diff globalneighbors/read.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children 1b94f3bf97e5
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/globalneighbors/read.py	Sat Jun 24 12:03:39 2017 -0700
@@ -0,0 +1,54 @@
+"""
+I/O
+"""
+
+import csv
+import sys
+from .schema import cast_row
+from .schema import types
+
+# TSV notes for python `csv` library:
+# https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
+
+string = (str, basestring)
+
+def read_tsv_generator(f):
+    """read tab-separated values from file `f` into memory"""
+
+    reader = csv.reader(f,
+                        delimiter='\t',
+                        quoting=csv.QUOTE_NONE)
+    for row in reader:
+        yield row
+
+def read_tsv(f):
+    if isinstance(f, string):
+        with open(f) as f_:
+            return read_tsv(f_)
+
+    return [row for row in read_tsv_generator(f)]
+
+
+def read_cities(f, fields=None, types=types):
+    """read and cast cities into a form we want"""
+
+    for index, row in enumerate(read_tsv_generator(f)):
+        try:
+            cast = cast_row(row, types=types)
+        except AssertionError:
+            sys.stderr.write("Error, row {}".format(index))
+            raise
+        if fields:
+            cast = {key: value for key, value in cast.items()
+                    if key in fields}
+        yield cast
+
+def read_city_list(f, fields=None, types=types):
+    """read cities as a list"""
+
+    if isinstance(f, string):
+        with open(f) as f_:
+            return read_city_list(f_, fields=fields, types=types)
+
+    return [city for city in
+            read_cities(f, fields=fields, types=types)]