Mercurial > hg > GlobalNeighbors
diff globalneighbors/read.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children | 1b94f3bf97e5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/globalneighbors/read.py Sat Jun 24 12:03:39 2017 -0700 @@ -0,0 +1,54 @@ +""" +I/O +""" + +import csv +import sys +from .schema import cast_row +from .schema import types + +# TSV notes for python `csv` library: +# https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 + +string = (str, basestring) + +def read_tsv_generator(f): + """read tab-separated values from file `f` into memory""" + + reader = csv.reader(f, + delimiter='\t', + quoting=csv.QUOTE_NONE) + for row in reader: + yield row + +def read_tsv(f): + if isinstance(f, string): + with open(f) as f_: + return read_tsv(f_) + + return [row for row in read_tsv_generator(f)] + + +def read_cities(f, fields=None, types=types): + """read and cast cities into a form we want""" + + for index, row in enumerate(read_tsv_generator(f)): + try: + cast = cast_row(row, types=types) + except AssertionError: + sys.stderr.write("Error, row {}".format(index)) + raise + if fields: + cast = {key: value for key, value in cast.items() + if key in fields} + yield cast + +def read_city_list(f, fields=None, types=types): + """read cities as a list""" + + if isinstance(f, string): + with open(f) as f_: + return read_city_list(f_, fields=fields, types=types) + + return [city for city in + read_cities(f, fields=fields, types=types)]