Mercurial > hg > GlobalNeighbors
comparison globalneighbors/read.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children | 1b94f3bf97e5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5dba84370182 |
---|---|
1 """ | |
2 I/O | |
3 """ | |
4 | |
5 import csv | |
6 import sys | |
7 from .schema import cast_row | |
8 from .schema import types | |
9 | |
10 # TSV notes for python `csv` library: | |
11 # https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 | |
12 | |
13 string = (str, basestring) | |
14 | |
15 def read_tsv_generator(f): | |
16 """read tab-separated values from file `f` into memory""" | |
17 | |
18 reader = csv.reader(f, | |
19 delimiter='\t', | |
20 quoting=csv.QUOTE_NONE) | |
21 for row in reader: | |
22 yield row | |
23 | |
24 def read_tsv(f): | |
25 if isinstance(f, string): | |
26 with open(f) as f_: | |
27 return read_tsv(f_) | |
28 | |
29 return [row for row in read_tsv_generator(f)] | |
30 | |
31 | |
32 def read_cities(f, fields=None, types=types): | |
33 """read and cast cities into a form we want""" | |
34 | |
35 for index, row in enumerate(read_tsv_generator(f)): | |
36 try: | |
37 cast = cast_row(row, types=types) | |
38 except AssertionError: | |
39 sys.stderr.write("Error, row {}".format(index)) | |
40 raise | |
41 if fields: | |
42 cast = {key: value for key, value in cast.items() | |
43 if key in fields} | |
44 yield cast | |
45 | |
46 def read_city_list(f, fields=None, types=types): | |
47 """read cities as a list""" | |
48 | |
49 if isinstance(f, string): | |
50 with open(f) as f_: | |
51 return read_city_list(f_, fields=fields, types=types) | |
52 | |
53 return [city for city in | |
54 read_cities(f, fields=fields, types=types)] |