comparison globalneighbors/read.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children 1b94f3bf97e5
comparison
equal deleted inserted replaced
-1:000000000000 0:5dba84370182
1 """
2 I/O
3 """
4
5 import csv
6 import sys
7 from .schema import cast_row
8 from .schema import types
9
10 # TSV notes for python `csv` library:
11 # https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
12
13 string = (str, basestring)
14
15 def read_tsv_generator(f):
16 """read tab-separated values from file `f` into memory"""
17
18 reader = csv.reader(f,
19 delimiter='\t',
20 quoting=csv.QUOTE_NONE)
21 for row in reader:
22 yield row
23
24 def read_tsv(f):
25 if isinstance(f, string):
26 with open(f) as f_:
27 return read_tsv(f_)
28
29 return [row for row in read_tsv_generator(f)]
30
31
32 def read_cities(f, fields=None, types=types):
33 """read and cast cities into a form we want"""
34
35 for index, row in enumerate(read_tsv_generator(f)):
36 try:
37 cast = cast_row(row, types=types)
38 except AssertionError:
39 sys.stderr.write("Error, row {}".format(index))
40 raise
41 if fields:
42 cast = {key: value for key, value in cast.items()
43 if key in fields}
44 yield cast
45
46 def read_city_list(f, fields=None, types=types):
47 """read cities as a list"""
48
49 if isinstance(f, string):
50 with open(f) as f_:
51 return read_city_list(f_, fields=fields, types=types)
52
53 return [city for city in
54 read_cities(f, fields=fields, types=types)]