Mercurial > hg > GlobalNeighbors
annotate globalneighbors/read.py @ 4:8e130b7bfed9
remove unintended boilerplate
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 14:48:55 -0700 |
parents | 1b94f3bf97e5 |
children | 7e27e874655b |
rev | line source |
---|---|
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 I/O |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 import csv |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 import sys |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 from .schema import cast_row |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 from .schema import types |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 # TSV notes for python `csv` library: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 # https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 string = (str, basestring) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 |
1 | 15 |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 def read_tsv_generator(f): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 """read tab-separated values from file `f` into memory""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 reader = csv.reader(f, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 delimiter='\t', |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 quoting=csv.QUOTE_NONE) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 for row in reader: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 yield row |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 def read_tsv(f): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 if isinstance(f, string): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 with open(f) as f_: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 return read_tsv(f_) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 return [row for row in read_tsv_generator(f)] |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 def read_cities(f, fields=None, types=types): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 """read and cast cities into a form we want""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 for index, row in enumerate(read_tsv_generator(f)): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 try: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 cast = cast_row(row, types=types) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 except AssertionError: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 sys.stderr.write("Error, row {}".format(index)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 raise |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 if fields: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 cast = {key: value for key, value in cast.items() |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 if key in fields} |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 yield cast |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 |
1 | 47 |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 def read_city_list(f, fields=None, types=types): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 """read cities as a list""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 if isinstance(f, string): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 with open(f) as f_: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 return read_city_list(f_, fields=fields, types=types) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 return [city for city in |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 read_cities(f, fields=fields, types=types)] |