Mercurial > hg > GlobalNeighbors
annotate tests/test_read.py @ 23:6891c5523b69
load with neighbors :)
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 25 Jun 2017 18:13:43 -0700 |
parents | 49aae0c0293b |
children |
rev | line source |
---|---|
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 test data reading + loading |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 import os |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 import unittest |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 from globalneighbors import schema |
3 | 10 from globalneighbors.locations import locations |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 from globalneighbors.read import read_tsv |
3 | 12 from globalneighbors.read import read_cities |
1 | 13 from globalneighbors.read import read_city_list |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 here = os.path.dirname(os.path.abspath(__file__)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 data = os.path.join(here, 'data') |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 class TestDataRead(unittest.TestCase): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 # created with |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 test_tsv = os.path.join(data, 'sample.tsv') |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 test_tsv_lines = 10 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 # full dataset: test with caution |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 full_tsv = os.path.join(data, 'cities1000.txt') |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 full_tsv_lines = 149092 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 def test_read_tsv(self): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 """test reading a tsv file chunk""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 assert os.path.isfile(self.test_tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 sample = read_tsv(self.test_tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 assert len(sample) == 10 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 for row in sample: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 assert len(row) == len(schema.descriptions) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 def test_full_dataset(self): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 """ensure we can operate on the full dataset""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 assert os.path.isfile(self.full_tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 cities = read_tsv(self.full_tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 assert len(cities) == self.full_tsv_lines |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 for row in cities: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 assert len(row) == len(schema.descriptions) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 # cast the data into types we want |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 for row in cities: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 row = schema.cast_row(row, types=schema.types) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 |
1 | 53 def test_read_unicode(self): |
54 """ensure we can read the cities as unicode""" | |
55 | |
56 cities = read_city_list(self.full_tsv) | |
3 | 57 for city in cities: |
58 for field in schema.unicode_fields: | |
59 assert isinstance(city[field], unicode) | |
60 | |
61 def test_iterative_locations(self): | |
62 """assert we can read into locations as a generator""" | |
63 | |
64 with open(self.test_tsv) as f: | |
65 cities = locations(read_cities(f)) | |
66 for geonameid, (lat, lon) in cities.items(): | |
67 assert -90. <= lat <= 90. | |
68 assert -180. <= lon <= 180. | |
69 assert type(geonameid) == schema.types['geonameid'] | |
1 | 70 |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 if __name__ == '__main__': |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 unittest.main() |