annotate tests/test_read.py @ 23:6891c5523b69

load with neighbors :)
author Jeff Hammel <k0scist@gmail.com>
date Sun, 25 Jun 2017 18:13:43 -0700
parents 49aae0c0293b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 test data reading + loading
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import os
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import unittest
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 from globalneighbors import schema
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
10 from globalneighbors.locations import locations
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 from globalneighbors.read import read_tsv
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
12 from globalneighbors.read import read_cities
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
13 from globalneighbors.read import read_city_list
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 here = os.path.dirname(os.path.abspath(__file__))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 data = os.path.join(here, 'data')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 class TestDataRead(unittest.TestCase):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 # created with
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 test_tsv = os.path.join(data, 'sample.tsv')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 test_tsv_lines = 10
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 # full dataset: test with caution
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 full_tsv = os.path.join(data, 'cities1000.txt')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 full_tsv_lines = 149092
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 def test_read_tsv(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 """test reading a tsv file chunk"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 assert os.path.isfile(self.test_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 sample = read_tsv(self.test_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 assert len(sample) == 10
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 for row in sample:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 assert len(row) == len(schema.descriptions)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 def test_full_dataset(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 """ensure we can operate on the full dataset"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 assert os.path.isfile(self.full_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 cities = read_tsv(self.full_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 assert len(cities) == self.full_tsv_lines
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 for row in cities:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 assert len(row) == len(schema.descriptions)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 # cast the data into types we want
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 for row in cities:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 row = schema.cast_row(row, types=schema.types)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
53 def test_read_unicode(self):
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
54 """ensure we can read the cities as unicode"""
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
55
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
56 cities = read_city_list(self.full_tsv)
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
57 for city in cities:
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
58 for field in schema.unicode_fields:
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
59 assert isinstance(city[field], unicode)
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
60
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
61 def test_iterative_locations(self):
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
62 """assert we can read into locations as a generator"""
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
63
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
64 with open(self.test_tsv) as f:
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
65 cities = locations(read_cities(f))
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
66 for geonameid, (lat, lon) in cities.items():
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
67 assert -90. <= lat <= 90.
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
68 assert -180. <= lon <= 180.
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
69 assert type(geonameid) == schema.types['geonameid']
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
70
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72 if __name__ == '__main__':
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 unittest.main()