annotate tests/test_read.py @ 1:1b94f3bf97e5

* limit distance function * start gridding * improve unicode handling
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 14:02:14 -0700
parents 5dba84370182
children 49aae0c0293b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 test data reading + loading
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import os
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import unittest
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 from globalneighbors import schema
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 from globalneighbors.read import read_tsv
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
11 from globalneighbors.read import read_city_list
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 here = os.path.dirname(os.path.abspath(__file__))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 data = os.path.join(here, 'data')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 class TestDataRead(unittest.TestCase):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 # created with
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 test_tsv = os.path.join(data, 'sample.tsv')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 test_tsv_lines = 10
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 # full dataset: test with caution
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 full_tsv = os.path.join(data, 'cities1000.txt')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 full_tsv_lines = 149092
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 def test_read_tsv(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 """test reading a tsv file chunk"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 assert os.path.isfile(self.test_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 sample = read_tsv(self.test_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 assert len(sample) == 10
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 for row in sample:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 assert len(row) == len(schema.descriptions)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 def test_full_dataset(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 """ensure we can operate on the full dataset"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 assert os.path.isfile(self.full_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 cities = read_tsv(self.full_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 assert len(cities) == self.full_tsv_lines
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 for row in cities:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 assert len(row) == len(schema.descriptions)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 # cast the data into types we want
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 for row in cities:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 row = schema.cast_row(row, types=schema.types)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
51 def test_read_unicode(self):
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
52 """ensure we can read the cities as unicode"""
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
53
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
54 cities = read_city_list(self.full_tsv)
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
55
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 if __name__ == '__main__':
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 unittest.main()