comparison tests/test_data.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5dba84370182
1 #!/usr/bin/env python
2
3 """
4 test date integrity; ensure we know what
5 data we're dealing with
6 """
7
8 import os
9 import unittest
10 from globalneighbors.locations import locations
11 from globalneighbors.read import read_cities
12 from globalneighbors.schema import primary_key
13
14 here = os.path.dirname(os.path.abspath(__file__))
15 data = os.path.join(here, 'data')
16 full_tsv_lines = 149092
17
18 class DataIntegrityTest(unittest.TestCase):
19
20 sample_tsv = os.path.join(data, 'sample.tsv')
21 assert os.path.exists(sample_tsv)
22 full_tsv = os.path.join(data, 'cities1000.txt')
23 assert os.path.exists(full_tsv)
24
25 def test_primary_key(self):
26 """ensure we have a unique key to identify cities"""
27
28 with open(self.full_tsv) as f:
29 cities = list(read_cities(f))
30 n_cities = len(cities)
31 assert n_cities == full_tsv_lines
32
33 # show we have duplicate names
34 nameset = set([city['name'] for city in cities])
35 assert len(nameset) != n_cities
36 asciinameset = set([city['asciiname'] for city in cities])
37 assert len(asciinameset) != n_cities
38
39 # show we do have a unique key
40 geonameids = set([city['geonameid'] for city in cities])
41 assert len(geonameids) == n_cities
42
43 # is ('name', 'country code') unique?
44 nameccset = set([(city['asciiname'], city['country code'])
45 for city in cities])
46 assert len(nameccset) != n_cities # Nope!
47
48 def test_latlon(self):
49 """
50 ensure all latitudes and longitudes are in the
51 range lat=(-90..90) and lon=(-180..180)
52 """
53
54 # read cities
55 with open(self.full_tsv) as f:
56 cities = list(read_cities(f))
57 n_cities = len(cities)
58 assert n_cities == full_tsv_lines
59
60 # make a location map
61 city_locations = locations(cities)
62
63 # ensure our data is value
64 latrange = (-90., 90.)
65 lonrange = (-180., 180.)
66 for geoid, (lat, lon) in city_locations.iteritems():
67 assert isinstance(geoid, int)
68 assert latrange[0] <= lat <= latrange[-1]
69 assert lonrange[0] <= lon <= lonrange[-1]
70
71
72 if __name__ == '__main__':
73 unittest.main()