Mercurial > hg > GlobalNeighbors
comparison tests/test_data.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5dba84370182 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 test date integrity; ensure we know what | |
5 data we're dealing with | |
6 """ | |
7 | |
8 import os | |
9 import unittest | |
10 from globalneighbors.locations import locations | |
11 from globalneighbors.read import read_cities | |
12 from globalneighbors.schema import primary_key | |
13 | |
14 here = os.path.dirname(os.path.abspath(__file__)) | |
15 data = os.path.join(here, 'data') | |
16 full_tsv_lines = 149092 | |
17 | |
18 class DataIntegrityTest(unittest.TestCase): | |
19 | |
20 sample_tsv = os.path.join(data, 'sample.tsv') | |
21 assert os.path.exists(sample_tsv) | |
22 full_tsv = os.path.join(data, 'cities1000.txt') | |
23 assert os.path.exists(full_tsv) | |
24 | |
25 def test_primary_key(self): | |
26 """ensure we have a unique key to identify cities""" | |
27 | |
28 with open(self.full_tsv) as f: | |
29 cities = list(read_cities(f)) | |
30 n_cities = len(cities) | |
31 assert n_cities == full_tsv_lines | |
32 | |
33 # show we have duplicate names | |
34 nameset = set([city['name'] for city in cities]) | |
35 assert len(nameset) != n_cities | |
36 asciinameset = set([city['asciiname'] for city in cities]) | |
37 assert len(asciinameset) != n_cities | |
38 | |
39 # show we do have a unique key | |
40 geonameids = set([city['geonameid'] for city in cities]) | |
41 assert len(geonameids) == n_cities | |
42 | |
43 # is ('name', 'country code') unique? | |
44 nameccset = set([(city['asciiname'], city['country code']) | |
45 for city in cities]) | |
46 assert len(nameccset) != n_cities # Nope! | |
47 | |
48 def test_latlon(self): | |
49 """ | |
50 ensure all latitudes and longitudes are in the | |
51 range lat=(-90..90) and lon=(-180..180) | |
52 """ | |
53 | |
54 # read cities | |
55 with open(self.full_tsv) as f: | |
56 cities = list(read_cities(f)) | |
57 n_cities = len(cities) | |
58 assert n_cities == full_tsv_lines | |
59 | |
60 # make a location map | |
61 city_locations = locations(cities) | |
62 | |
63 # ensure our data is value | |
64 latrange = (-90., 90.) | |
65 lonrange = (-180., 180.) | |
66 for geoid, (lat, lon) in city_locations.iteritems(): | |
67 assert isinstance(geoid, int) | |
68 assert latrange[0] <= lat <= latrange[-1] | |
69 assert lonrange[0] <= lon <= lonrange[-1] | |
70 | |
71 | |
72 if __name__ == '__main__': | |
73 unittest.main() |