Mercurial > hg > GlobalNeighbors
diff tests/test_data.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_data.py Sat Jun 24 12:03:39 2017 -0700 @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +""" +test date integrity; ensure we know what +data we're dealing with +""" + +import os +import unittest +from globalneighbors.locations import locations +from globalneighbors.read import read_cities +from globalneighbors.schema import primary_key + +here = os.path.dirname(os.path.abspath(__file__)) +data = os.path.join(here, 'data') +full_tsv_lines = 149092 + +class DataIntegrityTest(unittest.TestCase): + + sample_tsv = os.path.join(data, 'sample.tsv') + assert os.path.exists(sample_tsv) + full_tsv = os.path.join(data, 'cities1000.txt') + assert os.path.exists(full_tsv) + + def test_primary_key(self): + """ensure we have a unique key to identify cities""" + + with open(self.full_tsv) as f: + cities = list(read_cities(f)) + n_cities = len(cities) + assert n_cities == full_tsv_lines + + # show we have duplicate names + nameset = set([city['name'] for city in cities]) + assert len(nameset) != n_cities + asciinameset = set([city['asciiname'] for city in cities]) + assert len(asciinameset) != n_cities + + # show we do have a unique key + geonameids = set([city['geonameid'] for city in cities]) + assert len(geonameids) == n_cities + + # is ('name', 'country code') unique? + nameccset = set([(city['asciiname'], city['country code']) + for city in cities]) + assert len(nameccset) != n_cities # Nope! + + def test_latlon(self): + """ + ensure all latitudes and longitudes are in the + range lat=(-90..90) and lon=(-180..180) + """ + + # read cities + with open(self.full_tsv) as f: + cities = list(read_cities(f)) + n_cities = len(cities) + assert n_cities == full_tsv_lines + + # make a location map + city_locations = locations(cities) + + # ensure our data is value + latrange = (-90., 90.) + lonrange = (-180., 180.) + for geoid, (lat, lon) in city_locations.iteritems(): + assert isinstance(geoid, int) + assert latrange[0] <= lat <= latrange[-1] + assert lonrange[0] <= lon <= lonrange[-1] + + +if __name__ == '__main__': + unittest.main()