Mercurial > hg > GlobalNeighbors
view tests/test_data.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ test date integrity; ensure we know what data we're dealing with """ import os import unittest from globalneighbors.locations import locations from globalneighbors.read import read_cities from globalneighbors.schema import primary_key here = os.path.dirname(os.path.abspath(__file__)) data = os.path.join(here, 'data') full_tsv_lines = 149092 class DataIntegrityTest(unittest.TestCase): sample_tsv = os.path.join(data, 'sample.tsv') assert os.path.exists(sample_tsv) full_tsv = os.path.join(data, 'cities1000.txt') assert os.path.exists(full_tsv) def test_primary_key(self): """ensure we have a unique key to identify cities""" with open(self.full_tsv) as f: cities = list(read_cities(f)) n_cities = len(cities) assert n_cities == full_tsv_lines # show we have duplicate names nameset = set([city['name'] for city in cities]) assert len(nameset) != n_cities asciinameset = set([city['asciiname'] for city in cities]) assert len(asciinameset) != n_cities # show we do have a unique key geonameids = set([city['geonameid'] for city in cities]) assert len(geonameids) == n_cities # is ('name', 'country code') unique? nameccset = set([(city['asciiname'], city['country code']) for city in cities]) assert len(nameccset) != n_cities # Nope! def test_latlon(self): """ ensure all latitudes and longitudes are in the range lat=(-90..90) and lon=(-180..180) """ # read cities with open(self.full_tsv) as f: cities = list(read_cities(f)) n_cities = len(cities) assert n_cities == full_tsv_lines # make a location map city_locations = locations(cities) # ensure our data is value latrange = (-90., 90.) lonrange = (-180., 180.) for geoid, (lat, lon) in city_locations.iteritems(): assert isinstance(geoid, int) assert latrange[0] <= lat <= latrange[-1] assert lonrange[0] <= lon <= lonrange[-1] if __name__ == '__main__': unittest.main()