diff tests/test_data.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_data.py	Sat Jun 24 12:03:39 2017 -0700
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+"""
+test date integrity;  ensure we know what
+data we're dealing with
+"""
+
+import os
+import unittest
+from globalneighbors.locations import locations
+from globalneighbors.read import read_cities
+from globalneighbors.schema import primary_key
+
+here = os.path.dirname(os.path.abspath(__file__))
+data = os.path.join(here, 'data')
+full_tsv_lines = 149092
+
+class DataIntegrityTest(unittest.TestCase):
+
+    sample_tsv = os.path.join(data, 'sample.tsv')
+    assert os.path.exists(sample_tsv)
+    full_tsv = os.path.join(data, 'cities1000.txt')
+    assert os.path.exists(full_tsv)
+
+    def test_primary_key(self):
+        """ensure we have a unique key to identify cities"""
+
+        with open(self.full_tsv) as f:
+            cities = list(read_cities(f))
+        n_cities = len(cities)
+        assert n_cities == full_tsv_lines
+
+        # show we have duplicate names
+        nameset = set([city['name'] for city in cities])
+        assert len(nameset) != n_cities
+        asciinameset = set([city['asciiname'] for city in cities])
+        assert len(asciinameset) != n_cities
+
+        # show we do have a unique key
+        geonameids = set([city['geonameid'] for city in cities])
+        assert len(geonameids) == n_cities
+
+        # is ('name', 'country code') unique?
+        nameccset = set([(city['asciiname'], city['country code'])
+                         for city in cities])
+        assert len(nameccset) != n_cities  # Nope!
+
+    def test_latlon(self):
+        """
+        ensure all latitudes and longitudes are in the
+        range lat=(-90..90) and lon=(-180..180)
+        """
+
+        # read cities
+        with open(self.full_tsv) as f:
+            cities = list(read_cities(f))
+        n_cities = len(cities)
+        assert n_cities == full_tsv_lines
+
+        # make a location map
+        city_locations = locations(cities)
+
+        # ensure our data is value
+        latrange = (-90., 90.)
+        lonrange = (-180., 180.)
+        for geoid, (lat, lon) in city_locations.iteritems():
+            assert isinstance(geoid, int)
+            assert latrange[0] <= lat <= latrange[-1]
+            assert lonrange[0] <= lon <= lonrange[-1]
+
+
+if __name__ == '__main__':
+    unittest.main()