Mercurial > hg > GlobalNeighbors
view tests/test_distance.py @ 3:49aae0c0293b
improved test coverage
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 14:48:31 -0700 |
parents | 1b94f3bf97e5 |
children | 254195d0bac2 |
line wrap: on
line source
#!/usr/bin/env python """ test distance calculation """ import math import os import unittest from globalneighbors import distance from globalneighbors.constants import Rearth from globalneighbors.locations import locations from globalneighbors.read import read_cities from globalneighbors.read import read_city_list from globalneighbors.schema import primary_key here = os.path.dirname(os.path.abspath(__file__)) data = os.path.join(here, 'data') full_tsv_lines = 149092 class DistanceTests(unittest.TestCase): # created with # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv test_tsv = os.path.join(data, 'sample.tsv') test_tsv_lines = 10 # full dataset: test with caution full_tsv = os.path.join(data, 'cities1000.txt') full_tsv_lines = 149092 # here's a smaller one moderate_tsv = os.path.join(data, '10000cities.tsv') def test_haversine(self): # a simple canned case # equator to pole lat1 = 0. lat2 = 90. lon2 = 70. # undefined, technically expected_distance = 0.5*math.pi for lon1 in range(-135, 135, 15): radians = [distance.deg_to_rad(degrees) for degrees in (lat1, lon2, lat2, lon2)] error = (distance.haversine(*radians) == expected_distance) assert error < 1e-4 def test_distance(self): """test distance between two known cities""" # Source:https://en.wikipedia.org/wiki/List_of_cities_by_latitude # http://www.distancefromto.net/distance-from-new-york-to-chicago-us chicago = (40.71278, -74.00594) new_york = (41.85003, -87.65005) ref_distance = 1149. args = [distance.deg_to_rad(i) for i in list(chicago) + list(new_york)] calculated = distance.haversine(*args, r=Rearth) # Allow some error for circular projection approximation error = abs(calculated - ref_distance)/ref_distance assert error < 0.01 def test_distances(self): """"ensure disances monotonically decay""" # parse the data assert os.path.exists(self.test_tsv) cities = read_city_list(self.test_tsv) assert len(cities) == self.test_tsv_lines city_locations = locations(cities) assert len(city_locations) == self.test_tsv_lines # calculate all the neighbors # WARNING: n*2 algorithm Too computationally intensive # for full data set for key, value in distance.calculate_distances(city_locations, r=Rearth): # for now, just make sure we can iterate over them pass def test_neighbors(self): # parse the data tsv = os.path.join(data, 'sample.tsv') assert os.path.exists(tsv) cities = read_city_list(tsv) city_locations = locations(cities) # calculate the neighbors neighbors = distance.calculate_neighbors(city_locations, k=self.test_tsv_lines) assert len(neighbors) == self.test_tsv_lines # ensure distance increases for each thing for src, value in neighbors.items(): distances = [i[-1] for i in value] assert len(distances) == self.test_tsv_lines - 1 for i in range(1, len(distances)): assert distances[i] >= distances[i-1] def test_10000cities(self): """a moderate size test""" assert os.path.exists(self.moderate_tsv) with open(self.moderate_tsv) as f: cities = locations(read_cities(f)) # test over different values of # of neighbors for k in (10, 100, 1000): neighbors = distance.calculate_neighbors(cities, k=k) # ensure you have no more neighbors than you ask for assert max([len(value) for value in neighbors.values()]) <= k # assert distances increase for value in neighbors.values(): distances = [i[-1] for i in value] assert distances == sorted(distances) if __name__ == '__main__': unittest.main()