Mercurial > hg > GlobalNeighbors
view tests/test_distance.py @ 22:e69cb496324e
we have a data dump
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 25 Jun 2017 17:45:19 -0700 |
parents | 27925261c137 |
children |
line wrap: on
line source
#!/usr/bin/env python """ test distance calculation """ import math import os import random import unittest from globalneighbors import distance from globalneighbors.constants import Rearth from globalneighbors.locations import locations from globalneighbors.read import read_cities from globalneighbors.read import read_city_list from globalneighbors.schema import primary_key here = os.path.dirname(os.path.abspath(__file__)) data = os.path.join(here, 'data') full_tsv_lines = 149092 class DistanceTests(unittest.TestCase): # created with # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv test_tsv = os.path.join(data, 'sample.tsv') test_tsv_lines = 10 # full dataset: test with caution full_tsv = os.path.join(data, 'cities1000.txt') full_tsv_lines = 149092 # here's a smaller one moderate_tsv = os.path.join(data, '10000cities.tsv') def test_haversine(self): # a simple canned case # equator to pole lat1 = 0. lat2 = 90. lon2 = 70. # undefined, technically expected_distance = 0.5*math.pi for lon1 in range(-135, 135, 15): radians = [distance.deg_to_rad(degrees) for degrees in (lat1, lon2, lat2, lon2)] error = (distance.haversine(*radians) == expected_distance) assert error < 1e-4 def test_distance(self): """test distance between two known cities""" # Source:https://en.wikipedia.org/wiki/List_of_cities_by_latitude # http://www.distancefromto.net/distance-from-new-york-to-chicago-us chicago = (40.71278, -74.00594) new_york = (41.85003, -87.65005) ref_distance = 1149. args = [distance.deg_to_rad(i) for i in list(chicago) + list(new_york)] calculated = distance.haversine(*args, r=Rearth) # Allow some error for circular projection approximation error = abs(calculated - ref_distance)/ref_distance assert error < 0.01 def test_distances(self): """"ensure disances monotonically decay""" # parse the data assert os.path.exists(self.test_tsv) cities = read_city_list(self.test_tsv) assert len(cities) == self.test_tsv_lines city_locations = locations(cities) assert len(city_locations) == self.test_tsv_lines # calculate all the neighbors # WARNING: n*2 algorithm Too computationally intensive # for full data set for key, value in distance.calculate_distances(city_locations, r=Rearth): # for now, just make sure we can iterate over them pass def test_neighbors(self): # parse the data tsv = os.path.join(data, 'sample.tsv') assert os.path.exists(tsv) cities = read_city_list(tsv) city_locations = locations(cities) # calculate the neighbors neighbors = distance.calculate_neighbors(city_locations, k=self.test_tsv_lines) assert len(neighbors) == self.test_tsv_lines # ensure distance increases for each thing for src, value in neighbors.items(): distances = [i[-1] for i in value] assert len(distances) == self.test_tsv_lines - 1 for i in range(1, len(distances)): assert distances[i] >= distances[i-1] def test_10000cities(self): """a moderate size test""" assert os.path.exists(self.moderate_tsv) with open(self.moderate_tsv) as f: cities = locations(read_cities(f)) # test over different values of # of neighbors for k in (10, 100, 1000): neighbors = distance.calculate_neighbors(cities, k=k) # ensure you have no more neighbors than you ask for assert max([len(value) for value in neighbors.values()]) <= k # assert distances increase for key, value in neighbors.items(): distances = [i[-1] for i in value] assert distances == sorted(distances) neighbors = [i[0] for i in value] assert key not in neighbors assert len(set(neighbors)) == len(neighbors) def test_insert_distances(self): """test insert distances algorithm""" values = [(i, random.random()) for i in range(1500)] for k in (10, 100, 1000): _distances = [] for i, value in values: distance.insert_distance(_distances, i, value, k) # since k is < 1500 assert len(_distances) == k ordered = [value[-1] for value in _distances] assert sorted(ordered) == ordered if __name__ == '__main__': unittest.main()