view tests/test_distance.py @ 3:49aae0c0293b

improved test coverage
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 14:48:31 -0700
parents 1b94f3bf97e5
children 254195d0bac2
line wrap: on
line source

#!/usr/bin/env python

"""
test distance calculation
"""

import math
import os
import unittest
from globalneighbors import distance
from globalneighbors.constants import Rearth
from globalneighbors.locations import locations
from globalneighbors.read import read_cities
from globalneighbors.read import read_city_list
from globalneighbors.schema import primary_key

here = os.path.dirname(os.path.abspath(__file__))
data = os.path.join(here, 'data')
full_tsv_lines = 149092


class DistanceTests(unittest.TestCase):

    # created with
    # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv
    test_tsv = os.path.join(data, 'sample.tsv')
    test_tsv_lines = 10

    # full dataset:  test with caution
    full_tsv = os.path.join(data, 'cities1000.txt')
    full_tsv_lines = 149092

    # here's a smaller one
    moderate_tsv = os.path.join(data, '10000cities.tsv')

    def test_haversine(self):

        # a simple canned case
        # equator to pole
        lat1 = 0.
        lat2 = 90.
        lon2 = 70.  # undefined, technically
        expected_distance = 0.5*math.pi

        for lon1 in range(-135, 135, 15):
            radians = [distance.deg_to_rad(degrees)
                       for degrees in (lat1, lon2, lat2, lon2)]
            error = (distance.haversine(*radians) == expected_distance)
            assert error < 1e-4

    def test_distance(self):
        """test distance between two known cities"""

        # Source:https://en.wikipedia.org/wiki/List_of_cities_by_latitude
        # http://www.distancefromto.net/distance-from-new-york-to-chicago-us
        chicago = (40.71278,
                   -74.00594)

        new_york = (41.85003,
                    -87.65005)
        ref_distance = 1149.

        args = [distance.deg_to_rad(i) for i in
                list(chicago) + list(new_york)]
        calculated = distance.haversine(*args, r=Rearth)

        # Allow some error for circular projection approximation
        error = abs(calculated - ref_distance)/ref_distance

        assert error < 0.01

    def test_distances(self):
        """"ensure disances monotonically decay"""

        # parse the data
        assert os.path.exists(self.test_tsv)
        cities = read_city_list(self.test_tsv)
        assert len(cities) == self.test_tsv_lines
        city_locations = locations(cities)
        assert len(city_locations) == self.test_tsv_lines

        # calculate all the neighbors
        # WARNING: n*2 algorithm Too computationally intensive
        # for full data set
        for key, value in distance.calculate_distances(city_locations,
                                                       r=Rearth):
            # for now, just make sure we can iterate over them
            pass

    def test_neighbors(self):

        # parse the data
        tsv = os.path.join(data, 'sample.tsv')
        assert os.path.exists(tsv)
        cities = read_city_list(tsv)
        city_locations = locations(cities)

        # calculate the neighbors
        neighbors = distance.calculate_neighbors(city_locations,
                                                 k=self.test_tsv_lines)
        assert len(neighbors) == self.test_tsv_lines

        # ensure distance increases for each thing
        for src, value in neighbors.items():
            distances = [i[-1] for i in value]
            assert len(distances) == self.test_tsv_lines - 1
            for i in range(1, len(distances)):
                assert distances[i] >= distances[i-1]

    def test_10000cities(self):
        """a moderate size test"""

        assert os.path.exists(self.moderate_tsv)
        with open(self.moderate_tsv) as f:
            cities = locations(read_cities(f))

        # test over different values of # of neighbors
        for k in (10, 100, 1000):
            neighbors = distance.calculate_neighbors(cities,
                                                     k=k)

            # ensure you have no more neighbors than you ask for
            assert max([len(value) for value in neighbors.values()]) <= k

            # assert distances increase
            for value in neighbors.values():
                distances = [i[-1] for i in value]
                assert distances == sorted(distances)


if __name__ == '__main__':
    unittest.main()