annotate tests/test_distance.py @ 22:e69cb496324e

we have a data dump
author Jeff Hammel <k0scist@gmail.com>
date Sun, 25 Jun 2017 17:45:19 -0700
parents 27925261c137
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 test distance calculation
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import math
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import os
7
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
9 import random
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 import unittest
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 from globalneighbors import distance
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 from globalneighbors.constants import Rearth
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 from globalneighbors.locations import locations
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
14 from globalneighbors.read import read_cities
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 from globalneighbors.read import read_city_list
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 from globalneighbors.schema import primary_key
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 here = os.path.dirname(os.path.abspath(__file__))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 data = os.path.join(here, 'data')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 full_tsv_lines = 149092
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
22
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 class DistanceTests(unittest.TestCase):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 # created with
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 test_tsv = os.path.join(data, 'sample.tsv')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 test_tsv_lines = 10
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 # full dataset: test with caution
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 full_tsv = os.path.join(data, 'cities1000.txt')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 full_tsv_lines = 149092
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
34 # here's a smaller one
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
35 moderate_tsv = os.path.join(data, '10000cities.tsv')
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
36
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 def test_haversine(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 # a simple canned case
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 # equator to pole
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 lat1 = 0.
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 lat2 = 90.
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 lon2 = 70. # undefined, technically
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 expected_distance = 0.5*math.pi
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 for lon1 in range(-135, 135, 15):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 radians = [distance.deg_to_rad(degrees)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 for degrees in (lat1, lon2, lat2, lon2)]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 error = (distance.haversine(*radians) == expected_distance)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 assert error < 1e-4
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 def test_distance(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53 """test distance between two known cities"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55 # Source:https://en.wikipedia.org/wiki/List_of_cities_by_latitude
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56 # http://www.distancefromto.net/distance-from-new-york-to-chicago-us
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 chicago = (40.71278,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 -74.00594)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 new_york = (41.85003,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 -87.65005)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62 ref_distance = 1149.
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 args = [distance.deg_to_rad(i) for i in
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 list(chicago) + list(new_york)]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 calculated = distance.haversine(*args, r=Rearth)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 # Allow some error for circular projection approximation
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69 error = abs(calculated - ref_distance)/ref_distance
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71 assert error < 0.01
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 def test_distances(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
74 """"ensure disances monotonically decay"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
75
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
76 # parse the data
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
77 assert os.path.exists(self.test_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 cities = read_city_list(self.test_tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79 assert len(cities) == self.test_tsv_lines
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 city_locations = locations(cities)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 assert len(city_locations) == self.test_tsv_lines
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83 # calculate all the neighbors
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84 # WARNING: n*2 algorithm Too computationally intensive
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85 # for full data set
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 for key, value in distance.calculate_distances(city_locations,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
87 r=Rearth):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
88 # for now, just make sure we can iterate over them
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
89 pass
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91 def test_neighbors(self):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93 # parse the data
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94 tsv = os.path.join(data, 'sample.tsv')
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95 assert os.path.exists(tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
96 cities = read_city_list(tsv)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
97 city_locations = locations(cities)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
98
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
99 # calculate the neighbors
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
100 neighbors = distance.calculate_neighbors(city_locations,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
101 k=self.test_tsv_lines)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
102 assert len(neighbors) == self.test_tsv_lines
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
103
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
104 # ensure distance increases for each thing
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
105 for src, value in neighbors.items():
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
106 distances = [i[-1] for i in value]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
107 assert len(distances) == self.test_tsv_lines - 1
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
108 for i in range(1, len(distances)):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
109 assert distances[i] >= distances[i-1]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
110
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
111 def test_10000cities(self):
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
112 """a moderate size test"""
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
113
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
114 assert os.path.exists(self.moderate_tsv)
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
115 with open(self.moderate_tsv) as f:
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
116 cities = locations(read_cities(f))
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
117
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
118 # test over different values of # of neighbors
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
119 for k in (10, 100, 1000):
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
120 neighbors = distance.calculate_neighbors(cities,
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
121 k=k)
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
122
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
123 # ensure you have no more neighbors than you ask for
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
124 assert max([len(value) for value in neighbors.values()]) <= k
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
125
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
126 # assert distances increase
14
27925261c137 fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents: 7
diff changeset
127 for key, value in neighbors.items():
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
128 distances = [i[-1] for i in value]
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
129 assert distances == sorted(distances)
14
27925261c137 fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents: 7
diff changeset
130 neighbors = [i[0] for i in value]
27925261c137 fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents: 7
diff changeset
131 assert key not in neighbors
27925261c137 fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents: 7
diff changeset
132 assert len(set(neighbors)) == len(neighbors)
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
133
7
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
134 def test_insert_distances(self):
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
135 """test insert distances algorithm"""
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
136
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
137 values = [(i, random.random())
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
138 for i in range(1500)]
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
139 for k in (10, 100, 1000):
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
140 _distances = []
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
141 for i, value in values:
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
142 distance.insert_distance(_distances,
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
143 i,
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
144 value,
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
145 k)
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
146 # since k is < 1500
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
147 assert len(_distances) == k
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
148 ordered = [value[-1] for value in _distances]
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
149 assert sorted(ordered) == ordered
254195d0bac2 partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
150
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
151
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
152 if __name__ == '__main__':
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
153 unittest.main()