Mercurial > hg > GlobalNeighbors
annotate tests/test_distance.py @ 25:991bce6b6881 default tip
[knn] placeholder for planning session
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 17 Sep 2017 14:35:50 -0700 |
parents | 27925261c137 |
children |
rev | line source |
---|---|
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 test distance calculation |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 import math |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 import os |
7
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
9 import random |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 import unittest |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 from globalneighbors import distance |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 from globalneighbors.constants import Rearth |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 from globalneighbors.locations import locations |
3 | 14 from globalneighbors.read import read_cities |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 from globalneighbors.read import read_city_list |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 from globalneighbors.schema import primary_key |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 here = os.path.dirname(os.path.abspath(__file__)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 data = os.path.join(here, 'data') |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 full_tsv_lines = 149092 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 |
3 | 22 |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 class DistanceTests(unittest.TestCase): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 # created with |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 test_tsv = os.path.join(data, 'sample.tsv') |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 test_tsv_lines = 10 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 # full dataset: test with caution |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 full_tsv = os.path.join(data, 'cities1000.txt') |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 full_tsv_lines = 149092 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 |
3 | 34 # here's a smaller one |
35 moderate_tsv = os.path.join(data, '10000cities.tsv') | |
36 | |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 def test_haversine(self): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 # a simple canned case |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 # equator to pole |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 lat1 = 0. |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 lat2 = 90. |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 lon2 = 70. # undefined, technically |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 expected_distance = 0.5*math.pi |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 for lon1 in range(-135, 135, 15): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 radians = [distance.deg_to_rad(degrees) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 for degrees in (lat1, lon2, lat2, lon2)] |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 error = (distance.haversine(*radians) == expected_distance) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 assert error < 1e-4 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 def test_distance(self): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 """test distance between two known cities""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 # Source:https://en.wikipedia.org/wiki/List_of_cities_by_latitude |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 # http://www.distancefromto.net/distance-from-new-york-to-chicago-us |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
57 chicago = (40.71278, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 -74.00594) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
60 new_york = (41.85003, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
61 -87.65005) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
62 ref_distance = 1149. |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
63 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
64 args = [distance.deg_to_rad(i) for i in |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
65 list(chicago) + list(new_york)] |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
66 calculated = distance.haversine(*args, r=Rearth) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
67 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
68 # Allow some error for circular projection approximation |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 error = abs(calculated - ref_distance)/ref_distance |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 assert error < 0.01 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 def test_distances(self): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
74 """"ensure disances monotonically decay""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
75 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
76 # parse the data |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
77 assert os.path.exists(self.test_tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
78 cities = read_city_list(self.test_tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
79 assert len(cities) == self.test_tsv_lines |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
80 city_locations = locations(cities) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
81 assert len(city_locations) == self.test_tsv_lines |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
82 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
83 # calculate all the neighbors |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
84 # WARNING: n*2 algorithm Too computationally intensive |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
85 # for full data set |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
86 for key, value in distance.calculate_distances(city_locations, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
87 r=Rearth): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
88 # for now, just make sure we can iterate over them |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
89 pass |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
90 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
91 def test_neighbors(self): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
92 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
93 # parse the data |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
94 tsv = os.path.join(data, 'sample.tsv') |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
95 assert os.path.exists(tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
96 cities = read_city_list(tsv) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
97 city_locations = locations(cities) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
98 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
99 # calculate the neighbors |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
100 neighbors = distance.calculate_neighbors(city_locations, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
101 k=self.test_tsv_lines) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
102 assert len(neighbors) == self.test_tsv_lines |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
103 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
104 # ensure distance increases for each thing |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
105 for src, value in neighbors.items(): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
106 distances = [i[-1] for i in value] |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
107 assert len(distances) == self.test_tsv_lines - 1 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
108 for i in range(1, len(distances)): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
109 assert distances[i] >= distances[i-1] |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
110 |
3 | 111 def test_10000cities(self): |
112 """a moderate size test""" | |
113 | |
114 assert os.path.exists(self.moderate_tsv) | |
115 with open(self.moderate_tsv) as f: | |
116 cities = locations(read_cities(f)) | |
117 | |
118 # test over different values of # of neighbors | |
119 for k in (10, 100, 1000): | |
120 neighbors = distance.calculate_neighbors(cities, | |
121 k=k) | |
122 | |
123 # ensure you have no more neighbors than you ask for | |
124 assert max([len(value) for value in neighbors.values()]) <= k | |
125 | |
126 # assert distances increase | |
14
27925261c137
fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents:
7
diff
changeset
|
127 for key, value in neighbors.items(): |
3 | 128 distances = [i[-1] for i in value] |
129 assert distances == sorted(distances) | |
14
27925261c137
fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents:
7
diff
changeset
|
130 neighbors = [i[0] for i in value] |
27925261c137
fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents:
7
diff
changeset
|
131 assert key not in neighbors |
27925261c137
fix broken tests including an aggregious case where we add ourselves as a neighbor to ourself
Jeff Hammel <k0scist@gmail.com>
parents:
7
diff
changeset
|
132 assert len(set(neighbors)) == len(neighbors) |
3 | 133 |
7
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
134 def test_insert_distances(self): |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
135 """test insert distances algorithm""" |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
136 |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
137 values = [(i, random.random()) |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
138 for i in range(1500)] |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
139 for k in (10, 100, 1000): |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
140 _distances = [] |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
141 for i, value in values: |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
142 distance.insert_distance(_distances, |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
143 i, |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
144 value, |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
145 k) |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
146 # since k is < 1500 |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
147 assert len(_distances) == k |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
148 ordered = [value[-1] for value in _distances] |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
149 assert sorted(ordered) == ordered |
254195d0bac2
partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
Jeff Hammel <k0scist@gmail.com>
parents:
3
diff
changeset
|
150 |
1 | 151 |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
152 if __name__ == '__main__': |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
153 unittest.main() |