comparison tests/test_distance.py @ 3:49aae0c0293b

improved test coverage
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 14:48:31 -0700
parents 1b94f3bf97e5
children 254195d0bac2
comparison
equal deleted inserted replaced
2:50ee13cddf58 3:49aae0c0293b
8 import os 8 import os
9 import unittest 9 import unittest
10 from globalneighbors import distance 10 from globalneighbors import distance
11 from globalneighbors.constants import Rearth 11 from globalneighbors.constants import Rearth
12 from globalneighbors.locations import locations 12 from globalneighbors.locations import locations
13 from globalneighbors.read import read_cities
13 from globalneighbors.read import read_city_list 14 from globalneighbors.read import read_city_list
14 from globalneighbors.schema import primary_key 15 from globalneighbors.schema import primary_key
15 16
16 here = os.path.dirname(os.path.abspath(__file__)) 17 here = os.path.dirname(os.path.abspath(__file__))
17 data = os.path.join(here, 'data') 18 data = os.path.join(here, 'data')
18 full_tsv_lines = 149092 19 full_tsv_lines = 149092
20
19 21
20 class DistanceTests(unittest.TestCase): 22 class DistanceTests(unittest.TestCase):
21 23
22 # created with 24 # created with
23 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv 25 # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv
25 test_tsv_lines = 10 27 test_tsv_lines = 10
26 28
27 # full dataset: test with caution 29 # full dataset: test with caution
28 full_tsv = os.path.join(data, 'cities1000.txt') 30 full_tsv = os.path.join(data, 'cities1000.txt')
29 full_tsv_lines = 149092 31 full_tsv_lines = 149092
32
33 # here's a smaller one
34 moderate_tsv = os.path.join(data, '10000cities.tsv')
30 35
31 def test_haversine(self): 36 def test_haversine(self):
32 37
33 # a simple canned case 38 # a simple canned case
34 # equator to pole 39 # equator to pole
100 distances = [i[-1] for i in value] 105 distances = [i[-1] for i in value]
101 assert len(distances) == self.test_tsv_lines - 1 106 assert len(distances) == self.test_tsv_lines - 1
102 for i in range(1, len(distances)): 107 for i in range(1, len(distances)):
103 assert distances[i] >= distances[i-1] 108 assert distances[i] >= distances[i-1]
104 109
110 def test_10000cities(self):
111 """a moderate size test"""
112
113 assert os.path.exists(self.moderate_tsv)
114 with open(self.moderate_tsv) as f:
115 cities = locations(read_cities(f))
116
117 # test over different values of # of neighbors
118 for k in (10, 100, 1000):
119 neighbors = distance.calculate_neighbors(cities,
120 k=k)
121
122 # ensure you have no more neighbors than you ask for
123 assert max([len(value) for value in neighbors.values()]) <= k
124
125 # assert distances increase
126 for value in neighbors.values():
127 distances = [i[-1] for i in value]
128 assert distances == sorted(distances)
129
105 130
106 if __name__ == '__main__': 131 if __name__ == '__main__':
107 unittest.main() 132 unittest.main()