Mercurial > hg > GlobalNeighbors
annotate globalneighbors/distance.py @ 3:49aae0c0293b
improved test coverage
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 14:48:31 -0700 |
parents | 1b94f3bf97e5 |
children | 7e27e874655b |
rev | line source |
---|---|
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 distance functionality |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 import argparse |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 import json |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 import sys |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 import time |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 from math import asin, sin, cos, sqrt, pi, fabs |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 from .cli import CitiesParser |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 from .constants import Rearth |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 from .locations import locations |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 from .read import read_cities |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 from .schema import fields |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 DEGREESTORADIANS = pi/180. |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 def haversine(lat1, lon1, lat2, lon2, r=1): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 see |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 https://en.wikipedia.org/wiki/Haversine_formula |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 Coordinates in radians |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 return 2*r*asin( |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 sqrt(sin(0.5*(lat2-lat1))**2 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 )) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 def deg_to_rad(degrees): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 return degrees*DEGREESTORADIANS |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 def calculate_distances(locations, r=Rearth): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 WARNING! This is an N-squared approach |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 # convert to rad |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 rad_locations = [(location, tuple([deg_to_rad(i) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 for i in latlon])) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 for location, latlon |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 in locations.items()] |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 # use haversince function on N-body problem |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 for index, loc1 in enumerate(rad_locations): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 id1, (lat1, lon1) = loc1 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 for loc2 in rad_locations[index+1:]: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 id2, (lat2, lon2) = loc2 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 key = (id1, id2) if id2 > id1 else (id2, id1) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 yield (key, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 haversine(lat1, lon1, lat2, lon2, r=r)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 def calculate_neighbors(locations, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
57 k=10, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 lat_tol=1., |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 lon_tol=1., |
3 | 60 output=None, |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
61 neighbors=None): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
62 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
63 calculate `k` nearest neighbors for each location |
3 | 64 |
65 locations -- dict of `geoid: (lat, lon)` | |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
66 """ |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
67 neighbors = neighbors or {} |
3 | 68 items = locations.items() # copy |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 index = 0 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 n_items = len(items) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 start = int(time.time()) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 while items: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 index += 1 |
3 | 74 if output and not index % output: |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
75 # output status counter |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
76 now = int(time.time()) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
77 duration = now - start |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
78 start = now |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
79 print ('{},{},{},{}'.format(index, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
80 len(items), |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
81 n_items, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
82 duration)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
83 id1, (lat1, lon1) = items.pop() |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
84 for loc2 in items: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
85 id2, (lat2, lon2) = loc2 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
86 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
87 # filter out locations based on latlon boxing |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
88 if fabs(lat2 - lat1) > lat_tol: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
89 continue |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
90 if fabs(lon2 - lon1) > lon_tol: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
91 continue |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
92 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
93 # determine distance |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
94 args = [deg_to_rad(i) for i in |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
95 (lat1, lon1, lat2, lon2)] |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
96 new_distance = haversine(*args, r=Rearth) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
97 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
98 # insert in order |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
99 for i in (id1, id2): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
100 distances = neighbors.setdefault(i, []) |
1 | 101 if len(distances) == k and new_distance >= distances[-1][-1]: |
102 continue | |
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
103 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
104 # TODO: Binary Search Tree |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
105 for _index, (geoid, old_distance) in enumerate(distances): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
106 if new_distance < old_distance: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
107 distances.insert(_index, (i, new_distance)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
108 if len(distances) == k+1: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
109 distances.pop() |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
110 break |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
111 else: |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
112 distances.append((i, new_distance)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
113 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
114 return neighbors |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
115 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
116 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
117 def main(args=sys.argv[1:]): |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
118 """CLI""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
119 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
120 # parse command line arguments |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
121 description = """write nearest neighborfiles""" |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
122 parser = CitiesParser(description=description) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
123 parser.add_argument('output', type=argparse.FileType('w'), |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
124 help="output file to dump JSON to") |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
125 parser.add_argument('--counter', '--output-counter', |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
126 dest='output_counter', |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
127 type=int, default=100, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
128 help="how often to output progress updates [DEFAULT: %(default)s]") |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
129 parser.add_argument('-k', dest='k', |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
130 type=int, default=50, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
131 help="number of neighbors to determine [DEFAULT: %(default)s]") |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
132 options = parser.parse_args(args) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
133 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
134 # parse cities |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
135 cities = list(read_cities(options.cities, fields=fields)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
136 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
137 # get locations |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
138 city_locations = locations(cities) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
139 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
140 # calculate neighbors |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
141 neighbors = calculate_neighbors(city_locations, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
142 k=options.k, |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
143 output=options.output_counter) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
144 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
145 # output |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
146 options.output.write(json.dumps(neighbors, indent=2)) |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
147 |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
148 if __name__ == '__main__': |
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
149 main() |