annotate globalneighbors/distance.py @ 5:7e27e874655b

test a larger grid + move distance insertion to its own function
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 15:16:10 -0700
parents 49aae0c0293b
children 316e1d54ffd4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 distance functionality
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 import argparse
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 import json
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import sys
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import time
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 from math import asin, sin, cos, sqrt, pi, fabs
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 from .cli import CitiesParser
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 from .constants import Rearth
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 from .locations import locations
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 from .read import read_cities
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 from .schema import fields
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 DEGREESTORADIANS = pi/180.
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 def haversine(lat1, lon1, lat2, lon2, r=1):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 see
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 https://en.wikipedia.org/wiki/Haversine_formula
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 Coordinates in radians
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 return 2*r*asin(
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 sqrt(sin(0.5*(lat2-lat1))**2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 ))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 def deg_to_rad(degrees):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 return degrees*DEGREESTORADIANS
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34
5
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
35 def insert_distance(distances, i, new_distance, k):
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
36 """
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
37 insert `(i, new_distance)` into `distances`
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
38 keeping distances in order with maximum of size `k`
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
39 """
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
40
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
41 # TODO: Binary Search Tree
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
42 for _index, (geoid, old_distance) in enumerate(distances):
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
43 if new_distance < old_distance:
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
44 distances.insert(_index, (i, new_distance))
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
45 if len(distances) == k+1:
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
46 distances.pop()
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
47 break
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
48 else:
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
49 distances.append((i, new_distance))
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
50
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
51
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 def calculate_distances(locations, r=Rearth):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54 WARNING! This is an N-squared approach
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 # convert to rad
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 rad_locations = [(location, tuple([deg_to_rad(i)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59 for i in latlon]))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 for location, latlon
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 in locations.items()]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63 # use haversince function on N-body problem
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 for index, loc1 in enumerate(rad_locations):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 id1, (lat1, lon1) = loc1
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 for loc2 in rad_locations[index+1:]:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67 id2, (lat2, lon2) = loc2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 key = (id1, id2) if id2 > id1 else (id2, id1)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69 yield (key,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70 haversine(lat1, lon1, lat2, lon2, r=r))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 def calculate_neighbors(locations,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
74 k=10,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
75 lat_tol=1.,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
76 lon_tol=1.,
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
77 output=None,
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 neighbors=None):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 calculate `k` nearest neighbors for each location
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
81
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
82 locations -- dict of `geoid: (lat, lon)`
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84 neighbors = neighbors or {}
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
85 items = locations.items() # copy
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 index = 0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
87 n_items = len(items)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
88 start = int(time.time())
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
89 while items:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90 index += 1
3
49aae0c0293b improved test coverage
Jeff Hammel <k0scist@gmail.com>
parents: 1
diff changeset
91 if output and not index % output:
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92 # output status counter
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93 now = int(time.time())
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94 duration = now - start
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95 start = now
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
96 print ('{},{},{},{}'.format(index,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
97 len(items),
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
98 n_items,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
99 duration))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
100 id1, (lat1, lon1) = items.pop()
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
101 for loc2 in items:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
102 id2, (lat2, lon2) = loc2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
103
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
104 # filter out locations based on latlon boxing
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
105 if fabs(lat2 - lat1) > lat_tol:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
106 continue
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
107 if fabs(lon2 - lon1) > lon_tol:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
108 continue
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
109
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
110 # determine distance
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
111 args = [deg_to_rad(i) for i in
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
112 (lat1, lon1, lat2, lon2)]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
113 new_distance = haversine(*args, r=Rearth)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
114
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
115 # insert in order
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
116 for i in (id1, id2):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
117 distances = neighbors.setdefault(i, [])
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
118 if len(distances) == k and new_distance >= distances[-1][-1]:
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
119 continue
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
120
5
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
121 insert_distance(distances, i, new_distance, k)
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
122
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
123 return neighbors
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
124
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
125
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
126 def main(args=sys.argv[1:]):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
127 """CLI"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
128
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
129 # parse command line arguments
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
130 description = """write nearest neighborfiles"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
131 parser = CitiesParser(description=description)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
132 parser.add_argument('output', type=argparse.FileType('w'),
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
133 help="output file to dump JSON to")
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
134 parser.add_argument('--counter', '--output-counter',
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
135 dest='output_counter',
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
136 type=int, default=100,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
137 help="how often to output progress updates [DEFAULT: %(default)s]")
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
138 parser.add_argument('-k', dest='k',
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
139 type=int, default=50,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
140 help="number of neighbors to determine [DEFAULT: %(default)s]")
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
141 options = parser.parse_args(args)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
142
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
143 # parse cities
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
144 cities = list(read_cities(options.cities, fields=fields))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
145
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
146 # get locations
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
147 city_locations = locations(cities)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
148
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
149 # calculate neighbors
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
150 neighbors = calculate_neighbors(city_locations,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
151 k=options.k,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
152 output=options.output_counter)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
153
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
154 # output
5
7e27e874655b test a larger grid + move distance insertion to its own function
Jeff Hammel <k0scist@gmail.com>
parents: 3
diff changeset
155 options.output.write(json.dumps(neighbors))
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
156
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
157 if __name__ == '__main__':
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
158 main()