Mercurial > hg > GlobalNeighbors
annotate globalneighbors/distance.py @ 2:50ee13cddf58
remove this test; the datafile was too large
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Sat, 24 Jun 2017 14:04:00 -0700 |
| parents | 1b94f3bf97e5 |
| children | 49aae0c0293b |
| rev | line source |
|---|---|
|
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 distance functionality |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 import argparse |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 import json |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 import sys |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 import time |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 from math import asin, sin, cos, sqrt, pi, fabs |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 from .cli import CitiesParser |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 from .constants import Rearth |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 from .locations import locations |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 from .read import read_cities |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 from .schema import fields |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 DEGREESTORADIANS = pi/180. |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 def haversine(lat1, lon1, lat2, lon2, r=1): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 see |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 https://en.wikipedia.org/wiki/Haversine_formula |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 Coordinates in radians |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 return 2*r*asin( |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 sqrt(sin(0.5*(lat2-lat1))**2 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 )) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 def deg_to_rad(degrees): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 return degrees*DEGREESTORADIANS |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 def calculate_distances(locations, r=Rearth): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 WARNING! This is an N-squared approach |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 # convert to rad |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 rad_locations = [(location, tuple([deg_to_rad(i) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 for i in latlon])) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 for location, latlon |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 in locations.items()] |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 # use haversince function on N-body problem |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 for index, loc1 in enumerate(rad_locations): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 id1, (lat1, lon1) = loc1 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 for loc2 in rad_locations[index+1:]: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 id2, (lat2, lon2) = loc2 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 key = (id1, id2) if id2 > id1 else (id2, id1) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 yield (key, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 haversine(lat1, lon1, lat2, lon2, r=r)) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 def calculate_neighbors(locations, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
57 k=10, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 lat_tol=1., |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 lon_tol=1., |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
60 output=1000, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
61 neighbors=None): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
62 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
63 calculate `k` nearest neighbors for each location |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
64 """ |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
65 neighbors = neighbors or {} |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
66 items = locations.items() |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
67 index = 0 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
68 n_items = len(items) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 start = int(time.time()) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 while items: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 index += 1 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 if not index % output: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 # output status counter |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
74 now = int(time.time()) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
75 duration = now - start |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
76 start = now |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
77 print ('{},{},{},{}'.format(index, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
78 len(items), |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
79 n_items, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
80 duration)) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
81 id1, (lat1, lon1) = items.pop() |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
82 for loc2 in items: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
83 id2, (lat2, lon2) = loc2 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
84 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
85 # filter out locations based on latlon boxing |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
86 if fabs(lat2 - lat1) > lat_tol: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
87 continue |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
88 if fabs(lon2 - lon1) > lon_tol: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
89 continue |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
90 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
91 # determine distance |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
92 args = [deg_to_rad(i) for i in |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
93 (lat1, lon1, lat2, lon2)] |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
94 new_distance = haversine(*args, r=Rearth) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
95 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
96 # insert in order |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
97 for i in (id1, id2): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
98 distances = neighbors.setdefault(i, []) |
| 1 | 99 if len(distances) == k and new_distance >= distances[-1][-1]: |
| 100 continue | |
|
0
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
101 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
102 # TODO: Binary Search Tree |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
103 for _index, (geoid, old_distance) in enumerate(distances): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
104 if new_distance < old_distance: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
105 distances.insert(_index, (i, new_distance)) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
106 if len(distances) == k+1: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
107 distances.pop() |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
108 break |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
109 else: |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
110 distances.append((i, new_distance)) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
111 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
112 return neighbors |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
113 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
114 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
115 def main(args=sys.argv[1:]): |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
116 """CLI""" |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
117 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
118 # parse command line arguments |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
119 description = """write nearest neighborfiles""" |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
120 parser = CitiesParser(description=description) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
121 parser.add_argument('output', type=argparse.FileType('w'), |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
122 help="output file to dump JSON to") |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
123 parser.add_argument('--counter', '--output-counter', |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
124 dest='output_counter', |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
125 type=int, default=100, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
126 help="how often to output progress updates [DEFAULT: %(default)s]") |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
127 parser.add_argument('-k', dest='k', |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
128 type=int, default=50, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
129 help="number of neighbors to determine [DEFAULT: %(default)s]") |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
130 options = parser.parse_args(args) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
131 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
132 # parse cities |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
133 cities = list(read_cities(options.cities, fields=fields)) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
134 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
135 # get locations |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
136 city_locations = locations(cities) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
137 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
138 # calculate neighbors |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
139 neighbors = calculate_neighbors(city_locations, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
140 k=options.k, |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
141 output=options.output_counter) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
142 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
143 # output |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
144 options.output.write(json.dumps(neighbors, indent=2)) |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
145 |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
146 if __name__ == '__main__': |
|
5dba84370182
initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
147 main() |
