Mercurial > hg > GlobalNeighbors
comparison globalneighbors/distance.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children | 1b94f3bf97e5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5dba84370182 |
---|---|
1 """ | |
2 distance functionality | |
3 """ | |
4 | |
5 import argparse | |
6 import json | |
7 import sys | |
8 import time | |
9 from math import asin, sin, cos, sqrt, pi, fabs | |
10 from .cli import CitiesParser | |
11 from .constants import Rearth | |
12 from .locations import locations | |
13 from .read import read_cities | |
14 from .schema import fields | |
15 | |
16 DEGREESTORADIANS = pi/180. | |
17 | |
18 | |
19 def haversine(lat1, lon1, lat2, lon2, r=1): | |
20 """ | |
21 see | |
22 https://en.wikipedia.org/wiki/Haversine_formula | |
23 Coordinates in radians | |
24 """ | |
25 return 2*r*asin( | |
26 sqrt(sin(0.5*(lat2-lat1))**2 | |
27 +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2) | |
28 )) | |
29 | |
30 | |
31 def deg_to_rad(degrees): | |
32 return degrees*DEGREESTORADIANS | |
33 | |
34 | |
35 def calculate_distances(locations, r=Rearth): | |
36 """ | |
37 WARNING! This is an N-squared approach | |
38 """ | |
39 | |
40 # convert to rad | |
41 rad_locations = [(location, tuple([deg_to_rad(i) | |
42 for i in latlon])) | |
43 for location, latlon | |
44 in locations.items()] | |
45 | |
46 # use haversince function on N-body problem | |
47 for index, loc1 in enumerate(rad_locations): | |
48 id1, (lat1, lon1) = loc1 | |
49 for loc2 in rad_locations[index+1:]: | |
50 id2, (lat2, lon2) = loc2 | |
51 key = (id1, id2) if id2 > id1 else (id2, id1) | |
52 yield (key, | |
53 haversine(lat1, lon1, lat2, lon2, r=r)) | |
54 | |
55 | |
56 def calculate_neighbors(locations, | |
57 k=10, | |
58 lat_tol=1., | |
59 lon_tol=1., | |
60 output=1000, | |
61 neighbors=None): | |
62 """ | |
63 calculate `k` nearest neighbors for each location | |
64 """ | |
65 neighbors = neighbors or {} | |
66 items = locations.items() | |
67 index = 0 | |
68 n_items = len(items) | |
69 start = int(time.time()) | |
70 while items: | |
71 index += 1 | |
72 if not index % output: | |
73 # output status counter | |
74 now = int(time.time()) | |
75 duration = now - start | |
76 start = now | |
77 print ('{},{},{},{}'.format(index, | |
78 len(items), | |
79 n_items, | |
80 duration)) | |
81 id1, (lat1, lon1) = items.pop() | |
82 for loc2 in items: | |
83 id2, (lat2, lon2) = loc2 | |
84 | |
85 # filter out locations based on latlon boxing | |
86 if fabs(lat2 - lat1) > lat_tol: | |
87 continue | |
88 if fabs(lon2 - lon1) > lon_tol: | |
89 continue | |
90 | |
91 # determine distance | |
92 args = [deg_to_rad(i) for i in | |
93 (lat1, lon1, lat2, lon2)] | |
94 new_distance = haversine(*args, r=Rearth) | |
95 | |
96 # insert in order | |
97 for i in (id1, id2): | |
98 distances = neighbors.setdefault(i, []) | |
99 if len(distances) == k and new_distance > distances[-1][-1]: | |
100 break | |
101 | |
102 # TODO: Binary Search Tree | |
103 for _index, (geoid, old_distance) in enumerate(distances): | |
104 if new_distance < old_distance: | |
105 distances.insert(_index, (i, new_distance)) | |
106 if len(distances) == k+1: | |
107 distances.pop() | |
108 break | |
109 else: | |
110 distances.append((i, new_distance)) | |
111 | |
112 print ("DONE") | |
113 sys.stdout.flush() | |
114 import pdb; pdb.set_trace() | |
115 return neighbors | |
116 | |
117 | |
118 def main(args=sys.argv[1:]): | |
119 """CLI""" | |
120 | |
121 # parse command line arguments | |
122 description = """write nearest neighborfiles""" | |
123 parser = CitiesParser(description=description) | |
124 parser.add_argument('output', type=argparse.FileType('w'), | |
125 help="output file to dump JSON to") | |
126 parser.add_argument('--counter', '--output-counter', | |
127 dest='output_counter', | |
128 type=int, default=100, | |
129 help="how often to output progress updates [DEFAULT: %(default)s]") | |
130 parser.add_argument('-k', dest='k', | |
131 type=int, default=50, | |
132 help="number of neighbors to determine [DEFAULT: %(default)s]") | |
133 options = parser.parse_args(args) | |
134 | |
135 # parse cities | |
136 cities = list(read_cities(options.cities, fields=fields)) | |
137 | |
138 # get locations | |
139 city_locations = locations(cities) | |
140 | |
141 # calculate neighbors | |
142 neighbors = calculate_neighbors(city_locations, | |
143 k=options.k, | |
144 output=options.output_counter) | |
145 | |
146 # output | |
147 print ("AFTER") | |
148 sys.stddout.flush() | |
149 options.output.write(json.dumps(neighbors, indent=2)) | |
150 | |
151 if __name__ == '__main__': | |
152 main() |