annotate globalneighbors/distance.py @ 1:1b94f3bf97e5

* limit distance function * start gridding * improve unicode handling
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 14:02:14 -0700
parents 5dba84370182
children 49aae0c0293b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 distance functionality
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 import argparse
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 import json
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import sys
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import time
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 from math import asin, sin, cos, sqrt, pi, fabs
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 from .cli import CitiesParser
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 from .constants import Rearth
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 from .locations import locations
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 from .read import read_cities
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 from .schema import fields
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 DEGREESTORADIANS = pi/180.
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 def haversine(lat1, lon1, lat2, lon2, r=1):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 see
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 https://en.wikipedia.org/wiki/Haversine_formula
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 Coordinates in radians
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 return 2*r*asin(
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 sqrt(sin(0.5*(lat2-lat1))**2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 ))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 def deg_to_rad(degrees):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 return degrees*DEGREESTORADIANS
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 def calculate_distances(locations, r=Rearth):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 WARNING! This is an N-squared approach
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 # convert to rad
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 rad_locations = [(location, tuple([deg_to_rad(i)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 for i in latlon]))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 for location, latlon
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 in locations.items()]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 # use haversince function on N-body problem
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 for index, loc1 in enumerate(rad_locations):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 id1, (lat1, lon1) = loc1
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 for loc2 in rad_locations[index+1:]:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 id2, (lat2, lon2) = loc2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 key = (id1, id2) if id2 > id1 else (id2, id1)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 yield (key,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53 haversine(lat1, lon1, lat2, lon2, r=r))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56 def calculate_neighbors(locations,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 k=10,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 lat_tol=1.,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59 lon_tol=1.,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 output=1000,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 neighbors=None):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63 calculate `k` nearest neighbors for each location
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 """
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 neighbors = neighbors or {}
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 items = locations.items()
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67 index = 0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 n_items = len(items)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69 start = int(time.time())
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70 while items:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71 index += 1
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72 if not index % output:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 # output status counter
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
74 now = int(time.time())
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
75 duration = now - start
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
76 start = now
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
77 print ('{},{},{},{}'.format(index,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 len(items),
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79 n_items,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 duration))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 id1, (lat1, lon1) = items.pop()
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82 for loc2 in items:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83 id2, (lat2, lon2) = loc2
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85 # filter out locations based on latlon boxing
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 if fabs(lat2 - lat1) > lat_tol:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
87 continue
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
88 if fabs(lon2 - lon1) > lon_tol:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
89 continue
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91 # determine distance
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92 args = [deg_to_rad(i) for i in
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93 (lat1, lon1, lat2, lon2)]
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94 new_distance = haversine(*args, r=Rearth)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
96 # insert in order
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
97 for i in (id1, id2):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
98 distances = neighbors.setdefault(i, [])
1
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
99 if len(distances) == k and new_distance >= distances[-1][-1]:
1b94f3bf97e5 * limit distance function
Jeff Hammel <k0scist@gmail.com>
parents: 0
diff changeset
100 continue
0
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
101
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
102 # TODO: Binary Search Tree
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
103 for _index, (geoid, old_distance) in enumerate(distances):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
104 if new_distance < old_distance:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
105 distances.insert(_index, (i, new_distance))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
106 if len(distances) == k+1:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
107 distances.pop()
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
108 break
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
109 else:
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
110 distances.append((i, new_distance))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
111
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
112 return neighbors
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
113
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
114
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
115 def main(args=sys.argv[1:]):
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
116 """CLI"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
117
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
118 # parse command line arguments
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
119 description = """write nearest neighborfiles"""
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
120 parser = CitiesParser(description=description)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
121 parser.add_argument('output', type=argparse.FileType('w'),
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
122 help="output file to dump JSON to")
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
123 parser.add_argument('--counter', '--output-counter',
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
124 dest='output_counter',
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
125 type=int, default=100,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
126 help="how often to output progress updates [DEFAULT: %(default)s]")
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
127 parser.add_argument('-k', dest='k',
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
128 type=int, default=50,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
129 help="number of neighbors to determine [DEFAULT: %(default)s]")
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
130 options = parser.parse_args(args)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
131
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
132 # parse cities
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
133 cities = list(read_cities(options.cities, fields=fields))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
134
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
135 # get locations
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
136 city_locations = locations(cities)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
137
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
138 # calculate neighbors
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
139 neighbors = calculate_neighbors(city_locations,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
140 k=options.k,
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
141 output=options.output_counter)
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
142
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
143 # output
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
144 options.output.write(json.dumps(neighbors, indent=2))
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
145
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
146 if __name__ == '__main__':
5dba84370182 initial commit; half-working prototype
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
147 main()