comparison globalneighbors/distance.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children 1b94f3bf97e5
comparison
equal deleted inserted replaced
-1:000000000000 0:5dba84370182
1 """
2 distance functionality
3 """
4
5 import argparse
6 import json
7 import sys
8 import time
9 from math import asin, sin, cos, sqrt, pi, fabs
10 from .cli import CitiesParser
11 from .constants import Rearth
12 from .locations import locations
13 from .read import read_cities
14 from .schema import fields
15
16 DEGREESTORADIANS = pi/180.
17
18
19 def haversine(lat1, lon1, lat2, lon2, r=1):
20 """
21 see
22 https://en.wikipedia.org/wiki/Haversine_formula
23 Coordinates in radians
24 """
25 return 2*r*asin(
26 sqrt(sin(0.5*(lat2-lat1))**2
27 +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2)
28 ))
29
30
31 def deg_to_rad(degrees):
32 return degrees*DEGREESTORADIANS
33
34
35 def calculate_distances(locations, r=Rearth):
36 """
37 WARNING! This is an N-squared approach
38 """
39
40 # convert to rad
41 rad_locations = [(location, tuple([deg_to_rad(i)
42 for i in latlon]))
43 for location, latlon
44 in locations.items()]
45
46 # use haversince function on N-body problem
47 for index, loc1 in enumerate(rad_locations):
48 id1, (lat1, lon1) = loc1
49 for loc2 in rad_locations[index+1:]:
50 id2, (lat2, lon2) = loc2
51 key = (id1, id2) if id2 > id1 else (id2, id1)
52 yield (key,
53 haversine(lat1, lon1, lat2, lon2, r=r))
54
55
56 def calculate_neighbors(locations,
57 k=10,
58 lat_tol=1.,
59 lon_tol=1.,
60 output=1000,
61 neighbors=None):
62 """
63 calculate `k` nearest neighbors for each location
64 """
65 neighbors = neighbors or {}
66 items = locations.items()
67 index = 0
68 n_items = len(items)
69 start = int(time.time())
70 while items:
71 index += 1
72 if not index % output:
73 # output status counter
74 now = int(time.time())
75 duration = now - start
76 start = now
77 print ('{},{},{},{}'.format(index,
78 len(items),
79 n_items,
80 duration))
81 id1, (lat1, lon1) = items.pop()
82 for loc2 in items:
83 id2, (lat2, lon2) = loc2
84
85 # filter out locations based on latlon boxing
86 if fabs(lat2 - lat1) > lat_tol:
87 continue
88 if fabs(lon2 - lon1) > lon_tol:
89 continue
90
91 # determine distance
92 args = [deg_to_rad(i) for i in
93 (lat1, lon1, lat2, lon2)]
94 new_distance = haversine(*args, r=Rearth)
95
96 # insert in order
97 for i in (id1, id2):
98 distances = neighbors.setdefault(i, [])
99 if len(distances) == k and new_distance > distances[-1][-1]:
100 break
101
102 # TODO: Binary Search Tree
103 for _index, (geoid, old_distance) in enumerate(distances):
104 if new_distance < old_distance:
105 distances.insert(_index, (i, new_distance))
106 if len(distances) == k+1:
107 distances.pop()
108 break
109 else:
110 distances.append((i, new_distance))
111
112 print ("DONE")
113 sys.stdout.flush()
114 import pdb; pdb.set_trace()
115 return neighbors
116
117
118 def main(args=sys.argv[1:]):
119 """CLI"""
120
121 # parse command line arguments
122 description = """write nearest neighborfiles"""
123 parser = CitiesParser(description=description)
124 parser.add_argument('output', type=argparse.FileType('w'),
125 help="output file to dump JSON to")
126 parser.add_argument('--counter', '--output-counter',
127 dest='output_counter',
128 type=int, default=100,
129 help="how often to output progress updates [DEFAULT: %(default)s]")
130 parser.add_argument('-k', dest='k',
131 type=int, default=50,
132 help="number of neighbors to determine [DEFAULT: %(default)s]")
133 options = parser.parse_args(args)
134
135 # parse cities
136 cities = list(read_cities(options.cities, fields=fields))
137
138 # get locations
139 city_locations = locations(cities)
140
141 # calculate neighbors
142 neighbors = calculate_neighbors(city_locations,
143 k=options.k,
144 output=options.output_counter)
145
146 # output
147 print ("AFTER")
148 sys.stddout.flush()
149 options.output.write(json.dumps(neighbors, indent=2))
150
151 if __name__ == '__main__':
152 main()