Mercurial > hg > GlobalNeighbors
diff globalneighbors/distance.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children | 1b94f3bf97e5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/globalneighbors/distance.py Sat Jun 24 12:03:39 2017 -0700 @@ -0,0 +1,152 @@ +""" +distance functionality +""" + +import argparse +import json +import sys +import time +from math import asin, sin, cos, sqrt, pi, fabs +from .cli import CitiesParser +from .constants import Rearth +from .locations import locations +from .read import read_cities +from .schema import fields + +DEGREESTORADIANS = pi/180. + + +def haversine(lat1, lon1, lat2, lon2, r=1): + """ + see + https://en.wikipedia.org/wiki/Haversine_formula + Coordinates in radians + """ + return 2*r*asin( + sqrt(sin(0.5*(lat2-lat1))**2 + +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2) + )) + + +def deg_to_rad(degrees): + return degrees*DEGREESTORADIANS + + +def calculate_distances(locations, r=Rearth): + """ + WARNING! This is an N-squared approach + """ + + # convert to rad + rad_locations = [(location, tuple([deg_to_rad(i) + for i in latlon])) + for location, latlon + in locations.items()] + + # use haversince function on N-body problem + for index, loc1 in enumerate(rad_locations): + id1, (lat1, lon1) = loc1 + for loc2 in rad_locations[index+1:]: + id2, (lat2, lon2) = loc2 + key = (id1, id2) if id2 > id1 else (id2, id1) + yield (key, + haversine(lat1, lon1, lat2, lon2, r=r)) + + +def calculate_neighbors(locations, + k=10, + lat_tol=1., + lon_tol=1., + output=1000, + neighbors=None): + """ + calculate `k` nearest neighbors for each location + """ + neighbors = neighbors or {} + items = locations.items() + index = 0 + n_items = len(items) + start = int(time.time()) + while items: + index += 1 + if not index % output: + # output status counter + now = int(time.time()) + duration = now - start + start = now + print ('{},{},{},{}'.format(index, + len(items), + n_items, + duration)) + id1, (lat1, lon1) = items.pop() + for loc2 in items: + id2, (lat2, lon2) = loc2 + + # filter out locations based on latlon boxing + if fabs(lat2 - lat1) > lat_tol: + continue + if fabs(lon2 - lon1) > lon_tol: + continue + + # determine distance + args = [deg_to_rad(i) for i in + (lat1, lon1, lat2, lon2)] + new_distance = haversine(*args, r=Rearth) + + # insert in order + for i in (id1, id2): + distances = neighbors.setdefault(i, []) + if len(distances) == k and new_distance > distances[-1][-1]: + break + + # TODO: Binary Search Tree + for _index, (geoid, old_distance) in enumerate(distances): + if new_distance < old_distance: + distances.insert(_index, (i, new_distance)) + if len(distances) == k+1: + distances.pop() + break + else: + distances.append((i, new_distance)) + + print ("DONE") + sys.stdout.flush() + import pdb; pdb.set_trace() + return neighbors + + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line arguments + description = """write nearest neighborfiles""" + parser = CitiesParser(description=description) + parser.add_argument('output', type=argparse.FileType('w'), + help="output file to dump JSON to") + parser.add_argument('--counter', '--output-counter', + dest='output_counter', + type=int, default=100, + help="how often to output progress updates [DEFAULT: %(default)s]") + parser.add_argument('-k', dest='k', + type=int, default=50, + help="number of neighbors to determine [DEFAULT: %(default)s]") + options = parser.parse_args(args) + + # parse cities + cities = list(read_cities(options.cities, fields=fields)) + + # get locations + city_locations = locations(cities) + + # calculate neighbors + neighbors = calculate_neighbors(city_locations, + k=options.k, + output=options.output_counter) + + # output + print ("AFTER") + sys.stddout.flush() + options.output.write(json.dumps(neighbors, indent=2)) + +if __name__ == '__main__': + main()