diff globalneighbors/distance.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children 1b94f3bf97e5
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/globalneighbors/distance.py	Sat Jun 24 12:03:39 2017 -0700
@@ -0,0 +1,152 @@
+"""
+distance functionality
+"""
+
+import argparse
+import json
+import sys
+import time
+from math import asin, sin, cos, sqrt, pi, fabs
+from .cli import CitiesParser
+from .constants import Rearth
+from .locations import locations
+from .read import read_cities
+from .schema import fields
+
+DEGREESTORADIANS = pi/180.
+
+
+def haversine(lat1, lon1, lat2, lon2, r=1):
+    """
+    see
+    https://en.wikipedia.org/wiki/Haversine_formula
+    Coordinates in radians
+    """
+    return 2*r*asin(
+        sqrt(sin(0.5*(lat2-lat1))**2
+             +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2)
+        ))
+
+
+def deg_to_rad(degrees):
+    return degrees*DEGREESTORADIANS
+
+
+def calculate_distances(locations, r=Rearth):
+    """
+    WARNING! This is an N-squared approach
+    """
+
+    # convert to rad
+    rad_locations = [(location, tuple([deg_to_rad(i)
+                                       for i in latlon]))
+                      for location, latlon
+                      in locations.items()]
+
+    # use haversince function on N-body problem
+    for index, loc1 in enumerate(rad_locations):
+        id1, (lat1, lon1) = loc1
+        for loc2 in rad_locations[index+1:]:
+            id2, (lat2, lon2) = loc2
+            key = (id1, id2) if id2 > id1 else (id2, id1)
+            yield (key,
+                   haversine(lat1, lon1, lat2, lon2, r=r))
+
+
+def calculate_neighbors(locations,
+                        k=10,
+                        lat_tol=1.,
+                        lon_tol=1.,
+                        output=1000,
+                        neighbors=None):
+    """
+    calculate `k` nearest neighbors for each location
+    """
+    neighbors = neighbors or {}
+    items = locations.items()
+    index = 0
+    n_items = len(items)
+    start = int(time.time())
+    while items:
+        index += 1
+        if not index % output:
+            # output status counter
+            now = int(time.time())
+            duration =  now - start
+            start = now
+            print ('{},{},{},{}'.format(index,
+                                        len(items),
+                                        n_items,
+                                        duration))
+        id1, (lat1, lon1) = items.pop()
+        for loc2 in items:
+            id2, (lat2, lon2) = loc2
+
+            # filter out locations based on latlon boxing
+            if fabs(lat2 - lat1) > lat_tol:
+                 continue
+            if fabs(lon2 - lon1) > lon_tol:
+                 continue
+
+            # determine distance
+            args = [deg_to_rad(i) for i in
+                    (lat1, lon1, lat2, lon2)]
+            new_distance = haversine(*args, r=Rearth)
+
+            # insert in order
+            for i in (id1, id2):
+                distances = neighbors.setdefault(i, [])
+                if len(distances) == k and new_distance > distances[-1][-1]:
+                    break
+
+                # TODO: Binary Search Tree
+                for _index, (geoid, old_distance) in enumerate(distances):
+                    if new_distance < old_distance:
+                        distances.insert(_index, (i, new_distance))
+                        if len(distances) == k+1:
+                            distances.pop()
+                        break
+                else:
+                    distances.append((i, new_distance))
+
+    print ("DONE")
+    sys.stdout.flush()
+    import pdb; pdb.set_trace()
+    return neighbors
+
+
+def main(args=sys.argv[1:]):
+    """CLI"""
+
+    # parse command line arguments
+    description = """write nearest neighborfiles"""
+    parser = CitiesParser(description=description)
+    parser.add_argument('output', type=argparse.FileType('w'),
+                        help="output file to dump JSON to")
+    parser.add_argument('--counter', '--output-counter',
+                        dest='output_counter',
+                        type=int, default=100,
+                        help="how often to output progress updates [DEFAULT: %(default)s]")
+    parser.add_argument('-k', dest='k',
+                        type=int, default=50,
+                        help="number of neighbors to determine [DEFAULT: %(default)s]")
+    options = parser.parse_args(args)
+
+    # parse cities
+    cities = list(read_cities(options.cities, fields=fields))
+
+    # get locations
+    city_locations = locations(cities)
+
+    # calculate neighbors
+    neighbors = calculate_neighbors(city_locations,
+                                    k=options.k,
+                                    output=options.output_counter)
+
+    # output
+    print ("AFTER")
+    sys.stddout.flush()
+    options.output.write(json.dumps(neighbors, indent=2))
+
+if __name__ == '__main__':
+    main()