view globalneighbors/distance.py @ 7:254195d0bac2

partial implementation of autocomplete using jqueryui; easyautocomplete.com may be more what we want
author Jeff Hammel <k0scist@gmail.com>
date Sun, 25 Jun 2017 09:13:48 -0700
parents 316e1d54ffd4
children e3d6919130ca
line wrap: on
line source

"""
distance functionality
"""

import argparse
import json
import sys
import time
from math import asin, sin, cos, sqrt, pi, fabs
from .cli import CitiesParser
from .constants import Rearth
from .locations import locations
from .read import read_cities
from .schema import fields

DEGREESTORADIANS = pi/180.


def haversine(lat1, lon1, lat2, lon2, r=1):
    """
    see
    https://en.wikipedia.org/wiki/Haversine_formula
    Coordinates in radians
    """
    return 2*r*asin(
        sqrt(sin(0.5*(lat2-lat1))**2
             +cos(lat1)*cos(lat2)*(sin(0.5*(lon2-lon1))**2)
        ))


def deg_to_rad(degrees):
    return degrees*DEGREESTORADIANS


def insert_distance(distances, i, new_distance, k):
    """
    insert `(i, new_distance)` into `distances`
    keeping distances in order with maximum of size `k`
    """

    if len(distances) == k and new_distance >= distances[-1][-1]:
        return

     # TODO: Binary Search Tree
    for _index, (geoid, old_distance) in enumerate(distances):
        if new_distance < old_distance:
            distances.insert(_index, (i, new_distance))
            if len(distances) == k+1:
                distances.pop()
            break
    else:
        distances.append((i, new_distance))


def insert_distance_bisect(distances, i, new_distance, k):
    if not distances:
        distances.append((i, new_distance))
        return
    if new_distance >= distances[-1][-1]:
        if len(distances) < k:
            distances.append((i, new_distance))
        return
    indices = [0, len(distances)]
    while True:
        midpoint = int((indices[-1] - indices[0])/2)


def calculate_distances(locations, r=Rearth):
    """
    WARNING! This is an N-squared approach
    """

    # convert to rad
    rad_locations = [(location, tuple([deg_to_rad(i)
                                       for i in latlon]))
                      for location, latlon
                      in locations.items()]

    # use haversince function on N-body problem
    for index, loc1 in enumerate(rad_locations):
        id1, (lat1, lon1) = loc1
        for loc2 in rad_locations[index+1:]:
            id2, (lat2, lon2) = loc2
            key = (id1, id2) if id2 > id1 else (id2, id1)
            yield (key,
                   haversine(lat1, lon1, lat2, lon2, r=r))


def calculate_neighbors(locations,
                        k=10,
                        lat_tol=1.,
                        lon_tol=1.,
                        output=None,
                        neighbors=None):
    """
    calculate `k` nearest neighbors for each location

    locations -- dict of `geoid: (lat, lon)`
    """
    neighbors = neighbors or {}
    items = locations.items()  # copy
    index = 0
    n_items = len(items)
    start = int(time.time())
    while items:
        index += 1
        if output and not index % output:
            # output status counter
            now = int(time.time())
            duration =  now - start
            start = now
            print ('{},{},{},{}'.format(index,
                                        len(items),
                                        n_items,
                                        duration))
        id1, (lat1, lon1) = items.pop()
        for loc2 in items:
            id2, (lat2, lon2) = loc2

            # filter out locations based on latlon boxing
            if fabs(lat2 - lat1) > lat_tol:
                 continue
            if fabs(lon2 - lon1) > lon_tol:
                 continue

            # determine distance
            args = [deg_to_rad(i) for i in
                    (lat1, lon1, lat2, lon2)]
            new_distance = haversine(*args, r=Rearth)

            # insert in order
            for i in (id1, id2):
                distances = neighbors.setdefault(i, [])

                insert_distance(distances, i, new_distance, k)

    return neighbors


def main(args=sys.argv[1:]):
    """CLI"""

    # parse command line arguments
    description = """write nearest neighborfiles"""
    parser = CitiesParser(description=description)
    parser.add_argument('output', type=argparse.FileType('w'),
                        help="output file to dump JSON to")
    parser.add_argument('--counter', '--output-counter',
                        dest='output_counter',
                        type=int, default=100,
                        help="how often to output progress updates [DEFAULT: %(default)s]")
    parser.add_argument('-k', dest='k',
                        type=int, default=50,
                        help="number of neighbors to determine [DEFAULT: %(default)s]")
    options = parser.parse_args(args)

    # parse cities
    cities = list(read_cities(options.cities, fields=fields))

    # get locations
    city_locations = locations(cities)

    # calculate neighbors
    neighbors = calculate_neighbors(city_locations,
                                    k=options.k,
                                    output=options.output_counter)

    # output
    options.output.write(json.dumps(neighbors))

if __name__ == '__main__':
    main()