# HG changeset patch # User Jeff Hammel # Date 1498338134 25200 # Node ID 1b94f3bf97e57257a0d9f1f7f42bfd0d6910d69b # Parent 5dba843701828063267b7640d61b2e73b3e3aa80 * limit distance function * start gridding * improve unicode handling diff -r 5dba84370182 -r 1b94f3bf97e5 globalneighbors/distance.py --- a/globalneighbors/distance.py Sat Jun 24 12:03:39 2017 -0700 +++ b/globalneighbors/distance.py Sat Jun 24 14:02:14 2017 -0700 @@ -96,8 +96,8 @@ # insert in order for i in (id1, id2): distances = neighbors.setdefault(i, []) - if len(distances) == k and new_distance > distances[-1][-1]: - break + if len(distances) == k and new_distance >= distances[-1][-1]: + continue # TODO: Binary Search Tree for _index, (geoid, old_distance) in enumerate(distances): @@ -109,9 +109,6 @@ else: distances.append((i, new_distance)) - print ("DONE") - sys.stdout.flush() - import pdb; pdb.set_trace() return neighbors @@ -144,8 +141,6 @@ output=options.output_counter) # output - print ("AFTER") - sys.stddout.flush() options.output.write(json.dumps(neighbors, indent=2)) if __name__ == '__main__': diff -r 5dba84370182 -r 1b94f3bf97e5 globalneighbors/grid.py --- a/globalneighbors/grid.py Sat Jun 24 12:03:39 2017 -0700 +++ b/globalneighbors/grid.py Sat Jun 24 14:02:14 2017 -0700 @@ -13,18 +13,18 @@ self.create_grid() def create_grid(self): - self.grid = [[] for _ in xrange(self.n[-1]) - for _ in xrange(self.n[0])] + self.grid = [] + for _ in xrange(self.n[0]): + self.grid.append([set() for _ in xrange(self.n[-1])]) def add(self, geoid, lat, lon): latlon = (lat, lon) - self[self.index(lat, lon)].append(geoid) + self[self.index(lat, lon)].add(geoid) def __getitem__(self, index): """ index -- 2-tuple or list of i and j indices """ - import pdb; pdb.set_trace() return self.grid[index[0]][index[1]] def index(self, lat, lon): @@ -35,3 +35,5 @@ """ return neighbors of points i, j """ + if i: + raise NotImplementedError('TODO') diff -r 5dba84370182 -r 1b94f3bf97e5 globalneighbors/read.py --- a/globalneighbors/read.py Sat Jun 24 12:03:39 2017 -0700 +++ b/globalneighbors/read.py Sat Jun 24 14:02:14 2017 -0700 @@ -12,6 +12,7 @@ string = (str, basestring) + def read_tsv_generator(f): """read tab-separated values from file `f` into memory""" @@ -43,6 +44,7 @@ if key in fields} yield cast + def read_city_list(f, fields=None, types=types): """read cities as a list""" diff -r 5dba84370182 -r 1b94f3bf97e5 globalneighbors/schema.py --- a/globalneighbors/schema.py Sat Jun 24 12:03:39 2017 -0700 +++ b/globalneighbors/schema.py Sat Jun 24 14:02:14 2017 -0700 @@ -69,6 +69,8 @@ name = 'asciiname' primary_key = 'geonameid' +# fields that should be unicode +unicode_fields = ('name', 'asciiname') def cast_row(row, types=types): """ @@ -90,4 +92,6 @@ retval[key] = None else: raise + if key in unicode_fields: + retval[key] = retval[key].decode('utf-8') return retval diff -r 5dba84370182 -r 1b94f3bf97e5 globalneighbors/web.py --- a/globalneighbors/web.py Sat Jun 24 12:03:39 2017 -0700 +++ b/globalneighbors/web.py Sat Jun 24 14:02:14 2017 -0700 @@ -19,6 +19,23 @@ from .schema import name +def autocomplete(cities, startswith=None): + """autocomplete function for city names""" + ### TODO: sort once, ahead of time + + if startswith: + retval = [] + for i in cities: + try: + if i[name].startswith(startswith): + retval.append(i[name]) + except Exception as e: + import pdb; pdb.set_trace() + return sorted(retval) + else: + return sorted([i[name] for i in cities]) + + class Handler(object): """base class for HTTP handler""" @@ -45,12 +62,8 @@ def cities(self, startswith=None): """return list of cities""" - - if startswith: - return sorted([i[name] for i in self._cities - if i[name].startswith(startswith)]) - else: - return sorted([i[name] for i in self._cities]) + return autocomplete(self._cities, + startswith=startswith) def GET(self, request): return Response(content_type=self.content_type, diff -r 5dba84370182 -r 1b94f3bf97e5 tests/common.py --- a/tests/common.py Sat Jun 24 12:03:39 2017 -0700 +++ b/tests/common.py Sat Jun 24 14:02:14 2017 -0700 @@ -2,3 +2,11 @@ common test functionality """ +import os + +here = os.path.dirname(os.path.abspath(__file__)) +data = os.path.join(here, 'data') + +def datafile(*path): + """return path to a test data file""" + return os.path.join(data, *path) diff -r 5dba84370182 -r 1b94f3bf97e5 tests/test_distance.py --- a/tests/test_distance.py Sat Jun 24 12:03:39 2017 -0700 +++ b/tests/test_distance.py Sat Jun 24 14:02:14 2017 -0700 @@ -102,5 +102,6 @@ for i in range(1, len(distances)): assert distances[i] >= distances[i-1] + if __name__ == '__main__': unittest.main() diff -r 5dba84370182 -r 1b94f3bf97e5 tests/test_grid.py --- a/tests/test_grid.py Sat Jun 24 12:03:39 2017 -0700 +++ b/tests/test_grid.py Sat Jun 24 14:02:14 2017 -0700 @@ -4,18 +4,28 @@ test that we can grid a solution """ +import os import unittest +from common import datafile from globalneighbors.grid import LatLonGrid +from globalneighbors.locations import locations +from globalneighbors.read import read_city_list + class TestGrid(unittest.TestCase): """test gridding functionality""" + ### test functions + def test_dimensions(self): # make a 2 degree grid grid = LatLonGrid(90, 180) assert grid.n == (90, 180) assert grid.d == (2., 2.) + assert len(grid.grid) == 90 + for row in grid.grid: + assert len(row) == 180 def test_insertion(self): @@ -25,7 +35,23 @@ i, j = grid.index(*coord) assert i == 1 assert j == 3 - assert grid[(i,j)] == [1234] + assert grid[(i,j)] == set([1234]) + + def test_sample(self): + + samplefile = datafile('sample.tsv') + assert os.path.exists(samplefile) + city_locations = locations(read_city_list(samplefile)) + self.grid_locations(city_locations) + + ### generic (utility) functions + + def grid_locations(self, locations): + """grid locations + test created grid""" + + grid = LatLonGrid(8, 8) + for geoid, (lat, lon) in locations.items(): + grid.add(geoid, lat, lon) if __name__ == '__main__': unittest.main() diff -r 5dba84370182 -r 1b94f3bf97e5 tests/test_read.py --- a/tests/test_read.py Sat Jun 24 12:03:39 2017 -0700 +++ b/tests/test_read.py Sat Jun 24 14:02:14 2017 -0700 @@ -8,6 +8,7 @@ import unittest from globalneighbors import schema from globalneighbors.read import read_tsv +from globalneighbors.read import read_city_list here = os.path.dirname(os.path.abspath(__file__)) @@ -47,6 +48,11 @@ for row in cities: row = schema.cast_row(row, types=schema.types) + def test_read_unicode(self): + """ensure we can read the cities as unicode""" + + cities = read_city_list(self.full_tsv) + if __name__ == '__main__': unittest.main() diff -r 5dba84370182 -r 1b94f3bf97e5 tests/test_web.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_web.py Sat Jun 24 14:02:14 2017 -0700 @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +""" +test web functionality +""" + +import os +import unittest +from common import datafile +from globalneighbors.read import read_city_list +from globalneighbors.web import autocomplete + + +class WebFunctionalityTest(unittest.TestCase): + + def test_autcomplete(self): + """test autocomplete underlying functionality""" + + # read base data + cityfile = datafile('cities1000.txt') + assert os.path.exists(cityfile) + cities = read_city_list(cityfile) + + # Let's look for Chicago + q = u'Ch' + results = autocomplete(cities, q) + assert all([result.startswith(q) + for result in results]) + assert sorted(results) == results + assert 'Chicago' in results + + +if __name__ == '__main__': + unittest.main()