changeset 1:1b94f3bf97e5

* limit distance function * start gridding * improve unicode handling
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 14:02:14 -0700
parents 5dba84370182
children 50ee13cddf58
files globalneighbors/distance.py globalneighbors/grid.py globalneighbors/read.py globalneighbors/schema.py globalneighbors/web.py tests/common.py tests/test_distance.py tests/test_grid.py tests/test_read.py tests/test_web.py
diffstat 10 files changed, 109 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/globalneighbors/distance.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/globalneighbors/distance.py	Sat Jun 24 14:02:14 2017 -0700
@@ -96,8 +96,8 @@
             # insert in order
             for i in (id1, id2):
                 distances = neighbors.setdefault(i, [])
-                if len(distances) == k and new_distance > distances[-1][-1]:
-                    break
+                if len(distances) == k and new_distance >= distances[-1][-1]:
+                    continue
 
                 # TODO: Binary Search Tree
                 for _index, (geoid, old_distance) in enumerate(distances):
@@ -109,9 +109,6 @@
                 else:
                     distances.append((i, new_distance))
 
-    print ("DONE")
-    sys.stdout.flush()
-    import pdb; pdb.set_trace()
     return neighbors
 
 
@@ -144,8 +141,6 @@
                                     output=options.output_counter)
 
     # output
-    print ("AFTER")
-    sys.stddout.flush()
     options.output.write(json.dumps(neighbors, indent=2))
 
 if __name__ == '__main__':
--- a/globalneighbors/grid.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/globalneighbors/grid.py	Sat Jun 24 14:02:14 2017 -0700
@@ -13,18 +13,18 @@
         self.create_grid()
 
     def create_grid(self):
-        self.grid = [[] for _ in xrange(self.n[-1])
-                     for _ in xrange(self.n[0])]
+        self.grid = []
+        for _ in xrange(self.n[0]):
+            self.grid.append([set() for _ in xrange(self.n[-1])])
 
     def add(self, geoid, lat, lon):
         latlon = (lat, lon)
-        self[self.index(lat, lon)].append(geoid)
+        self[self.index(lat, lon)].add(geoid)
 
     def __getitem__(self, index):
         """
         index -- 2-tuple or list of i and j indices
         """
-        import pdb; pdb.set_trace()
         return self.grid[index[0]][index[1]]
 
     def index(self, lat, lon):
@@ -35,3 +35,5 @@
         """
         return neighbors of points i, j
         """
+        if i:
+            raise NotImplementedError('TODO')
--- a/globalneighbors/read.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/globalneighbors/read.py	Sat Jun 24 14:02:14 2017 -0700
@@ -12,6 +12,7 @@
 
 string = (str, basestring)
 
+
 def read_tsv_generator(f):
     """read tab-separated values from file `f` into memory"""
 
@@ -43,6 +44,7 @@
                     if key in fields}
         yield cast
 
+
 def read_city_list(f, fields=None, types=types):
     """read cities as a list"""
 
--- a/globalneighbors/schema.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/globalneighbors/schema.py	Sat Jun 24 14:02:14 2017 -0700
@@ -69,6 +69,8 @@
 name = 'asciiname'
 primary_key = 'geonameid'
 
+# fields that should be unicode
+unicode_fields = ('name', 'asciiname')
 
 def cast_row(row, types=types):
     """
@@ -90,4 +92,6 @@
                 retval[key] = None
             else:
                 raise
+        if key in unicode_fields:
+            retval[key] = retval[key].decode('utf-8')
     return retval
--- a/globalneighbors/web.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/globalneighbors/web.py	Sat Jun 24 14:02:14 2017 -0700
@@ -19,6 +19,23 @@
 from .schema import name
 
 
+def autocomplete(cities, startswith=None):
+    """autocomplete function for city names"""
+    ### TODO: sort once, ahead of time
+
+    if startswith:
+        retval = []
+        for i in cities:
+            try:
+                if i[name].startswith(startswith):
+                    retval.append(i[name])
+            except Exception as e:
+                import pdb; pdb.set_trace()
+        return sorted(retval)
+    else:
+        return sorted([i[name] for i in cities])
+
+
 class Handler(object):
     """base class for HTTP handler"""
 
@@ -45,12 +62,8 @@
 
     def cities(self, startswith=None):
         """return list of cities"""
-
-        if startswith:
-            return sorted([i[name] for i in self._cities
-                           if i[name].startswith(startswith)])
-        else:
-            return sorted([i[name] for i in self._cities])
+        return autocomplete(self._cities,
+                            startswith=startswith)
 
     def GET(self, request):
         return Response(content_type=self.content_type,
--- a/tests/common.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/tests/common.py	Sat Jun 24 14:02:14 2017 -0700
@@ -2,3 +2,11 @@
 common test functionality
 """
 
+import os
+
+here = os.path.dirname(os.path.abspath(__file__))
+data = os.path.join(here, 'data')
+
+def datafile(*path):
+    """return path to a test data file"""
+    return os.path.join(data, *path)
--- a/tests/test_distance.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/tests/test_distance.py	Sat Jun 24 14:02:14 2017 -0700
@@ -102,5 +102,6 @@
             for i in range(1, len(distances)):
                 assert distances[i] >= distances[i-1]
 
+
 if __name__ == '__main__':
     unittest.main()
--- a/tests/test_grid.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/tests/test_grid.py	Sat Jun 24 14:02:14 2017 -0700
@@ -4,18 +4,28 @@
 test that we can grid a solution
 """
 
+import os
 import unittest
+from common import datafile
 from globalneighbors.grid import LatLonGrid
+from globalneighbors.locations import locations
+from globalneighbors.read import read_city_list
+
 
 class TestGrid(unittest.TestCase):
     """test gridding functionality"""
 
+    ### test functions
+
     def test_dimensions(self):
 
         # make a 2 degree grid
         grid = LatLonGrid(90, 180)
         assert grid.n == (90, 180)
         assert grid.d == (2., 2.)
+        assert len(grid.grid) == 90
+        for row in grid.grid:
+            assert len(row) == 180
 
     def test_insertion(self):
 
@@ -25,7 +35,23 @@
         i, j = grid.index(*coord)
         assert i == 1
         assert j == 3
-        assert grid[(i,j)] == [1234]
+        assert grid[(i,j)] == set([1234])
+
+    def test_sample(self):
+
+        samplefile = datafile('sample.tsv')
+        assert os.path.exists(samplefile)
+        city_locations = locations(read_city_list(samplefile))
+        self.grid_locations(city_locations)
+
+    ### generic (utility) functions
+
+    def grid_locations(self, locations):
+        """grid locations + test created grid"""
+
+        grid = LatLonGrid(8, 8)
+        for geoid, (lat, lon) in locations.items():
+            grid.add(geoid, lat, lon)
 
 if __name__ == '__main__':
     unittest.main()
--- a/tests/test_read.py	Sat Jun 24 12:03:39 2017 -0700
+++ b/tests/test_read.py	Sat Jun 24 14:02:14 2017 -0700
@@ -8,6 +8,7 @@
 import unittest
 from globalneighbors import schema
 from globalneighbors.read import read_tsv
+from globalneighbors.read import read_city_list
 
 
 here = os.path.dirname(os.path.abspath(__file__))
@@ -47,6 +48,11 @@
         for row in cities:
             row = schema.cast_row(row, types=schema.types)
 
+    def test_read_unicode(self):
+        """ensure we can read the cities as unicode"""
+
+        cities = read_city_list(self.full_tsv)
+
 
 if __name__ == '__main__':
     unittest.main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_web.py	Sat Jun 24 14:02:14 2017 -0700
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+"""
+test web functionality
+"""
+
+import os
+import unittest
+from common import datafile
+from globalneighbors.read import read_city_list
+from globalneighbors.web import autocomplete
+
+
+class WebFunctionalityTest(unittest.TestCase):
+
+    def test_autcomplete(self):
+        """test autocomplete underlying functionality"""
+
+        # read base data
+        cityfile = datafile('cities1000.txt')
+        assert os.path.exists(cityfile)
+        cities = read_city_list(cityfile)
+
+        # Let's look for Chicago
+        q = u'Ch'
+        results = autocomplete(cities, q)
+        assert all([result.startswith(q)
+                    for result in results])
+        assert sorted(results) == results
+        assert 'Chicago' in results
+
+
+if __name__ == '__main__':
+    unittest.main()