changeset 22:e69cb496324e

we have a data dump
author Jeff Hammel <k0scist@gmail.com>
date Sun, 25 Jun 2017 17:45:19 -0700
parents 22c384fe954d
children 6891c5523b69
files globalneighbors/distance.py globalneighbors/neighbors.py globalneighbors/web.py tests/test_write.py
diffstat 4 files changed, 106 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/globalneighbors/distance.py	Sun Jun 25 16:28:56 2017 -0700
+++ b/globalneighbors/distance.py	Sun Jun 25 17:45:19 2017 -0700
@@ -158,6 +158,11 @@
     return neighbors
 
 
+def write_neighbors(fp, neighbors):
+    for key, value in neighbors.iteritems():
+        fp.write("{key} {value}\n".format(key=key,
+                                        value=json.dumps(value)))
+
 def main(args=sys.argv[1:]):
     """CLI"""
 
@@ -179,11 +184,11 @@
                         help="number of neighbors to determine [DEFAULT: %(default)s]")
     options = parser.parse_args(args)
 
-    # parse cities
-    cities = list(read_cities(options.cities, fields=fields))
+    # get locations
+    city_locations = locations(read_cities(options.cities, fields=fields))
+    options.cities.close()
+    options.output.close()
 
-    # get locations
-    city_locations = locations(cities)
 
     # calculate neighbors
     neighbors = calculate_neighbors(city_locations,
@@ -195,7 +200,9 @@
     # output
     print ("Outputting neighbors")
     sys.stdout.flush()
-    options.output.write(json.dumps(neighbors))
+    import pdb; pdb.set_trace()
+    with open(options.output.name, 'w') as f:
+        f.write(json.dumps(neighbors))
 
 if __name__ == '__main__':
     main()
--- a/globalneighbors/neighbors.py	Sun Jun 25 16:28:56 2017 -0700
+++ b/globalneighbors/neighbors.py	Sun Jun 25 17:45:19 2017 -0700
@@ -2,10 +2,15 @@
 read neighbors file;
 this should be in the form of:
 
-`{geoid: [(geoid_closest_neighbor, distance),
-          (geoid_2nd_closest_neighbor, distance),
-          ...]
- }`
+`geoid [(geoid_closest_neighbor, distance), (geoid_2nd_closest_neighbor, distance), ...]`
+
+*PER LINE*  this format was chosen because it is easier to
+iteratively read and write vs JSON.
+
+While CSV could be made to fit this model, because
+there are both distances and geo IDs as pairs, it is not
+the most natural fit.  So we'll settle for our own data model.
+No, it's not the best, but so be it (for now).
 """
 
 import json
@@ -21,6 +26,9 @@
 
     retval = {}
     for line in f:
-        data = json.loads(line)
-        retval.update(data)
+        key, value = line.split(None, 1)
+        key = int(key)
+        data = json.loads(value)
+        data = [tuple(item) for item in data]
+        retval[key] = data
     return retval
--- a/globalneighbors/web.py	Sun Jun 25 16:28:56 2017 -0700
+++ b/globalneighbors/web.py	Sun Jun 25 17:45:19 2017 -0700
@@ -108,6 +108,19 @@
                         body=json.dumps(self.cities(
                             startswith=request.GET.get('term'))))
 
+class NeighborsHandler(Handler):
+
+    content_type = 'application/json'
+
+    def __init__(self, neighbors):
+        self.neighbors = neighbors
+
+    def GET(self, request):
+        geoid = request.GET.get('geoid')
+        neighbors = self.neighbors.get(geoid, [])
+        return Response(content_type=self.content_type,
+                        body=json.dumps(neighbors))
+
 
 class GlobalHandler(Handler):
     """WSGI HTTP Handler"""
@@ -126,7 +139,9 @@
                                      fields=fields)
         self.locations = locations(self.cities)
         if neighbors_file:
-            pass  # TODO
+            self.neighbors = read_neighbors_file(neighbors_file)
+        else:
+            self.neighbors = None
 
         # get country codes
         self.country_codes = sorted(set([city['country code']
@@ -166,6 +181,8 @@
                 if not city:
                     return
                 variables = dict(city=city)
+                if self.neighbors:
+                    import pdb; pdb.set_trace()
                 return Response(content_type=self.content_type,
                                 body=self.citypage.render(variables))
             except ValueError:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_write.py	Sun Jun 25 17:45:19 2017 -0700
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+"""
+test writing + reading distances
+"""
+
+import os
+import shutil
+import tempfile
+import unittest
+from common import datafile
+from globalneighbors.distance import calculate_neighbors
+from globalneighbors.distance import write_neighbors
+from globalneighbors.locations import locations
+from globalneighbors.neighbors import read_neighbors_file
+from globalneighbors.read import read_cities
+
+
+class TestDistanceReadWrite(unittest.TestCase):
+
+    def test_10000(self):
+        """test 10000 cities"""
+
+        # read locations
+        citiesfile = datafile('10000cities.tsv')
+        assert os.path.exists(citiesfile)
+        with open(citiesfile) as f:
+            city_locations = locations(read_cities(f))
+
+        # calculate neighbors
+        neighbors = calculate_neighbors(city_locations,
+                                        k=50,
+                                        lat_tol=2.,
+                                        lon_tol=2.)
+
+        # make a staging area
+        tmpdir = tempfile.mkdtemp()
+        try:
+            # write the neighbors
+            outfile = os.path.join(tmpdir, 'neighbors.dat')
+            assert not os.path.exists(outfile)
+            with open(outfile, 'w') as f:
+                write_neighbors(f, neighbors)
+            assert os.path.exists(outfile)
+
+            # read the neighbors
+            with open(outfile) as f:
+                new_neighbors = read_neighbors_file(f)
+        finally:
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+        # they should be equal
+        assert len(neighbors) == len(new_neighbors)
+        assert sorted(neighbors.keys()) == sorted(new_neighbors.keys())
+        for key in neighbors.keys():
+            valueA = neighbors[key]
+            valueB = new_neighbors[key]
+            assert valueA == valueB
+
+
+if __name__ == '__main__':
+    unittest.main()