diff globalneighbors/neighbors.py @ 22:e69cb496324e

we have a data dump
author Jeff Hammel <k0scist@gmail.com>
date Sun, 25 Jun 2017 17:45:19 -0700
parents 2fef925fbf37
children 6891c5523b69
line wrap: on
line diff
--- a/globalneighbors/neighbors.py	Sun Jun 25 16:28:56 2017 -0700
+++ b/globalneighbors/neighbors.py	Sun Jun 25 17:45:19 2017 -0700
@@ -2,10 +2,15 @@
 read neighbors file;
 this should be in the form of:
 
-`{geoid: [(geoid_closest_neighbor, distance),
-          (geoid_2nd_closest_neighbor, distance),
-          ...]
- }`
+`geoid [(geoid_closest_neighbor, distance), (geoid_2nd_closest_neighbor, distance), ...]`
+
+*PER LINE*  this format was chosen because it is easier to
+iteratively read and write vs JSON.
+
+While CSV could be made to fit this model, because
+there are both distances and geo IDs as pairs, it is not
+the most natural fit.  So we'll settle for our own data model.
+No, it's not the best, but so be it (for now).
 """
 
 import json
@@ -21,6 +26,9 @@
 
     retval = {}
     for line in f:
-        data = json.loads(line)
-        retval.update(data)
+        key, value = line.split(None, 1)
+        key = int(key)
+        data = json.loads(value)
+        data = [tuple(item) for item in data]
+        retval[key] = data
     return retval