diff tests/test_read.py @ 0:5dba84370182

initial commit; half-working prototype
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 12:03:39 -0700
parents
children 1b94f3bf97e5
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_read.py	Sat Jun 24 12:03:39 2017 -0700
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+"""
+test data reading + loading
+"""
+
+import os
+import unittest
+from globalneighbors import schema
+from globalneighbors.read import read_tsv
+
+
+here = os.path.dirname(os.path.abspath(__file__))
+data = os.path.join(here, 'data')
+
+class TestDataRead(unittest.TestCase):
+
+    # created with
+    # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv
+    test_tsv = os.path.join(data, 'sample.tsv')
+    test_tsv_lines = 10
+
+    # full dataset:  test with caution
+    full_tsv = os.path.join(data, 'cities1000.txt')
+    full_tsv_lines = 149092
+
+    def test_read_tsv(self):
+        """test reading a tsv file chunk"""
+
+        assert os.path.isfile(self.test_tsv)
+        sample = read_tsv(self.test_tsv)
+
+        assert len(sample) == 10
+        for row in sample:
+            assert len(row) == len(schema.descriptions)
+
+    def test_full_dataset(self):
+        """ensure we can operate on the full dataset"""
+
+        assert os.path.isfile(self.full_tsv)
+        cities = read_tsv(self.full_tsv)
+        assert len(cities) == self.full_tsv_lines
+        for row in cities:
+            assert len(row) == len(schema.descriptions)
+
+        # cast the data into types we want
+        for row in cities:
+            row = schema.cast_row(row, types=schema.types)
+
+
+if __name__ == '__main__':
+    unittest.main()