Mercurial > hg > GlobalNeighbors
diff tests/test_read.py @ 0:5dba84370182
initial commit; half-working prototype
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 24 Jun 2017 12:03:39 -0700 |
parents | |
children | 1b94f3bf97e5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_read.py Sat Jun 24 12:03:39 2017 -0700 @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +""" +test data reading + loading +""" + +import os +import unittest +from globalneighbors import schema +from globalneighbors.read import read_tsv + + +here = os.path.dirname(os.path.abspath(__file__)) +data = os.path.join(here, 'data') + +class TestDataRead(unittest.TestCase): + + # created with + # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv + test_tsv = os.path.join(data, 'sample.tsv') + test_tsv_lines = 10 + + # full dataset: test with caution + full_tsv = os.path.join(data, 'cities1000.txt') + full_tsv_lines = 149092 + + def test_read_tsv(self): + """test reading a tsv file chunk""" + + assert os.path.isfile(self.test_tsv) + sample = read_tsv(self.test_tsv) + + assert len(sample) == 10 + for row in sample: + assert len(row) == len(schema.descriptions) + + def test_full_dataset(self): + """ensure we can operate on the full dataset""" + + assert os.path.isfile(self.full_tsv) + cities = read_tsv(self.full_tsv) + assert len(cities) == self.full_tsv_lines + for row in cities: + assert len(row) == len(schema.descriptions) + + # cast the data into types we want + for row in cities: + row = schema.cast_row(row, types=schema.types) + + +if __name__ == '__main__': + unittest.main()