view tests/test_read.py @ 22:e69cb496324e

we have a data dump
author Jeff Hammel <k0scist@gmail.com>
date Sun, 25 Jun 2017 17:45:19 -0700
parents 49aae0c0293b
children
line wrap: on
line source

#!/usr/bin/env python

"""
test data reading + loading
"""

import os
import unittest
from globalneighbors import schema
from globalneighbors.locations import locations
from globalneighbors.read import read_tsv
from globalneighbors.read import read_cities
from globalneighbors.read import read_city_list


here = os.path.dirname(os.path.abspath(__file__))
data = os.path.join(here, 'data')

class TestDataRead(unittest.TestCase):

    # created with
    # head -n 10 cities1000.txt > GlobalNeighbors/tests/data/sample.tsv
    test_tsv = os.path.join(data, 'sample.tsv')
    test_tsv_lines = 10

    # full dataset:  test with caution
    full_tsv = os.path.join(data, 'cities1000.txt')
    full_tsv_lines = 149092

    def test_read_tsv(self):
        """test reading a tsv file chunk"""

        assert os.path.isfile(self.test_tsv)
        sample = read_tsv(self.test_tsv)

        assert len(sample) == 10
        for row in sample:
            assert len(row) == len(schema.descriptions)

    def test_full_dataset(self):
        """ensure we can operate on the full dataset"""

        assert os.path.isfile(self.full_tsv)
        cities = read_tsv(self.full_tsv)
        assert len(cities) == self.full_tsv_lines
        for row in cities:
            assert len(row) == len(schema.descriptions)

        # cast the data into types we want
        for row in cities:
            row = schema.cast_row(row, types=schema.types)

    def test_read_unicode(self):
        """ensure we can read the cities as unicode"""

        cities = read_city_list(self.full_tsv)
        for city in cities:
            for field in schema.unicode_fields:
                assert isinstance(city[field], unicode)

    def test_iterative_locations(self):
        """assert we can read into locations as a generator"""

        with open(self.test_tsv) as f:
            cities = locations(read_cities(f))
        for geonameid, (lat, lon) in cities.items():
            assert -90. <= lat <= 90.
            assert -180. <= lon <= 180.
            assert type(geonameid) == schema.types['geonameid']


if __name__ == '__main__':
    unittest.main()