view globalneighbors/read.py @ 25:991bce6b6881 default tip

[knn] placeholder for planning session
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 Sep 2017 14:35:50 -0700
parents 7e27e874655b
children
line wrap: on
line source

"""
I/O
"""

import csv
import sys
from .schema import cast_row
from .schema import types

# TSV notes for python `csv` library:
# https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072

string = (str, basestring)


def read_tsv_generator(f):
    """read tab-separated values from file `f` into memory"""

    reader = csv.reader(f,
                        delimiter='\t',
                        quoting=csv.QUOTE_NONE)
    for row in reader:
        yield row

def read_tsv(f):
    if isinstance(f, string):
        with open(f) as f_:
            return read_tsv(f_)

    return [row for row in read_tsv_generator(f)]


def read_cities(f, fields=None, types=types):
    """read and cast cities into a form we want"""

    for index, row in enumerate(read_tsv_generator(f)):
        try:
            cast = cast_row(row, types=types)
        except AssertionError:
            sys.stderr.write("Error, row {}\n{}\n".format(index, row))
            raise
        if fields:
            cast = {key: value for key, value in cast.items()
                    if key in fields}
        yield cast


def read_city_list(f, fields=None, types=types):
    """read cities as a list"""

    if isinstance(f, string):
        with open(f) as f_:
            return read_city_list(f_, fields=fields, types=types)

    return [city for city in
            read_cities(f, fields=fields, types=types)]