view globalneighbors/read.py @ 4:8e130b7bfed9

remove unintended boilerplate
author Jeff Hammel <k0scist@gmail.com>
date Sat, 24 Jun 2017 14:48:55 -0700
parents 1b94f3bf97e5
children 7e27e874655b
line wrap: on
line source

"""
I/O
"""

import csv
import sys
from .schema import cast_row
from .schema import types

# TSV notes for python `csv` library:
# https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072

string = (str, basestring)


def read_tsv_generator(f):
    """read tab-separated values from file `f` into memory"""

    reader = csv.reader(f,
                        delimiter='\t',
                        quoting=csv.QUOTE_NONE)
    for row in reader:
        yield row

def read_tsv(f):
    if isinstance(f, string):
        with open(f) as f_:
            return read_tsv(f_)

    return [row for row in read_tsv_generator(f)]


def read_cities(f, fields=None, types=types):
    """read and cast cities into a form we want"""

    for index, row in enumerate(read_tsv_generator(f)):
        try:
            cast = cast_row(row, types=types)
        except AssertionError:
            sys.stderr.write("Error, row {}".format(index))
            raise
        if fields:
            cast = {key: value for key, value in cast.items()
                    if key in fields}
        yield cast


def read_city_list(f, fields=None, types=types):
    """read cities as a list"""

    if isinstance(f, string):
        with open(f) as f_:
            return read_city_list(f_, fields=fields, types=types)

    return [city for city in
            read_cities(f, fields=fields, types=types)]