view lemuriformes/cast.py @ 12:82cd4e0b66cf

csv2sql
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 15:18:00 -0800
parents dbbf5344868c
children
line wrap: on
line source

"""
methods for casting data
"""

import datetime
from collections import OrderedDict


try:
    # python2
    string = (str, unicode)
except NameError:
    # python3
    string = (str,)

def isstring(f):
    return isinstance(f, string)

# types we expect to encounter
types = OrderedDict([('int', int),
                     ('float', float),
                     ('str', str)])


def keyvalue(_string, separator='='):
    """
    cast `_string` to [`key`, `value`] split on `separator`
    """

    if separator not in _string:
        raise AssertionError("Separator '{}' not in '{}'".format(separator,
                                                                 _string))
    return _string.split(separator, 1)


def string_to_bool(string):
    """cast a string to a `bool`"""
    return {'true': True,
            'false': False}.get(string.lower())


def unify(item, codec='utf-8'):
    """cast item to unicode carefully if a string;  otherwise, return it"""

    if not isinstance(item, string):
        return item
    return item.encode(codec, 'ignore')


def datetime_handler(x):
    """handler for JSON serialization"""

    # Ref
    # https://stackoverflow.com/questions/35869985/datetime-datetime-is-not-json-serializable
    if isinstance(x, datetime.datetime):
        return x.isoformat()

    # go through the normal types for type casting
    # (and hope nothing "weird" happens")
    for _type in types.values():
        try:
            return str(_type(x))
        except ValueError:
            continue

    # worst case scenario
    raise TypeError("Unknown type: {x}".format(x=x))


def isiterable(obj):
    """determines if `obj` is iterable"""

    try:
        iter(obj)
        return True
    except TypeError:
        return False


def iterable(obj):
    """make an iterable out of `obj`"""
    return obj if isiterable(obj) else (obj,)


def infer(strings, types=types.values()):
    """
    inferrred the type of a bunch of strings
    """
    for _type in types:
        for s in strings:
            try:
                _type(s)
            except ValueError:
                break
        else:
            return _type


def cast(strings, types=types.values()):
    """
    cast `strings` to `types` based on inference
    """

    _type = infer(strings, types=types)
    return [_type(s) for s in strings]