annotate lemuriformes/cast.py @ 12:82cd4e0b66cf

csv2sql
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 15:18:00 -0800
parents dbbf5344868c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 methods for casting data
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 import datetime
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 from collections import OrderedDict
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 try:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 # python2
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 string = (str, unicode)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 except NameError:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 # python3
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 string = (str,)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 def isstring(f):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 return isinstance(f, string)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 # types we expect to encounter
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 types = OrderedDict([('int', int),
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 ('float', float),
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 ('str', str)])
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 def keyvalue(_string, separator='='):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 cast `_string` to [`key`, `value`] split on `separator`
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 if separator not in _string:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 raise AssertionError("Separator '{}' not in '{}'".format(separator,
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 _string))
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 return _string.split(separator, 1)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 def string_to_bool(string):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 """cast a string to a `bool`"""
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 return {'true': True,
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 'false': False}.get(string.lower())
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 def unify(item, codec='utf-8'):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 """cast item to unicode carefully if a string; otherwise, return it"""
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 if not isinstance(item, string):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 return item
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 return item.encode(codec, 'ignore')
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 def datetime_handler(x):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 """handler for JSON serialization"""
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53 # Ref
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54 # https://stackoverflow.com/questions/35869985/datetime-datetime-is-not-json-serializable
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55 if isinstance(x, datetime.datetime):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56 return x.isoformat()
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 # go through the normal types for type casting
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59 # (and hope nothing "weird" happens")
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 for _type in types.values():
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 try:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62 return str(_type(x))
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63 except ValueError:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 continue
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 # worst case scenario
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67 raise TypeError("Unknown type: {x}".format(x=x))
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70 def isiterable(obj):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71 """determines if `obj` is iterable"""
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 try:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
74 iter(obj)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
75 return True
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
76 except TypeError:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
77 return False
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 def iterable(obj):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 """make an iterable out of `obj`"""
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82 return obj if isiterable(obj) else (obj,)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85 def infer(strings, types=types.values()):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
87 inferrred the type of a bunch of strings
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
88 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
89 for _type in types:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90 for s in strings:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91 try:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92 _type(s)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93 except ValueError:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94 break
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95 else:
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
96 return _type
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
97
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
98
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
99 def cast(strings, types=types.values()):
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
100 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
101 cast `strings` to `types` based on inference
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
102 """
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
103
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
104 _type = infer(strings, types=types)
dbbf5344868c add utlities for casting data
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
105 return [_type(s) for s in strings]