annotate lemuriformes/serialize.py @ 18:56596902e9ae default tip

add some setup + tests
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:57:03 -0800
parents 4793f99b73e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 serialization
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 import argparse
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import csv
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 import json
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 import sys
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 from StringIO import StringIO
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 from .cast import isstring
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 from .cast import unify
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 def dictlist2csv(list_of_dicts, header=None, fp=None):
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 """
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 convert a `list_of_dicts` to CSV
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 `fp` should be a file-like object or a path to a file
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 that will be overwritten. If `fp` is not provided, a
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 string will be returned
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 """
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 if isstring(fp):
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 with open(fp, 'w') as _fp:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 return dictlist2csv(list_of_dicts, _fp)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 return_string = False
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 if fp is None:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 return_string = True
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 fp = StringIO()
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 # get the header
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 if not list_of_dicts:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 return # XXX what about return_string? Good or bad?
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 header = header or list_of_dicts[0].keys()
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 # instantiate a writer
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 writer = csv.DictWriter(fp, fieldnames=header)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 writer.writeheader()
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 for row in list_of_dicts:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 row = {key: unify(value)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 for key, value in row.items()}
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 try:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 writer.writerow(row)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 except (UnicodeDecodeError, UnicodeEncodeError) as e:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 print (row )
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 print(e)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 raise
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 if return_string:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53 return fp.getvalue()
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56 def dump_json(_json):
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 """general purpose JSON front-end"""
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 return json.dumps(_json, indent=2, sort_keys=True)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 def append(filename, item):
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62 """append line-`item` to `filename`"""
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 with open(filename, 'a') as f:
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 f.write('{}\n'.format(item))
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 def main(args=sys.argv[1:]):
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69 """CLI"""
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71 # parse command line
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72 description = "convert a list of dicts in JSON format to CSV"
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 parser = argparse.ArgumentParser(description=description)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
74 parser.add_argument('input',
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
75 type=argparse.FileType('r'),
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
76 help="path to file containing a list of flat dicts")
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
77 parser.add_argument('-o', '--output', dest='output',
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 type=argparse.FileType('w'), default=sys.stdout,
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79 help="file to write the CSV to [DEFAULT: stdout]")
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 options = parser.parse_args(args)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82 # parse input
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83 data = json.loads(options.input.read())
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84 assert type(data) == list
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 # write output
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
87 dictlist2csv(data, options.output)
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
88
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
89
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90 if __name__ == '__main__':
4793f99b73e0 [lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91 main()