Mercurial > hg > Lemuriformes
annotate lemuriformes/serialize.py @ 18:56596902e9ae default tip
add some setup + tests
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:57:03 -0800 |
parents | 4793f99b73e0 |
children |
rev | line source |
---|---|
17
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 """ |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 serialization |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 """ |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 import argparse |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 import csv |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 import json |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 import sys |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 from StringIO import StringIO |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 from .cast import isstring |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 from .cast import unify |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 def dictlist2csv(list_of_dicts, header=None, fp=None): |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 """ |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 convert a `list_of_dicts` to CSV |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 `fp` should be a file-like object or a path to a file |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 that will be overwritten. If `fp` is not provided, a |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 string will be returned |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 """ |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 if isstring(fp): |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 with open(fp, 'w') as _fp: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 return dictlist2csv(list_of_dicts, _fp) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 return_string = False |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 if fp is None: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 return_string = True |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 fp = StringIO() |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 # get the header |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 if not list_of_dicts: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 return # XXX what about return_string? Good or bad? |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 header = header or list_of_dicts[0].keys() |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 # instantiate a writer |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 writer = csv.DictWriter(fp, fieldnames=header) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 writer.writeheader() |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 for row in list_of_dicts: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 row = {key: unify(value) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 for key, value in row.items()} |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 try: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 writer.writerow(row) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 except (UnicodeDecodeError, UnicodeEncodeError) as e: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 print (row ) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 print(e) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 raise |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 if return_string: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 return fp.getvalue() |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 def dump_json(_json): |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
57 """general purpose JSON front-end""" |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 return json.dumps(_json, indent=2, sort_keys=True) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
60 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
61 def append(filename, item): |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
62 """append line-`item` to `filename`""" |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
63 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
64 with open(filename, 'a') as f: |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
65 f.write('{}\n'.format(item)) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
66 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
67 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
68 def main(args=sys.argv[1:]): |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 """CLI""" |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 # parse command line |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 description = "convert a list of dicts in JSON format to CSV" |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 parser = argparse.ArgumentParser(description=description) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
74 parser.add_argument('input', |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
75 type=argparse.FileType('r'), |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
76 help="path to file containing a list of flat dicts") |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
77 parser.add_argument('-o', '--output', dest='output', |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
78 type=argparse.FileType('w'), default=sys.stdout, |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
79 help="file to write the CSV to [DEFAULT: stdout]") |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
80 options = parser.parse_args(args) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
81 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
82 # parse input |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
83 data = json.loads(options.input.read()) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
84 assert type(data) == list |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
85 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
86 # write output |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
87 dictlist2csv(data, options.output) |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
88 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
89 |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
90 if __name__ == '__main__': |
4793f99b73e0
[lemuriformes] utility functions
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
91 main() |