annotate lemuriformes/json2csv.py @ 15:0d1b8bb1d97b

SQL + data related functionality
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:16:52 -0800
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 convert JSON list of hashes to CSV
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 # Note: we could use https://docs.python.org/2/library/csv.html#csv.DictWriter
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 # but we're being careful here since we actually want this data structure in code,
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 # not just for de/serialization
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 import argparse
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 import csv
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 import json
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 import sys
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 from .cast import unify
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 from .cli import ConfigurationParser
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 def flatten_list_of_dicts(list_of_dicts, header=None):
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 """
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 flattens a list of dicts into a list of lists.
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 Returns (header, list_of_lists)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 """
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 if not list_of_dicts:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 return []
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 # sanity
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 keys = list_of_dicts[0].keys()
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 if header:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 if not set(header).issubset(keys):
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 raise AssertionError("header contains elements not seen in the set: {}".format(', '.format(set(header).difference(keys))))
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 for item in list_of_dicts:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 # ensure each item has the same keys
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 if set(keys) != set(item.keys()):
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 raise AssertionError("Keys not consistent! {} != {}".format(sorted(keys),
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 sorted(item.keys())))
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 if not header:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 header = keys # to sort?
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 # flatten it!
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 retval = []
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 for item in list_of_dicts:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 retval.append([item[key] for key in header])
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 return (header, retval)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 def main(args=sys.argv[1:]):
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 """CLI"""
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54 # parse command line
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55 parser = ConfigurationParser(description=__doc__)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56 parser.add_argument('json', type=argparse.FileType('r'),
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 help="JSON file of list of hashes")
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 parser.add_argument('-H', '--header', dest='header', nargs='+',
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59 help="use these fields for header")
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 parser.add_argument('-o', '--output', dest='output',
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 type=argparse.FileType('w'), default=sys.stdout,
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62 help="path to output, or stdout by default")
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63 options = parser.parse_args(args)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 # read
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 data = json.load(options.json)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 # flatten
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69 header, flattened = flatten_list_of_dicts(data, header=options.header)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71 # write
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72 writer = csv.writer(options.output)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 writer.writerow(header)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
74 for row in flattened:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
75 writer.writerow([unify(v) for v in row])
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
76
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
77 if __name__ == '__main__':
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 main()