Mercurial > hg > Lemuriformes
comparison lemuriformes/json2csv.py @ 15:0d1b8bb1d97b
SQL + data related functionality
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:16:52 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
14:756dbd3e391e | 15:0d1b8bb1d97b |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 convert JSON list of hashes to CSV | |
5 """ | |
6 # Note: we could use https://docs.python.org/2/library/csv.html#csv.DictWriter | |
7 # but we're being careful here since we actually want this data structure in code, | |
8 # not just for de/serialization | |
9 | |
10 | |
11 import argparse | |
12 import csv | |
13 import json | |
14 import sys | |
15 from .cast import unify | |
16 from .cli import ConfigurationParser | |
17 | |
18 | |
19 def flatten_list_of_dicts(list_of_dicts, header=None): | |
20 """ | |
21 flattens a list of dicts into a list of lists. | |
22 | |
23 Returns (header, list_of_lists) | |
24 """ | |
25 | |
26 if not list_of_dicts: | |
27 return [] | |
28 | |
29 # sanity | |
30 keys = list_of_dicts[0].keys() | |
31 if header: | |
32 if not set(header).issubset(keys): | |
33 raise AssertionError("header contains elements not seen in the set: {}".format(', '.format(set(header).difference(keys)))) | |
34 for item in list_of_dicts: | |
35 # ensure each item has the same keys | |
36 if set(keys) != set(item.keys()): | |
37 raise AssertionError("Keys not consistent! {} != {}".format(sorted(keys), | |
38 sorted(item.keys()))) | |
39 | |
40 if not header: | |
41 header = keys # to sort? | |
42 | |
43 # flatten it! | |
44 retval = [] | |
45 for item in list_of_dicts: | |
46 retval.append([item[key] for key in header]) | |
47 | |
48 return (header, retval) | |
49 | |
50 | |
51 def main(args=sys.argv[1:]): | |
52 """CLI""" | |
53 | |
54 # parse command line | |
55 parser = ConfigurationParser(description=__doc__) | |
56 parser.add_argument('json', type=argparse.FileType('r'), | |
57 help="JSON file of list of hashes") | |
58 parser.add_argument('-H', '--header', dest='header', nargs='+', | |
59 help="use these fields for header") | |
60 parser.add_argument('-o', '--output', dest='output', | |
61 type=argparse.FileType('w'), default=sys.stdout, | |
62 help="path to output, or stdout by default") | |
63 options = parser.parse_args(args) | |
64 | |
65 # read | |
66 data = json.load(options.json) | |
67 | |
68 # flatten | |
69 header, flattened = flatten_list_of_dicts(data, header=options.header) | |
70 | |
71 # write | |
72 writer = csv.writer(options.output) | |
73 writer.writerow(header) | |
74 for row in flattened: | |
75 writer.writerow([unify(v) for v in row]) | |
76 | |
77 if __name__ == '__main__': | |
78 main() |