Mercurial > hg > Lemuriformes
annotate lemuriformes/json2csv.py @ 15:0d1b8bb1d97b
SQL + data related functionality
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:16:52 -0800 |
parents | |
children |
rev | line source |
---|---|
15
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 """ |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 convert JSON list of hashes to CSV |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 """ |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 # Note: we could use https://docs.python.org/2/library/csv.html#csv.DictWriter |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 # but we're being careful here since we actually want this data structure in code, |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 # not just for de/serialization |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 import argparse |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 import csv |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 import json |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 import sys |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 from .cast import unify |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 from .cli import ConfigurationParser |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 def flatten_list_of_dicts(list_of_dicts, header=None): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 """ |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 flattens a list of dicts into a list of lists. |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 Returns (header, list_of_lists) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 """ |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 if not list_of_dicts: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 return [] |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 # sanity |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 keys = list_of_dicts[0].keys() |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 if header: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 if not set(header).issubset(keys): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 raise AssertionError("header contains elements not seen in the set: {}".format(', '.format(set(header).difference(keys)))) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 for item in list_of_dicts: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 # ensure each item has the same keys |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 if set(keys) != set(item.keys()): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 raise AssertionError("Keys not consistent! {} != {}".format(sorted(keys), |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 sorted(item.keys()))) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 if not header: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 header = keys # to sort? |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 # flatten it! |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 retval = [] |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 for item in list_of_dicts: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 retval.append([item[key] for key in header]) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 return (header, retval) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 def main(args=sys.argv[1:]): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 """CLI""" |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 # parse command line |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 parser = ConfigurationParser(description=__doc__) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 parser.add_argument('json', type=argparse.FileType('r'), |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
57 help="JSON file of list of hashes") |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 parser.add_argument('-H', '--header', dest='header', nargs='+', |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 help="use these fields for header") |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
60 parser.add_argument('-o', '--output', dest='output', |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
61 type=argparse.FileType('w'), default=sys.stdout, |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
62 help="path to output, or stdout by default") |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
63 options = parser.parse_args(args) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
64 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
65 # read |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
66 data = json.load(options.json) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
67 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
68 # flatten |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 header, flattened = flatten_list_of_dicts(data, header=options.header) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 # write |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 writer = csv.writer(options.output) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 writer.writerow(header) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
74 for row in flattened: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
75 writer.writerow([unify(v) for v in row]) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
76 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
77 if __name__ == '__main__': |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
78 main() |