diff lemuriformes/json2csv.py @ 15:0d1b8bb1d97b

SQL + data related functionality
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:16:52 -0800
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lemuriformes/json2csv.py	Sun Dec 10 17:16:52 2017 -0800
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+"""
+convert JSON list of hashes to CSV
+"""
+# Note: we could use https://docs.python.org/2/library/csv.html#csv.DictWriter
+# but we're being careful here since we actually want this data structure in code,
+# not just for de/serialization
+
+
+import argparse
+import csv
+import json
+import sys
+from .cast import unify
+from .cli import ConfigurationParser
+
+
+def flatten_list_of_dicts(list_of_dicts, header=None):
+    """
+    flattens a list of dicts into a list of lists.
+
+    Returns (header, list_of_lists)
+    """
+
+    if not list_of_dicts:
+        return []
+
+    # sanity
+    keys = list_of_dicts[0].keys()
+    if header:
+        if not set(header).issubset(keys):
+            raise AssertionError("header contains elements not seen in the set: {}".format(', '.format(set(header).difference(keys))))
+    for item in list_of_dicts:
+        # ensure each item has the same keys
+        if set(keys) != set(item.keys()):
+            raise AssertionError("Keys not consistent! {} != {}".format(sorted(keys),
+                                                                        sorted(item.keys())))
+
+    if not header:
+        header = keys  # to sort?
+
+    # flatten it!
+    retval = []
+    for item in list_of_dicts:
+        retval.append([item[key] for key in header])
+
+    return (header, retval)
+
+
+def main(args=sys.argv[1:]):
+    """CLI"""
+
+    # parse command line
+    parser = ConfigurationParser(description=__doc__)
+    parser.add_argument('json', type=argparse.FileType('r'),
+                        help="JSON file of list of hashes")
+    parser.add_argument('-H', '--header', dest='header', nargs='+',
+                        help="use these fields for header")
+    parser.add_argument('-o', '--output', dest='output',
+                        type=argparse.FileType('w'), default=sys.stdout,
+                        help="path to output, or stdout by default")
+    options = parser.parse_args(args)
+
+    # read
+    data = json.load(options.json)
+
+    # flatten
+    header, flattened = flatten_list_of_dicts(data, header=options.header)
+
+    # write
+    writer = csv.writer(options.output)
+    writer.writerow(header)
+    for row in flattened:
+        writer.writerow([unify(v) for v in row])
+
+if __name__ == '__main__':
+    main()