Mercurial > hg > Lemuriformes
diff lemuriformes/json2csv.py @ 15:0d1b8bb1d97b
SQL + data related functionality
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:16:52 -0800 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lemuriformes/json2csv.py Sun Dec 10 17:16:52 2017 -0800 @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +""" +convert JSON list of hashes to CSV +""" +# Note: we could use https://docs.python.org/2/library/csv.html#csv.DictWriter +# but we're being careful here since we actually want this data structure in code, +# not just for de/serialization + + +import argparse +import csv +import json +import sys +from .cast import unify +from .cli import ConfigurationParser + + +def flatten_list_of_dicts(list_of_dicts, header=None): + """ + flattens a list of dicts into a list of lists. + + Returns (header, list_of_lists) + """ + + if not list_of_dicts: + return [] + + # sanity + keys = list_of_dicts[0].keys() + if header: + if not set(header).issubset(keys): + raise AssertionError("header contains elements not seen in the set: {}".format(', '.format(set(header).difference(keys)))) + for item in list_of_dicts: + # ensure each item has the same keys + if set(keys) != set(item.keys()): + raise AssertionError("Keys not consistent! {} != {}".format(sorted(keys), + sorted(item.keys()))) + + if not header: + header = keys # to sort? + + # flatten it! + retval = [] + for item in list_of_dicts: + retval.append([item[key] for key in header]) + + return (header, retval) + + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line + parser = ConfigurationParser(description=__doc__) + parser.add_argument('json', type=argparse.FileType('r'), + help="JSON file of list of hashes") + parser.add_argument('-H', '--header', dest='header', nargs='+', + help="use these fields for header") + parser.add_argument('-o', '--output', dest='output', + type=argparse.FileType('w'), default=sys.stdout, + help="path to output, or stdout by default") + options = parser.parse_args(args) + + # read + data = json.load(options.json) + + # flatten + header, flattened = flatten_list_of_dicts(data, header=options.header) + + # write + writer = csv.writer(options.output) + writer.writerow(header) + for row in flattened: + writer.writerow([unify(v) for v in row]) + +if __name__ == '__main__': + main()