Mercurial > hg > Lemuriformes
diff lemuriformes/serialize.py @ 17:4793f99b73e0
[lemuriformes] utility functions
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:42:52 -0800 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lemuriformes/serialize.py Sun Dec 10 17:42:52 2017 -0800 @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +""" +serialization +""" + +import argparse +import csv +import json +import sys +from StringIO import StringIO +from .cast import isstring +from .cast import unify + + +def dictlist2csv(list_of_dicts, header=None, fp=None): + """ + convert a `list_of_dicts` to CSV + + `fp` should be a file-like object or a path to a file + that will be overwritten. If `fp` is not provided, a + string will be returned + """ + + if isstring(fp): + with open(fp, 'w') as _fp: + return dictlist2csv(list_of_dicts, _fp) + + return_string = False + if fp is None: + return_string = True + fp = StringIO() + + # get the header + if not list_of_dicts: + return # XXX what about return_string? Good or bad? + header = header or list_of_dicts[0].keys() + + # instantiate a writer + writer = csv.DictWriter(fp, fieldnames=header) + writer.writeheader() + for row in list_of_dicts: + row = {key: unify(value) + for key, value in row.items()} + try: + writer.writerow(row) + except (UnicodeDecodeError, UnicodeEncodeError) as e: + print (row ) + print(e) + raise + + if return_string: + return fp.getvalue() + + +def dump_json(_json): + """general purpose JSON front-end""" + return json.dumps(_json, indent=2, sort_keys=True) + + +def append(filename, item): + """append line-`item` to `filename`""" + + with open(filename, 'a') as f: + f.write('{}\n'.format(item)) + + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line + description = "convert a list of dicts in JSON format to CSV" + parser = argparse.ArgumentParser(description=description) + parser.add_argument('input', + type=argparse.FileType('r'), + help="path to file containing a list of flat dicts") + parser.add_argument('-o', '--output', dest='output', + type=argparse.FileType('w'), default=sys.stdout, + help="file to write the CSV to [DEFAULT: stdout]") + options = parser.parse_args(args) + + # parse input + data = json.loads(options.input.read()) + assert type(data) == list + + # write output + dictlist2csv(data, options.output) + + +if __name__ == '__main__': + main()