diff lemuriformes/serialize.py @ 17:4793f99b73e0

[lemuriformes] utility functions
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:42:52 -0800
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lemuriformes/serialize.py	Sun Dec 10 17:42:52 2017 -0800
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+"""
+serialization
+"""
+
+import argparse
+import csv
+import json
+import sys
+from StringIO import StringIO
+from .cast import isstring
+from .cast import unify
+
+
+def dictlist2csv(list_of_dicts, header=None, fp=None):
+    """
+    convert a `list_of_dicts` to CSV
+
+    `fp` should be a file-like object or  a path to a file
+    that will be overwritten.  If `fp` is not provided, a
+    string will be returned
+    """
+
+    if isstring(fp):
+        with open(fp, 'w') as _fp:
+            return dictlist2csv(list_of_dicts, _fp)
+
+    return_string = False
+    if fp is None:
+        return_string = True
+        fp = StringIO()
+
+    # get the header
+    if not list_of_dicts:
+        return   # XXX what about return_string?  Good or bad?
+    header = header or list_of_dicts[0].keys()
+
+    # instantiate a writer
+    writer = csv.DictWriter(fp, fieldnames=header)
+    writer.writeheader()
+    for row in list_of_dicts:
+        row = {key: unify(value)
+               for key, value in row.items()}
+        try:
+            writer.writerow(row)
+        except (UnicodeDecodeError, UnicodeEncodeError) as e:
+            print (row )
+            print(e)
+            raise
+
+    if return_string:
+        return fp.getvalue()
+
+
+def dump_json(_json):
+    """general purpose JSON front-end"""
+    return json.dumps(_json, indent=2, sort_keys=True)
+
+
+def append(filename, item):
+    """append line-`item` to `filename`"""
+
+    with open(filename, 'a') as f:
+        f.write('{}\n'.format(item))
+
+
+def main(args=sys.argv[1:]):
+    """CLI"""
+
+    # parse command line
+    description = "convert a list of dicts in JSON format to CSV"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('input',
+                        type=argparse.FileType('r'),
+                        help="path to file containing a list of flat dicts")
+    parser.add_argument('-o', '--output', dest='output',
+                        type=argparse.FileType('w'), default=sys.stdout,
+                        help="file to write the CSV to [DEFAULT: stdout]")
+    options = parser.parse_args(args)
+
+    # parse input
+    data = json.loads(options.input.read())
+    assert type(data) == list
+
+    # write output
+    dictlist2csv(data, options.output)
+
+
+if __name__ == '__main__':
+    main()