Mercurial > hg > numerics
changeset 179:f63194f81f7d
stubbing table splitting
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Tue, 09 Aug 2016 14:03:33 -0700 |
parents | 30d820087ae8 |
children | 69543d62ae7a |
files | numerics/chunk.py numerics/split_table.py numerics/table.py setup.py |
diffstat | 4 files changed, 220 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/numerics/chunk.py Tue Aug 09 14:03:33 2016 -0700 @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +# imports +import argparse +import sys + +def chunk(n, *args): + """ + split `args` into `n` parts + """ + + args =list(args) + step = len(args)/int(n) + retval = [] + for i in range(n-1): + retval.append(args[i*step:(i+1)*step]) + retval.append(args[(n-1)*step:]) + return retval + + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line + parser = argparse.ArgumentParser(description=chunk.__doc__) + parser.add_argument('args', nargs='*') + parser.add_argument('-n', dest='n', + type=int, default=1, + help="number of parts to split into [DEFAULT: %(default)s]") + options = parser.parse_args(args) + + # split + chunks = chunk(options.n, *options.args) + + # output + for _chunk in chunks: + print _chunk + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/numerics/split_table.py Tue Aug 09 14:03:33 2016 -0700 @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +""" +split a CSV file with a header into parts +""" + +# imports +import chunk +import sys +import table + +class SplitTableParser(table.TableParser): + + def add_arguments(self): + self.add_argument('input', type=argparse.FileType('r'), + help="input CSV file") + self.add_argument('-o', '--output', dest='output', + type=argparse.FileType('w'), default=sys.stdout, + help="output file to write to, or stdout by default") + self.add_argument('-c', '--column', dest='columns', nargs='+', + help="column names to output") + self.add_argument('-v', '--verbose', dest='verbose', + action='store_true', default=False, + help="be verbose") + + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line + parser = SplitTableParser(description=__doc__) + options = parser.parse_args(args) + + # read CSV + data = parser.read_table() + + print 'hi' + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/numerics/table.py Tue Aug 09 14:03:33 2016 -0700 @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +parse header-based CSV +""" + +# imports +import argparse +import csv +import json +import os +import sys +import time + +string = (str, unicode) + + +def duplicates(*values): + """return all duplicates in `values`""" + + counts = {value: values.count(value) + for value in set(values)} + retval = [] + for value in values: + if counts[value] > 1 and value not in retval: + retval.append(value) + return retval + + +def read_table(fp, verbose=False): + """read table with header and return list of dictionaries""" + + if isinstance(fp, string): + with open(fp, 'r') as _fp: + return read_table(_fp, verbose) + + # read CSV + data = [row for row in csv.reader(fp)] + + # check data + columns = set([len(row) for row in data]) + if len(columns) != 1: + raise AssertionError("Expected: a constant number of columns, instead got: {}".format(', '.join([str(column) + for column in sorted(columns)]))) + columns = columns.pop() + if verbose: + print "{} columns".format(columns) + data = [[item.strip() for item in row] + for row in data] + + # xform to JSON-format structure + header = data.pop(0) + if verbose: + print "Header:\n{header}".format(header=json.dumps(header, indent=1)) + duplicate_fields = duplicates(*header) + if duplicate_fields: + raise AssertionError("Duplicate header fields found: {duplicates}".format(duplicates=', '.join(duplicate_fields))) + return [dict(zip(header, row)) + for row in data] + + +class TableParser(argparse.ArgumentParser): + """CLI option parser""" + + def __init__(self, **kwargs): + kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) + kwargs.setdefault('description', __doc__) + argparse.ArgumentParser.__init__(self, **kwargs) + self.add_arguments() + self.options = None + + def add_arguments(self): + self.add_argument('input', type=argparse.FileType('r'), + help="input CSV file") + self.add_argument('-o', '--output', dest='output', + type=argparse.FileType('w'), default=sys.stdout, + help="output file to write to, or stdout by default") + self.add_argument('-c', '--column', dest='columns', nargs='+', + help="column names to output") + self.add_argument('--format', dest='format', + choices=('json', 'csv'), default='json', + help="output in this format") + self.add_argument('-v', '--verbose', dest='verbose', + action='store_true', default=False, + help="be verbose") + + def parse_args(self, *args, **kw): + options = argparse.ArgumentParser.parse_args(self, *args, **kw) + self.validate(options) + self.options = options + return options + + def validate(self, options): + """validate options""" + + def read_table(self): + + assert self.options + + data = read_table(self.options.input, + verbose=self.options.verbose) + + if self.options.columns: + missing = [column + for column in self.options.columns + if column not in header] + if missing: + self.error("Columns not found in header: {0}".format(", ".join(missing))) + header = options.columns + data = [dict(zip(header, + [row[column] for column in header])) + for row in data] + return data + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line options + parser = TableParser() + options = parser.parse_args(args) + + # read table + data = parser.read_table() + + # output to JSON + if options.verbose: + print ("Output {format}:".format(format=options.format)) + if options.format == 'json': + options.output.write(json.dumps(data, indent=1)) + elif options.format == 'csv': + writer = csv.writer(options.output) + for row in data: + writer.writerow([row[column] for column in header]) + + +if __name__ == '__main__': + main()
--- a/setup.py Thu Mar 10 13:08:12 2016 -0800 +++ b/setup.py Tue Aug 09 14:03:33 2016 -0700 @@ -34,7 +34,9 @@ plot = numerics.plot:main read-csv = numerics.read:main smooth = numerics.smooth:main + split-table = numerics.split_table:main sum = numerics.sum:main + table2json = numerics.table:main types = numerics.convert:main """ # TODO: