Mercurial > hg > Lemuriformes
diff lemuriformes/uniques.py @ 17:4793f99b73e0
[lemuriformes] utility functions
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:42:52 -0800 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lemuriformes/uniques.py Sun Dec 10 17:42:52 2017 -0800 @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +count uniques in each CSV file column +""" + +import argparse +import csv +import sys +from collections import OrderedDict +from .columns import read_columns + + +def uniques(fp): + """get unique counts for columns in CSV file `fp`""" + + # read columns + columns = read_columns(fp, type=OrderedDict) + + # convert to sets + for key, value in columns.iteritems(): + columns[key] = set(value) + + return columns + + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('input', + type=argparse.FileType('r'), + help="input CSV file with headers") + options = parser.parse_args(args) + + # determine sets + columns = uniques(options.input) + + # output uniques + writer = csv.writer(sys.stdout) + for key, value in columns.iteritems(): + writer.writerow([key, len(value)]) + + +if __name__ == '__main__': + main()