Mercurial > hg > Lemuriformes
comparison lemuriformes/uniques.py @ 17:4793f99b73e0
[lemuriformes] utility functions
| author | Jeff Hammel <k0scist@gmail.com> | 
|---|---|
| date | Sun, 10 Dec 2017 17:42:52 -0800 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 16:9b1bb9eee962 | 17:4793f99b73e0 | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 """ | |
| 5 count uniques in each CSV file column | |
| 6 """ | |
| 7 | |
| 8 import argparse | |
| 9 import csv | |
| 10 import sys | |
| 11 from collections import OrderedDict | |
| 12 from .columns import read_columns | |
| 13 | |
| 14 | |
| 15 def uniques(fp): | |
| 16 """get unique counts for columns in CSV file `fp`""" | |
| 17 | |
| 18 # read columns | |
| 19 columns = read_columns(fp, type=OrderedDict) | |
| 20 | |
| 21 # convert to sets | |
| 22 for key, value in columns.iteritems(): | |
| 23 columns[key] = set(value) | |
| 24 | |
| 25 return columns | |
| 26 | |
| 27 | |
| 28 def main(args=sys.argv[1:]): | |
| 29 """CLI""" | |
| 30 | |
| 31 # parse command line | |
| 32 parser = argparse.ArgumentParser(description=__doc__) | |
| 33 parser.add_argument('input', | |
| 34 type=argparse.FileType('r'), | |
| 35 help="input CSV file with headers") | |
| 36 options = parser.parse_args(args) | |
| 37 | |
| 38 # determine sets | |
| 39 columns = uniques(options.input) | |
| 40 | |
| 41 # output uniques | |
| 42 writer = csv.writer(sys.stdout) | |
| 43 for key, value in columns.iteritems(): | |
| 44 writer.writerow([key, len(value)]) | |
| 45 | |
| 46 | |
| 47 if __name__ == '__main__': | |
| 48 main() | 
