comparison lemuriformes/uniques.py @ 17:4793f99b73e0

[lemuriformes] utility functions
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:42:52 -0800
parents
children
comparison
equal deleted inserted replaced
16:9b1bb9eee962 17:4793f99b73e0
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 count uniques in each CSV file column
6 """
7
8 import argparse
9 import csv
10 import sys
11 from collections import OrderedDict
12 from .columns import read_columns
13
14
15 def uniques(fp):
16 """get unique counts for columns in CSV file `fp`"""
17
18 # read columns
19 columns = read_columns(fp, type=OrderedDict)
20
21 # convert to sets
22 for key, value in columns.iteritems():
23 columns[key] = set(value)
24
25 return columns
26
27
28 def main(args=sys.argv[1:]):
29 """CLI"""
30
31 # parse command line
32 parser = argparse.ArgumentParser(description=__doc__)
33 parser.add_argument('input',
34 type=argparse.FileType('r'),
35 help="input CSV file with headers")
36 options = parser.parse_args(args)
37
38 # determine sets
39 columns = uniques(options.input)
40
41 # output uniques
42 writer = csv.writer(sys.stdout)
43 for key, value in columns.iteritems():
44 writer.writerow([key, len(value)])
45
46
47 if __name__ == '__main__':
48 main()