Mercurial > hg > Lemuriformes
comparison lemuriformes/uniques.py @ 17:4793f99b73e0
[lemuriformes] utility functions
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:42:52 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
16:9b1bb9eee962 | 17:4793f99b73e0 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
5 count uniques in each CSV file column | |
6 """ | |
7 | |
8 import argparse | |
9 import csv | |
10 import sys | |
11 from collections import OrderedDict | |
12 from .columns import read_columns | |
13 | |
14 | |
15 def uniques(fp): | |
16 """get unique counts for columns in CSV file `fp`""" | |
17 | |
18 # read columns | |
19 columns = read_columns(fp, type=OrderedDict) | |
20 | |
21 # convert to sets | |
22 for key, value in columns.iteritems(): | |
23 columns[key] = set(value) | |
24 | |
25 return columns | |
26 | |
27 | |
28 def main(args=sys.argv[1:]): | |
29 """CLI""" | |
30 | |
31 # parse command line | |
32 parser = argparse.ArgumentParser(description=__doc__) | |
33 parser.add_argument('input', | |
34 type=argparse.FileType('r'), | |
35 help="input CSV file with headers") | |
36 options = parser.parse_args(args) | |
37 | |
38 # determine sets | |
39 columns = uniques(options.input) | |
40 | |
41 # output uniques | |
42 writer = csv.writer(sys.stdout) | |
43 for key, value in columns.iteritems(): | |
44 writer.writerow([key, len(value)]) | |
45 | |
46 | |
47 if __name__ == '__main__': | |
48 main() |