Mercurial > hg > config
comparison python/count.py @ 740:25622fb5906d
example code for counting + duplicates
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Wed, 27 May 2015 15:55:29 -0700 |
parents | |
children | 8275fa887f2b |
comparison
equal
deleted
inserted
replaced
738:6833137f039c | 740:25622fb5906d |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
5 counting and duplication | |
6 """ | |
7 | |
8 # imports | |
9 import argparse | |
10 import sys | |
11 from collections import OrderedDict | |
12 | |
13 # module globals | |
14 __all__ = ['main', 'CountParser'] | |
15 | |
16 | |
17 def count(*items): | |
18 """count the occurance of each (hashable) item""" | |
19 counts = OrderedDict() | |
20 for item in items: | |
21 counts[item] = counts.get(item, 0) + 1 | |
22 return counts | |
23 | |
24 def duplicates(*items): | |
25 """returns set of duplicate items""" | |
26 return set([key for key, value in count(*items).items() | |
27 if value > 1]) | |
28 | |
29 class CountParser(argparse.ArgumentParser): | |
30 """CLI option parser""" | |
31 | |
32 def __init__(self, **kwargs): | |
33 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) | |
34 kwargs.setdefault('description', __doc__) | |
35 argparse.ArgumentParser.__init__(self, **kwargs) | |
36 self.add_argument('input', nargs='?', | |
37 type=argparse.FileType('r'), default=sys.stdin, | |
38 help="file to read items from, or stdin by default") | |
39 self.add_argument('--duplicates', dest='duplicates', | |
40 action='store_true', default=False, | |
41 help="print (sorted) duplicates, not counts") | |
42 self.options = None | |
43 | |
44 def parse_args(self, *args, **kw): | |
45 options = argparse.ArgumentParser.parse_args(self, *args, **kw) | |
46 self.validate(options) | |
47 self.options = options | |
48 return options | |
49 | |
50 def validate(self, options): | |
51 """validate options""" | |
52 | |
53 def main(args=sys.argv[1:]): | |
54 """CLI""" | |
55 | |
56 # parse command line options | |
57 parser = CountParser() | |
58 options = parser.parse_args(args) | |
59 | |
60 # read a thing | |
61 items = options.input.read().strip().split() | |
62 | |
63 if options.duplicates: | |
64 print ('\n'.join(sorted(duplicates(*items)))) | |
65 else: | |
66 # get the counts | |
67 for key, value in count(*items).items(): | |
68 print ('{}:{}'.format(key, value)) | |
69 | |
70 if __name__ == '__main__': | |
71 main() |