comparison python/count.py @ 740:25622fb5906d

example code for counting + duplicates
author Jeff Hammel <k0scist@gmail.com>
date Wed, 27 May 2015 15:55:29 -0700
parents
children 8275fa887f2b
comparison
equal deleted inserted replaced
738:6833137f039c 740:25622fb5906d
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 counting and duplication
6 """
7
8 # imports
9 import argparse
10 import sys
11 from collections import OrderedDict
12
13 # module globals
14 __all__ = ['main', 'CountParser']
15
16
17 def count(*items):
18 """count the occurance of each (hashable) item"""
19 counts = OrderedDict()
20 for item in items:
21 counts[item] = counts.get(item, 0) + 1
22 return counts
23
24 def duplicates(*items):
25 """returns set of duplicate items"""
26 return set([key for key, value in count(*items).items()
27 if value > 1])
28
29 class CountParser(argparse.ArgumentParser):
30 """CLI option parser"""
31
32 def __init__(self, **kwargs):
33 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
34 kwargs.setdefault('description', __doc__)
35 argparse.ArgumentParser.__init__(self, **kwargs)
36 self.add_argument('input', nargs='?',
37 type=argparse.FileType('r'), default=sys.stdin,
38 help="file to read items from, or stdin by default")
39 self.add_argument('--duplicates', dest='duplicates',
40 action='store_true', default=False,
41 help="print (sorted) duplicates, not counts")
42 self.options = None
43
44 def parse_args(self, *args, **kw):
45 options = argparse.ArgumentParser.parse_args(self, *args, **kw)
46 self.validate(options)
47 self.options = options
48 return options
49
50 def validate(self, options):
51 """validate options"""
52
53 def main(args=sys.argv[1:]):
54 """CLI"""
55
56 # parse command line options
57 parser = CountParser()
58 options = parser.parse_args(args)
59
60 # read a thing
61 items = options.input.read().strip().split()
62
63 if options.duplicates:
64 print ('\n'.join(sorted(duplicates(*items))))
65 else:
66 # get the counts
67 for key, value in count(*items).items():
68 print ('{}:{}'.format(key, value))
69
70 if __name__ == '__main__':
71 main()