annotate python/count.py @ 740:25622fb5906d

example code for counting + duplicates
author Jeff Hammel <k0scist@gmail.com>
date Wed, 27 May 2015 15:55:29 -0700
parents
children 8275fa887f2b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
740
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 counting and duplication
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 """
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 # imports
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 import argparse
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 import sys
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 from collections import OrderedDict
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 # module globals
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 __all__ = ['main', 'CountParser']
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 def count(*items):
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 """count the occurance of each (hashable) item"""
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 counts = OrderedDict()
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 for item in items:
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 counts[item] = counts.get(item, 0) + 1
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 return counts
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 def duplicates(*items):
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 """returns set of duplicate items"""
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 return set([key for key, value in count(*items).items()
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 if value > 1])
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 class CountParser(argparse.ArgumentParser):
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 """CLI option parser"""
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 def __init__(self, **kwargs):
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 kwargs.setdefault('description', __doc__)
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 argparse.ArgumentParser.__init__(self, **kwargs)
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 self.add_argument('input', nargs='?',
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 type=argparse.FileType('r'), default=sys.stdin,
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 help="file to read items from, or stdin by default")
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 self.add_argument('--duplicates', dest='duplicates',
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 action='store_true', default=False,
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 help="print (sorted) duplicates, not counts")
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 self.options = None
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 def parse_args(self, *args, **kw):
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 options = argparse.ArgumentParser.parse_args(self, *args, **kw)
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 self.validate(options)
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 self.options = options
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 return options
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 def validate(self, options):
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 """validate options"""
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53 def main(args=sys.argv[1:]):
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54 """CLI"""
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56 # parse command line options
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 parser = CountParser()
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 options = parser.parse_args(args)
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 # read a thing
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 items = options.input.read().strip().split()
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63 if options.duplicates:
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 print ('\n'.join(sorted(duplicates(*items))))
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 else:
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 # get the counts
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67 for key, value in count(*items).items():
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 print ('{}:{}'.format(key, value))
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70 if __name__ == '__main__':
25622fb5906d example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71 main()