Mercurial > hg > config
comparison python/count.py @ 740:25622fb5906d
example code for counting + duplicates
| author | Jeff Hammel <k0scist@gmail.com> | 
|---|---|
| date | Wed, 27 May 2015 15:55:29 -0700 | 
| parents | |
| children | 8275fa887f2b | 
   comparison
  equal
  deleted
  inserted
  replaced
| 738:6833137f039c | 740:25622fb5906d | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 """ | |
| 5 counting and duplication | |
| 6 """ | |
| 7 | |
| 8 # imports | |
| 9 import argparse | |
| 10 import sys | |
| 11 from collections import OrderedDict | |
| 12 | |
| 13 # module globals | |
| 14 __all__ = ['main', 'CountParser'] | |
| 15 | |
| 16 | |
| 17 def count(*items): | |
| 18 """count the occurance of each (hashable) item""" | |
| 19 counts = OrderedDict() | |
| 20 for item in items: | |
| 21 counts[item] = counts.get(item, 0) + 1 | |
| 22 return counts | |
| 23 | |
| 24 def duplicates(*items): | |
| 25 """returns set of duplicate items""" | |
| 26 return set([key for key, value in count(*items).items() | |
| 27 if value > 1]) | |
| 28 | |
| 29 class CountParser(argparse.ArgumentParser): | |
| 30 """CLI option parser""" | |
| 31 | |
| 32 def __init__(self, **kwargs): | |
| 33 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) | |
| 34 kwargs.setdefault('description', __doc__) | |
| 35 argparse.ArgumentParser.__init__(self, **kwargs) | |
| 36 self.add_argument('input', nargs='?', | |
| 37 type=argparse.FileType('r'), default=sys.stdin, | |
| 38 help="file to read items from, or stdin by default") | |
| 39 self.add_argument('--duplicates', dest='duplicates', | |
| 40 action='store_true', default=False, | |
| 41 help="print (sorted) duplicates, not counts") | |
| 42 self.options = None | |
| 43 | |
| 44 def parse_args(self, *args, **kw): | |
| 45 options = argparse.ArgumentParser.parse_args(self, *args, **kw) | |
| 46 self.validate(options) | |
| 47 self.options = options | |
| 48 return options | |
| 49 | |
| 50 def validate(self, options): | |
| 51 """validate options""" | |
| 52 | |
| 53 def main(args=sys.argv[1:]): | |
| 54 """CLI""" | |
| 55 | |
| 56 # parse command line options | |
| 57 parser = CountParser() | |
| 58 options = parser.parse_args(args) | |
| 59 | |
| 60 # read a thing | |
| 61 items = options.input.read().strip().split() | |
| 62 | |
| 63 if options.duplicates: | |
| 64 print ('\n'.join(sorted(duplicates(*items)))) | |
| 65 else: | |
| 66 # get the counts | |
| 67 for key, value in count(*items).items(): | |
| 68 print ('{}:{}'.format(key, value)) | |
| 69 | |
| 70 if __name__ == '__main__': | |
| 71 main() | 
