changeset 740:25622fb5906d

example code for counting + duplicates
author Jeff Hammel <k0scist@gmail.com>
date Wed, 27 May 2015 15:55:29 -0700
parents 6833137f039c
children a2d199008a83
files python/count.py
diffstat 1 files changed, 71 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/count.py	Wed May 27 15:55:29 2015 -0700
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+counting and duplication
+"""
+
+# imports
+import argparse
+import sys
+from collections import OrderedDict
+
+# module globals
+__all__ = ['main', 'CountParser']
+
+
+def count(*items):
+    """count the occurance of each (hashable) item"""
+    counts = OrderedDict()
+    for item in items:
+        counts[item] = counts.get(item, 0) + 1
+    return counts
+
+def duplicates(*items):
+    """returns set of duplicate items"""
+    return set([key for key, value in count(*items).items()
+                if value > 1])
+
+class CountParser(argparse.ArgumentParser):
+    """CLI option parser"""
+
+    def __init__(self, **kwargs):
+        kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
+        kwargs.setdefault('description', __doc__)
+        argparse.ArgumentParser.__init__(self, **kwargs)
+        self.add_argument('input', nargs='?',
+                          type=argparse.FileType('r'), default=sys.stdin,
+                          help="file to read items from, or stdin by default")
+        self.add_argument('--duplicates', dest='duplicates',
+                          action='store_true', default=False,
+                          help="print (sorted) duplicates, not counts")
+        self.options = None
+
+    def parse_args(self, *args, **kw):
+        options = argparse.ArgumentParser.parse_args(self, *args, **kw)
+        self.validate(options)
+        self.options = options
+        return options
+
+    def validate(self, options):
+        """validate options"""
+
+def main(args=sys.argv[1:]):
+    """CLI"""
+
+    # parse command line options
+    parser = CountParser()
+    options = parser.parse_args(args)
+
+    # read a thing
+    items = options.input.read().strip().split()
+
+    if options.duplicates:
+        print ('\n'.join(sorted(duplicates(*items))))
+    else:
+        # get the counts
+        for key, value in count(*items).items():
+            print ('{}:{}'.format(key, value))
+
+if __name__ == '__main__':
+    main()