Mercurial > hg > config
changeset 747:a2d199008a83
wtf
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Tue, 30 Jun 2015 15:18:12 -0700 |
parents | 25622fb5906d (diff) eec5b7abff2b (current diff) |
children | b68cd77be145 |
files | bin/tofile.sh python/slice.py |
diffstat | 2 files changed, 72 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/count.py Tue Jun 30 15:18:12 2015 -0700 @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +counting and duplication +""" + +# imports +import argparse +import sys +from collections import OrderedDict + +# module globals +__all__ = ['main', 'CountParser'] + + +def count(*items): + """count the occurance of each (hashable) item""" + counts = OrderedDict() + for item in items: + counts[item] = counts.get(item, 0) + 1 + return counts + +def duplicates(*items): + """returns set of duplicate items""" + return set([key for key, value in count(*items).items() + if value > 1]) + +class CountParser(argparse.ArgumentParser): + """CLI option parser""" + + def __init__(self, **kwargs): + kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) + kwargs.setdefault('description', __doc__) + argparse.ArgumentParser.__init__(self, **kwargs) + self.add_argument('input', nargs='?', + type=argparse.FileType('r'), default=sys.stdin, + help="file to read items from, or stdin by default") + self.add_argument('--duplicates', dest='duplicates', + action='store_true', default=False, + help="print (sorted) duplicates, not counts") + self.options = None + + def parse_args(self, *args, **kw): + options = argparse.ArgumentParser.parse_args(self, *args, **kw) + self.validate(options) + self.options = options + return options + + def validate(self, options): + """validate options""" + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line options + parser = CountParser() + options = parser.parse_args(args) + + # read a thing + items = options.input.read().strip().split() + + if options.duplicates: + print ('\n'.join(sorted(duplicates(*items)))) + else: + # get the counts + for key, value in count(*items).items(): + print ('{}:{}'.format(key, value)) + +if __name__ == '__main__': + main()