Mercurial > hg > config
annotate python/count.py @ 925:a92db57f62f8 default tip
add lxml
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Mon, 20 Jan 2025 09:20:00 -0800 |
parents | 8275fa887f2b |
children |
rev | line source |
---|---|
740
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 """ |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 counting and duplication |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 """ |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 # imports |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 import argparse |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 import sys |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 from collections import OrderedDict |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 # module globals |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 __all__ = ['main', 'CountParser'] |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 def count(*items): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 """count the occurance of each (hashable) item""" |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 counts = OrderedDict() |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 for item in items: |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 counts[item] = counts.get(item, 0) + 1 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 return counts |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 def duplicates(*items): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 """returns set of duplicate items""" |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 return set([key for key, value in count(*items).items() |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 if value > 1]) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 class CountParser(argparse.ArgumentParser): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 """CLI option parser""" |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 def __init__(self, **kwargs): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 kwargs.setdefault('description', __doc__) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 argparse.ArgumentParser.__init__(self, **kwargs) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 self.add_argument('input', nargs='?', |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 type=argparse.FileType('r'), default=sys.stdin, |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 help="file to read items from, or stdin by default") |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 self.add_argument('--duplicates', dest='duplicates', |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 action='store_true', default=False, |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 help="print (sorted) duplicates, not counts") |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 self.options = None |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 def parse_args(self, *args, **kw): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 options = argparse.ArgumentParser.parse_args(self, *args, **kw) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 self.validate(options) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 self.options = options |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 return options |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 def validate(self, options): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 """validate options""" |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 def main(args=sys.argv[1:]): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 """CLI""" |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 # parse command line options |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
57 parser = CountParser() |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 options = parser.parse_args(args) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
60 # read a thing |
800 | 61 try: |
62 items = options.input.read().strip().split() | |
63 except KeyboardInterrupt: | |
64 # probably trying to read stdin interactively | |
65 # revert! revert! revert! | |
66 return | |
740
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
67 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
68 if options.duplicates: |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 print ('\n'.join(sorted(duplicates(*items)))) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 else: |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 # get the counts |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 for key, value in count(*items).items(): |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 print ('{}:{}'.format(key, value)) |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
74 |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
75 if __name__ == '__main__': |
25622fb5906d
example code for counting + duplicates
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
76 main() |