annotate numerics/histogram.py @ 76:7faa0112ef9f

much of CLI
author Jeff Hammel <k0scist@gmail.com>
date Sun, 01 Mar 2015 08:51:45 -0800
parents 2337abc4ebfe
children dcfce20597a6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
68
07362c531a7e stub histogram tests
Jeff Hammel <k0scist@gmail.com>
parents: 65
diff changeset
5 Histograms
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 Unicode is awesome; see http://www.alanwood.net/unicode/block_elements.html
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 blocks = """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 # imports
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 import argparse
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 import os
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 import sys
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 import time
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
26 from .data import transpose
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 from .read import CSVParser
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
28 from collections import OrderedDict
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 # module globals
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
31 __all__ = ['Histogram', 'HistogramParser', 'main']
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
32
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
33 class Histogram(object):
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
34 """historgram"""
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
36 def __init__(self, bins):
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
37 self.bins = sorted(bins)
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
38 assert len(bins) > 1
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
39 self.data = OrderedDict([(bin, [])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
40 for bin in zip(bins[:-1],
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
41 bins[1:])])
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
42
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
43 def add(self, *values):
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
44 """add values to the histogram"""
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
45 for value in values:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
46 for vmin, vmax in self.data.keys():
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
47 if vmin <= value < vmax:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
48 self.data[(vmin, vmax)].append(value)
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
49 break
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
50 else:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
51 if value == vmax:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
52 # handle rightmost endpoint
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
53 self.data[(vmin, vmax)].append(value
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
54
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
55 def __iadd__(self, value):
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
56 self.add(value)
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
57 return self
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
58
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
59 def __call__(self, *values):
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
60 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
61 add values to the histogram and return
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
62 OrderedDict of counts
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
63 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
64 self.add(*values)
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
65 return OrderedDict([(bin, len(value))
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
66 for bin, value in self.data.items()])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
67
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
68 def keys(self):
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
69 return self.data.keys()
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
70
74
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
71 def max(self):
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
72 """return max length"""
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
73 return max([len(value) for value in self.data.values()])
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
74
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
75
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
76 class HistogramParser(CSVParser):
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
77 """histogram CLI option parser"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79 def __init__(self, **kwargs):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 kwargs.setdefault('description', __doc__)
69
5dceb1d05a29 fix syntax error
Jeff Hammel <k0scist@gmail.com>
parents: 68
diff changeset
82 CSVParser.__init__(self, **kwargs)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83 self.add_argument('-n', '--bins', dest='n_bins', type=int,
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84 help="number of bins")
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
85 self.add_argument('--min', dest='min', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
86 help="minimum value; else taken from data")
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
87 self.add_argument('--max', dest='max', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
88 help="maximum value, else taken from data")
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
89 self.options = None
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92 def main(args=sys.argv[1:]):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93 """CLI"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95 # parse command line options
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
96 parser = HistogramParser()
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
97 options = parser.parse_args(args)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
98
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
99 # read data
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
100 data = parser.read()
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
101 if not data:
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
102 parser.error("No data given")
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
103
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
104 # transpose to columns
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
105 columns = transpose(data)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
106
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
107
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
108 if __name__ == '__main__':
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
109 main()