annotate numerics/histogram.py @ 91:bbe8f3e9615d

fix bug
author Jeff Hammel <k0scist@gmail.com>
date Mon, 02 Mar 2015 15:59:17 -0800
parents 6ca75832116d
children 5b25e0be78aa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
68
07362c531a7e stub histogram tests
Jeff Hammel <k0scist@gmail.com>
parents: 65
diff changeset
5 Histograms
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 Unicode is awesome; see http://www.alanwood.net/unicode/block_elements.html
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 blocks = """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 # imports
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 import argparse
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 import os
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 import sys
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 import time
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
26 from .data import transpose
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
27 from .generate import frange
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 from .read import CSVParser
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
29 from collections import OrderedDict
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 # module globals
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
32 __all__ = ['Histogram', 'HistogramParser', 'main']
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
33
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
34 class Histogram(object):
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
35 """historgram"""
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
37 def __init__(self, bins):
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
38 self.bins = sorted(bins)
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
39 assert len(bins) > 1
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
40 self.data = OrderedDict([(bin, [])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
41 for bin in zip(bins[:-1],
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
42 bins[1:])])
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
43
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
44 def add(self, *values):
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
45 """add values to the histogram"""
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
46 for value in values:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
47 for vmin, vmax in self.data.keys():
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
48 if vmin <= value < vmax:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
49 self.data[(vmin, vmax)].append(value)
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
50 break
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
51 else:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
52 if value == vmax:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
53 # handle rightmost endpoint
77
dcfce20597a6 syntax error
Jeff Hammel <k0scist@gmail.com>
parents: 76
diff changeset
54 self.data[(vmin, vmax)].append(value)
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
55
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
56 def __iadd__(self, value):
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
57 self.add(value)
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
58 return self
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
59
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
60 def __call__(self, *values):
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
61 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
62 add values to the histogram and return
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
63 OrderedDict of counts
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
64 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
65 self.add(*values)
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
66 return OrderedDict([(bin, len(value))
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
67 for bin, value in self.data.items()])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
68
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
69 def keys(self):
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
70 return self.data.keys()
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
71
74
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
72 def max(self):
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
73 """return max length"""
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
74 return max([len(value) for value in self.data.values()])
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
75
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
76
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
77 class HistogramParser(CSVParser):
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 """histogram CLI option parser"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 def __init__(self, **kwargs):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82 kwargs.setdefault('description', __doc__)
69
5dceb1d05a29 fix syntax error
Jeff Hammel <k0scist@gmail.com>
parents: 68
diff changeset
83 CSVParser.__init__(self, **kwargs)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84 self.add_argument('-n', '--bins', dest='n_bins', type=int,
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85 help="number of bins")
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
86 self.add_argument('--min', dest='min', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
87 help="minimum value; else taken from data")
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
88 self.add_argument('--max', dest='max', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
89 help="maximum value, else taken from data")
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90 self.options = None
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93 def main(args=sys.argv[1:]):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94 """CLI"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
96 # parse command line options
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
97 parser = HistogramParser()
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
98 options = parser.parse_args(args)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
99
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
100 # read data
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
101 data = parser.read()
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
102 if not data:
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
103 parser.error("No data given")
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
104
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
105 # transpose to columns
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
106 columns = transpose(data)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
107
81
Jeff Hammel <k0scist@gmail.com>
parents: 78
diff changeset
108 # cast to float
82
738a2400f0f3 cast to float
Jeff Hammel <k0scist@gmail.com>
parents: 81
diff changeset
109 columns = [[float(value) for value in column]
738a2400f0f3 cast to float
Jeff Hammel <k0scist@gmail.com>
parents: 81
diff changeset
110 for column in columns]
81
Jeff Hammel <k0scist@gmail.com>
parents: 78
diff changeset
111
78
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
112 # find min, max if not provided
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
113 if options.min is None:
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
114 options.min = min([min(column) for column in columns])
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
115 if options.max is None:
89
ae8b90fed06f bug fixes
Jeff Hammel <k0scist@gmail.com>
parents: 88
diff changeset
116 options.max = max([max(column) for column in columns])
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
117
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
118 if not options.n_bins:
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
119 # find number of bins, if not specified
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
120 # We'll use a guess of 2 items per bin, on average
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
121 options.n_bins = len(columns[0]) / 2
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
122
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
123 # make some bins
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
124 bins = frange(options.min, options.max, options.n_bins)
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
125
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
126 # make some histograms
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
127 histograms = []
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
128 for column in columns:
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
129 histogram = Histogram(bins)
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
130 histogram.add(*column)
89
ae8b90fed06f bug fixes
Jeff Hammel <k0scist@gmail.com>
parents: 88
diff changeset
131 histograms.append(histogram)
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
132
90
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
133 # record delimeters for output
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
134 columns = zip(*histograms[0].keys())
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
135 for histogram in histograms:
91
bbe8f3e9615d fix bug
Jeff Hammel <k0scist@gmail.com>
parents: 90
diff changeset
136 columns.append(histogram().values())
90
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
137
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
138 # output
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
139 writer = None # TODO
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
140
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
141 if __name__ == '__main__':
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
142 main()