annotate numerics/histogram.py @ 92:5b25e0be78aa

wip
author Jeff Hammel <k0scist@gmail.com>
date Mon, 02 Mar 2015 16:08:39 -0800
parents bbe8f3e9615d
children f6d885adb3d7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
68
07362c531a7e stub histogram tests
Jeff Hammel <k0scist@gmail.com>
parents: 65
diff changeset
5 Histograms
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 Unicode is awesome; see http://www.alanwood.net/unicode/block_elements.html
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 blocks = """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 # imports
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 import argparse
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 import os
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 import sys
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 import time
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
26 from .data import transpose
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
27 from .generate import frange
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 from .read import CSVParser
92
Jeff Hammel <k0scist@gmail.com>
parents: 91
diff changeset
29 from .write import CSVWriter
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
30 from collections import OrderedDict
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 # module globals
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
33 __all__ = ['Histogram', 'HistogramParser', 'main']
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
34
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
35 class Histogram(object):
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
36 """historgram"""
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
38 def __init__(self, bins):
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
39 self.bins = sorted(bins)
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
40 assert len(bins) > 1
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
41 self.data = OrderedDict([(bin, [])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
42 for bin in zip(bins[:-1],
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
43 bins[1:])])
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
44
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
45 def add(self, *values):
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
46 """add values to the histogram"""
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
47 for value in values:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
48 for vmin, vmax in self.data.keys():
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
49 if vmin <= value < vmax:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
50 self.data[(vmin, vmax)].append(value)
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
51 break
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
52 else:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
53 if value == vmax:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
54 # handle rightmost endpoint
77
dcfce20597a6 syntax error
Jeff Hammel <k0scist@gmail.com>
parents: 76
diff changeset
55 self.data[(vmin, vmax)].append(value)
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
56
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
57 def __iadd__(self, value):
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
58 self.add(value)
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
59 return self
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
60
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
61 def __call__(self, *values):
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
62 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
63 add values to the histogram and return
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
64 OrderedDict of counts
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
65 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
66 self.add(*values)
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
67 return OrderedDict([(bin, len(value))
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
68 for bin, value in self.data.items()])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
69
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
70 def keys(self):
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
71 return self.data.keys()
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
72
74
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
73 def max(self):
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
74 """return max length"""
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
75 return max([len(value) for value in self.data.values()])
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
76
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
77
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
78 class HistogramParser(CSVParser):
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79 """histogram CLI option parser"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 def __init__(self, **kwargs):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83 kwargs.setdefault('description', __doc__)
69
5dceb1d05a29 fix syntax error
Jeff Hammel <k0scist@gmail.com>
parents: 68
diff changeset
84 CSVParser.__init__(self, **kwargs)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85 self.add_argument('-n', '--bins', dest='n_bins', type=int,
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 help="number of bins")
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
87 self.add_argument('--min', dest='min', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
88 help="minimum value; else taken from data")
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
89 self.add_argument('--max', dest='max', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
90 help="maximum value, else taken from data")
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91 self.options = None
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94 def main(args=sys.argv[1:]):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95 """CLI"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
96
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
97 # parse command line options
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
98 parser = HistogramParser()
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
99 options = parser.parse_args(args)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
100
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
101 # read data
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
102 data = parser.read()
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
103 if not data:
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
104 parser.error("No data given")
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
105
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
106 # transpose to columns
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
107 columns = transpose(data)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
108
81
Jeff Hammel <k0scist@gmail.com>
parents: 78
diff changeset
109 # cast to float
82
738a2400f0f3 cast to float
Jeff Hammel <k0scist@gmail.com>
parents: 81
diff changeset
110 columns = [[float(value) for value in column]
738a2400f0f3 cast to float
Jeff Hammel <k0scist@gmail.com>
parents: 81
diff changeset
111 for column in columns]
81
Jeff Hammel <k0scist@gmail.com>
parents: 78
diff changeset
112
78
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
113 # find min, max if not provided
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
114 if options.min is None:
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
115 options.min = min([min(column) for column in columns])
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
116 if options.max is None:
89
ae8b90fed06f bug fixes
Jeff Hammel <k0scist@gmail.com>
parents: 88
diff changeset
117 options.max = max([max(column) for column in columns])
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
118
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
119 if not options.n_bins:
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
120 # find number of bins, if not specified
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
121 # We'll use a guess of 2 items per bin, on average
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
122 options.n_bins = len(columns[0]) / 2
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
123
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
124 # make some bins
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
125 bins = frange(options.min, options.max, options.n_bins)
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
126
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
127 # make some histograms
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
128 histograms = []
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
129 for column in columns:
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
130 histogram = Histogram(bins)
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
131 histogram.add(*column)
89
ae8b90fed06f bug fixes
Jeff Hammel <k0scist@gmail.com>
parents: 88
diff changeset
132 histograms.append(histogram)
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
133
90
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
134 # record delimeters for output
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
135 columns = zip(*histograms[0].keys())
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
136 for histogram in histograms:
91
bbe8f3e9615d fix bug
Jeff Hammel <k0scist@gmail.com>
parents: 90
diff changeset
137 columns.append(histogram().values())
90
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
138
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
139 # output
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
140 writer = None # TODO
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
141
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
142 if __name__ == '__main__':
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
143 main()