annotate numerics/histogram.py @ 164:c16940bd2cee

this works
author Jeff Hammel <k0scist@gmail.com>
date Fri, 15 May 2015 16:59:09 -0700
parents 19f3d071bb73
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
68
07362c531a7e stub histogram tests
Jeff Hammel <k0scist@gmail.com>
parents: 65
diff changeset
5 Histograms
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 """
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 # imports
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 import argparse
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 import os
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 import sys
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 import time
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
13 from .data import transpose
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
14 from .generate import frange
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 from .read import CSVParser
92
Jeff Hammel <k0scist@gmail.com>
parents: 91
diff changeset
16 from .write import CSVWriter
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
17 from collections import OrderedDict
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 # module globals
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
20 __all__ = ['Histogram', 'HistogramParser', 'main']
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
21
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
22 class Histogram(object):
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
23 """historgram"""
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
25 def __init__(self, bins):
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
26 self.bins = sorted(bins)
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
27 assert len(bins) > 1
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
28 self.data = OrderedDict([(bin, [])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
29 for bin in zip(bins[:-1],
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
30 bins[1:])])
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
31
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
32 def add(self, *values):
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
33 """add values to the histogram"""
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
34 for value in values:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
35 for vmin, vmax in self.data.keys():
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
36 if vmin <= value < vmax:
72
06094870fdd7 more stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 71
diff changeset
37 self.data[(vmin, vmax)].append(value)
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
38 break
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
39 else:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
40 if value == vmax:
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
41 # handle rightmost endpoint
77
dcfce20597a6 syntax error
Jeff Hammel <k0scist@gmail.com>
parents: 76
diff changeset
42 self.data[(vmin, vmax)].append(value)
70
7a979736bbfc stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 69
diff changeset
43
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
44 def __iadd__(self, value):
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
45 self.add(value)
65
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
46 return self
Jeff Hammel <k0scist@gmail.com>
parents: 64
diff changeset
47
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
48 def __call__(self, *values):
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
49 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
50 add values to the histogram and return
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
51 OrderedDict of counts
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
52 """
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
53 self.add(*values)
73
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
54 return OrderedDict([(bin, len(value))
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
55 for bin, value in self.data.items()])
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
56
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
57 def keys(self):
8e93d7357c6b working histogram w tests
Jeff Hammel <k0scist@gmail.com>
parents: 72
diff changeset
58 return self.data.keys()
71
9a2674b32754 stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 70
diff changeset
59
74
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
60 def max(self):
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
61 """return max length"""
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
62 return max([len(value) for value in self.data.values()])
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
63
630cde28928a accessor
Jeff Hammel <k0scist@gmail.com>
parents: 73
diff changeset
64
64
719029ee5e7b stubbing
Jeff Hammel <k0scist@gmail.com>
parents: 63
diff changeset
65 class HistogramParser(CSVParser):
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 """histogram CLI option parser"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 def __init__(self, **kwargs):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70 kwargs.setdefault('description', __doc__)
69
5dceb1d05a29 fix syntax error
Jeff Hammel <k0scist@gmail.com>
parents: 68
diff changeset
71 CSVParser.__init__(self, **kwargs)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72 self.add_argument('-n', '--bins', dest='n_bins', type=int,
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 help="number of bins")
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
74 self.add_argument('--min', dest='min', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
75 help="minimum value; else taken from data")
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
76 self.add_argument('--max', dest='max', type=float,
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
77 help="maximum value, else taken from data")
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 self.options = None
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 def main(args=sys.argv[1:]):
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82 """CLI"""
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84 # parse command line options
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85 parser = HistogramParser()
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 options = parser.parse_args(args)
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
87
75
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
88 # read data
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
89 data = parser.read()
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
90 if not data:
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
91 parser.error("No data given")
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
92
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
93 # transpose to columns
2337abc4ebfe stubbing transposition
Jeff Hammel <k0scist@gmail.com>
parents: 74
diff changeset
94 columns = transpose(data)
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95
81
Jeff Hammel <k0scist@gmail.com>
parents: 78
diff changeset
96 # cast to float
82
738a2400f0f3 cast to float
Jeff Hammel <k0scist@gmail.com>
parents: 81
diff changeset
97 columns = [[float(value) for value in column]
738a2400f0f3 cast to float
Jeff Hammel <k0scist@gmail.com>
parents: 81
diff changeset
98 for column in columns]
81
Jeff Hammel <k0scist@gmail.com>
parents: 78
diff changeset
99
78
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
100 # find min, max if not provided
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
101 if options.min is None:
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
102 options.min = min([min(column) for column in columns])
9b9bfbcdd749 find min, max
Jeff Hammel <k0scist@gmail.com>
parents: 77
diff changeset
103 if options.max is None:
89
ae8b90fed06f bug fixes
Jeff Hammel <k0scist@gmail.com>
parents: 88
diff changeset
104 options.max = max([max(column) for column in columns])
76
7faa0112ef9f much of CLI
Jeff Hammel <k0scist@gmail.com>
parents: 75
diff changeset
105
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
106 if not options.n_bins:
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
107 # find number of bins, if not specified
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
108 # We'll use a guess of 2 items per bin, on average
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
109 options.n_bins = len(columns[0]) / 2
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
110
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
111 # make some bins
88
8ad844281bf8 variable name
Jeff Hammel <k0scist@gmail.com>
parents: 87
diff changeset
112 bins = frange(options.min, options.max, options.n_bins)
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
113
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
114 # make some histograms
87
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
115 histograms = []
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
116 for column in columns:
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
117 histogram = Histogram(bins)
8448c62a4917 almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents: 83
diff changeset
118 histogram.add(*column)
89
ae8b90fed06f bug fixes
Jeff Hammel <k0scist@gmail.com>
parents: 88
diff changeset
119 histograms.append(histogram)
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
120
90
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
121 # record delimeters for output
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
122 columns = zip(*histograms[0].keys())
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
123 for histogram in histograms:
91
bbe8f3e9615d fix bug
Jeff Hammel <k0scist@gmail.com>
parents: 90
diff changeset
124 columns.append(histogram().values())
90
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
125
122
19f3d071bb73 output historgram
Jeff Hammel <k0scist@gmail.com>
parents: 98
diff changeset
126 # transpose back to rows
19f3d071bb73 output historgram
Jeff Hammel <k0scist@gmail.com>
parents: 98
diff changeset
127 rows = transpose(columns)
19f3d071bb73 output historgram
Jeff Hammel <k0scist@gmail.com>
parents: 98
diff changeset
128
90
Jeff Hammel <k0scist@gmail.com>
parents: 89
diff changeset
129 # output
122
19f3d071bb73 output historgram
Jeff Hammel <k0scist@gmail.com>
parents: 98
diff changeset
130 writer = CSVWriter(options.output)
19f3d071bb73 output historgram
Jeff Hammel <k0scist@gmail.com>
parents: 98
diff changeset
131 writer.write(rows)
83
01b8009facc8 stub: generation
Jeff Hammel <k0scist@gmail.com>
parents: 82
diff changeset
132
63
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
133 if __name__ == '__main__':
0df8bcb6d521 stubbing: unicode histograms
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
134 main()