Mercurial > hg > numerics
annotate numerics/histogram.py @ 92:5b25e0be78aa
wip
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Mon, 02 Mar 2015 16:08:39 -0800 |
parents | bbe8f3e9615d |
children | f6d885adb3d7 |
rev | line source |
---|---|
63 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
68 | 5 Histograms |
63 | 6 |
7 Unicode is awesome; see http://www.alanwood.net/unicode/block_elements.html | |
8 """ | |
9 | |
10 blocks = """ | |
11 █ | |
12 ▉ | |
13 ▊ | |
14 ▋ | |
15 ▌ | |
16 ▍ | |
17 ▎ | |
18 ▏ | |
19 """ | |
20 | |
21 # imports | |
22 import argparse | |
23 import os | |
24 import sys | |
25 import time | |
75 | 26 from .data import transpose |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
27 from .generate import frange |
63 | 28 from .read import CSVParser |
92 | 29 from .write import CSVWriter |
64 | 30 from collections import OrderedDict |
63 | 31 |
32 # module globals | |
64 | 33 __all__ = ['Histogram', 'HistogramParser', 'main'] |
34 | |
35 class Histogram(object): | |
36 """historgram""" | |
63 | 37 |
64 | 38 def __init__(self, bins): |
73 | 39 self.bins = sorted(bins) |
64 | 40 assert len(bins) > 1 |
73 | 41 self.data = OrderedDict([(bin, []) |
42 for bin in zip(bins[:-1], | |
43 bins[1:])]) | |
64 | 44 |
70 | 45 def add(self, *values): |
46 """add values to the histogram""" | |
71 | 47 for value in values: |
72 | 48 for vmin, vmax in self.data.keys(): |
73 | 49 if vmin <= value < vmax: |
72 | 50 self.data[(vmin, vmax)].append(value) |
76 | 51 break |
52 else: | |
53 if value == vmax: | |
54 # handle rightmost endpoint | |
77 | 55 self.data[(vmin, vmax)].append(value) |
70 | 56 |
65 | 57 def __iadd__(self, value): |
71 | 58 self.add(value) |
65 | 59 return self |
60 | |
71 | 61 def __call__(self, *values): |
62 """ | |
63 add values to the histogram and return | |
64 OrderedDict of counts | |
65 """ | |
66 self.add(*values) | |
73 | 67 return OrderedDict([(bin, len(value)) |
68 for bin, value in self.data.items()]) | |
69 | |
70 def keys(self): | |
71 return self.data.keys() | |
71 | 72 |
74 | 73 def max(self): |
74 """return max length""" | |
75 return max([len(value) for value in self.data.values()]) | |
76 | |
77 | |
64 | 78 class HistogramParser(CSVParser): |
63 | 79 """histogram CLI option parser""" |
80 | |
81 def __init__(self, **kwargs): | |
82 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) | |
83 kwargs.setdefault('description', __doc__) | |
69 | 84 CSVParser.__init__(self, **kwargs) |
63 | 85 self.add_argument('-n', '--bins', dest='n_bins', type=int, |
86 help="number of bins") | |
76 | 87 self.add_argument('--min', dest='min', type=float, |
88 help="minimum value; else taken from data") | |
89 self.add_argument('--max', dest='max', type=float, | |
90 help="maximum value, else taken from data") | |
63 | 91 self.options = None |
92 | |
93 | |
94 def main(args=sys.argv[1:]): | |
95 """CLI""" | |
96 | |
97 # parse command line options | |
98 parser = HistogramParser() | |
99 options = parser.parse_args(args) | |
100 | |
75 | 101 # read data |
102 data = parser.read() | |
103 if not data: | |
104 parser.error("No data given") | |
105 | |
106 # transpose to columns | |
107 columns = transpose(data) | |
63 | 108 |
81 | 109 # cast to float |
82 | 110 columns = [[float(value) for value in column] |
111 for column in columns] | |
81 | 112 |
78 | 113 # find min, max if not provided |
114 if options.min is None: | |
115 options.min = min([min(column) for column in columns]) | |
116 if options.max is None: | |
89 | 117 options.max = max([max(column) for column in columns]) |
76 | 118 |
88 | 119 if not options.n_bins: |
83 | 120 # find number of bins, if not specified |
121 # We'll use a guess of 2 items per bin, on average | |
88 | 122 options.n_bins = len(columns[0]) / 2 |
83 | 123 |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
124 # make some bins |
88 | 125 bins = frange(options.min, options.max, options.n_bins) |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
126 |
83 | 127 # make some histograms |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
128 histograms = [] |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
129 for column in columns: |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
130 histogram = Histogram(bins) |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
131 histogram.add(*column) |
89 | 132 histograms.append(histogram) |
83 | 133 |
90 | 134 # record delimeters for output |
135 columns = zip(*histograms[0].keys()) | |
136 for histogram in histograms: | |
91 | 137 columns.append(histogram().values()) |
90 | 138 |
139 # output | |
140 writer = None # TODO | |
83 | 141 |
63 | 142 if __name__ == '__main__': |
143 main() |