Mercurial > hg > numerics
annotate numerics/histogram.py @ 90:6ca75832116d
wip
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Mon, 02 Mar 2015 15:58:02 -0800 |
parents | ae8b90fed06f |
children | bbe8f3e9615d |
rev | line source |
---|---|
63 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
68 | 5 Histograms |
63 | 6 |
7 Unicode is awesome; see http://www.alanwood.net/unicode/block_elements.html | |
8 """ | |
9 | |
10 blocks = """ | |
11 █ | |
12 ▉ | |
13 ▊ | |
14 ▋ | |
15 ▌ | |
16 ▍ | |
17 ▎ | |
18 ▏ | |
19 """ | |
20 | |
21 # imports | |
22 import argparse | |
23 import os | |
24 import sys | |
25 import time | |
75 | 26 from .data import transpose |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
27 from .generate import frange |
63 | 28 from .read import CSVParser |
64 | 29 from collections import OrderedDict |
63 | 30 |
31 # module globals | |
64 | 32 __all__ = ['Histogram', 'HistogramParser', 'main'] |
33 | |
34 class Histogram(object): | |
35 """historgram""" | |
63 | 36 |
64 | 37 def __init__(self, bins): |
73 | 38 self.bins = sorted(bins) |
64 | 39 assert len(bins) > 1 |
73 | 40 self.data = OrderedDict([(bin, []) |
41 for bin in zip(bins[:-1], | |
42 bins[1:])]) | |
64 | 43 |
70 | 44 def add(self, *values): |
45 """add values to the histogram""" | |
71 | 46 for value in values: |
72 | 47 for vmin, vmax in self.data.keys(): |
73 | 48 if vmin <= value < vmax: |
72 | 49 self.data[(vmin, vmax)].append(value) |
76 | 50 break |
51 else: | |
52 if value == vmax: | |
53 # handle rightmost endpoint | |
77 | 54 self.data[(vmin, vmax)].append(value) |
70 | 55 |
65 | 56 def __iadd__(self, value): |
71 | 57 self.add(value) |
65 | 58 return self |
59 | |
71 | 60 def __call__(self, *values): |
61 """ | |
62 add values to the histogram and return | |
63 OrderedDict of counts | |
64 """ | |
65 self.add(*values) | |
73 | 66 return OrderedDict([(bin, len(value)) |
67 for bin, value in self.data.items()]) | |
68 | |
69 def keys(self): | |
70 return self.data.keys() | |
71 | 71 |
74 | 72 def max(self): |
73 """return max length""" | |
74 return max([len(value) for value in self.data.values()]) | |
75 | |
76 | |
64 | 77 class HistogramParser(CSVParser): |
63 | 78 """histogram CLI option parser""" |
79 | |
80 def __init__(self, **kwargs): | |
81 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) | |
82 kwargs.setdefault('description', __doc__) | |
69 | 83 CSVParser.__init__(self, **kwargs) |
63 | 84 self.add_argument('-n', '--bins', dest='n_bins', type=int, |
85 help="number of bins") | |
76 | 86 self.add_argument('--min', dest='min', type=float, |
87 help="minimum value; else taken from data") | |
88 self.add_argument('--max', dest='max', type=float, | |
89 help="maximum value, else taken from data") | |
63 | 90 self.options = None |
91 | |
92 | |
93 def main(args=sys.argv[1:]): | |
94 """CLI""" | |
95 | |
96 # parse command line options | |
97 parser = HistogramParser() | |
98 options = parser.parse_args(args) | |
99 | |
75 | 100 # read data |
101 data = parser.read() | |
102 if not data: | |
103 parser.error("No data given") | |
104 | |
105 # transpose to columns | |
106 columns = transpose(data) | |
63 | 107 |
81 | 108 # cast to float |
82 | 109 columns = [[float(value) for value in column] |
110 for column in columns] | |
81 | 111 |
78 | 112 # find min, max if not provided |
113 if options.min is None: | |
114 options.min = min([min(column) for column in columns]) | |
115 if options.max is None: | |
89 | 116 options.max = max([max(column) for column in columns]) |
76 | 117 |
88 | 118 if not options.n_bins: |
83 | 119 # find number of bins, if not specified |
120 # We'll use a guess of 2 items per bin, on average | |
88 | 121 options.n_bins = len(columns[0]) / 2 |
83 | 122 |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
123 # make some bins |
88 | 124 bins = frange(options.min, options.max, options.n_bins) |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
125 |
83 | 126 # make some histograms |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
127 histograms = [] |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
128 for column in columns: |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
129 histogram = Histogram(bins) |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
130 histogram.add(*column) |
89 | 131 histograms.append(histogram) |
83 | 132 |
90 | 133 # record delimeters for output |
134 columns = zip(*histograms[0].keys()) | |
135 for histogram in histograms: | |
136 columns.append(histogram.values()) | |
137 | |
138 # output | |
139 writer = None # TODO | |
83 | 140 |
63 | 141 if __name__ == '__main__': |
142 main() |