Mercurial > hg > numerics
annotate numerics/histogram.py @ 138:488cb433576c
add d3 from http://d3js.org/d3.v3.min.js
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 21 Mar 2015 14:54:38 -0700 |
parents | 19f3d071bb73 |
children |
rev | line source |
---|---|
63 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
68 | 5 Histograms |
63 | 6 """ |
7 | |
8 # imports | |
9 import argparse | |
10 import os | |
11 import sys | |
12 import time | |
75 | 13 from .data import transpose |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
14 from .generate import frange |
63 | 15 from .read import CSVParser |
92 | 16 from .write import CSVWriter |
64 | 17 from collections import OrderedDict |
63 | 18 |
19 # module globals | |
64 | 20 __all__ = ['Histogram', 'HistogramParser', 'main'] |
21 | |
22 class Histogram(object): | |
23 """historgram""" | |
63 | 24 |
64 | 25 def __init__(self, bins): |
73 | 26 self.bins = sorted(bins) |
64 | 27 assert len(bins) > 1 |
73 | 28 self.data = OrderedDict([(bin, []) |
29 for bin in zip(bins[:-1], | |
30 bins[1:])]) | |
64 | 31 |
70 | 32 def add(self, *values): |
33 """add values to the histogram""" | |
71 | 34 for value in values: |
72 | 35 for vmin, vmax in self.data.keys(): |
73 | 36 if vmin <= value < vmax: |
72 | 37 self.data[(vmin, vmax)].append(value) |
76 | 38 break |
39 else: | |
40 if value == vmax: | |
41 # handle rightmost endpoint | |
77 | 42 self.data[(vmin, vmax)].append(value) |
70 | 43 |
65 | 44 def __iadd__(self, value): |
71 | 45 self.add(value) |
65 | 46 return self |
47 | |
71 | 48 def __call__(self, *values): |
49 """ | |
50 add values to the histogram and return | |
51 OrderedDict of counts | |
52 """ | |
53 self.add(*values) | |
73 | 54 return OrderedDict([(bin, len(value)) |
55 for bin, value in self.data.items()]) | |
56 | |
57 def keys(self): | |
58 return self.data.keys() | |
71 | 59 |
74 | 60 def max(self): |
61 """return max length""" | |
62 return max([len(value) for value in self.data.values()]) | |
63 | |
64 | |
64 | 65 class HistogramParser(CSVParser): |
63 | 66 """histogram CLI option parser""" |
67 | |
68 def __init__(self, **kwargs): | |
69 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) | |
70 kwargs.setdefault('description', __doc__) | |
69 | 71 CSVParser.__init__(self, **kwargs) |
63 | 72 self.add_argument('-n', '--bins', dest='n_bins', type=int, |
73 help="number of bins") | |
76 | 74 self.add_argument('--min', dest='min', type=float, |
75 help="minimum value; else taken from data") | |
76 self.add_argument('--max', dest='max', type=float, | |
77 help="maximum value, else taken from data") | |
63 | 78 self.options = None |
79 | |
80 | |
81 def main(args=sys.argv[1:]): | |
82 """CLI""" | |
83 | |
84 # parse command line options | |
85 parser = HistogramParser() | |
86 options = parser.parse_args(args) | |
87 | |
75 | 88 # read data |
89 data = parser.read() | |
90 if not data: | |
91 parser.error("No data given") | |
92 | |
93 # transpose to columns | |
94 columns = transpose(data) | |
63 | 95 |
81 | 96 # cast to float |
82 | 97 columns = [[float(value) for value in column] |
98 for column in columns] | |
81 | 99 |
78 | 100 # find min, max if not provided |
101 if options.min is None: | |
102 options.min = min([min(column) for column in columns]) | |
103 if options.max is None: | |
89 | 104 options.max = max([max(column) for column in columns]) |
76 | 105 |
88 | 106 if not options.n_bins: |
83 | 107 # find number of bins, if not specified |
108 # We'll use a guess of 2 items per bin, on average | |
88 | 109 options.n_bins = len(columns[0]) / 2 |
83 | 110 |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
111 # make some bins |
88 | 112 bins = frange(options.min, options.max, options.n_bins) |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
113 |
83 | 114 # make some histograms |
87
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
115 histograms = [] |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
116 for column in columns: |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
117 histogram = Histogram(bins) |
8448c62a4917
almost to output; frange done
Jeff Hammel <k0scist@gmail.com>
parents:
83
diff
changeset
|
118 histogram.add(*column) |
89 | 119 histograms.append(histogram) |
83 | 120 |
90 | 121 # record delimeters for output |
122 columns = zip(*histograms[0].keys()) | |
123 for histogram in histograms: | |
91 | 124 columns.append(histogram().values()) |
90 | 125 |
122 | 126 # transpose back to rows |
127 rows = transpose(columns) | |
128 | |
90 | 129 # output |
122 | 130 writer = CSVWriter(options.output) |
131 writer.write(rows) | |
83 | 132 |
63 | 133 if __name__ == '__main__': |
134 main() |