63
|
1 #!/usr/bin/env python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 """
|
68
|
5 Histograms
|
63
|
6
|
|
7 Unicode is awesome; see http://www.alanwood.net/unicode/block_elements.html
|
|
8 """
|
|
9
|
|
10 blocks = """
|
|
11 █
|
|
12 ▉
|
|
13 ▊
|
|
14 ▋
|
|
15 ▌
|
|
16 ▍
|
|
17 ▎
|
|
18 ▏
|
|
19 """
|
|
20
|
|
21 # imports
|
|
22 import argparse
|
|
23 import os
|
|
24 import sys
|
|
25 import time
|
75
|
26 from .data import transpose
|
63
|
27 from .read import CSVParser
|
64
|
28 from collections import OrderedDict
|
63
|
29
|
|
30 # module globals
|
64
|
31 __all__ = ['Histogram', 'HistogramParser', 'main']
|
|
32
|
|
33 class Histogram(object):
|
|
34 """historgram"""
|
63
|
35
|
64
|
36 def __init__(self, bins):
|
73
|
37 self.bins = sorted(bins)
|
64
|
38 assert len(bins) > 1
|
73
|
39 self.data = OrderedDict([(bin, [])
|
|
40 for bin in zip(bins[:-1],
|
|
41 bins[1:])])
|
64
|
42
|
70
|
43 def add(self, *values):
|
|
44 """add values to the histogram"""
|
71
|
45 for value in values:
|
72
|
46 for vmin, vmax in self.data.keys():
|
73
|
47 if vmin <= value < vmax:
|
72
|
48 self.data[(vmin, vmax)].append(value)
|
76
|
49 break
|
|
50 else:
|
|
51 if value == vmax:
|
|
52 # handle rightmost endpoint
|
77
|
53 self.data[(vmin, vmax)].append(value)
|
70
|
54
|
65
|
55 def __iadd__(self, value):
|
71
|
56 self.add(value)
|
65
|
57 return self
|
|
58
|
71
|
59 def __call__(self, *values):
|
|
60 """
|
|
61 add values to the histogram and return
|
|
62 OrderedDict of counts
|
|
63 """
|
|
64 self.add(*values)
|
73
|
65 return OrderedDict([(bin, len(value))
|
|
66 for bin, value in self.data.items()])
|
|
67
|
|
68 def keys(self):
|
|
69 return self.data.keys()
|
71
|
70
|
74
|
71 def max(self):
|
|
72 """return max length"""
|
|
73 return max([len(value) for value in self.data.values()])
|
|
74
|
|
75
|
64
|
76 class HistogramParser(CSVParser):
|
63
|
77 """histogram CLI option parser"""
|
|
78
|
|
79 def __init__(self, **kwargs):
|
|
80 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
|
|
81 kwargs.setdefault('description', __doc__)
|
69
|
82 CSVParser.__init__(self, **kwargs)
|
63
|
83 self.add_argument('-n', '--bins', dest='n_bins', type=int,
|
|
84 help="number of bins")
|
76
|
85 self.add_argument('--min', dest='min', type=float,
|
|
86 help="minimum value; else taken from data")
|
|
87 self.add_argument('--max', dest='max', type=float,
|
|
88 help="maximum value, else taken from data")
|
63
|
89 self.options = None
|
|
90
|
|
91
|
|
92 def main(args=sys.argv[1:]):
|
|
93 """CLI"""
|
|
94
|
|
95 # parse command line options
|
|
96 parser = HistogramParser()
|
|
97 options = parser.parse_args(args)
|
|
98
|
75
|
99 # read data
|
|
100 data = parser.read()
|
|
101 if not data:
|
|
102 parser.error("No data given")
|
|
103
|
|
104 # transpose to columns
|
|
105 columns = transpose(data)
|
63
|
106
|
78
|
107 # find min, max if not provided
|
|
108 if options.min is None:
|
|
109 options.min = min([min(column) for column in columns])
|
|
110 if options.max is None:
|
|
111 options.min = max([max(column) for column in columns])
|
76
|
112
|
63
|
113 if __name__ == '__main__':
|
|
114 main()
|