Mercurial > hg > numerics
annotate numerics/read.py @ 164:c16940bd2cee
this works
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Fri, 15 May 2015 16:59:09 -0700 |
parents | 8b120c7f0cf9 |
children |
rev | line source |
---|---|
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 """ |
103
067aa27050a3
limping along towards bar charts
Jeff Hammel <k0scist@gmail.com>
parents:
101
diff
changeset
|
5 read CSV |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 """ |
103
067aa27050a3
limping along towards bar charts
Jeff Hammel <k0scist@gmail.com>
parents:
101
diff
changeset
|
7 # TODO: support other formats |
067aa27050a3
limping along towards bar charts
Jeff Hammel <k0scist@gmail.com>
parents:
101
diff
changeset
|
8 |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 # imports |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 import argparse |
24 | 12 import csv |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 import os |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 import sys |
107
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
15 from .data import transpose |
80 | 16 from .write import CSVWriter |
43
bcf9ec537bda
read from stdin if no files specified
Jeff Hammel <k0scist@gmail.com>
parents:
30
diff
changeset
|
17 |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 # module globals |
103
067aa27050a3
limping along towards bar charts
Jeff Hammel <k0scist@gmail.com>
parents:
101
diff
changeset
|
19 __all__ = ['CSVSchema', 'read_csv', 'CSVParser', 'main'] |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 string = (str, unicode) |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 |
43
bcf9ec537bda
read from stdin if no files specified
Jeff Hammel <k0scist@gmail.com>
parents:
30
diff
changeset
|
22 |
20 | 23 class CSVSchema(object): |
24 """read CSV with a schema""" | |
25 | |
26 def __init__(self, columns): | |
27 self.columns = columns | |
28 | |
29 def read(self, f): | |
30 | |
31 if isinstance(f, string): | |
32 with open(f) as fp: | |
33 return self.read(fp) | |
34 | |
35 retval = [] | |
36 reader = csv.reader(f) | |
37 for row in reader: | |
38 retval.append(dict(zip(self.columns, row))) | |
39 return retval | |
40 | |
41 __call__ = read | |
42 | |
43 | |
44 def aggregate_columns(directory, schema): | |
45 | |
46 # check for missing files | |
47 missing = [path for path in schema | |
48 if not os.path.exists(os.path.join(directory, path))] | |
49 assert not missing, "Missing files: {}".format(', '.join(missing)) | |
50 | |
51 # read records | |
52 records = {filename: CSVSchema(columns).read(os.path.join(directory, filename)) | |
53 for filename, columns in schema.items()} | |
54 | |
55 | |
56 # check lengths | |
57 lengths = [len(value) for value in records.values()] | |
58 assert len(set(lengths)) == 1, "Differing lengths found for files" | |
59 | |
60 # build new rows | |
61 retval = [] | |
62 for row in zip(*records.values()): | |
63 new_row = {} | |
64 for record in row: | |
65 for key, value in record.items(): | |
66 if new_row.get(key, value) != value: | |
67 raise AssertionError("{} != {}".format(new_row.get(key), value)) | |
68 new_row[key] = value | |
69 retval.append(new_row) | |
70 | |
71 return retval | |
72 | |
73 | |
11 | 74 def read_csv(*fp): |
19 | 75 """read a series of CSV files""" |
76 | |
11 | 77 retval = [] |
78 for f in fp: | |
79 | |
80 if isinstance(f, string): | |
81 with open(f) as _f: | |
82 retval.extend(read_csv(_f)) | |
83 continue | |
84 | |
85 reader = csv.reader(f) | |
86 retval.extend([row for row in reader]) | |
87 | |
88 return retval | |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
89 |
43
bcf9ec537bda
read from stdin if no files specified
Jeff Hammel <k0scist@gmail.com>
parents:
30
diff
changeset
|
90 |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
91 class CSVParser(argparse.ArgumentParser): |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
92 """CLI option parser""" |
28 | 93 |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
94 def __init__(self, **kwargs): |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
95 kwargs.setdefault('description', __doc__) |
112 | 96 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter) |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
97 argparse.ArgumentParser.__init__(self, **kwargs) |
133 | 98 self.add_argument('input', nargs='*', |
99 help="CSV input files to read, or read from stdin") | |
25 | 100 self.add_argument('-+', '--add', dest='added_columns', nargs='+', |
101 help="append this column") | |
28 | 102 self.add_argument('-c', '--col', '--columns', dest='columns', |
103 nargs='+', type=int, | |
104 help="column numbers to output, starting with 0") | |
25 | 105 self.add_argument('-o', '--output', dest='output', |
106 type=argparse.FileType('a'), default=sys.stdout, | |
107 help='output destination, or stdout') | |
30
75270e7a051b
add ability to add an index and fix a few bugs
Jeff Hammel <k0scist@gmail.com>
parents:
28
diff
changeset
|
108 self.add_argument('--index', dest='index', |
75270e7a051b
add ability to add an index and fix a few bugs
Jeff Hammel <k0scist@gmail.com>
parents:
28
diff
changeset
|
109 action='store_true', default=False, |
75270e7a051b
add ability to add an index and fix a few bugs
Jeff Hammel <k0scist@gmail.com>
parents:
28
diff
changeset
|
110 help="prepend each row with numeric index") |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
111 self.options = None |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
112 |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
113 def parse_args(self, *args, **kw): |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
114 options = argparse.ArgumentParser.parse_args(self, *args, **kw) |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
115 self.validate(options) |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
116 self.options = options |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
117 return options |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
118 |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
119 def validate(self, options): |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
120 """validate options""" |
133 | 121 options.input = options.input or [sys.stdin] |
101
b7d4b7f84883
simple script to compute means
Jeff Hammel <k0scist@gmail.com>
parents:
80
diff
changeset
|
122 |
45
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
123 def read(self): |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
124 """read and process CSV""" |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
125 |
133 | 126 data = read_csv(*self.options.input) |
45
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
127 |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
128 if self.options.added_columns: |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
129 # add columns |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
130 for row in data: |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
131 row.extend(options.added_columns) |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
132 |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
133 if self.options.columns: |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
134 # filter by column |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
135 data = [[row[column] for column in self.options.columns] |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
136 for row in data] |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
137 |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
138 if self.options.index: |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
139 # prepend numeric index |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
140 for index, row in enumerate(data): |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
141 row.insert(0, index) |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
142 |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
143 # return processed data |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
144 return data |
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
145 |
105
32a849b8f1f0
stubbing; it looks like most of this work is upstreaming
Jeff Hammel <k0scist@gmail.com>
parents:
103
diff
changeset
|
146 def columns(self): |
32a849b8f1f0
stubbing; it looks like most of this work is upstreaming
Jeff Hammel <k0scist@gmail.com>
parents:
103
diff
changeset
|
147 """return columns vs `data`'s rows""" |
107
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
148 return transpose(self.read()) |
105
32a849b8f1f0
stubbing; it looks like most of this work is upstreaming
Jeff Hammel <k0scist@gmail.com>
parents:
103
diff
changeset
|
149 |
110 | 150 def write(self, data): |
151 """write data to specified CSV destination""" | |
152 # TODO: support more formats | |
153 CSVWriter(self.options.output).write(data) | |
45
ef915968d104
put this in the parser so that i can use this in convert
Jeff Hammel <k0scist@gmail.com>
parents:
43
diff
changeset
|
154 |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
155 def main(args=sys.argv[1:]): |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
156 """CLI""" |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
157 |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
158 # parse command line options |
24 | 159 parser = CSVParser() |
107
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
160 parser.add_argument('--transpose', dest='transpose', |
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
161 action='store_true', default=False, |
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
162 help="transpose columns and rows") |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
163 options = parser.parse_args(args) |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
164 |
11 | 165 # read CSV |
107
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
166 if options.transpose: |
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
167 data = parser.columns() |
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
168 else: |
19a5c2fb52bb
add transpose functionality
Jeff Hammel <k0scist@gmail.com>
parents:
105
diff
changeset
|
169 data = parser.read() |
28 | 170 |
25 | 171 # write CSV |
110 | 172 parser.write(data) |
11 | 173 |
5
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
174 if __name__ == '__main__': |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
175 main() |
d5447d401c44
serializaion; pandas probably does this
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
176 |