annotate numerics/cat_columns.py @ 172:dc0a620a0368

add another example
author Jeff Hammel <k0scist@gmail.com>
date Fri, 03 Jul 2015 11:11:01 -0700
parents 18c0820bfe12
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
62
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 output columns of various CSV files
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 Example::
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 > cat-columns foo.csv:0 bar.csv:1,2,3 fleem.csv
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 This will generate CSV output with the first column from `foo.csv`,
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 the 2nd, 3rd, and 4th columns from `bar.csv`,
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 and all columns from `fleem.csv`.
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 """
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 # imports
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 import argparse
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 import csv
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 import os
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 import sys
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 import time
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 from collections import OrderedDict
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 from .read import read_csv
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 # module globals
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 __all__ = ['cat_columns', 'CatColumnParser', 'main']
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 def cat_columns(csv_files):
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 """
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 csv_files -- an iterable of 2-tuples of `path`, columns
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 """
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 rows = []
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 class CatColumnParser(argparse.ArgumentParser):
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 """CLI option parser"""
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 def __init__(self, **kwargs):
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 kwargs.setdefault('description', __doc__)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 argparse.ArgumentParser.__init__(self, **kwargs)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 self.add_argument('csv', nargs='+',
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 help="path to CSV files and columns to output, delimited by ':' and comma-separated")
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 self.add_argument('-o', '--output', dest='output',
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 type=argparse.FileType('a'), default=sys.stdout,
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45 help="where to output to, or stdout by default")
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 self.options = None
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 def parse_args(self, *args, **kw):
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49 options = argparse.ArgumentParser.parse_args(self, *args, **kw)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 self.validate(options)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 self.options = options
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 return options
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54 def validate(self, options):
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55 """validate options"""
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 def main(args=sys.argv[1:]):
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 """CLI"""
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 # parse command line options
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 parser = CatColumnParser()
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62 options = parser.parse_args(args)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64 # get the data
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 csv_files = OrderedDict()
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 missing = []
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
67 for item in options.csv:
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
68 if ':' in item:
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
69 path, columns = item.rsplit(':', 1)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
70 columns = columns.strip()
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
71 if columns:
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
72 columns = [int(column) for column in columns.split(',')]
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
73 else:
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
74 columns = None
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
75 else:
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
76 path = item
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
77 columns = None
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
78 if not os.path.exists(path):
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
79 missing.append(path)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
80 if missing:
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
81 parser.error("File(s) not found:\n{}".format('\n'.join(missing)))
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
82
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
83 # concatenate the rows
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
84 data = cat_columns(csv_files.items())
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
85
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
86 # output it
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
87 writer = csv.writer(options.output)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
88 for row in data:
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
89 writer.write_row(row)
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
90 options.output.flush()
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
91
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
92 if __name__ == '__main__':
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
93 main()
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
94
18c0820bfe12 stub concatenating columns
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
95