62
|
1 #!/usr/bin/env python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 """
|
|
5 output columns of various CSV files
|
|
6
|
|
7 Example::
|
|
8
|
|
9 > cat-columns foo.csv:0 bar.csv:1,2,3 fleem.csv
|
|
10
|
|
11 This will generate CSV output with the first column from `foo.csv`,
|
|
12 the 2nd, 3rd, and 4th columns from `bar.csv`,
|
|
13 and all columns from `fleem.csv`.
|
|
14 """
|
|
15
|
|
16 # imports
|
|
17 import argparse
|
|
18 import csv
|
|
19 import os
|
|
20 import sys
|
|
21 import time
|
|
22 from collections import OrderedDict
|
|
23 from .read import read_csv
|
|
24
|
|
25 # module globals
|
|
26 __all__ = ['cat_columns', 'CatColumnParser', 'main']
|
|
27
|
|
28 def cat_columns(csv_files):
|
|
29 """
|
|
30 csv_files -- an iterable of 2-tuples of `path`, columns
|
|
31 """
|
|
32
|
|
33 rows = []
|
|
34
|
|
35 class CatColumnParser(argparse.ArgumentParser):
|
|
36 """CLI option parser"""
|
|
37 def __init__(self, **kwargs):
|
|
38 kwargs.setdefault('formatter_class', argparse.RawTextHelpFormatter)
|
|
39 kwargs.setdefault('description', __doc__)
|
|
40 argparse.ArgumentParser.__init__(self, **kwargs)
|
|
41 self.add_argument('csv', nargs='+',
|
|
42 help="path to CSV files and columns to output, delimited by ':' and comma-separated")
|
|
43 self.add_argument('-o', '--output', dest='output',
|
|
44 type=argparse.FileType('a'), default=sys.stdout,
|
|
45 help="where to output to, or stdout by default")
|
|
46 self.options = None
|
|
47
|
|
48 def parse_args(self, *args, **kw):
|
|
49 options = argparse.ArgumentParser.parse_args(self, *args, **kw)
|
|
50 self.validate(options)
|
|
51 self.options = options
|
|
52 return options
|
|
53
|
|
54 def validate(self, options):
|
|
55 """validate options"""
|
|
56
|
|
57 def main(args=sys.argv[1:]):
|
|
58 """CLI"""
|
|
59
|
|
60 # parse command line options
|
|
61 parser = CatColumnParser()
|
|
62 options = parser.parse_args(args)
|
|
63
|
|
64 # get the data
|
|
65 csv_files = OrderedDict()
|
|
66 missing = []
|
|
67 for item in options.csv:
|
|
68 if ':' in item:
|
|
69 path, columns = item.rsplit(':', 1)
|
|
70 columns = columns.strip()
|
|
71 if columns:
|
|
72 columns = [int(column) for column in columns.split(',')]
|
|
73 else:
|
|
74 columns = None
|
|
75 else:
|
|
76 path = item
|
|
77 columns = None
|
|
78 if not os.path.exists(path):
|
|
79 missing.append(path)
|
|
80 if missing:
|
|
81 parser.error("File(s) not found:\n{}".format('\n'.join(missing)))
|
|
82
|
|
83 # concatenate the rows
|
|
84 data = cat_columns(csv_files.items())
|
|
85
|
|
86 # output it
|
|
87 writer = csv.writer(options.output)
|
|
88 for row in data:
|
|
89 writer.write_row(row)
|
|
90 options.output.flush()
|
|
91
|
|
92 if __name__ == '__main__':
|
|
93 main()
|
|
94
|
|
95
|