Mercurial > hg > Lemuriformes
annotate lemuriformes/issubset.py @ 18:56596902e9ae default tip
add some setup + tests
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:57:03 -0800 |
parents | 0d1b8bb1d97b |
children |
rev | line source |
---|---|
15
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 """ |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 determine if one CSV column is a subsset of another. |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 If it is a subset, output nothing and exit 0. |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 If it is not a subset, output all elements that are |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 in the asserted subset but not the total set |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 and exits 1. |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 Exits 2 on error. |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 """ |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 import os |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 import sys |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 from collections import OrderedDict |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 from .cli import ConfigurationParser |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 from .uniques import uniques |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 def main(args=sys.argv[1:]): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 """CLI""" |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 sets = OrderedDict([('subset', "the `PATH` to the CSV and `COLUMN` of the asserted subset"), |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 ('total', "the `PATH` to the CSV and `COLUMN` of the total set")]) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 # parse command line |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 parser = ConfigurationParser(description=__doc__) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 for key, description in sets.items(): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 parser.add_argument(key, nargs=2, |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 help=description) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 options = parser.parse_args(args) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 # sanity |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 filenames = set() |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 for key in sets.keys(): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 # - ensure the values are listw |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 setattr(options, key, list(getattr(options, key))) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 value = getattr(options, key) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 # - make files absolute |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 value[0] = os.path.realpath(value[0]) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 # - ensure files exist |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 filename = value[0] |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 if not os.path.isfile(filename): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 parser.error("Not a file: {}".format(filename)) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 filenames.add(filename) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 # read the files |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 columns = {filename: uniques(filename) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 for filename in filenames} |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 # assert that the columns are in the files they have been ascribed to |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 for key in sets.keys(): |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 filename, column = getattr(options, key) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 if column not in columns[filename]: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 parser.error("Column '{}' not found in file '{}'".format(column, filename)) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
55 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 # calculate the difference |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
57 difference = columns[options.subset[0]][options.subset[1]].difference( |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 columns[options.total[0]][options.total[1]]) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 if not difference: |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
60 return |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
61 print ("\n".join([str(i) for i in sorted(difference)])) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
62 sys.exit(1) |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
63 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
64 |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
65 if __name__ == '__main__': |
0d1b8bb1d97b
SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
66 main() |