annotate lemuriformes/issubset.py @ 15:0d1b8bb1d97b

SQL + data related functionality
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:16:52 -0800
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 determine if one CSV column is a subsset of another.
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 If it is a subset, output nothing and exit 0.
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 If it is not a subset, output all elements that are
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 in the asserted subset but not the total set
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 and exits 1.
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 Exits 2 on error.
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 """
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 import os
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 import sys
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 from collections import OrderedDict
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 from .cli import ConfigurationParser
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 from .uniques import uniques
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 def main(args=sys.argv[1:]):
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 """CLI"""
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 sets = OrderedDict([('subset', "the `PATH` to the CSV and `COLUMN` of the asserted subset"),
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 ('total', "the `PATH` to the CSV and `COLUMN` of the total set")])
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 # parse command line
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 parser = ConfigurationParser(description=__doc__)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 for key, description in sets.items():
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 parser.add_argument(key, nargs=2,
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 help=description)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 options = parser.parse_args(args)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 # sanity
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 filenames = set()
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 for key in sets.keys():
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 # - ensure the values are listw
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 setattr(options, key, list(getattr(options, key)))
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 value = getattr(options, key)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 # - make files absolute
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 value[0] = os.path.realpath(value[0])
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40 # - ensure files exist
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 filename = value[0]
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42 if not os.path.isfile(filename):
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 parser.error("Not a file: {}".format(filename))
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 filenames.add(filename)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 # read the files
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 columns = {filename: uniques(filename)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
48 for filename in filenames}
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50 # assert that the columns are in the files they have been ascribed to
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 for key in sets.keys():
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 filename, column = getattr(options, key)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
53 if column not in columns[filename]:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
54 parser.error("Column '{}' not found in file '{}'".format(column, filename))
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
55
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
56 # calculate the difference
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
57 difference = columns[options.subset[0]][options.subset[1]].difference(
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
58 columns[options.total[0]][options.total[1]])
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
59 if not difference:
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
60 return
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
61 print ("\n".join([str(i) for i in sorted(difference)]))
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
62 sys.exit(1)
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
63
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
64
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
65 if __name__ == '__main__':
0d1b8bb1d97b SQL + data related functionality
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
66 main()