comparison lemuriformes/issubset.py @ 15:0d1b8bb1d97b

SQL + data related functionality
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:16:52 -0800
parents
children
comparison
equal deleted inserted replaced
14:756dbd3e391e 15:0d1b8bb1d97b
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 determine if one CSV column is a subsset of another.
6 If it is a subset, output nothing and exit 0.
7 If it is not a subset, output all elements that are
8 in the asserted subset but not the total set
9 and exits 1.
10 Exits 2 on error.
11 """
12
13 import os
14 import sys
15 from collections import OrderedDict
16 from .cli import ConfigurationParser
17 from .uniques import uniques
18
19 def main(args=sys.argv[1:]):
20 """CLI"""
21
22 sets = OrderedDict([('subset', "the `PATH` to the CSV and `COLUMN` of the asserted subset"),
23 ('total', "the `PATH` to the CSV and `COLUMN` of the total set")])
24
25 # parse command line
26 parser = ConfigurationParser(description=__doc__)
27 for key, description in sets.items():
28 parser.add_argument(key, nargs=2,
29 help=description)
30 options = parser.parse_args(args)
31
32 # sanity
33 filenames = set()
34 for key in sets.keys():
35 # - ensure the values are listw
36 setattr(options, key, list(getattr(options, key)))
37 value = getattr(options, key)
38 # - make files absolute
39 value[0] = os.path.realpath(value[0])
40 # - ensure files exist
41 filename = value[0]
42 if not os.path.isfile(filename):
43 parser.error("Not a file: {}".format(filename))
44 filenames.add(filename)
45
46 # read the files
47 columns = {filename: uniques(filename)
48 for filename in filenames}
49
50 # assert that the columns are in the files they have been ascribed to
51 for key in sets.keys():
52 filename, column = getattr(options, key)
53 if column not in columns[filename]:
54 parser.error("Column '{}' not found in file '{}'".format(column, filename))
55
56 # calculate the difference
57 difference = columns[options.subset[0]][options.subset[1]].difference(
58 columns[options.total[0]][options.total[1]])
59 if not difference:
60 return
61 print ("\n".join([str(i) for i in sorted(difference)]))
62 sys.exit(1)
63
64
65 if __name__ == '__main__':
66 main()