Mercurial > hg > Lemuriformes
comparison lemuriformes/issubset.py @ 15:0d1b8bb1d97b
SQL + data related functionality
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:16:52 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
14:756dbd3e391e | 15:0d1b8bb1d97b |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
5 determine if one CSV column is a subsset of another. | |
6 If it is a subset, output nothing and exit 0. | |
7 If it is not a subset, output all elements that are | |
8 in the asserted subset but not the total set | |
9 and exits 1. | |
10 Exits 2 on error. | |
11 """ | |
12 | |
13 import os | |
14 import sys | |
15 from collections import OrderedDict | |
16 from .cli import ConfigurationParser | |
17 from .uniques import uniques | |
18 | |
19 def main(args=sys.argv[1:]): | |
20 """CLI""" | |
21 | |
22 sets = OrderedDict([('subset', "the `PATH` to the CSV and `COLUMN` of the asserted subset"), | |
23 ('total', "the `PATH` to the CSV and `COLUMN` of the total set")]) | |
24 | |
25 # parse command line | |
26 parser = ConfigurationParser(description=__doc__) | |
27 for key, description in sets.items(): | |
28 parser.add_argument(key, nargs=2, | |
29 help=description) | |
30 options = parser.parse_args(args) | |
31 | |
32 # sanity | |
33 filenames = set() | |
34 for key in sets.keys(): | |
35 # - ensure the values are listw | |
36 setattr(options, key, list(getattr(options, key))) | |
37 value = getattr(options, key) | |
38 # - make files absolute | |
39 value[0] = os.path.realpath(value[0]) | |
40 # - ensure files exist | |
41 filename = value[0] | |
42 if not os.path.isfile(filename): | |
43 parser.error("Not a file: {}".format(filename)) | |
44 filenames.add(filename) | |
45 | |
46 # read the files | |
47 columns = {filename: uniques(filename) | |
48 for filename in filenames} | |
49 | |
50 # assert that the columns are in the files they have been ascribed to | |
51 for key in sets.keys(): | |
52 filename, column = getattr(options, key) | |
53 if column not in columns[filename]: | |
54 parser.error("Column '{}' not found in file '{}'".format(column, filename)) | |
55 | |
56 # calculate the difference | |
57 difference = columns[options.subset[0]][options.subset[1]].difference( | |
58 columns[options.total[0]][options.total[1]]) | |
59 if not difference: | |
60 return | |
61 print ("\n".join([str(i) for i in sorted(difference)])) | |
62 sys.exit(1) | |
63 | |
64 | |
65 if __name__ == '__main__': | |
66 main() |