Mercurial > hg > Lemuriformes
view lemuriformes/issubset.py @ 15:0d1b8bb1d97b
SQL + data related functionality
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 10 Dec 2017 17:16:52 -0800 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- """ determine if one CSV column is a subsset of another. If it is a subset, output nothing and exit 0. If it is not a subset, output all elements that are in the asserted subset but not the total set and exits 1. Exits 2 on error. """ import os import sys from collections import OrderedDict from .cli import ConfigurationParser from .uniques import uniques def main(args=sys.argv[1:]): """CLI""" sets = OrderedDict([('subset', "the `PATH` to the CSV and `COLUMN` of the asserted subset"), ('total', "the `PATH` to the CSV and `COLUMN` of the total set")]) # parse command line parser = ConfigurationParser(description=__doc__) for key, description in sets.items(): parser.add_argument(key, nargs=2, help=description) options = parser.parse_args(args) # sanity filenames = set() for key in sets.keys(): # - ensure the values are listw setattr(options, key, list(getattr(options, key))) value = getattr(options, key) # - make files absolute value[0] = os.path.realpath(value[0]) # - ensure files exist filename = value[0] if not os.path.isfile(filename): parser.error("Not a file: {}".format(filename)) filenames.add(filename) # read the files columns = {filename: uniques(filename) for filename in filenames} # assert that the columns are in the files they have been ascribed to for key in sets.keys(): filename, column = getattr(options, key) if column not in columns[filename]: parser.error("Column '{}' not found in file '{}'".format(column, filename)) # calculate the difference difference = columns[options.subset[0]][options.subset[1]].difference( columns[options.total[0]][options.total[1]]) if not difference: return print ("\n".join([str(i) for i in sorted(difference)])) sys.exit(1) if __name__ == '__main__': main()