view lemuriformes/uniques.py @ 17:4793f99b73e0

[lemuriformes] utility functions
author Jeff Hammel <k0scist@gmail.com>
date Sun, 10 Dec 2017 17:42:52 -0800
parents
children
line wrap: on
line source

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
count uniques in each CSV file column
"""

import argparse
import csv
import sys
from collections import OrderedDict
from .columns import read_columns


def uniques(fp):
    """get unique counts for columns in CSV file `fp`"""

    # read columns
    columns = read_columns(fp, type=OrderedDict)

    # convert to sets
    for key, value in columns.iteritems():
        columns[key] = set(value)

    return columns


def main(args=sys.argv[1:]):
    """CLI"""

    # parse command line
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('input',
                        type=argparse.FileType('r'),
                        help="input CSV file with headers")
    options = parser.parse_args(args)

    # determine sets
    columns = uniques(options.input)

    # output uniques
    writer = csv.writer(sys.stdout)
    for key, value in columns.iteritems():
        writer.writerow([key, len(value)])


if __name__ == '__main__':
    main()