# HG changeset patch # User Jeff Hammel # Date 1512957423 28800 # Node ID 56596902e9ae9e2cf167a2a90dd6c68d84b1e972 # Parent 4793f99b73e009d1f4aaa81ab7a826136bfd0499 add some setup + tests diff -r 4793f99b73e0 -r 56596902e9ae setup.py --- a/setup.py Sun Dec 10 17:42:52 2017 -0800 +++ b/setup.py Sun Dec 10 17:57:03 2017 -0800 @@ -25,6 +25,15 @@ [console_scripts] csv2sql = orion.csv2sql:main csv2sqlite = orion.csv2sqlite:main + dedupe = orion.deduplicate:main + dictlist2csv = orion.serialize:main + freeport = orion.port:main + issubset = orion.issubset:main + json2csv = orion.json2csv:main + sql-count = orion.count:main + table-size = orion.table_size:main + table2csv = orion.table2csv:main + uniques = orion.uniques:main """ kw['install_requires'] = dependencies except ImportError: diff -r 4793f99b73e0 -r 56596902e9ae tests/test_cast.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_cast.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +""" +data casting functions +""" + +import unittest +from lemiformes import cast + +class TestCast(unittest.TestCase): + + def test_isiterable(self): + assert not cast.isiterable(1) + assert cast.isiterable(range(5)) + + def test_make_iterable(self): + iterval = cast.iterable(1) + assert cast.isiterable(iterval) + assert list(iterval) == [1] + assert cast.iterable([1,2,3]) == [1,2,3] + + def test_interference(self): + ints = ['1', '2', '3'] + floats = ['1', '2', '3.5'] + strings = ['1', '2', "I'm a kitty!"] + + assert cast.infer(ints) == int + assert cast.infer(floats) == float + assert cast.infer(strings) == str + + def test_casting(self): + ints = ['1', '2', '3'] + floats = ['1', '2', '3.5'] + strings = ['1', '2', "I'm a kitty!"] + + assert cast.cast(ints) == [1, 2, 3] + assert cast.cast(floats) == [1., 2., 3.5] + assert cast.cast(strings) == strings + + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_chunk.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_chunk.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +""" +test data chunking +""" + +import os +import unittest +from lemuriformes.chunk import chunk + +class TestChunking(unittest.TestCase): + + def test_range(self): + """test basic chunking with range function""" + + data = list(range(64)) + chunked = list(chunk(data, 10)) + assert len(chunked) == 7 + sizes = set([len(c) for c in + chunked[:-1]]) + assert len(sizes) == 1 + assert sizes.pop() == 10 + reconstructed = [] + for _chunk in chunked: + reconstructed.extend(_chunk) + assert reconstructed == list(range(64)) + + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_columns.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_columns.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +import csv +import os +import shutil +import tempfile +import unittest +from collections import OrderedDict +from orion.columns import read_columns +from orion.transpose import transpose +from StringIO import StringIO + + +class TestColumns(unittest.TestCase): + + # fake data + header = ["Animal", "Baby", "Group"] + data = [("Fox", "Kit", "Leash"), + ("Whale", "Calf", "Pod"), + ("Cuttlefish", "Hatchling", "Bob"), + ("Crow", "Hatchling", "Murder")] + + def create_data(self, fp): + """write simple data to a test file and return its path""" + + writer = csv.writer(fp) + writer.writerow(self.header) + writer.writerows(self.data) + fp.flush() + + def fake_data_assertions(self, columns): + """test the columns created from our fake data""" + + assert len(columns) == len(self.header) + assert set(columns.keys()) == set(self.header) + transposed = transpose(self.data) + assert set([len(row) for row in transposed]) == set([len(self.data)]) + for key, column in zip(self.header, transposed): + assert len(columns[key]) == len(self.data) + assert columns[key] == column + + def test_string(self): + """basic test of columns interface""" + + # write fake data + fileobj = StringIO() + self.create_data(fileobj) + fileobj.seek(0) + + # read into columns + columns = read_columns(fileobj) + + # test what we have + self.fake_data_assertions(columns) + + def test_file(self): + """write to a real file and use that""" + + tmpdir = tempfile.mkdtemp() + try: + # write the data + path = os.path.join(tmpdir, 'tmp.csv') + with open(path, 'w') as f: + self.create_data(f) + assert os.path.exists(path) + + # read into columns + columns = read_columns(path) + + # test what we have + self.fake_data_assertions(columns) + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + def test_ordered_dict(self): + """ensure we can make use of an OrderedDict""" + + # write fake data + fileobj = StringIO() + self.create_data(fileobj) + fileobj.seek(0) + + # read into columns + columns = read_columns(fileobj, type=OrderedDict) + + # test what we have + self.fake_data_assertions(columns) + + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_deduplicate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_deduplicate.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +""" +test deduplication +""" + +import os +import unittest +from lemuriformes.deduplicate import deduplicate + + +class TestDeduplicate(unittest.TestCase): + + def test_deduplication(self): + """basic deduplication smoketest""" + + data = [{'a': 'B', 'c': 1, 'd': 'd'}, + {'a': 'a', 'c': 2, 'd': 'e'}, + {'a': 'F', 'c': 1, 'd': 'G'}, # duplicate! + {'a': 'H', 'c': 3, 'd': 'I'}, + {'a': 'b', 'c': 4, 'd': 'J'}, # duplicate! + {'a': 'K', 'c': 5, 'd': 'L'}, + ] + + deduped = deduplicate(data) + expected = [{'a': 'B', 'c': 1, 'd': 'd'}, + {'a': 'a', 'c': 2, 'd': 'e'}, + {'a': 'H', 'c': 3, 'd': 'I'}, + {'a': 'K', 'c': 5, 'd': 'L'}, + ] + assert expected == deduped + + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_json2csv.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_json2csv.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +""" +test JSON to CSV conversion +""" + +import unittest +from lemuriformes import json2csv + + +class TestJSON2CSV(unittest.TestCase): + + def test_flatten(self): + data = [{'a': 'B', 'c': 1, 'd': 'd'}, + {'a': 'a', 'c': 2, 'd': 'e'}, + {'a': 'H', 'c': 3, 'd': 'I'}, + {'a': 'K', 'c': 5, 'd': 'L'}, + ] + header = ['a', 'c', 'd'] + expected = [['B', 1, 'd'], + ['a', 2, 'e'], + ['H', 3, 'I'], + ['K', 5, 'L'], + ] + + received_header, flattened = json2csv.flatten_list_of_dicts(data, header=header) + assert flattened == expected + assert received_header == header + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_logging.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_logging.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +""" +unit-type tests for logging +""" + +import json +import os +import tempfile +import unittest +from lemuriformes.log import ElasticLogger, read_logfile + + +class TestLogging(unittest.TestCase): + """test various types of logging supported""" + + def test_log_line(self): + """test logging a single JSON line""" + + message = "I was here" + logfile = tempfile.mktemp() + + assert not os.path.exists(logfile) + logger = ElasticLogger(logfile) + logger(message) + + assert os.path.exists(logfile) + with open(logfile) as f: + data = f.read() + assert json.loads(data)['message'] == message + os.remove(logfile) + + def test_metadata(self): + """test setting global + local data on a logger""" + + message = "I was here" + logfile = tempfile.mktemp() + + assert not os.path.exists(logfile) + logger = ElasticLogger(logfile, source='truth') + logger(message, me="k0s") + logger("Hi") + logger(message, me="SomeoneElse") + + assert os.path.exists(logfile) + log_data = read_logfile(logfile) + os.remove(logfile) + assert len(log_data) == 3 + assert all([line.get('source') == 'truth' + for line in log_data]) + assert log_data[0]['message'] == message + assert log_data[-1]['message'] == message + assert log_data[0]['me'] == 'k0s' + assert log_data[-1]['me'] == 'SomeoneElse' + + + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_path.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_path.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,18 @@ +import os +from lemuriformes.path import TempDir + + +def test_tempdir_contextmanager(): + """ + ensure our temporary directory contextmanager works + and does what we want it to + """ + + tmpdir = None + with TempDir() as td: + tmpdir = td + assert os.path.exists(tmpdir) + assert os.path.isdir(tmpdir) + assert not os.path.exists(tmpdir) + assert not os.path.isdir(tmpdir) + diff -r 4793f99b73e0 -r 56596902e9ae tests/test_transpose.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_transpose.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +""" +test array transposition +""" + +import os +import unittest +from lemuriformes.transpose import transpose + +class TestTranspose(unittest.TestCase): + + def test_basic(self): + """transpose a basic array""" + + array = [[1,2,3], + [4,5,6], + [7,8,9]] + expected = [[1,4,7], + [2,5,8], + [3,6,9]] + transposition = transpose(array) + assert transposition == expected + + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_uniques.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_uniques.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +""" +test CSV file uniques counter +""" + +import csv +import os +import shutil +import tempfile +import unittest +from lemuriformes.uniques import uniques +from StringIO import StringIO + + +class TestUniques(unittest.TestCase): + + header = ['a', 'b', 'c'] + dataset = [[1,2,3], + [1,1,2], + [1,2,4]] + + def write_dataset(self, fp): + + writer = csv.writer(fp) + writer.writerow(self.header) + writer.writerows(self.dataset) + fp.flush() + + def validate(self, uniq): + """validate `uniq` for object dataset""" + + # count them + counts = {key: len(value) + for key, value in uniq.items()} + assert counts['a'] == 1 + assert counts['b'] == 2 + assert counts['c'] == 3 + + + def test_file(self): + """test reading uniques from a file""" + + tmpdir = tempfile.mkdtemp() + try: + # write test data + dst = os.path.join(tmpdir, 'test.csv') + with open(dst, 'w') as fp: + self.write_dataset(fp) + + # determine uniques + self.validate(uniques(dst)) + + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + def test_buffer(self): + """test using a StringIO buffer object""" + + # write test data to buffer + buffer = StringIO() + self.write_dataset(buffer) + buffer.seek(0) + + # determine uniques + self.validate(uniques(buffer)) + +if __name__ == '__main__': + unittest.main() diff -r 4793f99b73e0 -r 56596902e9ae tests/test_waiter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_waiter.py Sun Dec 10 17:57:03 2017 -0800 @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +""" +test bash waiting function +""" + +import os +import shutil +import tempfile +import unittest +from lemuriformes import waiter + +string = (str, unicode) + + +class TestWaiter(unittest.TestCase): + + def test_render(self): + commands = ['echo {}'.format(item) + for item in ('hello', 'world')] + hello = str(waiter.BashWaiter(*commands)) + assert isinstance(hello, string) + + +if __name__ == '__main__': + unittest.main()