# HG changeset patch
# User Jeff Hammel <k0scist@gmail.com>
# Date 1512957423 28800
# Node ID 56596902e9ae9e2cf167a2a90dd6c68d84b1e972
# Parent  4793f99b73e009d1f4aaa81ab7a826136bfd0499
add some setup + tests

diff -r 4793f99b73e0 -r 56596902e9ae setup.py
--- a/setup.py	Sun Dec 10 17:42:52 2017 -0800
+++ b/setup.py	Sun Dec 10 17:57:03 2017 -0800
@@ -25,6 +25,15 @@
       [console_scripts]
       csv2sql = orion.csv2sql:main
       csv2sqlite = orion.csv2sqlite:main
+      dedupe = orion.deduplicate:main
+      dictlist2csv = orion.serialize:main
+      freeport = orion.port:main
+      issubset = orion.issubset:main
+      json2csv = orion.json2csv:main
+      sql-count = orion.count:main
+      table-size = orion.table_size:main
+      table2csv = orion.table2csv:main
+      uniques = orion.uniques:main
 """
     kw['install_requires'] = dependencies
 except ImportError:
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_cast.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_cast.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+"""
+data casting functions
+"""
+
+import unittest
+from lemiformes import cast
+
+class TestCast(unittest.TestCase):
+
+    def test_isiterable(self):
+        assert not cast.isiterable(1)
+        assert cast.isiterable(range(5))
+
+    def test_make_iterable(self):
+        iterval = cast.iterable(1)
+        assert cast.isiterable(iterval)
+        assert list(iterval) == [1]
+        assert cast.iterable([1,2,3]) == [1,2,3]
+
+    def test_interference(self):
+        ints = ['1', '2', '3']
+        floats = ['1', '2', '3.5']
+        strings = ['1', '2', "I'm a kitty!"]
+
+        assert cast.infer(ints) == int
+        assert cast.infer(floats) == float
+        assert cast.infer(strings) == str
+
+    def test_casting(self):
+        ints = ['1', '2', '3']
+        floats = ['1', '2', '3.5']
+        strings = ['1', '2', "I'm a kitty!"]
+
+        assert cast.cast(ints) == [1, 2, 3]
+        assert cast.cast(floats) == [1., 2., 3.5]
+        assert cast.cast(strings) == strings
+
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_chunk.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_chunk.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+"""
+test data chunking
+"""
+
+import os
+import unittest
+from lemuriformes.chunk import chunk
+
+class TestChunking(unittest.TestCase):
+
+    def test_range(self):
+        """test basic chunking with range function"""
+
+        data = list(range(64))
+        chunked = list(chunk(data, 10))
+        assert len(chunked) == 7
+        sizes = set([len(c) for c in
+                     chunked[:-1]])
+        assert len(sizes) == 1
+        assert sizes.pop() == 10
+        reconstructed = []
+        for _chunk in chunked:
+            reconstructed.extend(_chunk)
+        assert reconstructed == list(range(64))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_columns.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_columns.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+import csv
+import os
+import shutil
+import tempfile
+import unittest
+from collections import OrderedDict
+from orion.columns import read_columns
+from orion.transpose import transpose
+from StringIO import StringIO
+
+
+class TestColumns(unittest.TestCase):
+
+    # fake data
+    header = ["Animal", "Baby", "Group"]
+    data = [("Fox", "Kit", "Leash"),
+            ("Whale", "Calf", "Pod"),
+            ("Cuttlefish", "Hatchling", "Bob"),
+            ("Crow", "Hatchling", "Murder")]
+
+    def create_data(self, fp):
+        """write simple data to a test file and return its path"""
+
+        writer = csv.writer(fp)
+        writer.writerow(self.header)
+        writer.writerows(self.data)
+        fp.flush()
+
+    def fake_data_assertions(self, columns):
+        """test the columns created from our fake data"""
+
+        assert len(columns) == len(self.header)
+        assert set(columns.keys()) == set(self.header)
+        transposed = transpose(self.data)
+        assert set([len(row) for row in transposed]) == set([len(self.data)])
+        for key, column in zip(self.header, transposed):
+            assert len(columns[key]) == len(self.data)
+            assert columns[key] == column
+
+    def test_string(self):
+        """basic test of columns interface"""
+
+        # write fake data
+        fileobj = StringIO()
+        self.create_data(fileobj)
+        fileobj.seek(0)
+
+        # read into columns
+        columns = read_columns(fileobj)
+
+        # test what we have
+        self.fake_data_assertions(columns)
+
+    def test_file(self):
+        """write to a real file and use that"""
+
+        tmpdir = tempfile.mkdtemp()
+        try:
+            # write the data
+            path = os.path.join(tmpdir, 'tmp.csv')
+            with open(path, 'w') as f:
+                self.create_data(f)
+            assert os.path.exists(path)
+
+            # read into columns
+            columns = read_columns(path)
+
+            # test what we have
+            self.fake_data_assertions(columns)
+        finally:
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+    def test_ordered_dict(self):
+        """ensure we can make use of an OrderedDict"""
+
+        # write fake data
+        fileobj = StringIO()
+        self.create_data(fileobj)
+        fileobj.seek(0)
+
+        # read into columns
+        columns = read_columns(fileobj, type=OrderedDict)
+
+        # test what we have
+        self.fake_data_assertions(columns)
+
+
+if __name__ == '__main__':
+     unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_deduplicate.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_deduplicate.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+"""
+test deduplication
+"""
+
+import os
+import unittest
+from lemuriformes.deduplicate import deduplicate
+
+
+class TestDeduplicate(unittest.TestCase):
+
+    def test_deduplication(self):
+        """basic deduplication smoketest"""
+
+        data = [{'a': 'B', 'c': 1, 'd': 'd'},
+                {'a': 'a', 'c': 2, 'd': 'e'},
+                {'a': 'F', 'c': 1, 'd': 'G'},  # duplicate!
+                {'a': 'H', 'c': 3, 'd': 'I'},
+                {'a': 'b', 'c': 4, 'd': 'J'},  # duplicate!
+                {'a': 'K', 'c': 5, 'd': 'L'},
+        ]
+
+        deduped = deduplicate(data)
+        expected = [{'a': 'B', 'c': 1, 'd': 'd'},
+                    {'a': 'a', 'c': 2, 'd': 'e'},
+                    {'a': 'H', 'c': 3, 'd': 'I'},
+                    {'a': 'K', 'c': 5, 'd': 'L'},
+        ]
+        assert expected == deduped
+
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_json2csv.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_json2csv.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+"""
+test JSON to CSV conversion
+"""
+
+import unittest
+from lemuriformes import json2csv
+
+
+class TestJSON2CSV(unittest.TestCase):
+
+    def test_flatten(self):
+        data = [{'a': 'B', 'c': 1, 'd': 'd'},
+                {'a': 'a', 'c': 2, 'd': 'e'},
+                {'a': 'H', 'c': 3, 'd': 'I'},
+                {'a': 'K', 'c': 5, 'd': 'L'},
+        ]
+        header = ['a', 'c', 'd']
+        expected = [['B', 1, 'd'],
+                    ['a', 2, 'e'],
+                    ['H', 3, 'I'],
+                    ['K', 5, 'L'],
+        ]
+
+        received_header, flattened = json2csv.flatten_list_of_dicts(data, header=header)
+        assert flattened == expected
+        assert received_header == header
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_logging.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_logging.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+"""
+unit-type tests for logging
+"""
+
+import json
+import os
+import tempfile
+import unittest
+from lemuriformes.log import ElasticLogger, read_logfile
+
+
+class TestLogging(unittest.TestCase):
+    """test various types of logging supported"""
+
+    def test_log_line(self):
+        """test logging a single JSON line"""
+
+        message = "I was here"
+        logfile = tempfile.mktemp()
+
+        assert not os.path.exists(logfile)
+        logger = ElasticLogger(logfile)
+        logger(message)
+
+        assert os.path.exists(logfile)
+        with open(logfile) as f:
+            data = f.read()
+        assert json.loads(data)['message'] == message
+        os.remove(logfile)
+
+    def test_metadata(self):
+        """test setting global + local data on a logger"""
+
+        message = "I was here"
+        logfile = tempfile.mktemp()
+
+        assert not os.path.exists(logfile)
+        logger = ElasticLogger(logfile, source='truth')
+        logger(message, me="k0s")
+        logger("Hi")
+        logger(message, me="SomeoneElse")
+
+        assert os.path.exists(logfile)
+        log_data = read_logfile(logfile)
+        os.remove(logfile)
+        assert len(log_data) == 3
+        assert all([line.get('source') == 'truth'
+                    for line in log_data])
+        assert log_data[0]['message'] == message
+        assert log_data[-1]['message'] == message
+        assert log_data[0]['me'] == 'k0s'
+        assert log_data[-1]['me'] == 'SomeoneElse'
+
+
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_path.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_path.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,18 @@
+import os
+from lemuriformes.path import TempDir
+
+
+def test_tempdir_contextmanager():
+    """
+    ensure our temporary directory contextmanager works
+    and does what we want it to
+    """
+
+    tmpdir = None
+    with TempDir() as td:
+        tmpdir = td
+        assert os.path.exists(tmpdir)
+        assert os.path.isdir(tmpdir)
+    assert not os.path.exists(tmpdir)
+    assert not os.path.isdir(tmpdir)
+
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_transpose.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_transpose.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+"""
+test array transposition
+"""
+
+import os
+import unittest
+from lemuriformes.transpose import transpose
+
+class TestTranspose(unittest.TestCase):
+
+    def test_basic(self):
+        """transpose a basic array"""
+
+        array = [[1,2,3],
+                 [4,5,6],
+                 [7,8,9]]
+        expected = [[1,4,7],
+                    [2,5,8],
+                    [3,6,9]]
+        transposition = transpose(array)
+        assert transposition == expected
+
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_uniques.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_uniques.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+"""
+test CSV file uniques counter
+"""
+
+import csv
+import os
+import shutil
+import tempfile
+import unittest
+from lemuriformes.uniques import uniques
+from StringIO import StringIO
+
+
+class TestUniques(unittest.TestCase):
+
+    header = ['a', 'b', 'c']
+    dataset = [[1,2,3],
+               [1,1,2],
+               [1,2,4]]
+
+    def write_dataset(self, fp):
+
+        writer = csv.writer(fp)
+        writer.writerow(self.header)
+        writer.writerows(self.dataset)
+        fp.flush()
+
+    def validate(self, uniq):
+        """validate `uniq` for object dataset"""
+
+        # count them
+        counts = {key: len(value)
+                  for key, value in uniq.items()}
+        assert counts['a'] == 1
+        assert counts['b'] == 2
+        assert counts['c'] == 3
+
+
+    def test_file(self):
+        """test reading uniques from a file"""
+
+        tmpdir = tempfile.mkdtemp()
+        try:
+            # write test data
+            dst = os.path.join(tmpdir, 'test.csv')
+            with open(dst, 'w') as fp:
+                self.write_dataset(fp)
+
+            # determine uniques
+            self.validate(uniques(dst))
+
+        finally:
+            shutil.rmtree(tmpdir, ignore_errors=True)
+
+    def test_buffer(self):
+        """test using a StringIO buffer object"""
+
+        # write test data to buffer
+        buffer = StringIO()
+        self.write_dataset(buffer)
+        buffer.seek(0)
+
+        # determine uniques
+        self.validate(uniques(buffer))
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 4793f99b73e0 -r 56596902e9ae tests/test_waiter.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_waiter.py	Sun Dec 10 17:57:03 2017 -0800
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+"""
+test bash waiting function
+"""
+
+import os
+import shutil
+import tempfile
+import unittest
+from lemuriformes import waiter
+
+string = (str, unicode)
+
+
+class TestWaiter(unittest.TestCase):
+
+    def test_render(self):
+        commands = ['echo {}'.format(item)
+                    for item in ('hello', 'world')]
+        hello = str(waiter.BashWaiter(*commands))
+        assert isinstance(hello, string)
+
+
+if __name__ == '__main__':
+    unittest.main()