changeset 45:ccbdc00d4f0a

stub
author Jeff Hammel <k0scist@gmail.com>
date Tue, 12 May 2015 21:21:04 -0700
parents 8addd6e12b29
children 7e63ca061b6c
files textshaper/split.py
diffstat 1 files changed, 38 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/textshaper/split.py	Tue May 12 21:21:04 2015 -0700
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+"""
+split paragraphs, sentences, etc
+"""
+
+# imports
+import argparse
+import sys
+
+def split_paragraphs(text):
+
+    lines = [line.strip() for line in text.strip().splitlines()]
+    lines = [line if line else '\n'
+             for line in lines]
+    text = ' '.join(lines).strip()
+    paragraphs = [' '.join(p) for p in text.split('\n')]
+    return paragraphs
+
+def main(args=sys.argv[1:]):
+    """CLI"""
+
+    # parse command line arguments
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
+    options = parser.parse_args(args)
+
+    text = options.file.read().strip()
+    text = ' '.join(text.split())
+#    paragraphs = split_paragraphs(text)
+
+    punctuation = ('.',)
+
+#    for paragraph in paragraphs:
+#        print (paragraph)
+
+if __name__ == '__main__':
+    main()