# HG changeset patch # User Jeff Hammel # Date 1431490864 25200 # Node ID ccbdc00d4f0a83dac53aba4c0d2ff575b0fae09d # Parent 8addd6e12b29a105b8a483ab05194354653b77cb stub diff -r 8addd6e12b29 -r ccbdc00d4f0a textshaper/split.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/textshaper/split.py Tue May 12 21:21:04 2015 -0700 @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +""" +split paragraphs, sentences, etc +""" + +# imports +import argparse +import sys + +def split_paragraphs(text): + + lines = [line.strip() for line in text.strip().splitlines()] + lines = [line if line else '\n' + for line in lines] + text = ' '.join(lines).strip() + paragraphs = [' '.join(p) for p in text.split('\n')] + return paragraphs + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line arguments + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) + options = parser.parse_args(args) + + text = options.file.read().strip() + text = ' '.join(text.split()) +# paragraphs = split_paragraphs(text) + + punctuation = ('.',) + +# for paragraph in paragraphs: +# print (paragraph) + +if __name__ == '__main__': + main()