Mercurial > hg > TextShaper
changeset 45:ccbdc00d4f0a
stub
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Tue, 12 May 2015 21:21:04 -0700 |
parents | 8addd6e12b29 |
children | 7e63ca061b6c |
files | textshaper/split.py |
diffstat | 1 files changed, 38 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/textshaper/split.py Tue May 12 21:21:04 2015 -0700 @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +""" +split paragraphs, sentences, etc +""" + +# imports +import argparse +import sys + +def split_paragraphs(text): + + lines = [line.strip() for line in text.strip().splitlines()] + lines = [line if line else '\n' + for line in lines] + text = ' '.join(lines).strip() + paragraphs = [' '.join(p) for p in text.split('\n')] + return paragraphs + +def main(args=sys.argv[1:]): + """CLI""" + + # parse command line arguments + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) + options = parser.parse_args(args) + + text = options.file.read().strip() + text = ' '.join(text.split()) +# paragraphs = split_paragraphs(text) + + punctuation = ('.',) + +# for paragraph in paragraphs: +# print (paragraph) + +if __name__ == '__main__': + main()