# HG changeset patch # User Jeff Hammel # Date 1431827633 25200 # Node ID 7e63ca061b6c0270a059cabd2cd0117912f94b4b # Parent ccbdc00d4f0a83dac53aba4c0d2ff575b0fae09d start findall function diff -r ccbdc00d4f0a -r 7e63ca061b6c textshaper/split.py --- a/textshaper/split.py Tue May 12 21:21:04 2015 -0700 +++ b/textshaper/split.py Sat May 16 18:53:53 2015 -0700 @@ -6,8 +6,25 @@ # imports import argparse +import re +import string import sys + +def findall(sub, _string): + """find all occurances of `sub` in _string""" + + retval = [] + index = 0 + while True: + try: + index = _string.index(sub, index) + retval.append(index) + index += 1 + except ValueError: + return retval + + def split_paragraphs(text): lines = [line.strip() for line in text.strip().splitlines()] @@ -25,12 +42,14 @@ parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) options = parser.parse_args(args) + # preprocess text text = options.file.read().strip() text = ' '.join(text.split()) # paragraphs = split_paragraphs(text) - punctuation = ('.',) + ends = '.?!' + for end in ends: # for paragraph in paragraphs: # print (paragraph)