Mercurial > hg > TextShaper
diff textshaper/split.py @ 46:7e63ca061b6c
start findall function
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sat, 16 May 2015 18:53:53 -0700 |
parents | ccbdc00d4f0a |
children | 03ce88daa98d |
line wrap: on
line diff
--- a/textshaper/split.py Tue May 12 21:21:04 2015 -0700 +++ b/textshaper/split.py Sat May 16 18:53:53 2015 -0700 @@ -6,8 +6,25 @@ # imports import argparse +import re +import string import sys + +def findall(sub, _string): + """find all occurances of `sub` in _string""" + + retval = [] + index = 0 + while True: + try: + index = _string.index(sub, index) + retval.append(index) + index += 1 + except ValueError: + return retval + + def split_paragraphs(text): lines = [line.strip() for line in text.strip().splitlines()] @@ -25,12 +42,14 @@ parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) options = parser.parse_args(args) + # preprocess text text = options.file.read().strip() text = ' '.join(text.split()) # paragraphs = split_paragraphs(text) - punctuation = ('.',) + ends = '.?!' + for end in ends: # for paragraph in paragraphs: # print (paragraph)