changeset 46:7e63ca061b6c

start findall function
author Jeff Hammel <k0scist@gmail.com>
date Sat, 16 May 2015 18:53:53 -0700
parents ccbdc00d4f0a
children 6c7ca72777af
files textshaper/split.py
diffstat 1 files changed, 20 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/textshaper/split.py	Tue May 12 21:21:04 2015 -0700
+++ b/textshaper/split.py	Sat May 16 18:53:53 2015 -0700
@@ -6,8 +6,25 @@
 
 # imports
 import argparse
+import re
+import string
 import sys
 
+
+def findall(sub, _string):
+    """find all occurances of `sub` in _string"""
+
+    retval = []
+    index = 0
+    while True:
+        try:
+            index = _string.index(sub, index)
+            retval.append(index)
+            index += 1
+        except ValueError:
+            return retval
+
+
 def split_paragraphs(text):
 
     lines = [line.strip() for line in text.strip().splitlines()]
@@ -25,12 +42,14 @@
     parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
     options = parser.parse_args(args)
 
+    # preprocess text
     text = options.file.read().strip()
     text = ' '.join(text.split())
 #    paragraphs = split_paragraphs(text)
 
-    punctuation = ('.',)
+     ends = '.?!'
 
+             for end in ends:
 #    for paragraph in paragraphs:
 #        print (paragraph)