comparison textshaper/split.py @ 46:7e63ca061b6c

start findall function
author Jeff Hammel <k0scist@gmail.com>
date Sat, 16 May 2015 18:53:53 -0700
parents ccbdc00d4f0a
children 03ce88daa98d
comparison
equal deleted inserted replaced
45:ccbdc00d4f0a 46:7e63ca061b6c
4 split paragraphs, sentences, etc 4 split paragraphs, sentences, etc
5 """ 5 """
6 6
7 # imports 7 # imports
8 import argparse 8 import argparse
9 import re
10 import string
9 import sys 11 import sys
12
13
14 def findall(sub, _string):
15 """find all occurances of `sub` in _string"""
16
17 retval = []
18 index = 0
19 while True:
20 try:
21 index = _string.index(sub, index)
22 retval.append(index)
23 index += 1
24 except ValueError:
25 return retval
26
10 27
11 def split_paragraphs(text): 28 def split_paragraphs(text):
12 29
13 lines = [line.strip() for line in text.strip().splitlines()] 30 lines = [line.strip() for line in text.strip().splitlines()]
14 lines = [line if line else '\n' 31 lines = [line if line else '\n'
23 # parse command line arguments 40 # parse command line arguments
24 parser = argparse.ArgumentParser(description=__doc__) 41 parser = argparse.ArgumentParser(description=__doc__)
25 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) 42 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
26 options = parser.parse_args(args) 43 options = parser.parse_args(args)
27 44
45 # preprocess text
28 text = options.file.read().strip() 46 text = options.file.read().strip()
29 text = ' '.join(text.split()) 47 text = ' '.join(text.split())
30 # paragraphs = split_paragraphs(text) 48 # paragraphs = split_paragraphs(text)
31 49
32 punctuation = ('.',) 50 ends = '.?!'
33 51
52 for end in ends:
34 # for paragraph in paragraphs: 53 # for paragraph in paragraphs:
35 # print (paragraph) 54 # print (paragraph)
36 55
37 if __name__ == '__main__': 56 if __name__ == '__main__':
38 main() 57 main()