comparison textshaper/split.py @ 51:c3b69728f291

finding indices now works
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 May 2015 08:33:23 -0700
parents 1284c99a94fa
children 8d8c1ac0e8e1
comparison
equal deleted inserted replaced
50:1284c99a94fa 51:c3b69728f291
22 retval.append(index) 22 retval.append(index)
23 index += len(sub) 23 index += len(sub)
24 except ValueError: 24 except ValueError:
25 return retval 25 return retval
26 26
27 def findindices(_string, values): 27 def indices(text, values):
28 """ 28 """
29 returns ordered list of 2-tuples: 29 returns ordered list of 2-tuples:
30 (index, value) 30 (index, value)
31 """ 31 """
32 locations = {value: findall(text, value) for value in values}
33 indices = []
34 for key, values in locations.items():
35 indices.extend([(value, key) for value in values])
36 return sorted(indices, key=lambda x: x[0])
32 37
33 def split_sentences(text, ends='.?!'): 38 def split_sentences(text, ends='.?!'):
34 """split a text into sentences""" 39 """split a text into sentences"""
35 40
36 def split_paragraphs(text): 41 def split_paragraphs(text):
56 # paragraphs = split_paragraphs(text) 61 # paragraphs = split_paragraphs(text)
57 62
58 ends = '.?!' 63 ends = '.?!'
59 64
60 # find all ending punctuation 65 # find all ending punctuation
61 indices = {end: findall(text, end) for end in ends} 66
62 67
63 68
64 if __name__ == '__main__': 69 if __name__ == '__main__':
65 main() 70 main()