Mercurial > hg > TextShaper
comparison textshaper/split.py @ 51:c3b69728f291
finding indices now works
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 17 May 2015 08:33:23 -0700 |
parents | 1284c99a94fa |
children | 8d8c1ac0e8e1 |
comparison
equal
deleted
inserted
replaced
50:1284c99a94fa | 51:c3b69728f291 |
---|---|
22 retval.append(index) | 22 retval.append(index) |
23 index += len(sub) | 23 index += len(sub) |
24 except ValueError: | 24 except ValueError: |
25 return retval | 25 return retval |
26 | 26 |
27 def findindices(_string, values): | 27 def indices(text, values): |
28 """ | 28 """ |
29 returns ordered list of 2-tuples: | 29 returns ordered list of 2-tuples: |
30 (index, value) | 30 (index, value) |
31 """ | 31 """ |
32 locations = {value: findall(text, value) for value in values} | |
33 indices = [] | |
34 for key, values in locations.items(): | |
35 indices.extend([(value, key) for value in values]) | |
36 return sorted(indices, key=lambda x: x[0]) | |
32 | 37 |
33 def split_sentences(text, ends='.?!'): | 38 def split_sentences(text, ends='.?!'): |
34 """split a text into sentences""" | 39 """split a text into sentences""" |
35 | 40 |
36 def split_paragraphs(text): | 41 def split_paragraphs(text): |
56 # paragraphs = split_paragraphs(text) | 61 # paragraphs = split_paragraphs(text) |
57 | 62 |
58 ends = '.?!' | 63 ends = '.?!' |
59 | 64 |
60 # find all ending punctuation | 65 # find all ending punctuation |
61 indices = {end: findall(text, end) for end in ends} | 66 |
62 | 67 |
63 | 68 |
64 if __name__ == '__main__': | 69 if __name__ == '__main__': |
65 main() | 70 main() |