Mercurial > hg > TextShaper
diff textshaper/split.py @ 52:8d8c1ac0e8e1
add a test text and wire some things up
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 17 May 2015 08:48:56 -0700 |
parents | c3b69728f291 |
children | 1d755747e67a |
line wrap: on
line diff
--- a/textshaper/split.py Sun May 17 08:33:23 2015 -0700 +++ b/textshaper/split.py Sun May 17 08:48:56 2015 -0700 @@ -38,6 +38,20 @@ def split_sentences(text, ends='.?!'): """split a text into sentences""" + text = text.strip() + sentences = [] + _indices = indices(text, ends) + + begin = 0 + for index, value in _indices: + sentence = text[begin:index] + sentence += value + sentence.strip() + begin = index + if sentence: + sentences.append(sentence) + import pdb; pdb.set_trace() + def split_paragraphs(text): lines = [line.strip() for line in text.strip().splitlines()] @@ -60,11 +74,13 @@ text = ' '.join(text.split()) # paragraphs = split_paragraphs(text) + # find all sentences ends = '.?!' + sentences = split_sentences(text, ends) - # find all ending punctuation - - + # display + for sentence in sentences: + print (sentence) if __name__ == '__main__': main()