# HG changeset patch # User Jeff Hammel # Date 1431877736 25200 # Node ID 8d8c1ac0e8e1f6765c7b3a4bf578bcf7e4a02c6f # Parent c3b69728f29135e06907af3e6e801e4ef201f16d add a test text and wire some things up diff -r c3b69728f291 -r 8d8c1ac0e8e1 tests/test.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test.txt Sun May 17 08:48:56 2015 -0700 @@ -0,0 +1,1 @@ +The fog of an October evening occluded the arrival of a carriage to the township of Bronswick so that none could discern through the mists that the townhouse at 18 Merriwether Lane was again to be occupied, save only those neighbors across the way that might have the vantage. The girl of the house, a Miss Anne Danubar, was staring into the gray streets when the clod of horse hooves broke pace and rested. From the carriage stepped two men, an elderly gentleman and a younger, who unlocked the gate to the yard which had never stood unlocked in times rememembered. The driver aided them with their modest luggage and soon they were inside, the carriage leaving. \ No newline at end of file diff -r c3b69728f291 -r 8d8c1ac0e8e1 textshaper/split.py --- a/textshaper/split.py Sun May 17 08:33:23 2015 -0700 +++ b/textshaper/split.py Sun May 17 08:48:56 2015 -0700 @@ -38,6 +38,20 @@ def split_sentences(text, ends='.?!'): """split a text into sentences""" + text = text.strip() + sentences = [] + _indices = indices(text, ends) + + begin = 0 + for index, value in _indices: + sentence = text[begin:index] + sentence += value + sentence.strip() + begin = index + if sentence: + sentences.append(sentence) + import pdb; pdb.set_trace() + def split_paragraphs(text): lines = [line.strip() for line in text.strip().splitlines()] @@ -60,11 +74,13 @@ text = ' '.join(text.split()) # paragraphs = split_paragraphs(text) + # find all sentences ends = '.?!' + sentences = split_sentences(text, ends) - # find all ending punctuation - - + # display + for sentence in sentences: + print (sentence) if __name__ == '__main__': main()