comparison textshaper/split.py @ 52:8d8c1ac0e8e1

add a test text and wire some things up
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 May 2015 08:48:56 -0700
parents c3b69728f291
children 1d755747e67a
comparison
equal deleted inserted replaced
51:c3b69728f291 52:8d8c1ac0e8e1
36 return sorted(indices, key=lambda x: x[0]) 36 return sorted(indices, key=lambda x: x[0])
37 37
38 def split_sentences(text, ends='.?!'): 38 def split_sentences(text, ends='.?!'):
39 """split a text into sentences""" 39 """split a text into sentences"""
40 40
41 text = text.strip()
42 sentences = []
43 _indices = indices(text, ends)
44
45 begin = 0
46 for index, value in _indices:
47 sentence = text[begin:index]
48 sentence += value
49 sentence.strip()
50 begin = index
51 if sentence:
52 sentences.append(sentence)
53 import pdb; pdb.set_trace()
54
41 def split_paragraphs(text): 55 def split_paragraphs(text):
42 56
43 lines = [line.strip() for line in text.strip().splitlines()] 57 lines = [line.strip() for line in text.strip().splitlines()]
44 lines = [line if line else '\n' 58 lines = [line if line else '\n'
45 for line in lines] 59 for line in lines]
58 # preprocess text 72 # preprocess text
59 text = options.file.read().strip() 73 text = options.file.read().strip()
60 text = ' '.join(text.split()) 74 text = ' '.join(text.split())
61 # paragraphs = split_paragraphs(text) 75 # paragraphs = split_paragraphs(text)
62 76
77 # find all sentences
63 ends = '.?!' 78 ends = '.?!'
79 sentences = split_sentences(text, ends)
64 80
65 # find all ending punctuation 81 # display
66 82 for sentence in sentences:
67 83 print (sentence)
68 84
69 if __name__ == '__main__': 85 if __name__ == '__main__':
70 main() 86 main()