# HG changeset patch # User Jeff Hammel # Date 1431876803 25200 # Node ID c3b69728f29135e06907af3e6e801e4ef201f16d # Parent 1284c99a94faa21205719359b82f7cefcdc0aa2a finding indices now works diff -r 1284c99a94fa -r c3b69728f291 tests/test_split.py --- a/tests/test_split.py Sat May 16 21:02:07 2015 -0700 +++ b/tests/test_split.py Sun May 17 08:33:23 2015 -0700 @@ -10,10 +10,11 @@ from textshaper import split + class SplitUnitTest(unittest.TestCase): def test_findall(self): - """test basic""" + """test finding all substrings""" # 012345678901 string = 'a cat, a bat' @@ -24,6 +25,18 @@ self.assertEqual(split.findall(string, 't'), [4, 11]) + def test_indices(self): + """test finding ordered indices""" + string = 'a cat, a bat' + indices = split.indices(string, ('a', 't')) + self.assertEqual(indices, + [(0, 'a'), + (3, 'a'), + (4, 't'), + (7, 'a'), + (10, 'a'), + (11, 't')]) + if __name__ == '__main__': unittest.main() diff -r 1284c99a94fa -r c3b69728f291 textshaper/split.py --- a/textshaper/split.py Sat May 16 21:02:07 2015 -0700 +++ b/textshaper/split.py Sun May 17 08:33:23 2015 -0700 @@ -24,11 +24,16 @@ except ValueError: return retval -def findindices(_string, values): +def indices(text, values): """ returns ordered list of 2-tuples: (index, value) """ + locations = {value: findall(text, value) for value in values} + indices = [] + for key, values in locations.items(): + indices.extend([(value, key) for value in values]) + return sorted(indices, key=lambda x: x[0]) def split_sentences(text, ends='.?!'): """split a text into sentences""" @@ -58,7 +63,7 @@ ends = '.?!' # find all ending punctuation - indices = {end: findall(text, end) for end in ends} + if __name__ == '__main__':