changeset 51:c3b69728f291

finding indices now works
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 May 2015 08:33:23 -0700 (2015-05-17)
parents 1284c99a94fa
children 8d8c1ac0e8e1
files tests/test_split.py textshaper/split.py
diffstat 2 files changed, 21 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/tests/test_split.py	Sat May 16 21:02:07 2015 -0700
+++ b/tests/test_split.py	Sun May 17 08:33:23 2015 -0700
@@ -10,10 +10,11 @@
 from textshaper import split
 
 
+
 class SplitUnitTest(unittest.TestCase):
 
     def test_findall(self):
-        """test basic"""
+        """test finding all substrings"""
 
                 # 012345678901
         string = 'a cat, a bat'
@@ -24,6 +25,18 @@
         self.assertEqual(split.findall(string, 't'),
                          [4, 11])
 
+    def test_indices(self):
+        """test finding ordered indices"""
+        string = 'a cat, a bat'
+        indices = split.indices(string, ('a', 't'))
+        self.assertEqual(indices,
+                         [(0, 'a'),
+                          (3, 'a'),
+                          (4, 't'),
+                          (7, 'a'),
+                          (10, 'a'),
+                          (11, 't')])
+
 
 if __name__ == '__main__':
     unittest.main()
--- a/textshaper/split.py	Sat May 16 21:02:07 2015 -0700
+++ b/textshaper/split.py	Sun May 17 08:33:23 2015 -0700
@@ -24,11 +24,16 @@
         except ValueError:
             return retval
 
-def findindices(_string, values):
+def indices(text, values):
     """
     returns ordered list of 2-tuples:
     (index, value)
     """
+    locations = {value: findall(text, value) for value in values}
+    indices = []
+    for key, values in locations.items():
+        indices.extend([(value, key) for value in values])
+    return sorted(indices, key=lambda x: x[0])
 
 def split_sentences(text, ends='.?!'):
     """split a text into sentences"""
@@ -58,7 +63,7 @@
     ends = '.?!'
 
     # find all ending punctuation
-    indices = {end: findall(text, end) for end in ends}
+
 
 
 if __name__ == '__main__':