changeset 55:4e2190495d50

this basically works
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 May 2015 17:14:47 -0700
parents 1d755747e67a
children 4576ccc3be76
files textshaper/split.py
diffstat 1 files changed, 14 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/textshaper/split.py	Sun May 17 09:11:30 2015 -0700
+++ b/textshaper/split.py	Sun May 17 17:14:47 2015 -0700
@@ -71,7 +71,8 @@
 def words(text):
     """return the alphanumeric words in a sentence"""
     words = text.strip().split()
-    return [word for word in words]
+    return [word for word in words
+            if set(word).intersection(string.letters)]
 
 def main(args=sys.argv[1:]):
     """CLI"""
@@ -82,6 +83,9 @@
     parser.add_argument('-n', '--number', dest='number',
                         action='store_true', default=False,
                         help="number the sentences (CSV)")
+    parser.add_argument('-c', '--count', dest='count',
+                        action='store_true', default=False,
+                        help="count the words in each sentence (CSV)")
     parser.add_argument('-o', '--output', dest='output',
                         type=argparse.FileType('w'), default=sys.stdout,
                         help="file to output to, or stdout by default")
@@ -98,7 +102,16 @@
 
     # display
     if options.number:
+        if options.count:
+            raise NotImplementedError('TODO') # -> record TODO items
         writer = csv.writer(options.output)
+        for index, sentence in enumerate(sentences, 1):
+            writer.writerow([index, sentence])
+    elif options.count:
+        writer = csv.writer(options.output)
+        for sentence in sentences:
+            n_words = len(words(sentence))
+            writer.writerow([n_words, sentence])
     else:
         for sentence in sentences:
             options.output.write(sentence + '\n')