Mercurial > hg > TextShaper

diff textshaper/split.py @ 55:4e2190495d50
this basically works
author: Jeff Hammel <k0scist@gmail.com>
date: Sun, 17 May 2015 17:14:47 -0700
parents: 1d755747e67a
--- a/textshaper/split.py	Sun May 17 09:11:30 2015 -0700
+++ b/textshaper/split.py	Sun May 17 17:14:47 2015 -0700
@@ -71,7 +71,8 @@
 def words(text):
     """return the alphanumeric words in a sentence"""
     words = text.strip().split()
-    return [word for word in words]
+    return [word for word in words
+            if set(word).intersection(string.letters)]
 
 def main(args=sys.argv[1:]):
     """CLI"""
@@ -82,6 +83,9 @@
     parser.add_argument('-n', '--number', dest='number',
                         action='store_true', default=False,
                         help="number the sentences (CSV)")
+    parser.add_argument('-c', '--count', dest='count',
+                        action='store_true', default=False,
+                        help="count the words in each sentence (CSV)")
     parser.add_argument('-o', '--output', dest='output',
                         type=argparse.FileType('w'), default=sys.stdout,
                         help="file to output to, or stdout by default")
@@ -98,7 +102,16 @@
 
     # display
     if options.number:
+        if options.count:
+            raise NotImplementedError('TODO') # -> record TODO items
         writer = csv.writer(options.output)
+        for index, sentence in enumerate(sentences, 1):
+            writer.writerow([index, sentence])
+    elif options.count:
+        writer = csv.writer(options.output)
+        for sentence in sentences:
+            n_words = len(words(sentence))
+            writer.writerow([n_words, sentence])
     else:
         for sentence in sentences:
             options.output.write(sentence + '\n')
author	Jeff Hammel <k0scist@gmail.com>
date	Sun, 17 May 2015 17:14:47 -0700
parents	1d755747e67a
children