# HG changeset patch # User Jeff Hammel # Date 1431908087 25200 # Node ID 4e2190495d5024d575e1bd48bce99f0ec64963d1 # Parent 1d755747e67a26d12c8521196a8ca8923bbff719 this basically works diff -r 1d755747e67a -r 4e2190495d50 textshaper/split.py --- a/textshaper/split.py Sun May 17 09:11:30 2015 -0700 +++ b/textshaper/split.py Sun May 17 17:14:47 2015 -0700 @@ -71,7 +71,8 @@ def words(text): """return the alphanumeric words in a sentence""" words = text.strip().split() - return [word for word in words] + return [word for word in words + if set(word).intersection(string.letters)] def main(args=sys.argv[1:]): """CLI""" @@ -82,6 +83,9 @@ parser.add_argument('-n', '--number', dest='number', action='store_true', default=False, help="number the sentences (CSV)") + parser.add_argument('-c', '--count', dest='count', + action='store_true', default=False, + help="count the words in each sentence (CSV)") parser.add_argument('-o', '--output', dest='output', type=argparse.FileType('w'), default=sys.stdout, help="file to output to, or stdout by default") @@ -98,7 +102,16 @@ # display if options.number: + if options.count: + raise NotImplementedError('TODO') # -> record TODO items writer = csv.writer(options.output) + for index, sentence in enumerate(sentences, 1): + writer.writerow([index, sentence]) + elif options.count: + writer = csv.writer(options.output) + for sentence in sentences: + n_words = len(words(sentence)) + writer.writerow([n_words, sentence]) else: for sentence in sentences: options.output.write(sentence + '\n')