TextShaper: textshaper/split.py comparison

this basically works

comparison

equal deleted inserted replaced

-:1d755747e67a
+:4e2190495d50
 return paragraphs
 def words(text):
 """return the alphanumeric words in a sentence"""
 words = text.strip().split()
-return [word for word in words]
+return [word for word in words
+if set(word).intersection(string.letters)]
 def main(args=sys.argv[1:]):
 """CLI"""
 # parse command line arguments
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
 parser.add_argument('-n', '--number', dest='number',
 action='store_true', default=False,
 help="number the sentences (CSV)")
+parser.add_argument('-c', '--count', dest='count',
+action='store_true', default=False,
+help="count the words in each sentence (CSV)")
 parser.add_argument('-o', '--output', dest='output',
 type=argparse.FileType('w'), default=sys.stdout,
 help="file to output to, or stdout by default")
 options = parser.parse_args(args)
 ends = '.?!'
 sentences = split_sentences(text, ends)
 # display
 if options.number:
+if options.count:
+raise NotImplementedError('TODO') # -> record TODO items
 writer = csv.writer(options.output)
+for index, sentence in enumerate(sentences, 1):
+writer.writerow([index, sentence])
+elif options.count:
+writer = csv.writer(options.output)
+for sentence in sentences:
+n_words = len(words(sentence))
+writer.writerow([n_words, sentence])
 else:
 for sentence in sentences:
 options.output.write(sentence + '\n')
 if __name__ == '__main__':

Mercurial > hg > TextShaper