comparison textshaper/split.py @ 55:4e2190495d50

this basically works
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 May 2015 17:14:47 -0700
parents 1d755747e67a
children
comparison
equal deleted inserted replaced
54:1d755747e67a 55:4e2190495d50
69 return paragraphs 69 return paragraphs
70 70
71 def words(text): 71 def words(text):
72 """return the alphanumeric words in a sentence""" 72 """return the alphanumeric words in a sentence"""
73 words = text.strip().split() 73 words = text.strip().split()
74 return [word for word in words] 74 return [word for word in words
75 if set(word).intersection(string.letters)]
75 76
76 def main(args=sys.argv[1:]): 77 def main(args=sys.argv[1:]):
77 """CLI""" 78 """CLI"""
78 79
79 # parse command line arguments 80 # parse command line arguments
80 parser = argparse.ArgumentParser(description=__doc__) 81 parser = argparse.ArgumentParser(description=__doc__)
81 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) 82 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
82 parser.add_argument('-n', '--number', dest='number', 83 parser.add_argument('-n', '--number', dest='number',
83 action='store_true', default=False, 84 action='store_true', default=False,
84 help="number the sentences (CSV)") 85 help="number the sentences (CSV)")
86 parser.add_argument('-c', '--count', dest='count',
87 action='store_true', default=False,
88 help="count the words in each sentence (CSV)")
85 parser.add_argument('-o', '--output', dest='output', 89 parser.add_argument('-o', '--output', dest='output',
86 type=argparse.FileType('w'), default=sys.stdout, 90 type=argparse.FileType('w'), default=sys.stdout,
87 help="file to output to, or stdout by default") 91 help="file to output to, or stdout by default")
88 options = parser.parse_args(args) 92 options = parser.parse_args(args)
89 93
96 ends = '.?!' 100 ends = '.?!'
97 sentences = split_sentences(text, ends) 101 sentences = split_sentences(text, ends)
98 102
99 # display 103 # display
100 if options.number: 104 if options.number:
105 if options.count:
106 raise NotImplementedError('TODO') # -> record TODO items
101 writer = csv.writer(options.output) 107 writer = csv.writer(options.output)
108 for index, sentence in enumerate(sentences, 1):
109 writer.writerow([index, sentence])
110 elif options.count:
111 writer = csv.writer(options.output)
112 for sentence in sentences:
113 n_words = len(words(sentence))
114 writer.writerow([n_words, sentence])
102 else: 115 else:
103 for sentence in sentences: 116 for sentence in sentences:
104 options.output.write(sentence + '\n') 117 options.output.write(sentence + '\n')
105 118
106 if __name__ == '__main__': 119 if __name__ == '__main__':