Mercurial > hg > TextShaper
comparison textshaper/split.py @ 45:ccbdc00d4f0a
stub
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Tue, 12 May 2015 21:21:04 -0700 |
parents | |
children | 7e63ca061b6c |
comparison
equal
deleted
inserted
replaced
44:8addd6e12b29 | 45:ccbdc00d4f0a |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 split paragraphs, sentences, etc | |
5 """ | |
6 | |
7 # imports | |
8 import argparse | |
9 import sys | |
10 | |
11 def split_paragraphs(text): | |
12 | |
13 lines = [line.strip() for line in text.strip().splitlines()] | |
14 lines = [line if line else '\n' | |
15 for line in lines] | |
16 text = ' '.join(lines).strip() | |
17 paragraphs = [' '.join(p) for p in text.split('\n')] | |
18 return paragraphs | |
19 | |
20 def main(args=sys.argv[1:]): | |
21 """CLI""" | |
22 | |
23 # parse command line arguments | |
24 parser = argparse.ArgumentParser(description=__doc__) | |
25 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | |
26 options = parser.parse_args(args) | |
27 | |
28 text = options.file.read().strip() | |
29 text = ' '.join(text.split()) | |
30 # paragraphs = split_paragraphs(text) | |
31 | |
32 punctuation = ('.',) | |
33 | |
34 # for paragraph in paragraphs: | |
35 # print (paragraph) | |
36 | |
37 if __name__ == '__main__': | |
38 main() |