45
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 split paragraphs, sentences, etc
|
|
5 """
|
|
6
|
|
7 # imports
|
|
8 import argparse
|
|
9 import sys
|
|
10
|
|
11 def split_paragraphs(text):
|
|
12
|
|
13 lines = [line.strip() for line in text.strip().splitlines()]
|
|
14 lines = [line if line else '\n'
|
|
15 for line in lines]
|
|
16 text = ' '.join(lines).strip()
|
|
17 paragraphs = [' '.join(p) for p in text.split('\n')]
|
|
18 return paragraphs
|
|
19
|
|
20 def main(args=sys.argv[1:]):
|
|
21 """CLI"""
|
|
22
|
|
23 # parse command line arguments
|
|
24 parser = argparse.ArgumentParser(description=__doc__)
|
|
25 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
|
|
26 options = parser.parse_args(args)
|
|
27
|
|
28 text = options.file.read().strip()
|
|
29 text = ' '.join(text.split())
|
|
30 # paragraphs = split_paragraphs(text)
|
|
31
|
|
32 punctuation = ('.',)
|
|
33
|
|
34 # for paragraph in paragraphs:
|
|
35 # print (paragraph)
|
|
36
|
|
37 if __name__ == '__main__':
|
|
38 main()
|