annotate textshaper/split.py @ 45:ccbdc00d4f0a

stub
author Jeff Hammel <k0scist@gmail.com>
date Tue, 12 May 2015 21:21:04 -0700
parents
children 7e63ca061b6c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 split paragraphs, sentences, etc
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 """
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 # imports
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 import argparse
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 import sys
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 def split_paragraphs(text):
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 lines = [line.strip() for line in text.strip().splitlines()]
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 lines = [line if line else '\n'
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 for line in lines]
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 text = ' '.join(lines).strip()
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 paragraphs = [' '.join(p) for p in text.split('\n')]
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 return paragraphs
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 def main(args=sys.argv[1:]):
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 """CLI"""
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 # parse command line arguments
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 parser = argparse.ArgumentParser(description=__doc__)
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 options = parser.parse_args(args)
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 text = options.file.read().strip()
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 text = ' '.join(text.split())
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 # paragraphs = split_paragraphs(text)
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 punctuation = ('.',)
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 # for paragraph in paragraphs:
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 # print (paragraph)
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 if __name__ == '__main__':
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 main()