annotate textshaper/url2txt.py @ 51:c3b69728f291

finding indices now works
author Jeff Hammel <k0scist@gmail.com>
date Sun, 17 May 2015 08:33:23 -0700
parents 986f8a20c234
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
21
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 """
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 get the human-form of the name of the final path segment in a url:
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 xclip -o | sed 's/_//' | sed 's/.html//'
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 """
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8
38
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
9 import argparse
21
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 import sys
39
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
11 import urlparse
21
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12
39
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
13 def url2txt(url, strip_extension=True, replacements=(('_', ' '),)):
21
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 """gets the text equivalent of a URL"""
39
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
15
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
16 # parse the url
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
17 parsed = urlparse.urlparse(url)
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
18
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
19 # process the path, if available
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
20 path = parsed.path.rstrip('/')
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
21 if path:
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
22 text = path.split('/')[-1]
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
23 if strip_extension:
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
24 # strip the extension, if desired
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
25 text = text.split('.', 1)[0]
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
26 else:
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
27 # otherwise go with the hostname
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
28 text = parsed.hostname
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
29
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
30 # replace desired items
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
31 for item, replacement in replacements:
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
32 text = text.replace(item, replacement)
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
33
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
34 return text
21
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 def main(args=sys.argv[1:]):
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 """CLI"""
38
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
39
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
40 # parse command line
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
41 parser = argparse.ArgumentParser(description=__doc__)
39
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
42 parser.add_argument('urls', metavar='url', nargs='+',
986f8a20c234 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 38
diff changeset
43 help="URLs to convert")
38
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
44 options = parser.parse_args(args)
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
45
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
46 # convert urls
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
47 for url in options.urls:
56fa70e2e239 STUB: textshaper/url2txt.py
Jeff Hammel <k0scist@gmail.com>
parents: 21
diff changeset
48 print (url2txt(url))
21
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
49
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
50
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
51 if __name__ == '__main__':
e6f680d25d63 migrate url2txt
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
52 main()