Mercurial > hg > TextShaper
comparison textshaper/url2txt.py @ 39:986f8a20c234
STUB: textshaper/url2txt.py
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Thu, 03 Jul 2014 13:46:30 -0700 |
parents | 56fa70e2e239 |
children |
comparison
equal
deleted
inserted
replaced
38:56fa70e2e239 | 39:986f8a20c234 |
---|---|
6 xclip -o | sed 's/_//' | sed 's/.html//' | 6 xclip -o | sed 's/_//' | sed 's/.html//' |
7 """ | 7 """ |
8 | 8 |
9 import argparse | 9 import argparse |
10 import sys | 10 import sys |
11 import urlparse | |
11 | 12 |
12 def url2txt(url): | 13 def url2txt(url, strip_extension=True, replacements=(('_', ' '),)): |
13 """gets the text equivalent of a URL""" | 14 """gets the text equivalent of a URL""" |
14 url = url.rstrip('/') | 15 |
15 if '/' in url: | 16 # parse the url |
16 url = url.rsplit('/')[-1] | 17 parsed = urlparse.urlparse(url) |
17 if '.' in url: | 18 |
18 url = url.split('.', 1)[0] | 19 # process the path, if available |
19 url = url.replace('_', ' ') | 20 path = parsed.path.rstrip('/') |
20 return url | 21 if path: |
22 text = path.split('/')[-1] | |
23 if strip_extension: | |
24 # strip the extension, if desired | |
25 text = text.split('.', 1)[0] | |
26 else: | |
27 # otherwise go with the hostname | |
28 text = parsed.hostname | |
29 | |
30 # replace desired items | |
31 for item, replacement in replacements: | |
32 text = text.replace(item, replacement) | |
33 | |
34 return text | |
21 | 35 |
22 | 36 |
23 def main(args=sys.argv[1:]): | 37 def main(args=sys.argv[1:]): |
24 """CLI""" | 38 """CLI""" |
25 | 39 |
26 # parse command line | 40 # parse command line |
27 parser = argparse.ArgumentParser(description=__doc__) | 41 parser = argparse.ArgumentParser(description=__doc__) |
28 parser.add_option('urls', metavar='url', nargs='+', | 42 parser.add_argument('urls', metavar='url', nargs='+', |
29 help="URLs to convert") | 43 help="URLs to convert") |
30 options = parser.parse_args(args) | 44 options = parser.parse_args(args) |
31 | 45 |
32 # convert urls | 46 # convert urls |
33 for url in options.urls: | 47 for url in options.urls: |
34 print (url2txt(url)) | 48 print (url2txt(url)) |