comparison textshaper/url2txt.py @ 39:986f8a20c234

STUB: textshaper/url2txt.py
author Jeff Hammel <k0scist@gmail.com>
date Thu, 03 Jul 2014 13:46:30 -0700
parents 56fa70e2e239
children
comparison
equal deleted inserted replaced
38:56fa70e2e239 39:986f8a20c234
6 xclip -o | sed 's/_//' | sed 's/.html//' 6 xclip -o | sed 's/_//' | sed 's/.html//'
7 """ 7 """
8 8
9 import argparse 9 import argparse
10 import sys 10 import sys
11 import urlparse
11 12
12 def url2txt(url): 13 def url2txt(url, strip_extension=True, replacements=(('_', ' '),)):
13 """gets the text equivalent of a URL""" 14 """gets the text equivalent of a URL"""
14 url = url.rstrip('/') 15
15 if '/' in url: 16 # parse the url
16 url = url.rsplit('/')[-1] 17 parsed = urlparse.urlparse(url)
17 if '.' in url: 18
18 url = url.split('.', 1)[0] 19 # process the path, if available
19 url = url.replace('_', ' ') 20 path = parsed.path.rstrip('/')
20 return url 21 if path:
22 text = path.split('/')[-1]
23 if strip_extension:
24 # strip the extension, if desired
25 text = text.split('.', 1)[0]
26 else:
27 # otherwise go with the hostname
28 text = parsed.hostname
29
30 # replace desired items
31 for item, replacement in replacements:
32 text = text.replace(item, replacement)
33
34 return text
21 35
22 36
23 def main(args=sys.argv[1:]): 37 def main(args=sys.argv[1:]):
24 """CLI""" 38 """CLI"""
25 39
26 # parse command line 40 # parse command line
27 parser = argparse.ArgumentParser(description=__doc__) 41 parser = argparse.ArgumentParser(description=__doc__)
28 parser.add_option('urls', metavar='url', nargs='+', 42 parser.add_argument('urls', metavar='url', nargs='+',
29 help="URLs to convert") 43 help="URLs to convert")
30 options = parser.parse_args(args) 44 options = parser.parse_args(args)
31 45
32 # convert urls 46 # convert urls
33 for url in options.urls: 47 for url in options.urls:
34 print (url2txt(url)) 48 print (url2txt(url))