Mercurial > hg > TextShaper
changeset 39:986f8a20c234
STUB: textshaper/url2txt.py
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Thu, 03 Jul 2014 13:46:30 -0700 |
parents | 56fa70e2e239 |
children | e1832eeae084 |
files | textshaper/url2txt.py |
diffstat | 1 files changed, 24 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/textshaper/url2txt.py Thu Jul 03 13:23:19 2014 -0700 +++ b/textshaper/url2txt.py Thu Jul 03 13:46:30 2014 -0700 @@ -8,16 +8,30 @@ import argparse import sys +import urlparse -def url2txt(url): +def url2txt(url, strip_extension=True, replacements=(('_', ' '),)): """gets the text equivalent of a URL""" - url = url.rstrip('/') - if '/' in url: - url = url.rsplit('/')[-1] - if '.' in url: - url = url.split('.', 1)[0] - url = url.replace('_', ' ') - return url + + # parse the url + parsed = urlparse.urlparse(url) + + # process the path, if available + path = parsed.path.rstrip('/') + if path: + text = path.split('/')[-1] + if strip_extension: + # strip the extension, if desired + text = text.split('.', 1)[0] + else: + # otherwise go with the hostname + text = parsed.hostname + + # replace desired items + for item, replacement in replacements: + text = text.replace(item, replacement) + + return text def main(args=sys.argv[1:]): @@ -25,8 +39,8 @@ # parse command line parser = argparse.ArgumentParser(description=__doc__) - parser.add_option('urls', metavar='url', nargs='+', - help="URLs to convert") + parser.add_argument('urls', metavar='url', nargs='+', + help="URLs to convert") options = parser.parse_args(args) # convert urls