# HG changeset patch # User Jeff Hammel # Date 1404420390 25200 # Node ID 986f8a20c2344c436612988fdd7c348f7cdf7255 # Parent 56fa70e2e2399d69c01a4e28a4317a237b9262f5 STUB: textshaper/url2txt.py diff -r 56fa70e2e239 -r 986f8a20c234 textshaper/url2txt.py --- a/textshaper/url2txt.py Thu Jul 03 13:23:19 2014 -0700 +++ b/textshaper/url2txt.py Thu Jul 03 13:46:30 2014 -0700 @@ -8,16 +8,30 @@ import argparse import sys +import urlparse -def url2txt(url): +def url2txt(url, strip_extension=True, replacements=(('_', ' '),)): """gets the text equivalent of a URL""" - url = url.rstrip('/') - if '/' in url: - url = url.rsplit('/')[-1] - if '.' in url: - url = url.split('.', 1)[0] - url = url.replace('_', ' ') - return url + + # parse the url + parsed = urlparse.urlparse(url) + + # process the path, if available + path = parsed.path.rstrip('/') + if path: + text = path.split('/')[-1] + if strip_extension: + # strip the extension, if desired + text = text.split('.', 1)[0] + else: + # otherwise go with the hostname + text = parsed.hostname + + # replace desired items + for item, replacement in replacements: + text = text.replace(item, replacement) + + return text def main(args=sys.argv[1:]): @@ -25,8 +39,8 @@ # parse command line parser = argparse.ArgumentParser(description=__doc__) - parser.add_option('urls', metavar='url', nargs='+', - help="URLs to convert") + parser.add_argument('urls', metavar='url', nargs='+', + help="URLs to convert") options = parser.parse_args(args) # convert urls