Mercurial > hg > smartopen
changeset 12:bb995bdf82e2
annoyingly work around user agents again; i hate robot protection sometimes
author | Jeff Hammel <jhammel@mozilla.com> |
---|---|
date | Mon, 27 Dec 2010 11:48:18 -0800 |
parents | ba9058605c5a |
children | f11ce7b1a349 |
files | setup.py smartopen/handlers.py |
diffstat | 2 files changed, 10 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/setup.py Wed Dec 22 17:22:04 2010 -0800 +++ b/setup.py Mon Dec 27 11:48:18 2010 -0800 @@ -6,7 +6,7 @@ except: description = '' -version = '0.1.5' +version = '0.1.6' setup(name='smartopen', version=version,
--- a/smartopen/handlers.py Wed Dec 22 17:22:04 2010 -0800 +++ b/smartopen/handlers.py Mon Dec 27 11:48:18 2010 -0800 @@ -22,10 +22,17 @@ def exists(self, URL): """does a URL exist?""" + # need a phony user agent so wikipedia won't know we're a bot + headers = {} + headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4' + + request = urllib2.Request(URL, None, headers) try: - urllib.urlopen(URL) + f = urllib2.urlopen(request).read() return True - except IOError: + except urllib2.HTTPError, e: + return False + except urllib2.URLError, e: return False class URL(Location):