# HG changeset patch # User Jeff Hammel # Date 1293479298 28800 # Node ID bb995bdf82e26e3d86b3931ac2f265c0c30911af # Parent ba9058605c5a358db3d7d24e71eb55bc6eac8923 annoyingly work around user agents again; i hate robot protection sometimes diff -r ba9058605c5a -r bb995bdf82e2 setup.py --- a/setup.py Wed Dec 22 17:22:04 2010 -0800 +++ b/setup.py Mon Dec 27 11:48:18 2010 -0800 @@ -6,7 +6,7 @@ except: description = '' -version = '0.1.5' +version = '0.1.6' setup(name='smartopen', version=version, diff -r ba9058605c5a -r bb995bdf82e2 smartopen/handlers.py --- a/smartopen/handlers.py Wed Dec 22 17:22:04 2010 -0800 +++ b/smartopen/handlers.py Mon Dec 27 11:48:18 2010 -0800 @@ -22,10 +22,17 @@ def exists(self, URL): """does a URL exist?""" + # need a phony user agent so wikipedia won't know we're a bot + headers = {} + headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4' + + request = urllib2.Request(URL, None, headers) try: - urllib.urlopen(URL) + f = urllib2.urlopen(request).read() return True - except IOError: + except urllib2.HTTPError, e: + return False + except urllib2.URLError, e: return False class URL(Location):