changeset 12:bb995bdf82e2

annoyingly work around user agents again; i hate robot protection sometimes
author Jeff Hammel <jhammel@mozilla.com>
date Mon, 27 Dec 2010 11:48:18 -0800
parents ba9058605c5a
children f11ce7b1a349
files setup.py smartopen/handlers.py
diffstat 2 files changed, 10 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/setup.py	Wed Dec 22 17:22:04 2010 -0800
+++ b/setup.py	Mon Dec 27 11:48:18 2010 -0800
@@ -6,7 +6,7 @@
 except:
     description = ''
 
-version = '0.1.5'
+version = '0.1.6'
 
 setup(name='smartopen',
       version=version,
--- a/smartopen/handlers.py	Wed Dec 22 17:22:04 2010 -0800
+++ b/smartopen/handlers.py	Mon Dec 27 11:48:18 2010 -0800
@@ -22,10 +22,17 @@
 
     def exists(self, URL):
         """does a URL exist?"""
+        # need a phony user agent so wikipedia won't know we're a bot
+        headers = {}
+        headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4'
+
+        request = urllib2.Request(URL, None, headers)
         try:
-            urllib.urlopen(URL)
+            f = urllib2.urlopen(request).read()
             return True
-        except IOError:
+        except urllib2.HTTPError, e:
+            return False
+        except urllib2.URLError, e:
             return False
 
 class URL(Location):