changeset 12:bb995bdf82e2

annoyingly work around user agents again; i hate robot protection sometimes
author Jeff Hammel <jhammel@mozilla.com>
date Mon, 27 Dec 2010 11:48:18 -0800
parents ba9058605c5a
children f11ce7b1a349
files setup.py smartopen/handlers.py
diffstat 2 files changed, 10 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/setup.py
+++ b/setup.py
@@ -1,17 +1,17 @@
 from setuptools import setup, find_packages
 import sys, os
 
 try:
     description = file('README.txt').read()
 except:
     description = ''
 
-version = '0.1.5'
+version = '0.1.6'
 
 setup(name='smartopen',
       version=version,
       description="open text in a browser contextually",
       long_description=description,
       classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
       keywords='',
       author='Jeff Hammel',
--- a/smartopen/handlers.py
+++ b/smartopen/handlers.py
@@ -17,20 +17,27 @@ class Location(object):
     def process(self, query):
         return query
 
     def test(self, query):
         return True
 
     def exists(self, URL):
         """does a URL exist?"""
+        # need a phony user agent so wikipedia won't know we're a bot
+        headers = {}
+        headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4'
+
+        request = urllib2.Request(URL, None, headers)
         try:
-            urllib.urlopen(URL)
+            f = urllib2.urlopen(request).read()
             return True
-        except IOError:
+        except urllib2.HTTPError, e:
+            return False
+        except urllib2.URLError, e:
             return False
 
 class URL(Location):
     """a straight URL"""
 
     def process(self, query):
         if '://' in query:
             return query