Mercurial > hg > urlmatch

--- a/setup.py	Thu Jun 16 17:59:29 2011 -0700
+++ b/setup.py	Fri Jun 17 10:39:37 2011 -0700
@@ -6,15 +6,15 @@
 setup(name='urlmatch',
       version=version,
       description="match urls systematically",
-      long_description="""\
-""",
+      long_description='',
       classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
       keywords='url',
       author='Jeff Hammel',
       author_email='jhammel@mozilla.com',
       url='http://k0s.org/mozilla/hg/urlmatch',
       license='MPL',
-      packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
+      py_modules=['urlmatch'],
+      packages=[],
       include_package_data=True,
       zip_safe=False,
       install_requires=[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/urlmatch.py	Fri Jun 17 10:39:37 2011 -0700
@@ -0,0 +1,96 @@
+import urlparse
+
+class UrlMatcher(object):
+
+    def __init__(self, *urls):
+        match_order=('domain', 'scheme', 'path')
+        self.order = match_order
+        self.urls = {}
+        for url in urls:
+            self.add(url)
+
+    def decompose(self, url):
+
+        # break it down
+        (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
+        urldict = {}
+
+        # domain
+        netloc = netloc.split('.')
+        if len(netloc) == 1:
+            urldict['domain'] = netloc
+        else:
+            # assert a TLD
+            urldict['domain'] = [netloc[-2], netloc[-1]] + list(reversed(netloc[0:-2]))
+
+        # path
+        path = path.strip('/').split('/')
+        if path == ['']:
+            path = []
+        urldict['path'] = path
+
+        # scheme
+        urldict['scheme'] = scheme
+
+        # could do others
+
+        return urldict
+
+    def add(self, url):
+        if url not in self.urls:
+            self.urls[url] = self.decompose(url)
+
+    def diff(self, url1, url2):
+
+        # decompose the urls if necessary
+        if isinstance(url1, basestring):
+            url1 = self.decompose(url)
+        if isinstance(url2, basestring):
+            url2 = self.decompose(url)
+
+        # TODO: finish
+        raise NotImplementedError
+
+    def match(self, url):
+        if '://' not in url:
+            # give a bogus scheme for urlparse. boo!
+            urldict = self.decompose('bogus://' + url)
+            urldict.pop('scheme')
+        else:
+            urldict = self.decompose(url)
+
+        order = self.order
+        urls = set(self.urls.keys())
+        for field in order:
+            value = urldict.get(field)
+            if not value:
+                # don't match trivial fields
+                continue
+            length = len(value)
+            deleted = set()
+            for key in list(urls)[:]:
+                compare_value = self.urls[key].get(field)
+                if not compare_value:
+                    urls.discard(key)
+                    continue
+                if isinstance(value, basestring) and value != compare_value:
+                    urls.discard(key)
+                    continue
+                if len(compare_value) < length:
+                    urls.discard(key)
+                    continue
+                if compare_value[:len(value)] != value:
+                    urls.discard(key)
+            if not urls:
+                return []
+        return urls
+
+if __name__ == '__main__':
+    matcher = UrlMatcher('http://www.example.com/foo/bar/fleem')
+    matcher.add('http://www.example.com/foo/blah')
+    matcher.add('https://www.example.com/foo/')
+    matcher.add('https://www.example.net/foo/')
+    print matcher.match('example.com/foo/bar')
+    print matcher.match('http://example.com/foo')
+    print matcher.match('example.com')
+    print matcher.match('example')
--- a/urlmatch/__init__.py	Thu Jun 16 17:59:29 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-import urlparse
-
-class UrlMatcher(object):
-
-    def __init__(self, *urls):
-        match_order=('domain', 'scheme', 'path')
-        self.order = match_order
-        self.urls = {}
-        for url in urls:
-            self.add(url)
-
-    def decompose(self, url):
-
-        # break it down
-        (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
-        urldict = {}
-
-        # domain
-        netloc = netloc.split('.')
-        if len(netloc) == 1:
-            urldict['domain'] = netloc
-        else:
-            # assert a TLD
-            urldict['domain'] = [netloc[-2], netloc[-1]] + list(reversed(netloc[0:-2]))
-
-        # path
-        path = path.strip('/').split('/')
-        if path == ['']:
-            path = []
-        urldict['path'] = path
-
-        # scheme
-        urldict['scheme'] = scheme
-
-        # could do others
-
-        return urldict
-
-    def add(self, url):
-        if url not in self.urls:
-            self.urls[url] = self.decompose(url)
-
-    def diff(self, url1, url2):
-
-        # decompose the urls if necessary
-        if isinstance(url1, basestring):
-            url1 = self.decompose(url)
-        if isinstance(url2, basestring):
-            url2 = self.decompose(url)
-
-        # TODO: finish
-        raise NotImplementedError
-
-    def match(self, url):
-        if '://' not in url:
-            # give a bogus scheme for urlparse. boo!
-            urldict = self.decompose('bogus://' + url)
-            urldict.pop('scheme')
-        else:
-            urldict = self.decompose(url)
-
-        order = self.order
-        urls = set(self.urls.keys())
-        for field in order:
-            value = urldict.get(field)
-            if not value:
-                # don't match trivial fields
-                continue
-            length = len(value)
-            deleted = set()
-            for key in list(urls)[:]:
-                compare_value = self.urls[key].get(field)
-                if not compare_value:
-                    urls.discard(key)
-                    continue
-                if isinstance(value, basestring) and value != compare_value:
-                    urls.discard(key)
-                    continue
-                if len(compare_value) < length:
-                    urls.discard(key)
-                    continue
-                if compare_value[:len(value)] != value:
-                    urls.discard(key)
-            if not urls:
-                return []
-        return urls
-
-if __name__ == '__main__':
-    matcher = UrlMatcher('http://www.example.com/foo/bar/fleem')
-    matcher.add('http://www.example.com/foo/blah')
-    matcher.add('https://www.example.com/foo/')
-    matcher.add('https://www.example.net/foo/')
-    print matcher.match('example.com/foo/bar')
-    print matcher.match('http://example.com/foo')
-    print matcher.match('example.com')
-    print matcher.match('example')