annotate urlmatch.py @ 7:ef0553c4bbcd

more stubbing
author Jeff Hammel <jhammel@mozilla.com>
date Mon, 27 Jun 2011 11:07:38 -0700
parents 0cd69fa6751c
children b02420253bfd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
23be092e6099 make this executable
Jeff Hammel <jhammel@mozilla.com>
parents: 3
diff changeset
1 #!/usr/bin/env python
23be092e6099 make this executable
Jeff Hammel <jhammel@mozilla.com>
parents: 3
diff changeset
2
0
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
3 import urlparse
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
4
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
5 class UrlMatcher(object):
4
23be092e6099 make this executable
Jeff Hammel <jhammel@mozilla.com>
parents: 3
diff changeset
6 """match urls"""
0
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
7
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
8 def __init__(self, *urls):
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
9 match_order=('domain', 'scheme', 'path')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
10 self.order = match_order
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
11 self.urls = {}
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
12 for url in urls:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
13 self.add(url)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
14
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
15 def decompose(self, url):
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
16
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
17 # break it down
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
18 (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
19 urldict = {}
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
20
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
21 # domain
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
22 netloc = netloc.split('.')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
23 if len(netloc) == 1:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
24 urldict['domain'] = netloc
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
25 else:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
26 # assert a TLD
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
27 urldict['domain'] = [netloc[-2], netloc[-1]] + list(reversed(netloc[0:-2]))
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
28
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
29 # path
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
30 path = path.strip('/').split('/')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
31 if path == ['']:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
32 path = []
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
33 urldict['path'] = path
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
34
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
35 # scheme
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
36 urldict['scheme'] = scheme
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
37
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
38 # could do others
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
39
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
40 return urldict
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
41
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
42 def add(self, url):
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
43 if url not in self.urls:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
44 self.urls[url] = self.decompose(url)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
45
1
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
46 def diff(self, url1, url2):
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
47
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
48 # decompose the urls if necessary
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
49 if isinstance(url1, basestring):
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
50 url1 = self.decompose(url)
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
51 if isinstance(url2, basestring):
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
52 url2 = self.decompose(url)
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
53
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
54 # TODO: finish
6
0cd69fa6751c add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents: 4
diff changeset
55 for i in self.order:
0cd69fa6751c add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents: 4
diff changeset
56 if i in url1 and i in url2:
0cd69fa6751c add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents: 4
diff changeset
57 if url1[i] == url2[i]:
0cd69fa6751c add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents: 4
diff changeset
58 continue
0cd69fa6751c add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents: 4
diff changeset
59 if isinstance(url1[i], basestring):
0cd69fa6751c add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents: 4
diff changeset
60 raise NotImplementedError
7
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
61 else:
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
62 raise NotImplementedError
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
63 elif i not in url1 and i not in url2:
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
64 continue
6
0cd69fa6751c add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents: 4
diff changeset
65 else:
7
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
66 retval1 = url1.get(i)
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
67 retval2 = url2.get(i)
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
68 if isinstance(retval1, basestring) or isinstance(retval2, basestring):
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
69 return {i: (retval1, retval2)}
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
70 if retval1 is not None:
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
71 retval1 = retval1[0]
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
72 if retval2 is not None:
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
73 retval2 = retval2[0]
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
74 return {i: [(retval1, retval2)]}
ef0553c4bbcd more stubbing
Jeff Hammel <jhammel@mozilla.com>
parents: 6
diff changeset
75
1
750dc780d3d8 stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents: 0
diff changeset
76
0
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
77 def match(self, url):
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
78 if '://' not in url:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
79 # give a bogus scheme for urlparse. boo!
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
80 urldict = self.decompose('bogus://' + url)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
81 urldict.pop('scheme')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
82 else:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
83 urldict = self.decompose(url)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
84
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
85 order = self.order
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
86 urls = set(self.urls.keys())
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
87 for field in order:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
88 value = urldict.get(field)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
89 if not value:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
90 # don't match trivial fields
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
91 continue
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
92 length = len(value)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
93 deleted = set()
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
94 for key in list(urls)[:]:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
95 compare_value = self.urls[key].get(field)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
96 if not compare_value:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
97 urls.discard(key)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
98 continue
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
99 if isinstance(value, basestring) and value != compare_value:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
100 urls.discard(key)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
101 continue
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
102 if len(compare_value) < length:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
103 urls.discard(key)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
104 continue
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
105 if compare_value[:len(value)] != value:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
106 urls.discard(key)
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
107 if not urls:
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
108 return []
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
109 return urls
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
110
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
111 if __name__ == '__main__':
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
112 matcher = UrlMatcher('http://www.example.com/foo/bar/fleem')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
113 matcher.add('http://www.example.com/foo/blah')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
114 matcher.add('https://www.example.com/foo/')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
115 matcher.add('https://www.example.net/foo/')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
116 print matcher.match('example.com/foo/bar')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
117 print matcher.match('http://example.com/foo')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
118 print matcher.match('example.com')
8bd0c3b2163e create urlmatch package
Jeff Hammel <jhammel@mozilla.com>
parents:
diff changeset
119 print matcher.match('example')