Mercurial > hg > urlmatch
annotate urlmatch.py @ 7:ef0553c4bbcd
more stubbing
author | Jeff Hammel <jhammel@mozilla.com> |
---|---|
date | Mon, 27 Jun 2011 11:07:38 -0700 |
parents | 0cd69fa6751c |
children | b02420253bfd |
rev | line source |
---|---|
4 | 1 #!/usr/bin/env python |
2 | |
0 | 3 import urlparse |
4 | |
5 class UrlMatcher(object): | |
4 | 6 """match urls""" |
0 | 7 |
8 def __init__(self, *urls): | |
9 match_order=('domain', 'scheme', 'path') | |
10 self.order = match_order | |
11 self.urls = {} | |
12 for url in urls: | |
13 self.add(url) | |
14 | |
15 def decompose(self, url): | |
16 | |
17 # break it down | |
18 (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url) | |
19 urldict = {} | |
20 | |
21 # domain | |
22 netloc = netloc.split('.') | |
23 if len(netloc) == 1: | |
24 urldict['domain'] = netloc | |
25 else: | |
26 # assert a TLD | |
27 urldict['domain'] = [netloc[-2], netloc[-1]] + list(reversed(netloc[0:-2])) | |
28 | |
29 # path | |
30 path = path.strip('/').split('/') | |
31 if path == ['']: | |
32 path = [] | |
33 urldict['path'] = path | |
34 | |
35 # scheme | |
36 urldict['scheme'] = scheme | |
37 | |
38 # could do others | |
39 | |
40 return urldict | |
41 | |
42 def add(self, url): | |
43 if url not in self.urls: | |
44 self.urls[url] = self.decompose(url) | |
45 | |
1
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
46 def diff(self, url1, url2): |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
47 |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
48 # decompose the urls if necessary |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
49 if isinstance(url1, basestring): |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
50 url1 = self.decompose(url) |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
51 if isinstance(url2, basestring): |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
52 url2 = self.decompose(url) |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
53 |
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
54 # TODO: finish |
6
0cd69fa6751c
add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents:
4
diff
changeset
|
55 for i in self.order: |
0cd69fa6751c
add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents:
4
diff
changeset
|
56 if i in url1 and i in url2: |
0cd69fa6751c
add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents:
4
diff
changeset
|
57 if url1[i] == url2[i]: |
0cd69fa6751c
add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents:
4
diff
changeset
|
58 continue |
0cd69fa6751c
add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents:
4
diff
changeset
|
59 if isinstance(url1[i], basestring): |
0cd69fa6751c
add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents:
4
diff
changeset
|
60 raise NotImplementedError |
7 | 61 else: |
62 raise NotImplementedError | |
63 elif i not in url1 and i not in url2: | |
64 continue | |
6
0cd69fa6751c
add test for decomposition; stub diff, will have to move to 2-tuples to do this properly
Jeff Hammel <jhammel@mozilla.com>
parents:
4
diff
changeset
|
65 else: |
7 | 66 retval1 = url1.get(i) |
67 retval2 = url2.get(i) | |
68 if isinstance(retval1, basestring) or isinstance(retval2, basestring): | |
69 return {i: (retval1, retval2)} | |
70 if retval1 is not None: | |
71 retval1 = retval1[0] | |
72 if retval2 is not None: | |
73 retval2 = retval2[0] | |
74 return {i: [(retval1, retval2)]} | |
75 | |
1
750dc780d3d8
stub a diff method; really, i have no idea what im doing
Jeff Hammel <jhammel@mozilla.com>
parents:
0
diff
changeset
|
76 |
0 | 77 def match(self, url): |
78 if '://' not in url: | |
79 # give a bogus scheme for urlparse. boo! | |
80 urldict = self.decompose('bogus://' + url) | |
81 urldict.pop('scheme') | |
82 else: | |
83 urldict = self.decompose(url) | |
84 | |
85 order = self.order | |
86 urls = set(self.urls.keys()) | |
87 for field in order: | |
88 value = urldict.get(field) | |
89 if not value: | |
90 # don't match trivial fields | |
91 continue | |
92 length = len(value) | |
93 deleted = set() | |
94 for key in list(urls)[:]: | |
95 compare_value = self.urls[key].get(field) | |
96 if not compare_value: | |
97 urls.discard(key) | |
98 continue | |
99 if isinstance(value, basestring) and value != compare_value: | |
100 urls.discard(key) | |
101 continue | |
102 if len(compare_value) < length: | |
103 urls.discard(key) | |
104 continue | |
105 if compare_value[:len(value)] != value: | |
106 urls.discard(key) | |
107 if not urls: | |
108 return [] | |
109 return urls | |
110 | |
111 if __name__ == '__main__': | |
112 matcher = UrlMatcher('http://www.example.com/foo/bar/fleem') | |
113 matcher.add('http://www.example.com/foo/blah') | |
114 matcher.add('https://www.example.com/foo/') | |
115 matcher.add('https://www.example.net/foo/') | |
116 print matcher.match('example.com/foo/bar') | |
117 print matcher.match('http://example.com/foo') | |
118 print matcher.match('example.com') | |
119 print matcher.match('example') |