Mercurial > hg > toolbox
annotate toolbox/search.py @ 22:17e3a9b6b4e2 default tip
string not tuple
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 02 Jun 2024 15:58:09 -0700 |
parents | 87990e313a91 |
children |
rev | line source |
---|---|
0
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 import os |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 import shutil |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 import tempfile |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 from time import sleep |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 from whoosh import fields |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 from whoosh import index |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 from whoosh.query import And |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 from whoosh.query import Or |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 from whoosh.query import Term |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 from whoosh.qparser import QueryParser |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 from whoosh.index import LockError |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 class WhooshSearch(object): |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 """full-text search""" |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 def __init__(self, whoosh_index=None): |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 """ |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 - whoosh_index : whoosh index directory |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 """ |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 self.schema = fields.Schema(name=fields.ID(unique=True, stored=True), |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 description=fields.TEXT) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 self.keywords = set([]) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 self.tempdir = False |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 if whoosh_index is None: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 whoosh_index = tempfile.mkdtemp() |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 self.tempdir = True |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 if not os.path.exists(whoosh_index): |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 os.makedirs(whoosh_index) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 self.index = whoosh_index |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 self.ix = index.create_in(self.index, self.schema) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 def update(self, name, description, **kw): |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 """update a document""" |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 # forgivingly get the writer |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 timeout = 3. # seconds |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 ctr = 0. |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 incr = 0.2 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 while ctr < timeout: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 try: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 writer = self.ix.writer() |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 break |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 except LockError: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 ctr += incr |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 sleep(incr) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 else: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 raise |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
49 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
50 # add keywords |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
51 for key in kw: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
52 if key not in self.keywords: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
53 writer.add_field(key, fields.KEYWORD) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
54 self.keywords.add(key) |
17 | 55 if not isinstance(kw[key], str): |
0
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
56 kw[key] = ' '.join(kw[key]) |
18 | 57 kw[key] = str(kw[key]) |
0
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
58 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
59 writer.update_document(name=name, description=description, **kw) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
60 writer.commit() |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
61 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
62 def delete(self, name): |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
63 """delete a document of a given name""" |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
64 writer = self.ix.writer() |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
65 writer.delete_by_term('name', name) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
66 writer.commit() |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
67 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
68 def __call__(self, query): |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 """search""" |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 query_parser = QueryParser("description", schema=self.ix.schema) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 myquery = query_parser.parse(query) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
72 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
73 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
74 # New code: too permissive |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
75 excluded = set(['AND', 'OR', 'NOT']) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
76 terms = [i for i in query.split() if i not in excluded] |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
77 extendedquery = And([Or([myquery] + [Term('description', term), Term('name', term)] + |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
78 [Term(field, term) for field in self.keywords]) for term in terms]) |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
79 |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
80 # perform the search |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
81 searcher = self.ix.searcher() |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
82 return [i['name'] for i in searcher.search(extendedquery, limit=None)] |
18 | 83 |
0
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
84 def __del__(self): |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
85 if self.tempdir: |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
86 # delete the temporary directory, if present |
b0942f44413f
import from git://github.com/mozilla/toolbox.git
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
87 shutil.rmtree(self.index) |