pdnew/search.py

359 lines
9.3 KiB
Python

# third-party
import flask
import peewee
# internals
from application import app
import rendering
import form
import markdown
import commenting
search_fields = [
'name',
'title',
'teaser',
'text',
]
class Token:
def __init__(self, string, negate=False):
self.string = string
self.negate = negate
def __repr__(self):
if self.negate:
return f"<{self.__class__.__name__} NOT '{self.string}'>"
return f"<{self.__class__.__name__} '{self.string}'>"
def where(self, stmt, model):
raise NotImplementedError("Token subclass MUST implement .where!")
class BareToken(Token):
def where(self, stmt, model):
expression = None
for field_name in search_fields:
field = getattr(model, field_name)
current_expression = (field ** f'%{self.string}%')
if self.negate:
current_expression = ~current_expression
if expression is None:
expression = current_expression
else:
if self.negate:
expression = (expression & current_expression)
else:
expression = (expression | current_expression)
return stmt.where(expression)
class FieldToken(Token):
def __init__(self, string, negate=False):
super().__init__(string, negate=negate)
(self.field, self.value) = string.split(':', 1)
if self.field in search_fields:
self._valid = True
else:
self._valid = False
if self.value.startswith('"') and self.value.endswith('"'):
self.value = QuoteToken(self.value)
def __repr__(self):
if self.negate:
return f"<{self.__class__.__name__} NOT {self.field} == {self.value.__repr__()}>"
return f"<{self.__class__.__name__} {self.field} == {self.value.__repr__()}>"
def where(self, stmt, model):
if self._valid:
field = getattr(model, self.field)
if isinstance(self.value, QuoteToken):
expression = field ** f'%{self.value.value}%'
else:
expression = field ** f'%{self.value}%'
if self.negate:
stmt = stmt.where(~expression)
else:
stmt = stmt.where(expression)
return stmt
class QuoteToken(Token):
def __init__(self, string, negate=False):
super().__init__(string, negate=negate)
self.value = string[1:-1]
def where(self, stmt, model):
expression = None
for field_name in search_fields:
field = getattr(model, field_name)
current_expression = (field ** f'%{self.value}%')
if self.negate:
current_expression = ~current_expression
if expression is None:
expression = current_expression
else:
if self.negate:
expression = (expression & current_expression)
else:
expression = (expression | current_expression)
return stmt.where(expression)
def tokenize(search_string):
if '"' in search_string: # found opening quote
quote_token_start = search_string.find('"')
token_start = quote_token_start
search_string_rest = search_string[token_start+1:]
if token_start > 0 and search_string[token_start - 1] != ' ':
# see if there's a non-space character directly before the quote.
# could be ':', '-', or a previous token (which should probably
# be automatically split)
# TODO: special case for '-', reverse search only on ':',
# else don't modify token_start (which will split it
# into another token)
found_real_start = False
current_pos = token_start - 2
while not found_real_start:
if current_pos <= 0:
found_real_start = True
token_start = 0
elif search_string[current_pos] == ' ':
found_real_start = True
token_start = current_pos + 1 # mark actual token start
else:
current_pos -= 1
if '"' in search_string_rest: # found closing quote
token_end = quote_token_start + 1 + search_string_rest.find('"') + 1
quote_token = search_string[token_start:token_end]
tokens = []
tokens.extend(tokenize(search_string[0:token_start])) # tokenize everything before quote
if quote_token[0] == '-':
negate = True
quote_token = quote_token[1:]
else:
negate = False
if ':' in quote_token: # we're actually dealing with a field token that has a quoted value
tokens.append(FieldToken(quote_token, negate=negate))
else:
tokens.append(QuoteToken(quote_token, negate=negate))
tokens.extend(tokenize(search_string[token_end:])) # tokenize everything after quote
return tokens
if len(search_string):
tokens = []
parts = search_string.split(' ')
for part in parts:
negate = False
if part.startswith('-'):
negate = True
token_string = part[1:]
else:
token_string = part
if token_string != '': # ignore empty tokens
if ':' in token_string:
tokens.append(FieldToken(token_string, negate=negate))
else:
tokens.append(BareToken(token_string, negate=negate))
return tokens
return []
class SearchMiniForm(form.Form):
def __init__(self, **kwargs):
kwargs['action'] = flask.url_for('minisearch')
super().__init__(**kwargs)
self['search_string'] = form.Text()
self.buttons['submit'] = form.Button(label='Search')
def process(self, submit):
return flask.redirect(flask.url_for('search', search_string=self['search_string'].value))
class SearchForm(form.Form):
def __init__(self, search_string=None, **kwargs):
kwargs['action'] = flask.url_for('search')
super().__init__(**kwargs)
help = """
### Advanced search features ###
#### Quoted phrases ####
**`"<search phrase>"`**
Match `<search phrase>` verbatim.
**Examples**:
* `"many wow"`
* `"such search"`
#### Field matching ####
**`<field>:<search term>`**
Match `<search term>` only for `<field>`.
Valid fields are as follows:
* `name`: name (used in URLs)
* `title`: title
* `teaser`: teaser text
* `text`: full view text
**Examples**:
* `teaser:jorts`
* `title:"not enough jorts"`
#### Negation ####
All search tokens can be prefixed with a minus (`-`) to exlude matches.
**Examples**:
* `-jortsn't`
* `-title:"too many jorts"`
"""
self['search_string'] = form.Text(value=search_string, help=help)
self.buttons['submit'] = form.Button(label='Search')
def process(self, submit):
return flask.redirect(flask.url_for('search', search_string=self['search_string'].value))
class SearchPage(rendering.Renderable):
def __init__(self, search_string=None, offset=0, **kwargs):
super().__init__(**kwargs)
self.title = "Search"
self.form = SearchForm(search_string)
results = []
if search_string:
search_tokens = tokenize(search_string)
stmt = None
for name, commentable in commenting.Commentable.__class_descendants__.items():
if not commentable in app.models_abstract:
select = commentable.select(
peewee.SQL(f'\'{name}\' AS "type"'), # so we know what class to instantiate
commentable.name, # ditto, but identifying the exact row
commentable.created
).where(commentable.published == True)
for token in search_tokens:
select = token.where(select, commentable)
if stmt is None:
stmt = select
else:
stmt = stmt.union_all(select)
stmt = stmt.order_by(stmt.c.created.desc())
results = stmt.dicts()
def item_constructor(item):
commentable = commenting.Commentable.__class_descendants__[ item['type'] ]
return commentable.load(item['name'])
self.results = rendering.Listing(results, endpoint='search', endpoint_params={'search_string': search_string}, offset=offset, item_constructor=item_constructor)
@app.block('search')
def block_search():
return SearchMiniForm()
@app.route('/minisearch/', methods=['POST'])
def minisearch():
f = SearchMiniForm()
return f.handle()
@app.route('/search/', methods=['GET', 'POST'])
@app.route('/search/<string:search_string>/', methods=['GET', 'POST'])
@app.route('/search/<string:search_string>/<int:offset>/', methods=['GET', 'POST'])
@rendering.page()
def search(search_string=None, offset=0):
search = SearchPage(search_string, offset=offset)
if flask.request.method == 'POST':
redirect = search.form.handle()
if redirect:
return redirect
return search