2024-05-09 19:43:43 +00:00
|
|
|
# third-party
|
|
|
|
import flask
|
2024-05-10 18:20:47 +00:00
|
|
|
import peewee
|
2024-05-09 19:43:43 +00:00
|
|
|
|
|
|
|
# internals
|
|
|
|
from application import app
|
|
|
|
|
|
|
|
import rendering
|
|
|
|
import form
|
2024-05-14 04:15:20 +00:00
|
|
|
import markdown
|
2024-06-03 15:50:44 +00:00
|
|
|
import commenting
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
search_fields = [
|
|
|
|
'name',
|
|
|
|
'title',
|
|
|
|
'teaser',
|
|
|
|
'text',
|
|
|
|
]
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
class Token:
|
|
|
|
|
|
|
|
def __init__(self, string, negate=False):
|
|
|
|
|
|
|
|
self.string = string
|
|
|
|
self.negate = negate
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
|
|
|
|
if self.negate:
|
|
|
|
|
|
|
|
return f"<{self.__class__.__name__} NOT '{self.string}'>"
|
|
|
|
|
|
|
|
return f"<{self.__class__.__name__} '{self.string}'>"
|
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
def where(self, stmt, model):
|
|
|
|
|
|
|
|
raise NotImplementedError("Token subclass MUST implement .where!")
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
class BareToken(Token):
|
2024-06-03 15:50:44 +00:00
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
def where(self, stmt, model):
|
|
|
|
|
|
|
|
expression = None
|
|
|
|
|
|
|
|
for field_name in search_fields:
|
|
|
|
|
|
|
|
field = getattr(model, field_name)
|
|
|
|
|
|
|
|
current_expression = (field ** f'%{self.string}%')
|
|
|
|
|
|
|
|
if self.negate:
|
|
|
|
current_expression = ~current_expression
|
|
|
|
|
|
|
|
if expression is None:
|
|
|
|
expression = current_expression
|
|
|
|
else:
|
|
|
|
if self.negate:
|
|
|
|
expression = (expression & current_expression)
|
|
|
|
else:
|
|
|
|
expression = (expression | current_expression)
|
|
|
|
|
|
|
|
return stmt.where(expression)
|
2024-05-09 19:43:43 +00:00
|
|
|
|
|
|
|
class FieldToken(Token):
|
|
|
|
|
|
|
|
def __init__(self, string, negate=False):
|
|
|
|
|
|
|
|
super().__init__(string, negate=negate)
|
|
|
|
(self.field, self.value) = string.split(':', 1)
|
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
if self.field in search_fields:
|
|
|
|
self._valid = True
|
|
|
|
else:
|
|
|
|
self._valid = False
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
if self.value.startswith('"') and self.value.endswith('"'):
|
|
|
|
self.value = QuoteToken(self.value)
|
2024-06-03 15:50:44 +00:00
|
|
|
|
2024-05-09 21:04:06 +00:00
|
|
|
def __repr__(self):
|
|
|
|
|
|
|
|
if self.negate:
|
|
|
|
|
|
|
|
return f"<{self.__class__.__name__} NOT {self.field} == {self.value.__repr__()}>"
|
|
|
|
|
|
|
|
return f"<{self.__class__.__name__} {self.field} == {self.value.__repr__()}>"
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
def where(self, stmt, model):
|
|
|
|
|
|
|
|
if self._valid:
|
|
|
|
|
|
|
|
field = getattr(model, self.field)
|
|
|
|
|
|
|
|
if isinstance(self.value, QuoteToken):
|
|
|
|
expression = field ** f'%{self.value.value}%'
|
|
|
|
else:
|
|
|
|
expression = field ** f'%{self.value}%'
|
2024-06-03 15:50:44 +00:00
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
if self.negate:
|
|
|
|
stmt = stmt.where(~expression)
|
|
|
|
else:
|
|
|
|
stmt = stmt.where(expression)
|
|
|
|
|
|
|
|
return stmt
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
class QuoteToken(Token):
|
|
|
|
|
|
|
|
def __init__(self, string, negate=False):
|
|
|
|
|
|
|
|
super().__init__(string, negate=negate)
|
|
|
|
self.value = string[1:-1]
|
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
def where(self, stmt, model):
|
|
|
|
|
|
|
|
expression = None
|
|
|
|
|
|
|
|
for field_name in search_fields:
|
|
|
|
|
|
|
|
field = getattr(model, field_name)
|
|
|
|
|
|
|
|
current_expression = (field ** f'%{self.value}%')
|
|
|
|
|
|
|
|
if self.negate:
|
|
|
|
current_expression = ~current_expression
|
|
|
|
|
|
|
|
if expression is None:
|
|
|
|
expression = current_expression
|
|
|
|
else:
|
|
|
|
if self.negate:
|
|
|
|
expression = (expression & current_expression)
|
|
|
|
else:
|
|
|
|
expression = (expression | current_expression)
|
|
|
|
|
|
|
|
return stmt.where(expression)
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
def tokenize(search_string):
|
|
|
|
|
|
|
|
if '"' in search_string: # found opening quote
|
|
|
|
|
|
|
|
quote_token_start = search_string.find('"')
|
|
|
|
token_start = quote_token_start
|
|
|
|
search_string_rest = search_string[token_start+1:]
|
|
|
|
|
|
|
|
if token_start > 0 and search_string[token_start - 1] != ' ':
|
|
|
|
|
|
|
|
# see if there's a non-space character directly before the quote.
|
|
|
|
# could be ':', '-', or a previous token (which should probably
|
|
|
|
# be automatically split)
|
|
|
|
|
2024-05-09 21:04:06 +00:00
|
|
|
# TODO: special case for '-', reverse search only on ':',
|
2024-06-03 15:50:44 +00:00
|
|
|
# else don't modify token_start (which will split it
|
2024-05-09 21:04:06 +00:00
|
|
|
# into another token)
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
found_real_start = False
|
|
|
|
current_pos = token_start - 2
|
|
|
|
|
|
|
|
while not found_real_start:
|
2024-05-10 18:20:47 +00:00
|
|
|
|
|
|
|
if current_pos <= 0:
|
|
|
|
|
|
|
|
found_real_start = True
|
|
|
|
token_start = 0
|
|
|
|
|
|
|
|
elif search_string[current_pos] == ' ':
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
found_real_start = True
|
2024-05-09 21:04:06 +00:00
|
|
|
token_start = current_pos + 1 # mark actual token start
|
2024-05-10 18:20:47 +00:00
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
else:
|
|
|
|
current_pos -= 1
|
|
|
|
|
|
|
|
if '"' in search_string_rest: # found closing quote
|
|
|
|
|
|
|
|
token_end = quote_token_start + 1 + search_string_rest.find('"') + 1
|
|
|
|
quote_token = search_string[token_start:token_end]
|
|
|
|
|
|
|
|
tokens = []
|
2024-05-09 21:04:06 +00:00
|
|
|
|
|
|
|
tokens.extend(tokenize(search_string[0:token_start])) # tokenize everything before quote
|
|
|
|
|
|
|
|
if quote_token[0] == '-':
|
|
|
|
negate = True
|
|
|
|
quote_token = quote_token[1:]
|
|
|
|
else:
|
|
|
|
negate = False
|
|
|
|
|
|
|
|
if ':' in quote_token: # we're actually dealing with a field token that has a quoted value
|
|
|
|
tokens.append(FieldToken(quote_token, negate=negate))
|
|
|
|
else:
|
|
|
|
tokens.append(QuoteToken(quote_token, negate=negate))
|
|
|
|
|
|
|
|
tokens.extend(tokenize(search_string[token_end:])) # tokenize everything after quote
|
2024-05-09 19:43:43 +00:00
|
|
|
|
|
|
|
return tokens
|
|
|
|
|
|
|
|
if len(search_string):
|
2024-05-09 21:04:06 +00:00
|
|
|
|
|
|
|
tokens = []
|
|
|
|
parts = search_string.split(' ')
|
|
|
|
|
|
|
|
for part in parts:
|
|
|
|
|
|
|
|
negate = False
|
|
|
|
|
|
|
|
if part.startswith('-'):
|
|
|
|
negate = True
|
|
|
|
token_string = part[1:]
|
|
|
|
else:
|
|
|
|
token_string = part
|
|
|
|
|
|
|
|
if token_string != '': # ignore empty tokens
|
2024-06-03 15:50:44 +00:00
|
|
|
|
2024-05-09 21:04:06 +00:00
|
|
|
if ':' in token_string:
|
|
|
|
tokens.append(FieldToken(token_string, negate=negate))
|
|
|
|
else:
|
|
|
|
tokens.append(BareToken(token_string, negate=negate))
|
|
|
|
|
|
|
|
return tokens
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
return []
|
|
|
|
|
2024-06-29 03:36:26 +00:00
|
|
|
class SearchMiniForm(form.Form):
|
|
|
|
|
|
|
|
def __init__(self, **kwargs):
|
|
|
|
|
|
|
|
kwargs['action'] = flask.url_for('minisearch')
|
|
|
|
|
|
|
|
super().__init__(**kwargs)
|
|
|
|
|
|
|
|
self['search_string'] = form.Text()
|
|
|
|
self.buttons['submit'] = form.Button(label='Search')
|
|
|
|
|
|
|
|
def process(self, submit):
|
|
|
|
|
|
|
|
return flask.redirect(flask.url_for('search', search_string=self['search_string'].value))
|
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
class SearchForm(form.Form):
|
|
|
|
|
2024-05-13 03:57:53 +00:00
|
|
|
def __init__(self, search_string=None, **kwargs):
|
2024-05-09 19:43:43 +00:00
|
|
|
|
|
|
|
kwargs['action'] = flask.url_for('search')
|
|
|
|
super().__init__(**kwargs)
|
2024-06-17 22:32:06 +00:00
|
|
|
help = """
|
2024-05-14 04:15:20 +00:00
|
|
|
### Advanced search features ###
|
|
|
|
|
|
|
|
#### Quoted phrases ####
|
|
|
|
|
|
|
|
**`"<search phrase>"`**
|
|
|
|
|
|
|
|
Match `<search phrase>` verbatim.
|
|
|
|
|
|
|
|
**Examples**:
|
|
|
|
* `"many wow"`
|
|
|
|
* `"such search"`
|
|
|
|
|
|
|
|
#### Field matching ####
|
|
|
|
|
|
|
|
**`<field>:<search term>`**
|
|
|
|
|
|
|
|
Match `<search term>` only for `<field>`.
|
|
|
|
|
|
|
|
Valid fields are as follows:
|
|
|
|
* `name`: name (used in URLs)
|
|
|
|
* `title`: title
|
|
|
|
* `teaser`: teaser text
|
|
|
|
* `text`: full view text
|
|
|
|
|
|
|
|
**Examples**:
|
|
|
|
* `teaser:jorts`
|
|
|
|
* `title:"not enough jorts"`
|
|
|
|
|
|
|
|
#### Negation ####
|
|
|
|
|
|
|
|
All search tokens can be prefixed with a minus (`-`) to exlude matches.
|
|
|
|
|
|
|
|
**Examples**:
|
|
|
|
* `-jortsn't`
|
|
|
|
* `-title:"too many jorts"`
|
2024-06-17 22:32:06 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
self['search_string'] = form.Text(value=search_string, help=help)
|
|
|
|
self.buttons['submit'] = form.Button(label='Search')
|
|
|
|
|
|
|
|
def process(self, submit):
|
|
|
|
return flask.redirect(flask.url_for('search', search_string=self['search_string'].value))
|
|
|
|
|
|
|
|
class SearchPage(rendering.Renderable):
|
|
|
|
|
2024-06-29 03:36:26 +00:00
|
|
|
def __init__(self, search_string=None, offset=0, **kwargs):
|
|
|
|
|
|
|
|
super().__init__(**kwargs)
|
2024-06-17 22:32:06 +00:00
|
|
|
|
|
|
|
self.title = "Search"
|
|
|
|
self.form = SearchForm(search_string)
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
results = []
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-05-10 18:20:47 +00:00
|
|
|
if search_string:
|
2024-05-09 19:43:43 +00:00
|
|
|
|
|
|
|
search_tokens = tokenize(search_string)
|
2024-05-10 18:20:47 +00:00
|
|
|
stmt = None
|
2024-05-09 19:43:43 +00:00
|
|
|
|
|
|
|
for name, commentable in commenting.Commentable.__class_descendants__.items():
|
|
|
|
|
2024-10-14 14:54:17 +00:00
|
|
|
if not commentable in app.models_abstract:
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-10-14 14:54:17 +00:00
|
|
|
select = commentable.select(
|
|
|
|
peewee.SQL(f'\'{name}\' AS "type"'), # so we know what class to instantiate
|
|
|
|
commentable.name, # ditto, but identifying the exact row
|
|
|
|
commentable.created
|
|
|
|
).where(commentable.published == True)
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-10-14 14:54:17 +00:00
|
|
|
for token in search_tokens:
|
|
|
|
select = token.where(select, commentable)
|
|
|
|
|
|
|
|
if stmt is None:
|
|
|
|
stmt = select
|
|
|
|
else:
|
|
|
|
stmt = stmt.union_all(select)
|
2024-05-10 18:20:47 +00:00
|
|
|
|
2024-05-23 23:48:49 +00:00
|
|
|
stmt = stmt.order_by(stmt.c.created.desc())
|
2024-05-10 18:20:47 +00:00
|
|
|
|
2024-05-13 02:52:45 +00:00
|
|
|
results = stmt.dicts()
|
|
|
|
|
|
|
|
def item_constructor(item):
|
2024-05-10 18:20:47 +00:00
|
|
|
|
2024-05-13 02:52:45 +00:00
|
|
|
commentable = commenting.Commentable.__class_descendants__[ item['type'] ]
|
|
|
|
return commentable.load(item['name'])
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-05-13 03:35:26 +00:00
|
|
|
self.results = rendering.Listing(results, endpoint='search', endpoint_params={'search_string': search_string}, offset=offset, item_constructor=item_constructor)
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-05-13 03:57:53 +00:00
|
|
|
@app.block('search')
|
|
|
|
def block_search():
|
|
|
|
|
2024-06-29 03:36:26 +00:00
|
|
|
return SearchMiniForm()
|
|
|
|
|
|
|
|
@app.route('/minisearch/', methods=['POST'])
|
|
|
|
def minisearch():
|
|
|
|
|
|
|
|
f = SearchMiniForm()
|
|
|
|
|
|
|
|
return f.handle()
|
2024-05-13 03:57:53 +00:00
|
|
|
|
2024-05-09 19:43:43 +00:00
|
|
|
@app.route('/search/', methods=['GET', 'POST'])
|
|
|
|
@app.route('/search/<string:search_string>/', methods=['GET', 'POST'])
|
2024-05-13 03:35:26 +00:00
|
|
|
@app.route('/search/<string:search_string>/<int:offset>/', methods=['GET', 'POST'])
|
2024-05-09 19:43:43 +00:00
|
|
|
@rendering.page()
|
2024-05-13 03:35:26 +00:00
|
|
|
def search(search_string=None, offset=0):
|
2024-05-09 19:43:43 +00:00
|
|
|
|
2024-05-13 03:35:26 +00:00
|
|
|
search = SearchPage(search_string, offset=offset)
|
2024-05-09 19:43:43 +00:00
|
|
|
|
|
|
|
if flask.request.method == 'POST':
|
|
|
|
|
|
|
|
redirect = search.form.handle()
|
|
|
|
|
|
|
|
if redirect:
|
|
|
|
return redirect
|
|
|
|
|
|
|
|
return search
|