# third-party import flask import peewee # internals from application import app import rendering import form import markdown import commenting search_fields = [ 'name', 'title', 'teaser', 'text', ] class Token: def __init__(self, string, negate=False): self.string = string self.negate = negate def __repr__(self): if self.negate: return f"<{self.__class__.__name__} NOT '{self.string}'>" return f"<{self.__class__.__name__} '{self.string}'>" def where(self, stmt, model): raise NotImplementedError("Token subclass MUST implement .where!") class BareToken(Token): def where(self, stmt, model): expression = None for field_name in search_fields: field = getattr(model, field_name) current_expression = (field ** f'%{self.string}%') if self.negate: current_expression = ~current_expression if expression is None: expression = current_expression else: if self.negate: expression = (expression & current_expression) else: expression = (expression | current_expression) return stmt.where(expression) class FieldToken(Token): def __init__(self, string, negate=False): super().__init__(string, negate=negate) (self.field, self.value) = string.split(':', 1) if self.field in search_fields: self._valid = True else: self._valid = False if self.value.startswith('"') and self.value.endswith('"'): self.value = QuoteToken(self.value) def __repr__(self): if self.negate: return f"<{self.__class__.__name__} NOT {self.field} == {self.value.__repr__()}>" return f"<{self.__class__.__name__} {self.field} == {self.value.__repr__()}>" def where(self, stmt, model): if self._valid: field = getattr(model, self.field) if isinstance(self.value, QuoteToken): expression = field ** f'%{self.value.value}%' else: expression = field ** f'%{self.value}%' if self.negate: stmt = stmt.where(~expression) else: stmt = stmt.where(expression) return stmt class QuoteToken(Token): def __init__(self, string, negate=False): super().__init__(string, negate=negate) self.value = string[1:-1] def where(self, stmt, model): expression = None for field_name in search_fields: field = getattr(model, field_name) current_expression = (field ** f'%{self.value}%') if self.negate: current_expression = ~current_expression if expression is None: expression = current_expression else: if self.negate: expression = (expression & current_expression) else: expression = (expression | current_expression) return stmt.where(expression) def tokenize(search_string): if '"' in search_string: # found opening quote quote_token_start = search_string.find('"') token_start = quote_token_start search_string_rest = search_string[token_start+1:] if token_start > 0 and search_string[token_start - 1] != ' ': # see if there's a non-space character directly before the quote. # could be ':', '-', or a previous token (which should probably # be automatically split) # TODO: special case for '-', reverse search only on ':', # else don't modify token_start (which will split it # into another token) found_real_start = False current_pos = token_start - 2 while not found_real_start: if current_pos <= 0: found_real_start = True token_start = 0 elif search_string[current_pos] == ' ': found_real_start = True token_start = current_pos + 1 # mark actual token start else: current_pos -= 1 if '"' in search_string_rest: # found closing quote token_end = quote_token_start + 1 + search_string_rest.find('"') + 1 quote_token = search_string[token_start:token_end] tokens = [] tokens.extend(tokenize(search_string[0:token_start])) # tokenize everything before quote if quote_token[0] == '-': negate = True quote_token = quote_token[1:] else: negate = False if ':' in quote_token: # we're actually dealing with a field token that has a quoted value tokens.append(FieldToken(quote_token, negate=negate)) else: tokens.append(QuoteToken(quote_token, negate=negate)) tokens.extend(tokenize(search_string[token_end:])) # tokenize everything after quote return tokens if len(search_string): tokens = [] parts = search_string.split(' ') for part in parts: negate = False if part.startswith('-'): negate = True token_string = part[1:] else: token_string = part if token_string != '': # ignore empty tokens if ':' in token_string: tokens.append(FieldToken(token_string, negate=negate)) else: tokens.append(BareToken(token_string, negate=negate)) return tokens return [] class SearchMiniForm(form.Form): def __init__(self, **kwargs): kwargs['action'] = flask.url_for('minisearch') super().__init__(**kwargs) self['search_string'] = form.Text() self.buttons['submit'] = form.Button(label='Search') def process(self, submit): return flask.redirect(flask.url_for('search', search_string=self['search_string'].value)) class SearchForm(form.Form): def __init__(self, search_string=None, **kwargs): kwargs['action'] = flask.url_for('search') super().__init__(**kwargs) help = """ ### Advanced search features ### #### Quoted phrases #### **`""`** Match `` verbatim. **Examples**: * `"many wow"` * `"such search"` #### Field matching #### **`:`** Match `` only for ``. Valid fields are as follows: * `name`: name (used in URLs) * `title`: title * `teaser`: teaser text * `text`: full view text **Examples**: * `teaser:jorts` * `title:"not enough jorts"` #### Negation #### All search tokens can be prefixed with a minus (`-`) to exlude matches. **Examples**: * `-jortsn't` * `-title:"too many jorts"` """ self['search_string'] = form.Text(value=search_string, help=help) self.buttons['submit'] = form.Button(label='Search') def process(self, submit): return flask.redirect(flask.url_for('search', search_string=self['search_string'].value)) class SearchPage(rendering.Renderable): def __init__(self, search_string=None, offset=0, **kwargs): super().__init__(**kwargs) self.title = "Search" self.form = SearchForm(search_string) results = [] if search_string: search_tokens = tokenize(search_string) stmt = None for name, commentable in commenting.Commentable.__class_descendants__.items(): if not commentable in app.models_abstract: select = commentable.select( peewee.SQL(f'\'{name}\' AS "type"'), # so we know what class to instantiate commentable.name, # ditto, but identifying the exact row commentable.created ).where(commentable.published == True) for token in search_tokens: select = token.where(select, commentable) if stmt is None: stmt = select else: stmt = stmt.union_all(select) stmt = stmt.order_by(stmt.c.created.desc()) results = stmt.dicts() def item_constructor(item): commentable = commenting.Commentable.__class_descendants__[ item['type'] ] return commentable.load(item['name']) self.results = rendering.Listing(results, endpoint='search', endpoint_params={'search_string': search_string}, offset=offset, item_constructor=item_constructor) @app.block('search') def block_search(): return SearchMiniForm() @app.route('/minisearch/', methods=['POST']) def minisearch(): f = SearchMiniForm() return f.handle() @app.route('/search/', methods=['GET', 'POST']) @app.route('/search//', methods=['GET', 'POST']) @app.route('/search///', methods=['GET', 'POST']) @rendering.page() def search(search_string=None, offset=0): search = SearchPage(search_string, offset=offset) if flask.request.method == 'POST': redirect = search.form.handle() if redirect: return redirect return search