aboutsummaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
authorMax Magorsch <max@magorsch.de>2019-10-18 00:50:51 +0200
committerMax Magorsch <max@magorsch.de>2019-10-18 00:50:51 +0200
commita5a8ff8c763ae3d25c15a90190403fbd854f3f48 (patch)
treea9dbbac408ea5b7e59f1ff372f730f3910f3fa7b /app
parentAdd a couple of initial integration tests (diff)
downloadpackages-5-a5a8ff8c763ae3d25c15a90190403fbd854f3f48.tar.gz
packages-5-a5a8ff8c763ae3d25c15a90190403fbd854f3f48.tar.bz2
packages-5-a5a8ff8c763ae3d25c15a90190403fbd854f3f48.zip
Add a query parser for advanced search queries
Add a query parser using parslet which is able to parse advanced queries. This way, it is possible to run queries like: > +maintainers.emails:haskell@gentoo.org or > +maintainers.emails:haskell@gentoo.org -category:dev-haskell to find all packages that are maintained by the haskell herd and respectively all packages that are maintained by the haskell herd but not part of dev-haskell. Documentation on how to use the advanced queries is included into the website and can be viewed by clicking on a question mark next to the search bar on the landing page. Signed-off-by: Max Magorsch <max@magorsch.de>
Diffstat (limited to 'app')
-rw-r--r--app/repositories/package_repository.rb26
-rw-r--r--app/repositories/query_parser/search_query_parser.rb145
-rw-r--r--app/views/index/index.html.erb187
3 files changed, 340 insertions, 18 deletions
diff --git a/app/repositories/package_repository.rb b/app/repositories/package_repository.rb
index dd516e9..2a2e891 100644
--- a/app/repositories/package_repository.rb
+++ b/app/repositories/package_repository.rb
@@ -1,5 +1,6 @@
require 'forwardable'
require 'singleton'
+require_relative './query_parser/search_query_parser'
class PackageRepository < BaseRepository
include Singleton
@@ -62,16 +63,7 @@ class PackageRepository < BaseRepository
end
def suggest(q)
- PackageRepository.search(
- size: 20,
- query: {
- wildcard: {
- name_sort: {
- wildcard: q.downcase + '*'
- }
- }
- }
- )
+ search(build_query(q, 20, 0))
end
# Tries to resolve a query atom to one or more packages
@@ -114,22 +106,20 @@ class PackageRepository < BaseRepository
def default_search(q, offset)
return [] if q.nil? || q.empty?
- part1, part2 = q.split('/', 2)
+ search(build_query(q, default_search_size, offset))
- if part2.nil?
- search(build_query(part1, nil, default_search_size, offset))
- else
- search(build_query(part2, part1, default_search_size, offset))
- end
end
- def build_query(q, category, size, offset)
+ def build_query(q, size, offset)
+ parser = Object.const_get("SearchQueryParser::QueryParser").new
+ transformer = Object.const_get("SearchQueryParser::QueryTransformer").new
+
{
size: size,
from: offset,
query: {
function_score: {
- query: { bool: bool_query_parts(q, category) },
+ query: { bool: transformer.apply(parser.parse(q)).to_elasticsearch },
functions: scoring_functions
}
}
diff --git a/app/repositories/query_parser/search_query_parser.rb b/app/repositories/query_parser/search_query_parser.rb
new file mode 100644
index 0000000..f3e67c6
--- /dev/null
+++ b/app/repositories/query_parser/search_query_parser.rb
@@ -0,0 +1,145 @@
+require 'parslet'
+
+module SearchQueryParser
+
+ class QueryParser < Parslet::Parser
+ rule(:term) { match('[^\s"]').repeat(1).as(:term) }
+ rule(:quote) { str('"') }
+ rule(:operator) { (str('+') | str('-')).as(:operator) }
+
+ rule(:fieldname) { match('[^\s:"]').repeat(1).as(:fieldname) }
+ rule(:field) { (fieldname >> str(':')).as(:field) }
+
+ rule(:phrase) do
+ (quote >> (term >> space.maybe).repeat >> quote).as(:phrase)
+ end
+ rule(:clause) { (operator.maybe >> field.maybe >> (phrase | term)).as(:clause) }
+ rule(:space) { match('\s').repeat(1) }
+ rule(:query) { (clause >> space.maybe).repeat.as(:query) }
+ root(:query)
+ end
+
+ class QueryTransformer < Parslet::Transform
+ rule(:clause => subtree(:clause)) do
+ if clause[:term]
+ TermClause.new(clause[:operator]&.to_s, clause[:field], clause[:term].to_s)
+ elsif clause[:phrase]
+ phrase = clause[:phrase].map { |p| p[:term].to_s }.join(" ")
+ PhraseClause.new(clause[:operator]&.to_s, clause[:field], phrase)
+ else
+ raise "Unexpected clause type: '#{clause}'"
+ end
+ end
+ rule(:query => sequence(:clauses)) { Query.new(clauses) }
+ end
+
+ class Operator
+ def self.symbol(str)
+ case str
+ when '+'
+ :must
+ when '-'
+ :must_not
+ when nil
+ :should
+ else
+ raise "Unknown operator: #{str}"
+ end
+ end
+ end
+
+ class TermClause
+ attr_accessor :operator, :field, :term
+
+ def initialize(operator, field, term)
+ self.operator = Operator.symbol(operator)
+ self.field = field
+ self.term = term
+ end
+ end
+
+ class PhraseClause
+ attr_accessor :operator, :field, :phrase
+
+ def initialize(operator, field, phrase)
+ self.operator = Operator.symbol(operator)
+ self.field = field
+ self.phrase = phrase
+ end
+ end
+
+ class Query
+ attr_accessor :should_clauses, :must_not_clauses, :must_clauses
+
+ def initialize(clauses)
+ grouped = clauses.chunk { |c| c.operator }.to_h
+ self.should_clauses = grouped.fetch(:should, [])
+ self.must_not_clauses = grouped.fetch(:must_not, [])
+ self.must_clauses = grouped.fetch(:must, [])
+ end
+
+ def to_elasticsearch
+ query = { }
+
+ if should_clauses.any?
+ query[:should] = should_clauses.map do |clause|
+ clause_to_query(clause)
+ end
+ end
+
+ if must_clauses.any?
+ query[:must] = must_clauses.map do |clause|
+ clause_to_query(clause)
+ end
+ end
+
+ if must_not_clauses.any?
+ query[:must_not] = must_not_clauses.map do |clause|
+ clause_to_query(clause)
+ end
+ end
+
+ query
+ end
+
+ def clause_to_query(clause)
+ case clause
+ when TermClause
+ match(clause.field, clause.term)
+ when PhraseClause
+ match_phrase(clause.field, clause.phrase)
+ else
+ raise "Unknown clause type: #{clause}"
+ end
+ end
+
+ def match(field, term)
+ if field
+ {
+ :match => {
+ field[:fieldname].to_s.to_sym => {
+ :query => term
+ }
+ }
+ }
+ else
+ {
+ :multi_match => {
+ :query => term,
+ :fields => ["atom^3", "name^2"]
+ }
+ }
+ end
+ end
+
+ def match_phrase(field, phrase)
+ {
+ :match_phrase => {
+ field ? field[:fieldname].to_s.to_sym : :name => {
+ :query => phrase
+ }
+ }
+ }
+ end
+ end
+end
diff --git a/app/views/index/index.html.erb b/app/views/index/index.html.erb
index af86c9e..e54eb74 100644
--- a/app/views/index/index.html.erb
+++ b/app/views/index/index.html.erb
@@ -8,6 +8,11 @@
<input id="q" name="q" type="search" autocomplete="off" placeholder="<%= t :find_packages %>" aria-label="<%= t :find_packages %>" autofocus>
</span>
<span class="typeahead-button">
+ <button type="button" onclick="$('#searchHelp').modal('show')" title="Help" aria-label="<%= "Help" %>">
+ <span class="fa fa-question" style="font-size: 15px;"></span><span class="sr-only"><%= "Help" %></span>
+ </button>
+ </span>
+ <span class="typeahead-button">
<button type="submit" title="<%= t :find %>" aria-label="<%= t :find %>">
<span class="typeahead-search-icon"></span><span class="sr-only"><%= t :find %></span>
</button>
@@ -50,4 +55,186 @@
</div>
<% end %>
+<div class="modal fade" id="searchHelp" tabindex="-1" role="dialog" aria-labelledby="searchHelpTitle">
+ <div class="modal-dialog modal-lg" role="document">
+ <div class="modal-content">
+ <div class="modal-header">
+ <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
+ <h3 class="modal-title" id="searchHelpTitle"><span class="fa fa-info" style="font-size: 15px;"></span>&nbsp; Package Search Syntax</h3>
+ </div>
+ <div class="modal-body">
+ The search can be used to find Gentoo packages. You can search in two different ways:
+ <ul >
+ <li>Keywords</li>
+ <li>Field/Value pairs</li>
+ </ul>
+ <h4 style="margin-top:35px;">Keywords</h4>
+ You can use simple keywords to find packages. For instance, if you search for <i>gentoo-sources</i> you will find the package <i>sys-kernel/gentoo-sources</i>.
+ When searching for keywords, the
+ <ul >
+ <li><b>name</b></li>
+ <li> and the <b>atom</b>
+ </ul>
+ fields are searched for the keyword.
+ <h4 style="margin-top:35px;">Field/Value pairs</h4>
+ If you, however, like to run advanced queries, you can use field/value pairs combined with operators. The possible fields and operators are summarized in the following tables.
+ <table class="table">
+ <thead>
+ <tr>
+ <th scope="col">Field</th>
+ <th scope="col">Description</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <th scope="row">atom</th>
+ <td>The unique identifier of a package <br> <i style="padding-left:2em">e.g. sys-kernel/gentoo-sources</i></td>
+ </tr>
+ <tr>
+ <th scope="row">category</th>
+ <td>The category of a package <br> <i style="padding-left:2em">e.g. sys-kernel</i></td>
+ </tr>
+ <tr>
+ <th scope="row">name</th>
+ <td>The name of a package <br> <i style="padding-left:2em">e.g. gentoo-sources</i></td>
+ </tr>
+ <tr>
+ <th scope="row">description</th>
+ <td>The description of a package <br> <i style="padding-left:2em">e.g. A tiling window manager</i> </td>
+ </tr>
+ <tr>
+ <th scope="row">longdescription</th>
+ <td>The full descripiton of a package <br> <i style="padding-left:2em">e.g. xmonad is a tiling window manager for [...]</i></td>
+ </tr>
+ <tr>
+ <th scope="row">homepage</th>
+ <td>The homepage of a package <br> <i style="padding-left:2em">e.g. http://xmonad.org</i></td>
+ </tr>
+ <tr>
+ <th scope="row">license</th>
+ <td>The license of a package <br> <i style="padding-left:2em">e.g. BSD</i></td>
+ </tr>
+ <tr>
+ <th scope="row">Maintainers</th>
+ <td></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">maintainers.name</th>
+ <td>The name of the maintainer <br> <i style="padding-left:2em">e.g. Gentoo Haskell</i></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">maintainers.description</th>
+ <td>The description of the maintainers</td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">maintainers.type</th>
+ <td>The type of maintainter <br> <i style="padding-left:2em">e.g. project</i></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">maintainers.restrict</th>
+ <td></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">maintainers.email</th>
+ <td>The email of the maintainer <br> <i style="padding-left:2em">e.g. haskell@gentoo.org</i></td>
+ </tr>
+ <tr>
+ <th scope="row">Useflag</th>
+ <td></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:1em">global</th>
+ <td></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">useflags.global.name</th>
+ <td>The name of the global useflag <br> <i style="padding-left:2em">e.g. hscolour</i></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">useflags.global.description</th>
+ <td>The description of the global useflag <br> <i style="padding-left:2em">e.g. Include coloured haskell sources to [...]</i></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:1em">local</th>
+ <td></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">useflags.local.name</th>
+ <td>The name of the local useflag</td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">useflags.local.description</th>
+ <td>The description of the local useflag</td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:1em">use_expand</th>
+ <td></td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">useflags.use_expand.name</th>
+ <td>The name of the local use_expand</td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">useflags.use_expand.description</th>
+ <td>The description of the use_expand</td>
+ </tr>
+ <tr>
+ <th scope="row" style="padding-left:2em">useflags.use_expand.use_expand_prefix</th>
+ <td>The use_expand prefix <br> <i style="padding-left:2em">e.g. python_targets</i></td>
+ </tr>
+ <tr>
+ <th scope="row">metadata_hash</th>
+ <td>The hash of the metadata <br> <i style="padding-left:2em">e.g. 5cd76e098f966b4edcd1848866dd9099</i></td>
+ </tr>
+ </tbody>
+ </table>
+ The following operators can be used to combine multiple field/value pairs:
+ <table class="table">
+ <thead>
+ <tr>
+ <th scope="col">Operator</th>
+ <th scope="col">Description</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <th scope="row"></th>
+ <td>The term <b>should</b> appear (default)</td>
+ </tr>
+ <tr>
+ <th scope="row">+</th>
+ <td>The term <b>must</b> appear</td>
+ </tr>
+ <tr>
+ <th scope="row">-</th>
+ <td>The term <b>must not</b> appear</td>
+ </tr>
+ <tr>
+ <th scope="row">"..."</th>
+ <td>Can be used to <b>group</b> phrases <br> <i style="padding-left:2em">e.g. +description:"window manager"</i></td>
+ </tr>
+ </tbody>
+ </table>
+
+ <h4>Examples</h4>
+
+ <ul>
+ <li>Find all packages named git: <br><code style="margin-left:2em">+name:git</code></li>
+ <li>Find all packages in the category sys-kernel: <br><code style="margin-left:2em">+category:sys-kernel</code></li>
+ <li>Find all packages with a BSD license: <br><code style="margin-left:2em">+license:BSD</code></li>
+ <li>Find all packages that neither have a BSD license nor a MIT license: <br><code style="margin-left:2em">-license:BSD -license:MIT</code></li>
+ <li>Find all packages maintained by the Haskell Team: <br><code style="margin-left:2em">+maintainer.email:haskell@gentoo.org</code></li>
+ <li>Find all packages maintained by the Haskell Team but that aren't in the 'dev-haskell' category: <br><code style="margin-left:2em">+maintainer.email:haskell@gentoo.org -category:dev-haskell</code></li>
+ <li>Find all packages those description contains 'window manager': <br><code style="margin-left:2em">+description:"window manager"</code></li>
+ <li>Find all packages that contain the use_expand 'python_targets': <br><code style="margin-left:2em">+useflags.use_expand.use_expand_prefix:python_targets</code></li>
+ </ul>
+
+ </div>
+ <div class="modal-footer">
+ <button type="button" class="btn btn-primary" data-dismiss="modal">Close</button>
+ </div>
+ </div>
+ </div>
+</div>
+
<%= javascript_include_tag 'index/typeahead.js' %>