diff options
author | Robin H. Johnson <robbat2@gentoo.org> | 2008-10-10 21:35:53 +0000 |
---|---|---|
committer | Robin H. Johnson <robbat2@gentoo.org> | 2008-10-10 21:35:53 +0000 |
commit | 8aba693dbd288e16c94924559e9b2d6ccdf6e3dc (patch) | |
tree | 46f59aee52bb12a85a6a06e21f77c91dfcfbb767 | |
parent | New URL for alias expansion. (diff) | |
download | rbot-gentoo-8aba693dbd288e16c94924559e9b2d6ccdf6e3dc.tar.gz rbot-gentoo-8aba693dbd288e16c94924559e9b2d6ccdf6e3dc.tar.bz2 rbot-gentoo-8aba693dbd288e16c94924559e9b2d6ccdf6e3dc.zip |
Variant of searching borrowed from jeeves and based on the normal rbot google.
-rw-r--r-- | gentoo-search.rb | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/gentoo-search.rb b/gentoo-search.rb new file mode 100644 index 0000000..f711e59 --- /dev/null +++ b/gentoo-search.rb @@ -0,0 +1,85 @@ +#-- vim:sw=2:et:ft=ruby + +GOOGLE_SEARCH = "http://www.google.com/search?oe=UTF-8&q=" +GOOGLE_WAP_SEARCH = "http://www.google.com/wml/search?hl=en&q=" +GOOGLE_WAP_LINK = /<a accesskey="(\d)" href=".*?u=(.*?)">(.*?)<\/a>/im +GOOGLE_CALC_RESULT = %r{<img src=/images/calc_img\.gif(?: width=40 height=30 alt="")?></td><td> </td><td nowrap>(?:<h2 class=r>)?<font size=\+1><b>(.+)</b>(?:</h2>)?</td>} +GOOGLE_DEF_RESULT = %r{<p> (Web definitions for .*?)<br/>(.*?)<br/>(.*?)\s-\s+<a href} + +class GentooSearchPlugin < Plugin + def listen(m) + #return if m.address? + return if m.message !~ /^\? (.+)$/i + search = $1 + #m.reply "doing search for #{search}" + params = {} + params[:words] = search + return gentoo_search(m, params) + end + + def gentoo_search(m, params) + params[:site] = 'gentoo.org' + return google(m, params) + end + + def google(m, params) + what = params[:words].to_s + searchfor = CGI.escape what + # This method is also called by other methods to restrict searching to some sites + if params[:site] + site = "site:#{params[:site]}+" + else + site = "" + end + # It is also possible to choose a filter to remove constant parts from the titles + # e.g.: "Wikipedia, the free encyclopedia" when doing Wikipedia searches + filter = params[:filter] || "" + + url = GOOGLE_WAP_SEARCH + site + searchfor + + hits = params[:hits] || @bot.config['google.hits'] + + first_pars = params[:firstpar] || @bot.config['google.first_par'] + + single = (hits == 1 and first_pars == 1) + + begin + wml = @bot.httputil.get(url) + raise unless wml + rescue => e + m.reply "error googling for #{what}" + return + end + results = wml.scan(GOOGLE_WAP_LINK) + if results.length == 0 + m.reply "no results found for #{what}" + return + end + single ||= (results.length==1) + urls = Array.new + results = results[0...hits].map { |res| + n = res[0] + t = Utils.decode_html_entities res[2].gsub(filter, '').strip + u = URI.unescape res[1] + urls.push(u) + single ? u : "#{n}. #{Bold}#{t}#{Bold}: #{u}" + }.join(" | ") + + # If we return a single, full result, change the output to a more compact representation + if single + m.reply "Result for %s: %s -- %s" % [what, results, Utils.get_first_pars(urls, first_pars)], :overlong => :truncate + return + end + + m.reply "Results for #{what}: #{results}", :split_at => /\s+\|\s+/ + + return unless first_pars > 0 + + Utils.get_first_pars urls, first_pars, :message => m + + end +end + +plugin = GentooSearchPlugin.new + +#plugin.map "? *words", :action => 'gentoo_search', :threaded => true |