summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin H. Johnson <robbat2@gentoo.org>2008-03-20 17:41:03 -0700
committerRobin H. Johnson <robbat2@gentoo.org>2008-03-21 20:13:06 -0700
commit85766ce726d1971d8c49d351007a03be6010f101 (patch)
treef4ac06fad14d445a46b5283c4906f0be5e274b0e
parentAdd index creation script. (diff)
downloaddistindex-85766ce726d1971d8c49d351007a03be6010f101.tar.gz
distindex-85766ce726d1971d8c49d351007a03be6010f101.tar.bz2
distindex-85766ce726d1971d8c49d351007a03be6010f101.zip
Add tools to query and dump the index.
-rw-r--r--index-dumper.pl28
-rw-r--r--index-query.pl44
2 files changed, 72 insertions, 0 deletions
diff --git a/index-dumper.pl b/index-dumper.pl
new file mode 100644
index 0000000..542ec02
--- /dev/null
+++ b/index-dumper.pl
@@ -0,0 +1,28 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+# Lucene stuff by Robin H. Johnson <robbat2@gentoo.org>
+
+use Lucene;
+use Data::Dumper;
+
+my $store = Lucene::Store::FSDirectory->getDirectory("data", 0);
+my $reader = Lucene::Index::IndexReader->open($store);
+
+# get number of docs in index
+my $num_docs = $reader->numDocs();
+
+for(my $i=0;$i<$num_docs; $i++) {
+ # get the nth document
+ my $doc = $reader->document($i);
+ # This is missing in the Perl bindings :-(
+ #my $fields = $doc->fields;
+ # So we have to either specify a field directly
+ my $fields = $doc->get('md5');
+ my $s = $doc->toString;
+ print $s."\n";
+}
+
+$reader->close;
+undef $reader;
diff --git a/index-query.pl b/index-query.pl
new file mode 100644
index 0000000..8e44c25
--- /dev/null
+++ b/index-query.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+# Lucene stuff by Robin H. Johnson <robbat2@gentoo.org>
+
+use Lucene;
+use Data::Dumper;
+
+my $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer();
+my $store = Lucene::Store::FSDirectory->getDirectory("data", 0);
+my $searcher = new Lucene::Search::IndexSearcher($store);
+my $parser = new Lucene::QueryParser("filename", $analyzer);
+
+# The numeric range queries don't work quite as you expect
+# They run as strings, not numerics presently "size:[0 TO 9000]"
+# http://lucene.apache.org/java/docs/queryparsersyntax.html
+my $query = $parser->parse("distfile:akode* AND filename:m* AND isdistfile:0");
+my $hits = $searcher->search($query);
+
+# get number of results
+ my $num_hits = $hits->length();
+
+ # get fields and ranking score for each hit
+ for (my $i = 0; $i < $num_hits; $i++) {
+ my $doc = $hits->doc($i);
+ my $score = $hits->score($i);
+ my $path = $doc->get("path");
+ my $size = $doc->get("size");
+ my $md5 = $doc->get("md5");
+ printf "%s %s %d\n",$path,$md5,$size;
+ }
+
+ # free memory and close searcher
+ undef $hits;
+ undef $query;
+ undef $parser;
+ undef $analyzer;
+ $searcher->close();
+ undef $searcher;
+ undef $store;
+
+
+