use strict;
use warnings;
use Groonga;
use Groonga::Constants;
use Groonga::PatriciaTrie;
package Groonga::PatriciaTrie {
sub tag_keys {
my($self, $text, $callback) = @_;
my $result = '';
my $position = 0;
$self->scan($text, sub {
my($record, $word, $offset, $length, $record_id) = @_;
if ($position < $offset) {
$result .= substr $text, $position, $offset - $position;
}
$result .= $callback->($record, $word, $record_id) // '';
$position = $offset + $length;
});
$result .= substr $text, $position, length $text;
$result;
}
};
open my $fh, '<', '/usr/share/dict/words';
my %count;
my @words = grep {
my($first) = $_ =~ /^(.)/;
$count{$first}++ < 2 ? 1 : 0;
} reverse map { chomp; lc $_ } <$fh>;
my $text = (join ', ', @words);
my $path = 'groonga_tag_keys.db';
`rm $path` if -f $path;
my $pat = Groonga::PatriciaTrie->new;
if (! $pat->open($path)) {
$pat->create($path, 1024, 1024, GRN_OBJ_KEY_VAR_SIZE | GRN_OBJ_KEY_NORMALIZE)
or die 'Groonga::PatriciaTrie create error';
}
for (@words) {
$pat->add($_, '');
}
my $html = $pat->tag_keys($text, sub {
my($record, $word, $record_id) = @_;
'' . $record . '';
});
print "$html\n";
my $expected = ;
chomp $expected;
print $html eq $expected ? "ok\n" : "error\n";
__DATA__
zyzzogeton, zyzomys, yvonne, yuzluk, xystus, xystum, wyver, wyve, vyingly, vying, uzbek, uzbeg, tzutuhil, tzotzil, szopelka, szlachta, ryukyu, rytina, qurti, quotum, pyxis, pyxis, ozotype, ozostomia, nyxis, nystagmus, myzostomous, myzostomidan, lyxose, lytta, kyurinish, kyurin, jynx, jynx, izzard, iztle, hystrix, hystricomorphous, gyve, gytling, fyrd, fylfot, ezra, ezekiel, dzungar, dzeren, czechoslovakian, czechoslovak, byzantinize, byzantinism, azymous, azymite