DTA-CAB/CAB/Analyzer/EqRW/JsonCDB.pm
## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Analyzer::EqRW::JsonCDB.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: dictionary-based equivalence-class expander, phonetic variant
package DTA::CAB::Analyzer::EqRW::JsonCDB;
use DTA::CAB::Analyzer ':child';
use DTA::CAB::Analyzer::Dict::JsonCDB;
use strict;
##==============================================================================
## Globals
##==============================================================================
our @ISA = qw(DTA::CAB::Analyzer::Dict::JsonCDB);
##==============================================================================
## Constructors etc.
##==============================================================================
## $obj = CLASS_OR_OBJ->new(%args)
## + object structure: see DTA::CAB::Analyzer::Dict::JsonCDB
sub new {
my $that = shift;
return $that->SUPER::new(
##-- options
label => 'eqrw',
eqIdWeight => 0,
#allowRegex => '(?:^[[:alpha:]\-\x{ac}]*[[:alpha:]]+$)|(?:^[[:alpha:]]+[[:alpha:]\-\x{ac}]+$)',
allowRegex => '[[:alpha:]]',
##
analyzeCode => join("\n",
'return if (defined($_->{$lab})); ##-- avoid re-analysis',
'$tmp=undef; ##-- re-initialize temporary used by _am_fst_uniq',
'$_->{$lab}=['._am_fst_usort((_am_id_fst('$_', '$dic->{eqIdWeight}')
.', map {defined($val=$tied->FETCH($_)) ? @{$jxs->decode($val)} : qw()}'
.join(',', _am_xtext, _am_xlit, _am_rw)
),
'$tmp'
).'];',
),
##-- user args
@_
);
}
## $prefix = $dict->analyzePre()
sub analyzePre {
my $dic = shift;
return $dic->SUPER::analyzePre(@_)."\n".'my $tied=tied(%$dhash);'."\n".'my ($tmp);';
}
##-- DEBUG
sub analyzeCode_DEBUG {
my $anl = shift;
my $jxs=$anl->jsonxs;
my $dic=$anl;
my $lab=$dic->{label};
my $dhash=$dic->dictHash;
my ($key,$val,@keys,@vals,%vals);
my $tied=tied(%$dhash);
my ($tmp);
return sub {
return if (defined($_->{$lab})); ##-- avoid re-analysis
if (!defined($val=$tied->FETCH(($_->{lts} && @{$_->{lts}} ? $_->{lts}[0]{hi} : $_->{text})))) {
$_->{$lab} = [{hi=>($_->{xlit} ? $_->{xlit}{latin1Text} : $_->{text}),w=>$dic->{eqIdWeight}}]; ##== _am_id_fst
return;
}
$tmp = undef;
$val = $jxs->decode($val);
@$val = (
sort {
($a->{w}||0) <=> ($b->{w}||0) || ($a->{hi}||"") cmp ($b->{hi}||"")
} (
map {
$tmp && $tmp->{hi} eq $_->{hi} ? qw() : ($tmp=$_)
}
sort {
($a->{hi}||"") cmp ($b->{hi}||"") || ($a->{w}||0) <=> ($b->{w}||0)
}
{hi=>($_->{xlit} ? $_->{xlit}{latin1Text} : $_->{text}),w=>$dic->{eqIdWeight}}, ##== _am_id_fst
@$val
) ##== _am_fst_uniq
) ##== _am_fst_sort
;
$_->{$lab} = $val;
};
}
1; ##-- be happy
__END__
##========================================================================
## POD DOCUMENTATION, auto-generated by podextract.perl, edited
##========================================================================
## NAME
=pod
=head1 NAME
DTA::CAB::Analyzer::EqRW::JsonCDB - Json-valued CDB dictionary-based phonetic equivalence expander
=cut
##========================================================================
## SYNOPSIS
=pod
=head1 SYNOPSIS
use DTA::CAB::Analyzer::EqRW::JsonCDB;
##========================================================================
## Constructors etc.
$eqp = DTA::CAB::Analyzer::EqRW::JsonCDB->new(%args);
=cut
##========================================================================
## DESCRIPTION
=pod
=head1 DESCRIPTION
Json-valued CDB dictionary-based phonetic equivalence-class expander.
Composite analyzers should also include an 'lts' phonetic analyzer.
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::Dict::JsonCDB: Globals
=pod
=head2 Globals
=over 4
=item Variable: @ISA
DTA::CAB::Analyzer::EqRW::JsonCDB inherits from
L<DTA::CAB::Analyzer::Dict::JsonCDB>.
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::Dict::JsonCDB: Constructors etc.
=pod
=head2 Constructors etc.
=over 4
=item new
$eqc = CLASS_OR_OBJ->new(%args);
Constructor. Sets the following default options:
label => 'eqpho',
eqIdWeight => 0,
#allowRegex => '(?:^[[:alpha:]\-\x{ac}]*[[:alpha:]]+$)|(?:^[[:alpha:]]+[[:alpha:]\-\x{ac}]+$)',
#allowRegex => '(?:^[[:alpha:]\-\x{ac}]*[[:alpha:]]+$)|(?:^[[:alpha:]]+[[:alpha:]\-\x{ac}]+$)',
allowRegex => '(?:[[:alpha:]])',
analyzeCode => ... ##-- see the source
=back
=cut
##========================================================================
## END POD DOCUMENTATION, auto-generated by podextract.perl
##======================================================================
## Footer
##======================================================================
=pod
=head1 AUTHOR
Bryan Jurish E<lt>moocow@cpan.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2011-2019 by Bryan Jurish
This package is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.24.1 or,
at your option, any later version of Perl 5 you may have available.
=cut