Group
Extension

DTA-CAB/CAB/Analyzer/EqRW/JsonCDB.pm

## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Analyzer::EqRW::JsonCDB.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: dictionary-based equivalence-class expander, phonetic variant

package DTA::CAB::Analyzer::EqRW::JsonCDB;
use DTA::CAB::Analyzer ':child';
use DTA::CAB::Analyzer::Dict::JsonCDB;
use strict;

##==============================================================================
## Globals
##==============================================================================

our @ISA = qw(DTA::CAB::Analyzer::Dict::JsonCDB);

##==============================================================================
## Constructors etc.
##==============================================================================

## $obj = CLASS_OR_OBJ->new(%args)
##  + object structure: see DTA::CAB::Analyzer::Dict::JsonCDB
sub new {
  my $that = shift;
  return $that->SUPER::new(
			   ##-- options
			   label       => 'eqrw',
			   eqIdWeight  => 0,
			   #allowRegex  => '(?:^[[:alpha:]\-\x{ac}]*[[:alpha:]]+$)|(?:^[[:alpha:]]+[[:alpha:]\-\x{ac}]+$)',
			   allowRegex  => '[[:alpha:]]',
			   ##
			   analyzeCode => join("\n",
					       'return if (defined($_->{$lab})); ##-- avoid re-analysis',
					       '$tmp=undef; ##-- re-initialize temporary used by _am_fst_uniq',
					       '$_->{$lab}=['._am_fst_usort((_am_id_fst('$_', '$dic->{eqIdWeight}')
									     .', map {defined($val=$tied->FETCH($_)) ? @{$jxs->decode($val)} : qw()}'
									     .join(',', _am_xtext, _am_xlit, _am_rw)
									    ),
									    '$tmp'
									   ).'];',
					      ),

			   ##-- user args
			   @_
			  );
}

## $prefix = $dict->analyzePre()
sub analyzePre {
  my $dic = shift;
  return $dic->SUPER::analyzePre(@_)."\n".'my $tied=tied(%$dhash);'."\n".'my ($tmp);';
}

##-- DEBUG
sub analyzeCode_DEBUG {
  my $anl = shift;

  my $jxs=$anl->jsonxs;
  my $dic=$anl;
  my $lab=$dic->{label};
  my $dhash=$dic->dictHash;
  my ($key,$val,@keys,@vals,%vals);
  my $tied=tied(%$dhash);
  my ($tmp);
  return sub {
    return if (defined($_->{$lab})); ##-- avoid re-analysis
    if (!defined($val=$tied->FETCH(($_->{lts} && @{$_->{lts}} ? $_->{lts}[0]{hi} : $_->{text})))) {
      $_->{$lab} = [{hi=>($_->{xlit} ? $_->{xlit}{latin1Text} : $_->{text}),w=>$dic->{eqIdWeight}}]; ##== _am_id_fst
      return;
    }
    $tmp  = undef;
    $val  = $jxs->decode($val);
    @$val = (
	     sort {
	       ($a->{w}||0) <=> ($b->{w}||0) || ($a->{hi}||"") cmp ($b->{hi}||"")
	     } (
		map {
		  $tmp && $tmp->{hi} eq $_->{hi} ? qw() : ($tmp=$_)
		}
		sort {
		  ($a->{hi}||"") cmp ($b->{hi}||"") || ($a->{w}||0) <=> ($b->{w}||0)
		}
		{hi=>($_->{xlit} ? $_->{xlit}{latin1Text} : $_->{text}),w=>$dic->{eqIdWeight}}, ##== _am_id_fst
		@$val
	       ) ##== _am_fst_uniq
	    ) ##== _am_fst_sort
      ;
    $_->{$lab} = $val;
  };
}


1; ##-- be happy

__END__
##========================================================================
## POD DOCUMENTATION, auto-generated by podextract.perl, edited

##========================================================================
## NAME
=pod

=head1 NAME

DTA::CAB::Analyzer::EqRW::JsonCDB - Json-valued CDB dictionary-based phonetic equivalence expander

=cut

##========================================================================
## SYNOPSIS
=pod

=head1 SYNOPSIS

 use DTA::CAB::Analyzer::EqRW::JsonCDB;
 
 ##========================================================================
 ## Constructors etc.
 
 $eqp = DTA::CAB::Analyzer::EqRW::JsonCDB->new(%args);
 

=cut

##========================================================================
## DESCRIPTION
=pod

=head1 DESCRIPTION

Json-valued CDB dictionary-based phonetic equivalence-class expander.
Composite analyzers should also include an 'lts' phonetic analyzer.

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::Dict::JsonCDB: Globals
=pod

=head2 Globals

=over 4

=item Variable: @ISA

DTA::CAB::Analyzer::EqRW::JsonCDB inherits from
L<DTA::CAB::Analyzer::Dict::JsonCDB>.

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::Dict::JsonCDB: Constructors etc.
=pod

=head2 Constructors etc.

=over 4

=item new

 $eqc = CLASS_OR_OBJ->new(%args);

Constructor.  Sets the following default options:

 label       => 'eqpho',
 eqIdWeight  => 0,
 #allowRegex  => '(?:^[[:alpha:]\-\x{ac}]*[[:alpha:]]+$)|(?:^[[:alpha:]]+[[:alpha:]\-\x{ac}]+$)',
 #allowRegex  => '(?:^[[:alpha:]\-\x{ac}]*[[:alpha:]]+$)|(?:^[[:alpha:]]+[[:alpha:]\-\x{ac}]+$)',
 allowRegex  => '(?:[[:alpha:]])',
 analyzeCode => ... ##-- see the source

=back

=cut


##========================================================================
## END POD DOCUMENTATION, auto-generated by podextract.perl

##======================================================================
## Footer
##======================================================================

=pod

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2011-2019 by Bryan Jurish

This package is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.24.1 or,
at your option, any later version of Perl 5 you may have available.

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.