Group
Extension

DTA-CAB/CAB/Analyzer/Dict/Json.pm

## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Analyzer::Dict::Json.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: generic analysis dictionary API using JSON values

package DTA::CAB::Analyzer::Dict::Json;
use DTA::CAB::Analyzer ':child';
use DTA::CAB::Analyzer::Dict;
use JSON::XS;
use IO::File;
use Carp;
use Encode qw(encode decode);

use strict;

##==============================================================================
## Globals
##==============================================================================

our @ISA = qw(DTA::CAB::Analyzer::Dict);

our $CODE_DEFAULT =
  ('return if (!defined($val=$dhash->{'._am_xlit('$_').'}));'
   .' $val=$jxs->decode($val);'
   .' @$_{keys %$val}=values %$val;');

##==============================================================================
## Constructors etc.
##==============================================================================

## $obj = CLASS_OR_OBJ->new(%args)
##  + object structure:
##    (
##     ##-- Filename Options
##     dictFile => $filename,    ##-- filename (default=undef): should be TT-dict with JSON-encoded hash values
##
##     ##-- Analysis Output
##     label          => $lab,   ##-- analyzer label
##     analyzeCode    => $code,  ##-- pseudo-accessor to perform actual analysis for token ($_); see DTA::CAB::Analyzer::Dict for details
##
##     ##-- Analysis Options
##     encoding       => $enc,   ##-- encoding of db file (default='UTF-8'): clobbers $dba{encoding} ; uses DB filters
##
##     ##-- Analysis objects
##     dbf => $dbf,              ##-- underlying Lingua::TT::DBFile object (default=undef)
##     dba => \%dba,             ##-- args for Lingua::TT::DBFile->new()
##     #={
##     #  mode  => $mode,        ##-- default: 0644
##     #  dbflags => $flags,     ##-- default: O_RDONLY
##     #  type    => $type,      ##-- one of 'HASH', 'BTREE', 'RECNO', 'GUESS' (default: 'GUESS')
##     #  dbinfo  => \%dbinfo,   ##-- default: "DB_File::${type}INFO"->new();
##     #  dbopts  => \%opts,     ##-- db options (e.g. cachesize,bval,...) -- defaults to none (uses DB_File defaults)
##     # }
##    )
sub new {
  my $that = shift;
  my $dic = $that->SUPER::new(
			      ##-- filenames
			      dictFile => undef,

			      ##-- options
			      encoding => 'UTF-8',

			      ##-- analysis output
			      label => 'dict_json',
			      analyzeCode => $CODE_DEFAULT,

			      ##-- JSON parser (segfaults: see jsonxs() method, below)
			      #jxs => __PACKAGE__->jsonxs,

			      ##-- user args
			      @_
			     );
  return $dic;
}



##==============================================================================
## Methods: Embedded API
##==============================================================================

##--------------------------------------------------------------
## Methods: I/O: Input: all

## $bool = $dic->ensureLoaded()
##  + ensures analyzer data is loaded from default files
sub ensureLoaded {
  my $dic = shift;
  my $rc  = 1;
  if ( defined($dic->{dictFile}) && !$dic->dictOk ) {
    $dic->info("loading dictionary file '$dic->{dictFile}'");
    $rc &&= $dic->{ttd}->loadFile($dic->{dictFile}, encoding=>$dic->{encoding});

    ##-- json with utf8(1) wants utf8-encoded octets, so we munge them here
    foreach (values %{$dic->{ttd}{dict}}) {
      #$_ = Encode::encode_utf8($_) if (utf8::is_utf8($_));
      utf8::encode($_) if (utf8::is_utf8($_));
    }
  }
  return $rc;
}

## $dic = $dic->decodeDictValues()
##  + runs $jxs->decode() on all dict values (for in-memory dicts)
sub decodeDictValues {
  my $dic = shift;
  return $dic if (!$dic->{ttd} || !$dic->{ttd}{dict});

  my $dict = $dic->{ttd}{dict};
  my $jxs  = $dic->jsonxs;
  foreach (keys %$dict) {
    $dict->{$_} = $jxs->decode($dict->{$_}) if (!ref($dict->{$_}));
  }
  return $dic;
}


##==============================================================================
## Methods: Persistence
##==============================================================================

##======================================================================
## Methods: Persistence: Perl

## @keys = $class_or_obj->noSaveKeys()
##  + returns list of keys not to be saved
sub noSaveKeys {
  my $that = shift;
  return ($that->SUPER::noSaveKeys, qw(jxs));
}

## @keys = $class_or_obj->noSaveBinKeys()
##  + returns list of keys not to be saved in binary mode
sub noSaveBinKeys {
  my $that = shift;
  return ($that->SUPER::noSaveBinKeys, qw(jxs));
}


##==============================================================================
## Methods: Analysis
##==============================================================================

##------------------------------------------------------------------------
## Methods: Analysis: v1.x: API

## $doc = $anl->analyzeTypes($doc,\%types,\%opts)
##  + perform type-wise analysis of all (text) types in $doc->{types}
##  + INHERITED from Dict

##------------------------------------------------------------------------
## Methods: Analysis: Utils

## $jxs = $dict->jsonxs()
## $jxs = $CLASS->jsonxs()
##  + returns underlying JSON::XS object or creates appropriate new object
##  + DISABLED (b/c of segfaults): caches if not already defined for object call
##--
## Program received signal SIGSEGV, Segmentation fault.
## 0xb7a03584 in XS_JSON__XS_DESTROY () from /usr/lib/perl5/auto/JSON/XS/XS.so
## (gdb) bt
## #0  0xb7a03584 in XS_JSON__XS_DESTROY () from /usr/lib/perl5/auto/JSON/XS/XS.so
## #1  0x080d5d7b in Perl_pp_entersub ()
## #2  0x08078bb8 in Perl_call_sv ()
## #3  0x080e8090 in Perl_sv_clear ()
## #4  0x080e87da in Perl_sv_free2 ()
## #5  0x080dd8d9 in ?? ()
## #6  0x080dd939 in Perl_sv_clean_objs ()
## #7  0x0807dcaf in perl_destruct ()
## #8  0x080642a5 in main ()
##--
sub jsonxs {
  return $_[0]{jxs} if (ref($_[0]) && defined($_[0]{jxs}));
  my $jxs = JSON::XS->new->utf8(1)->relaxed(1)->shrink(1)->allow_blessed(1)->convert_blessed(1);
  #return $_[0]{jxs} = $jxs if (ref($_[0]));
  return $jxs;
}

## $prefix = $dict->analyzePre()
sub analyzePre {
  my $dic = shift;
  return 'my $jxs=$anl->jsonxs; '.$dic->SUPER::analyzePre();
}

## $coderef = $dict->analyzeCode()
## $coderef = $dict->analyzeCode($code)
##  + inherited



1; ##-- be happy

__END__


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.