Group
Extension

Lingua-TT/TT/Dict/JSON.pm

## -*- Mode: CPerl -*-
## File: Lingua::TT::Dict::JSON.pm
## Author: Bryan Jurish <TT/IO.pm>
## Descript: TT Utils: dictionary: JSON

package Lingua::TT::Dict::JSON;
use Lingua::TT::Dict;
use Lingua::TT::IO;
use JSON::XS;
use Carp;
use strict;

##==============================================================================
## Globals & Constants

our @ISA = qw(Lingua::TT::Dict);

##==============================================================================
## Constructors etc.

## $dict = CLASS_OR_OBJECT->new(%opts)
## + %opts, %$dict:
##    dict => \%key2val,  ##-- dict data; values are refs (encoded/decoded via JSON)
sub new {
  my $that = shift;
  return $that->SUPER::new(@_);
}

##==============================================================================
## Methods: Access and Manipulation

## $jxs = $obj->jsonxs()
sub jsonxs {
  return $_[0]{jxs} if (ref($_[0]) && defined($_[0]{jxs}));
  return $_[0]{jxs} = JSON::XS->new->utf8(0)->allow_nonref;
}

##==============================================================================
## Methods: merge

## $dict = $dict->merge($dict2, %opts)
##  + include $dict2 entries in $dict, destructively alters $dict
##  + %opts:
##     append => $bool,  ##-- if true, $dict2 values are appended (dict clobber) to $dict1 values
sub merge {
  my ($d1,$d2,%opts) = @_;
  if (!$opts{append}) {
    @{$d1->{dict}}{CORE::keys %{$d2->{dict}}} = CORE::values %{$d2->{dict}}; ##-- clobber
  } else {
    my $h1 = $d1->{dict};
    my $h2 = $d2->{dict};
    my $jxs = $d1->jsonxs;
    my ($key,$val1,$val2);
    while (($key,$val2)=each %$h2) {
      if (!defined($val1=$h1->{$key})) {
	$h1->{$key} = $val2;
      }
      elsif (ref($val1) eq 'HASH' && ref($val2) eq 'HASH') {
	@$val1{keys %$val2} = values %$val2;
      }
      elsif (ref($val1) eq 'ARRAY' && ref($val2) eq 'ARRAY') {
	  push(@$val1, @$val2);
      }
      else {
	warn(ref($d1)."::merge(): cannot merge values $val1, $val2 for key '$key'");
	$h1->{$key} = $val2;
	next;
      }
    }
  }
  return $d1;
}

##==============================================================================
## Methods: Apply

## \&apply = $dict->applySub(%opts)
##   + returns a CODE-ref for applying dictionary analysis to a single item
##   + returned sub is called without arguments
##     - data line to be analyzed (chomped) is in $_
##     - output for current data line should be stored in $_
sub applySub {
  my ($dict,%opts) = @_;
  my $jxs           = $dict->jsonxs;
  my $dh            = $dict->{dict};
  #my $include_empty = $opts{allow_empty};
  my ($text,$a_in,$a_dict);
  return sub {
    ($text,$a_in) = split(/\t/,$_,2);
    $a_in   = $jxs->decode($a_in) if (defined($a_in));
    $a_dict = $dh->{$text};
    if (!defined($a_dict)) {
      ##-- +in, -dict
      ;
    }
    elsif (!defined($a_in)) {
      ##-- -in, +dict
      $a_in = $a_dict;
    }
    elsif (ref($a_in) eq 'HASH' && ref($a_dict) eq 'HASH') {
      ##-- +in, +dict: HASH
      @$a_in{keys %$a_dict} = values %$a_dict;
    }
    elsif (ref($a_in) eq 'ARRAY' && ref($a_dict) eq 'ARRAY') {
      ##-- +in, +dict: ARRAY
      push(@$a_in, @$a_dict);
    }
    else {
      ##-- +in, +dict: OTHER
      warn(ref($dict)."::applySub(): cannot merge values $a_in, $a_dict for key '$text'");
      $a_in = $a_dict;
    }
    $_ = join("\t", $text, (defined($a_in) ? $jxs->encode($a_in) : qw()))."\n";
  };
}

## $bool = $dict->apply($infh,$outfh,%opts)
##  + apply dict to filehandle $fh
##  + %opts:
##     allow_empty => $bool,  ##-- include empty analyses? (default=0)

##==============================================================================
## Methods: I/O

##--------------------------------------------------------------
## Methods: I/O: generic

## $bool = $dict->setFhLayers($fh,%opts)
sub setFhLayers {
  binmode($_[1],':utf8');
}

##--------------------------------------------------------------
## Methods: I/O: Native

## $bool = $dict->saveNativeFh($fh,%opts)
## + saves to filehandle
## + %opts: (none)
sub saveNativeFh {
  my ($dict,$fh,%opts) = @_;
  binmode($fh,":utf8");
  my $jxs = $dict->jsonxs();
  my ($key,$val);
  while (($key,$val)=each(%{$dict->{dict}})) {
    $fh->print($key, "\t", $jxs->encode($val), "\n");
  }
  return $dict;
}

## $bool = $dict->loadNativeFh($fh)
## + loads from handle
## + %opts
##    encoding => $enc,  ##-- sets $fh :encoding flag if defined; default: none
##    append   => $bool, ##-- if true, multiple entries for a single key will be appended (and maybe promoted to ARRAY)
##    merge    => $bool, ##-- if true, multiple HASH-entries for a single key will be merged
sub loadNativeFh {
  my ($dict,$fh,%opts) = @_;
  binmode($fh,":utf8");
  $dict   = $dict->new() if (!ref($dict));
  my $dh  = $dict->{dict};
  my $jxs = $dict->jsonxs;
  my $merge = $opts{merge};
  my ($line,$key,$val);
  if ($opts{append} || $opts{merge}) {
    ##-- append mode
    my ($oldval);
    while (defined($line=<$fh>)) {
      chomp($line);
      next if ($line =~ /^\s*$/ || $line =~ /^%%/);
      ($key,$val) = split(/\t/,$line,2);
      next if (!defined($val)); ##-- don't store keys for undef values (but do for empty string)
      $val = $jxs->decode($val);
      if (!defined($oldval=$dh->{$key})) {
	##-- new key
	$dh->{$key} = $val;
      }
      elsif ($merge && (ref($oldval)//'') eq 'HASH' && (ref($val)//'') eq 'HASH') {
	##-- merge multiple HASH values
	@$oldval{keys %$val} = values %$val;
      }
      else {
	##-- append / promote to ARRAY values
	$oldval = $dh->{$key} = [$oldval] if (!UNIVERSAL::isa($oldval,'ARRAY'));
	push(@$oldval, UNIVERSAL::isa($val,'ARRAY') ? @$val : $val);
      }
    }
  } else {
    ##-- clobber mode (default)
    while (defined($line=<$fh>)) {
      chomp($line);
      next if ($line =~ /^\s*$/ || $line =~ /^%%/);
      ($key,$val) = split(/\t/,$line,2);
      next if (!defined($val)); ##-- don't store keys for undef values (but do for empty string)
      $dh->{$key} = $jxs->decode($val);
    }
  }
  return $dict;
}

##--------------------------------------------------------------
## Methods: I/O: Bin

## ($serialized_string,\@other_refs) = STORABLE_freeze($obj, $cloning_flag)

## $obj = STORABLE_thaw($obj, $cloning_flag, $serialized_string, @other_refs)

##==============================================================================
## Footer
1;

__END__


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.