Group
Extension

Bio-Gonzales/lib/Bio/Gonzales/Util/Cerial.pm

package Bio::Gonzales::Util::Cerial;

use warnings;
use strict;
use Carp;

use v5.11;

use Exporter 'import';

use Bio::Gonzales::Util::File qw/open_on_demand/;
use Bio::Gonzales::Util qw/deep_value flatten/;

use Try::Tiny;
use YAML::XS;
use Cpanel::JSON::XS;
use Data::Dumper;
use Storable qw/nstore_fd fd_retrieve/;

our $VERSION = '0.090'; # VERSION

our %EXPORT_TAGS = (
  'all' => [
    qw(
      ndjson_iterate ndjson_hash ndjson_slurp ndjson_spew
      ndjson_freeze ndjson_thaw
      ythaw yfreeze yslurp yspew
      jthaw jfreeze jslurp jspew
      stoslurp stospew
    )
  ],
  std => [
    qw(
      ythaw yfreeze yslurp yspew
      jthaw jfreeze jslurp jspew
      stoslurp stospew
    )
  ]
);
our @EXPORT_OK = (@{ $EXPORT_TAGS{'all'} });
our @EXPORT    = (@{ $EXPORT_TAGS{'std'} });

BEGIN {
  *yfreeze = \&YAML::XS::Dump;
  *ythaw   = \&YAML::XS::Load;
  *jthaw   = \&Cpanel::JSON::XS::decode_json;
}

our $JSON = Cpanel::JSON::XS->new->indent(1)->utf8->allow_nonref;

sub jfreeze {
  my $r;
  my @d = @_;
  try {
    $r = $JSON->encode(@d);
  } catch {
    confess Dumper \@d;
  };

}

sub _spew {
  my $dest = shift;
  my $data = shift;

  my ($fh, $was_open) = open_on_demand($dest, '>');
  binmode $fh, ':utf8' unless (ref $fh eq 'IO::Zlib');
  local $/ = "\n";

  print $fh $data;
  $fh->close unless $was_open;
}

sub _slurp {
  my $src = shift;
  my $c   = shift;
  my ($fh, $was_open) = open_on_demand($src, '<');

  my $io_layer = $c->{io_layer};
  $io_layer = ':utf8' if (!$io_layer && $c->{utf8_layer});

  binmode $fh, $c->{io_layer} if ($io_layer);

  my $data = do { local $/; <$fh> };

  $fh->close unless $was_open;
  return $data;
}

sub yslurp { return ythaw(_slurp(shift, shift)) }
sub jslurp { return jthaw(_slurp(shift, shift)) }
sub yspew  { my $file = shift; _spew($file, yfreeze($_[0])) }
sub jspew  { my $file = shift; _spew($file, jfreeze($_[0])) }

sub stospew {
  my $dest = shift;
  my $data = shift;

  my ($fh, $was_open) = open_on_demand($dest, '>');
  nstore_fd($data, $fh);
  $fh->close unless ($was_open);
}

sub stoslurp {
  my $src = shift;
  my ($fh, $was_open) = open_on_demand($src, '<');
  my $data = fd_retrieve($fh);
  $fh->close unless $was_open;
  return $data;
}

sub ndjson_freeze {
  my $entries = shift;
  return unless (@$entries);
  state $js = Cpanel::JSON::XS->new->utf8->allow_nonref;
  return join("\n", (map { $js->encode($_) } @$entries)) . "\n";
}

sub ndjson_thaw {
  my $data = shift;

  return unless ($data);
  my @entries = split /\n/, $data;

  return unless (@entries);

  state $js = Cpanel::JSON::XS->new->utf8->allow_nonref;

  return [ map { $js->decode($_) } @entries ];
}

sub ndjson_hash {
  my $keys  = shift;
  my $files = shift;
  my $cfg   = shift;

  my %res;
  my $it = ndjson_iterate($files);
  while (my $elem = $it->()) {
    my $val = deep_value($elem, $keys);
    if ($cfg->{uniq}) {
      die $val . " already exists" if ($res{$val});
      $res{$val} = $elem;
    } else {
      $res{$val} //= [];
      push @{ $res{$val} }, $elem;
    }
  }
  return \%res;
}

sub ndjson_slurp {
  my $it = ndjson_iterate(@_);

  my @res;
  while (defined(my $elem = $it->())) {
    push @res, $elem;
  }
  return \@res;
}

sub ndjson_spew {
  my ($dest, $elems, $c) = @_;

  my $js = Cpanel::JSON::XS->new->utf8->allow_nonref;
  $js = $js->canonical(1) if ($c->{canonical});
  my ($fh, $fh_was_open) = open_on_demand($dest, '>');

  try {
    for my $elem (@$elems) {
      say $fh $js->encode($elem);
    }
  } catch {
    confess "could not spew: $_";
  };

  $fh->close unless ($fh_was_open);
  return;
}

sub ndjson_iterate {
  my @srcs = flatten(@_);

  my $i = 0;
  my ($fh, $fh_was_open) = open_on_demand($srcs[$i], '<');

  return sub {
    while ($i < @srcs) {
      while (my $record = <$fh>) {
        next if (!$record || $record =~ /^\s*$/);
        my $data;
        try {
          $data = decode_json($record);
        } catch {
          warn "caught error: $_ JSON string >$record<";
        };
        confess "no valid data in record" unless ($data);

        return $data;
      }

      $fh->close unless ($fh_was_open);
      if (++$i >= @srcs) {
        # return if next file does not exist
        return;
      }

      # open next file
      ($fh, $fh_was_open) = open_on_demand($srcs[$i], '<');
    }
    return;
  };

}

__END__

=head1 NAME

Bio::Gonzales::Util::Cerial - convenience functions for yaml and json IO

=head1 SYNOPSIS

    use Bio::Gonzales::Util::Cerial;

    # YAML IO
    my $yaml_string = yfreeze(\%data);
    my $data = ythaw($yaml_string);

    yspew($filename, \%data);
    my $data = yslurp($filename);

    # JSON IO
    my $json_string = jfreeze(\%data);
    my $data = jthaw($json_string);

    jspew($filename, \%data);
    my $data = jslurp($filename);


=head1 DESCRIPTION

=head1 SUBROUTINES

=over 4
    
=item B<< $yaml_string = yfreeze(\%data) >>

Serialize data structure as yaml string

=item B<< $data = ythaw($yaml_string) >>

UNserialize data structure from yaml string

=item B<< yspew($filename, \%data) >>

Serialize data structure as yaml string to a file

=item B<< my $data = yslurp($filename) >>

UNserialize data structure from yaml file

=item B<< my $json_string = jfreeze(\%data) >>

Serialize data structure as json string

=item B<< my $data = jthaw($json_string) >>

UNserialize data structure from json string

=item B<< jspew($filename, \%data) >>

Serialize data structure as json string to a file

=item B<< my $data = jslurp($filename) >>

UNserialize data structure from json file

=back

=head1 EXPORT

The following functions are exported by default

    ythaw
    yfreeze
    yslurp
    yspew

    jthaw
    jfreeze
    jslurp
    jspew

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.