Group
Extension

Chess-PGN-Extract/lib/Chess/PGN/Extract/Stream.pm

package Chess::PGN::Extract::Stream;
use 5.008001;
use strict;
use warnings;

use base 'Exporter::Tiny';
our @EXPORT = qw| pgn_file read_game read_games |;

use Carp       qw| croak |;
use File::Temp qw| tempdir tempfile |;
use Chess::PGN::Extract 'read_games' => { -prefix => '_' };
use IO::Handle;

sub new {
  my ( $class, $pgn_file ) = @_;

  croak ("'new' requires a PGN file name")
    unless defined $pgn_file;

  my $self = {};
  $self->{pgn_file} = $pgn_file;
  open my $pgn_handle, '<', $pgn_file
    or croak ("Cannot open PGN file: \"$pgn_file\"");
  $self->{pgn_handle} = $pgn_handle;

  bless $self => $class;
}

sub pgn_file { $_[0]->{pgn_file} }

sub read_game {
  ( $_[0]->read_games (1) )[0];
}

sub read_games {
  my $self = shift;
  my ($limit) = @_;

  my $handle = $self->{pgn_handle};
  return if $handle->eof;

  unless ( defined $limit ) {
    return _read_all ($handle);
  }

  # Force integer
  $limit = int $limit;

  if    ( $limit  < 0 ) { _read_all ($handle) }
  elsif ( $limit == 0 ) { return }
  else {
    my ( $game, @games );
    while ( $limit-- and $game = _get_one_game_string ($handle) ) {
      push @games, $game;
    }
    return _read_pgn_string ( join '', @games );
  }
}

{
  # Parser contexts:
  #   $start        - Before parsing tag sections
  #   $expect_tag   - Parsing tag sections has started
  #   $expect_moves - Parsing moves section has started
  my ( $start, $expect_tag, $expect_moves ) = 0 .. 2;

  # Regular expressions to identify which section the given $line is
  my $blank     = qr/^[\s\t]*\n$/;
  my $tag       = qr/^[\s\t]*\[[\s\t]*\w+[\s\t]+\".+\"[\s\t]*\][\s\t]*\n$/;
  my $tag_begin = qr/^[\s\t]*\[/;
  # my $moves = ...;

  # _get_one_game_string ($handle) => $pgn_string
  sub _get_one_game_string {
    my $context = $start;
    _parse_lines ( $_[0], $context, [] );
  }

  # _parse_lines ($handle, $context, $buffer) => $pgn_string
  sub _parse_lines {
    return join '', @{ $_[2] } if $_[0]->eof;

    my $line = $_[0]->getline;

    # Ignore blank lines
    goto \&_parse_lines if $line =~ $blank;

    if ( $_[1] == $start ) {

      if ( $line =~ $tag_begin ) {
        _complete_tag_line ($_[0], $line);
        push @{ $_[2] }, $line;
        $_[1] = $expect_tag;
        goto \&_parse_lines;
      }
      else {
        croak ("PGN parse error: Move section started without any tags");
      }
    }
    elsif ( $_[1] == $expect_tag ) {

      if ( $line =~ $tag_begin ) {
        _complete_tag_line ($_[0], $line);
        push @{ $_[2] }, $line;
        goto \&_parse_lines;
      }
      else {
        push @{ $_[2] }, $line;
        $_[1] = $expect_moves;
        goto \&_parse_lines;
      }
    }
    elsif ( $_[1] == $expect_moves ) {

      if ( $line =~ $tag_begin ) {
        seek $_[0], -length $line, 1;    # go back to the head of $line
        return join '', @{ $_[2] };
      }
      else {
        push @{ $_[2] }, $line;
        goto \&_parse_lines;
      }
    }
    else {
      croak ("PGN parse error: Unknown context");
    }
    croak ("PGN parse error: Unknown parse error");
  }

  # _complete_tag_line ($handle, $partial_tag_line)
  sub _complete_tag_line {
    return if $_[1] =~ $tag;
    if ( $_[0]->eof ) {
      croak ("PGN parse error: Parse finished inside a tag section");
    }
    chomp $_[1];
    $_[1] .= $_[0]->getline;
    goto \&_complete_tag_line;
  }
}

# _read_all ($handle) => @games
sub _read_all {
  my $handle = shift;
  my $all = do { local $/; $handle->getline };
  _read_pgn_string ($all);
}

# _read_pgn_string ($pgn_string) => @games
sub _read_pgn_string {
  my ($pgn_string) = @_;

  my $tmp_dir = tempdir ( $ENV{TMPDIR} . "/chess_pgn_extract_stream_XXXXXXXX",
    CLEANUP => 1 );
  my ( $tmp_handle, $tmp_file ) = tempfile ( DIR => $tmp_dir );
  $tmp_handle->print ($pgn_string);
  $tmp_handle->close;

  return _read_games ($tmp_file);
}

1;
__END__

=encoding utf-8

=head1 NAME

Chess::PGN::Extract::Stream - File stream for reading PGN files

=head1 SYNOPSIS

    my $stream = Chess::PGN::Extract->new ("filename.pgn");
    while ( my $game = $stream->read_game ) {
      # You can read games one by one
    }

    # ... or a chunk of games you want
    my @game = $stream->read_games (10);

=head1 DESCRIPTION

B<Chess::PGN::Extract::Stream> provides a simple class of file stream by which
you can extract chess records one by one or chunk by chunk from Portable Game
Notation (PGN) files.

=head1 ATTRIBUTES AND METHODS

=over

=item B<$class-E<gt>new ($pgn_file)>

Create a stream instance from the C<$pgn_file>.

=item B<$self-E<gt>pgn_file>

PGN file name from which the stream reads games.

=item B<$self-E<gt>read_game ()>

Read a game from the stream.

=item B<$self-E<gt>read_games ($limit)>

Read a number of games at once and return an C<ARRAY> of them. If C<$limit> is a
positive number, it reads games until the number of them reaches the C<$limit>.
If C<$limit> is C<undef> or negative, it slurps the PGN file and returns all the
games contained.

=back

=head1 SEE ALSO

L<Chess::PGN::Extract>

=head1 BUGS

Please report any bugs to
L<https://bitbucket.org/mnacamura/chess-pgn-extract/issues>.

=head1 AUTHOR

Mitsuhiro Nakamura <m.nacamura@gmail.com>

Many thanks to David J. Barnes for his original development of
L<pgn-extract|http://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/> and
basicer at Bitbucket for
L<his work on JSON enhancement|https://bitbucket.org/basicer/pgn-extract/>.

=head1 LICENSE

Copyright (C) 2014 Mitsuhiro Nakamura.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.