Group
Extension

WWW-GoKGS/lib/WWW/GoKGS/Scraper/GameArchives.pm

package WWW::GoKGS::Scraper::GameArchives;
use strict;
use warnings;
use parent qw/WWW::GoKGS::Scraper/;
use WWW::GoKGS::Scraper::Declare;
use WWW::GoKGS::Scraper::Filters qw/game_result datetime/;

sub base_uri { 'http://www.gokgs.com/gameArchives.jsp' }

sub __build_scraper {
    my $self = shift;

    my $month2num = do {
        my @months = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
        my %month2num; @month2num{ @months } = ( 1..12 );
        sub { $month2num{$_[0]} };
    };

    my %user = (
        name => [ 'TEXT', sub { s/ \[[^\]]+\]$// } ],
        rank => [ 'TEXT', sub { m/ \[([^\]]+)\]$/ ? $1 : undef } ],
        uri  => '@href',
    );

    my $game = scraper {
        process '//td[1]/a', 'sgf_uri' => '@href';
        process '//td[2]/a', 'white[]' => \%user;
        process '//td[3]/a', 'black[]' => \%user;
        process '//td[3]', 'maybe_setup' => 'TEXT';
        process '//td[4]', 'setup' => 'TEXT';
        process '//td[5]', 'start_time' => 'TEXT';
        process '//td[6]', 'type' => 'TEXT';
        process '//td[7]', 'result' => 'TEXT';
        process '//td[8]', 'tag' => 'TEXT';
    };

    my $calendar = scraper {
        process 'td', 'year' => 'TEXT';
        process '//following-sibling::td[not(@colspan)]',
                'month[]' => scraper {
                    process '.', 'month' => [ 'TEXT', $month2num ];
                    process 'a', 'uri' => '@href'; };
    };

    scraper {
        process '//a[@href="tzList.jsp"]', 'time_zone' => 'TEXT';
        process '//table[tr/th/text()="Viewable?"]//following-sibling::tr',
                'games[]' => $game;
        process '//a[contains(@href, ".zip")]', 'zip_uri' => '@href';
        process '//a[contains(@href, ".tar.gz")]', 'tgz_uri' => '@href';
        process '//table[colgroup]//following-sibling::tr',
                'calendar[]' => $calendar;
    };
}

sub scrape {
    my ( $self, @args ) = @_;
    my $result = $self->SUPER::scrape( @args );

    return $result unless $result->{calendar};

    my @calendar;
    for my $calendar ( @{$result->{calendar}} ) {
        for my $month ( @{$calendar->{month}} ) {
            $month->{year} = $calendar->{year};
            push @calendar, $month;
        }
    }

    if ( @calendar == 1 and $calendar[0]{year} == 1970 ) { # KGS's bug
        delete $result->{calendar};
    }
    else {
        $result->{calendar} = \@calendar;
    }

    return $result unless $result->{games};

    for my $game ( @{$result->{games}} ) {
        next if exists $game->{black};

        # <td colspan="2">
        my $users = $game->{white};
        if ( @$users == 1 ) { # Type: Demonstration
            $game->{owner} = $users->[0];
        }
        elsif ( @$users == 3 ) { # Type: Review
            $game->{owner} = $users->[0];
            $game->{white} = [ $users->[1] ];
            $game->{black} = [ $users->[2] ];
        }
        elsif ( @$users == 5 ) { # Type: Rengo Review
            $game->{owner} = $users->[0];
            $game->{white} = [ @{$users}[1,2] ];
            $game->{black} = [ @{$users}[3,4] ];
        }
        else {
            die 'Oops! Something went wrong';
        }

        $game->{tag}        = $game->{result} if exists $game->{result};
        $game->{result}     = $game->{type};
        $game->{type}       = $game->{start_time};
        $game->{start_time} = $game->{setup};
        $game->{setup}      = $game->{maybe_setup};
    }
    continue {
        $game->{start_time} = datetime( $game->{start_time} );
        $game->{result}     = game_result( $game->{result} );
        $game->{setup}      =~ /^(\d+)\x{d7}\d+ (?:H(\d+))?$/;
        $game->{board_size} = $1;
        $game->{handicap}   = $2 if defined $2;

        for my $user (
            $game->{owner} || (),
            @{ $game->{black} || [] },
            @{ $game->{white} || [] },
        ) {
            delete $user->{rank} unless $user->{rank};
        }

        delete $game->{setup};
        delete $game->{maybe_setup};
    }

    $result;
}

1;

__END__

=head1 NAME

WWW::GoKGS::Scraper::GameArchives - KGS Game Archives

=head1 SYNOPSIS

  use WWW::GoKGS::Scraper::GameArchives;
  my $game_archives = WWW::GoKGS::Scraper::GameArchives->new;
  my $result = $game_archives->query( user => 'YourAccount' );

=head1 DESCRIPTION

L<KGS|http://www.gokgs.com/> Game Archives
preserves Go games played by the users. You can search games by filling
in the HTML forms. The search result is provided as an HTML document naturally.

This module provides yet another interface to send a query to the archives,
and also parses the result into a neatly arranged Perl data structure.

This class inherits from L<WWW::GoKGS::Scraper>.

=head2 DISCLAIMER

According to KGS's C<robots.txt>, bots are not allowed to crawl 
the Game Archives:

  User-agent: *
  Disallow: /gameArchives.jsp

Although this module can be used to implement crawlers,
the author doesn't intend to violate their policy.
Use at your own risk.

=head2 CLASS METHODS

=over 4

=item $uri = $class->base_uri

  # => "http://www.gokgs.com/gameArchives.jsp"

=item $URI = $class->build_uri( $k1 => $v1, $k2 => $v2, ... )

=item $URI = $class->build_uri({ $k1 => $v1, $k2 => $v2, ... })

=item $URI = $class->build_uri([ $k1 => $v1, $k2 => $v2, ... ])

Given key-value pairs of query parameters, constructs a L<URI> object
which consists of C<base_uri> and the parameters.

=back

=head2 INSTANCE METHODS

=over 4

=item $UserAgent = $game_archives->user_agent

=item $game_archives->user_agent( LWP::UserAgent->new(...) )

Can be used to get or set an L<LWP::UserAgent> object which is used to
C<GET> the requested resource. Defaults to the C<LWP::UserAgent> object
shared by L<Web::Scraper> users (C<$Web::Scraper::UserAgent>).

=item $HashRef = $game_archives->query( user => 'YourAccount', ... )

Given key-value pairs of query parameters, returns a hash reference
which represnets the result. The hashref is formatted as follows:

  {
      time_zone => 'GMT',
      games => [
          {
              sgf_uri => 'http://files.gokgs.com/.../foo-bar.sgf',
              white => [
                  {
                      name => 'foo',
                      rank => '4k',
                      uri  => 'http://...&user=foo...'
                  }
              ],
              black => [
                  {
                      name => 'bar',
                      rank => '6k',
                      uri  => 'http://...&user=bar...'
                  }
              ],
              board_size => '19',
              handicap => '2',
              start_time => '2013-07-04T05:32',
              type => 'Ranked',
              result => 'W+Resign'
          },
          ...
      ],
      zip_uri => 'http://.../foo-2013-7.zip',    # contains SGF files
      tgz_uri => 'http://.../foo-2013-7.tar.gz', # created in July 2013
      calendar => [
          ...
          {
              year  => 2011,
              month => 7,
              uri   => '/gameArchives.jsp?user=foo&year=2011&month=7'
          },
          ...
      ]
  }

The possible parameters are as follows:

=over 4

=item user (required)

Represents a KGS username.

  my $result = $game_archives->query( user => 'foo' );

=item year, month

Can be used to search games played in the specified month.

  my $result = $game_archives->query(
      user  => 'foo',
      year  => 2013
      month => 7
  );

=item oldAccounts

Can be used to search games played by expired and guest accounts.

  my $result = $game_archives->query(
      user        => 'foo',
      oldAccounts => 'y'
  );

=item tags

Can be used to search games tagged by the specified C<user>.

  my $result = $game_archives->query(
      user => 'foo',
      tags => 't'
  );

=back

=item $HashRef = $game_archives->scrape( $stuff )

The given arguments are passed to L<Web::Scraper>'s C<scrape> method.
C<query> method is just a wrapper of this method. For example,
you can pass URIs included by the return value of C<query> method.

=back

=head1 EXAMPLES

  use JSON::Path qw/jpath_map/; 
  use Time::Piece qw/gmtime/;

  # convert start times for games into Time::Piece objects
  jpath_map { gmtime->strftime('%Y-%m-%dT%H:%MZ', $_) }
      $result, '$.games[*].start_time';

=head1 HISTORY

L<WWW::KGS::GameArchives> was renamed to C<WWW::GoKGS::Scraper::GameArchives>
(this module).
The return value of the C<scrape> method (C<$result>) was modified as follows:

  - Remove $result->{summary}
  - Rename $game->{kifu_uri} to $game->{sgf_uri}
  - Rename $game->{editor} to $game->{owner}
  - Remove $game->{setup}
  - Add $game->{board_size}
  - Add $game->{handicap}
  - $user->{name} does not end with a rank string such as "[2k]"
  - Add $user->{rank}
  - Rename $user->{link} to $user->{uri}

where C<$game> denotes an element of C<< $result->{games} >>
and  C<$user> denotes C<< $game->{owner} >>, an element of
C<< $game->{white} >> or an element of C<< $game->{black} >> respectively.

=head1 SEE ALSO

L<WWW::GoKGS>

=head1 AUTHOR

Ryo Anazawa (anazawa@cpan.org)

=head1 LICENSE

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.