Group
Extension

FilmAffinity-UserRating/lib/FilmAffinity/Movie.pm

package FilmAffinity::Movie;

use strict;
use warnings;

use JSON;
use Encode;
use Readonly;
use Scalar::Util qw(looks_like_number);
use Text::Trim;
use LWP::RobotUA;
use HTML::TreeBuilder;
use HTML::TreeBuilder::XPath;

use Moose;
use MooseX::Privacy;

use FilmAffinity::Utils qw/buildRobot/;

=head1 NAME

FilmAffinity::Movie - Perl interface to FilmAffinity

=head1 VERSION

Version 1.01

=cut

our $VERSION = 1.01;

=head1 SYNOPSIS

Retrieve information about a filmaffinity movie

    use FilmAffinity::Movie;

    my $movie = FilmAffinity::Movie->new(
      id    => $movieID,
      delay => $delay || 5,
    );

    $movie->parse();

Via the command-line program filmaffinity-get-movie-info.pl

=head1 DESCRIPTION

=head2 Overview

FilmAffinity::Movie is a Perl interface to FilmAffinity. You can use
this module to retrieve information about a movie.

=head2 Constructor

=over 4

=item new()

Object's constructor. You should pass as parameter the movieID

    my $movie = FilmAffinity::Movie->new( id => '932476' );

=back

=head2 Options

=over 4

=item delay

Set the minimum delay between requests to the server, in seconds.

    my $parser = FilmAffinity::Movie->new(
      userID => '932476',
      delay  => 20,
    );

By default, the delay is 5 seconds

=back

=head1 ACCESSORS

=head2 $movie->id

get id

=head2 $movie->title

get title

=head2 $movie->originaltitle

get original title

=head2 $movie->year

get year

=head2 $movie->duration

get running time (in minutes)

=head2 $movie->synopsis

get synopsis

=head2 $movie->website

get url of the movie website

=head2 $movie->country

get country

=head2 $movie->cover

get url of the cover

=head2 $movie->rating

get site rating

=head2 $movie->myrating

get personal rating

=head2 $movie->votes

get number of votes

=head2 $movie->genre

get genres list

=head2 $movie->topic

get topics list

=head2 $movie->cast

get cast list

=head2 $movie->director

get directors list

=head2 $movie->composer

get composers list

=head2 $movie->screenwriter

get screenwriters list

=head2 $movie->cinematographer

get cinematographers list

=head2 $movie->studio

get studios list

=head2 $movie->producer

get producers list

=cut

has id       => ( is => 'ro', isa => 'Int', required => 1, );
has title    => ( is => 'rw', isa => 'Str', );
has originaltitle    => ( is => 'rw', isa => 'Str', );
has year     => ( is => 'rw', isa => 'Int', );
has duration => ( is => 'rw', isa => 'Int', );
has synopsis => ( is => 'rw', isa => 'Str', );
has website  => ( is => 'rw', isa => 'Str', );
has country  => ( is => 'rw', isa => 'Str', );
has cover    => ( is => 'rw', isa => 'Str', );
has rating   => ( is => 'rw', isa => 'Num', );
has votes    => ( is => 'rw', isa => 'Num', );
has myrating => ( is => 'rw', isa => 'Num', );

has genre => ( is => 'rw', isa => 'ArrayRef[Str]', );
has topic => ( is => 'rw', isa => 'ArrayRef[Str]', );

has cast     => ( is => 'rw', isa => 'ArrayRef[Str]', );
has director => ( is => 'rw', isa => 'ArrayRef[Str]', );
has composer => ( is => 'rw', isa => 'ArrayRef[Str]', );

has screenwriter    => ( is => 'rw', isa => 'ArrayRef[Str]', );
has cinematographer => ( is => 'rw', isa => 'ArrayRef[Str]', );

has studio   => ( is => 'rw', isa => 'ArrayRef[Str]', );
has producer => ( is => 'rw',  );


has tree => (
  is      => 'rw',
  isa     => 'HTML::TreeBuilder',
  traits  => [qw/Private/],
);

has ua => (
  is      => 'rw',
  isa     => 'LWP::RobotUA',
  traits  => [qw/Private/],
);

my $MOVIE_URL     = 'http://www.filmaffinity.com/en/film';
my $XPATH_TITLE   = '//h1[@id="main-title"]';
my $XPATH_RATING  = '//div[@id="movie-rat-avg"]';
my $XPATH_VOTE    = '//div[@id="movie-count-rat"]/span';
my $XPATH_COUNTRY = '//span[@id="country-img"]/img/@title';
my $XPATH_COVER   = '//div[@id="movie-main-image-container"]/a/img/@src';

my @JSON_FIELD = qw(
  id title year synopsis website duration cast director composer screenwriter
  cinematographer genre topic studio producer country cover rating votes
  originaltitle myrating
);

my $FIELD = [
  {
    accessor => 'originaltitle',
    faTag    => 'Original title',
  },
  {
    accessor => 'year',
    faTag    => 'Year',
  },
  {
    accessor => 'synopsis',
    faTag    => 'Synopsis / Plot',
  },
  {
    accessor => 'website',
    faTag    => 'Official Site',
  },
  {
    accessor   => 'duration',
    faTag      => 'Running Time',
    cleanerSub => \&p_cleanDuration,
  },
  {
    accessor   => 'cast',
    faTag      => 'Cast',
    cleanerSub => \&p_cleanPerson,
  },
  {
    accessor   => 'director',
    faTag      => 'Director',
    cleanerSub => \&p_cleanPerson,
  },
  {
    accessor   => 'composer',
    faTag      => 'Music',
    cleanerSub => \&p_cleanPerson,
  },
  {
    accessor   => 'screenwriter',
    faTag      => 'Screenwriter',
    cleanerSub => \&p_cleanPerson,
  },
  {
    accessor   => 'cinematographer',
    faTag      => 'Cinematography',
    cleanerSub => \&p_cleanPerson,
  },
  {
    accessor   => 'genre',
    faTag      => 'Genre',
    cleanerSub => \&p_cleanGenre,
  },
  {
    accessor   => 'topic',
    faTag      => 'Genre',
    cleanerSub => \&p_cleanGenre,
  },
  {
    accessor   => 'studio',
    faTag      => 'Production Co.',
    cleanerSub => \&p_cleanStudio,
  },
  {
    accessor   => 'producer' ,
    faTag      => 'Production Co.',
    cleanerSub => \&p_cleanStudio,
  },
];

Readonly my $SECONDS => 5;

sub BUILD {
  my ($self, $args) = @_;

  $self->tree( HTML::TreeBuilder->new() );
  $self->ua( buildRobot( $args->{delay} || $SECONDS ) );
  return;
}

=head1 SUBROUTINES/METHODS

=head2 $movie->parse()

This method will get the content of the filmaffinity webpage and retrieve
all information about the movie

=cut

sub parse {
  my $self = shift;

  my $content = $self->getContent();
  $self->parsePage($content);
  return;
}


=head2 $movie->getContent()

This method will get the content of the filmaffinity webpage

=cut

sub getContent {
  my ($self) = @_;

  my $url = $self->p_buildUrlMovie($self->id);

  my $response = $self->ua->get($url);
  if ($response->is_success){
    return $response->content();
  }
}

=head2 $movie->parsePage($content)

This method parses a page of filmaffinity that is available as
a single string in memory.

=cut

sub parsePage {
  my ($self, $content) = @_;

  $content = decode('utf-8', $content);
  $self->tree->parse($content);

  foreach my $data (@{$FIELD}){
    $self->p_findField($data);
  }

  $self->p_findRating();
  $self->p_findVotes();
  $self->country( $self->tree->findvalue( $XPATH_COUNTRY ) );
  $self->cover( $self->tree->findvalue( $XPATH_COVER ) );
  $self->title( trim($self->tree->findvalue( $XPATH_TITLE )) );

  $self->tree->delete();
  return;
}



=head2 $movie->toJSON()

This method will export all movie informations in JSON format

=cut

sub toJSON {
  my $self = shift;

  my %data;
  foreach my $field (@JSON_FIELD){
    $data{$field} = $self->$field() if defined $self->$field();
  };

  return to_json(\%data, {pretty => 1});
}

private_method p_findField => sub {
  my ( $self, $data ) = @_;

  my @nodes = $self->tree->findnodes( '//dl[@class="movie-info"]/dt' );
  foreach my $node (@nodes){
    if ( trim( $node->as_text() ) eq $data->{faTag} ){

      my $searched_node = $node->right();

      my $accessor = $data->{accessor};

      my $value = trim( $searched_node->as_text() );

      next if $value eq q{};

      if (defined $data->{cleanerSub}){
        $value = $data->{cleanerSub}($value, $accessor);
      }

      next if not defined $value;

      $self->$accessor( $value );
      last;
    }
  }
};

private_method p_findRating => sub {
  my $self = shift;

  my $rating = $self->tree->findvalue( $XPATH_RATING );

  return if not defined $rating or $rating eq q{};
  $self->rating( trim($rating) );
};

private_method p_findVotes => sub {
  my $self = shift;

  my $votes = $self->tree->findvalue( $XPATH_VOTE );

  return if not defined $votes or $votes eq q{};
  $votes =~ s/\D//gixms;
  $self->votes( trim($votes) );
};

private_method p_buildUrlMovie => sub {
  my ($self, $id) = @_;

  return $MOVIE_URL.$id.'.html';
};

private_method p_removeTextBetweenParenthesis => sub {
  my $content = shift;

  $content =~ s/[(].*[)]//gxms;
  return $content;
};

private_method p_cleanDuration => sub {
  my $value = shift;
  $value =~ s/min[.]//gixms;
  $value = trim($value);

  if ( looks_like_number($value) ){
    return $value;
  }
  return;
};

private_method p_cleanPerson => sub {
  my $value = shift;

  my @persons = split m/,/xms, $value;
  @persons = map {trim (p_removeTextBetweenParenthesis($_) )} @persons;
  return \@persons;
};

private_method p_cleanGenre => sub {
  my ($value, $field) = @_;

  my $pos = $field eq 'genre' ? 0 : 1;
  my @list = split m/[|]/xms, $value;

  if ( defined $list[$pos]){
    my @genres = trim ( split m/[.]/xms, $list[$pos] );
    return \@genres;
  }
  return;
};

private_method p_cleanStudio => sub {
  my ($value, $field) = @_;

  my $pos = $field eq 'studio' ? 0 : 1;
  my @list = split m/[.]\sProducer: /xms, $value;

  my @studio = ();
  if (not defined $list[$pos]){
    return;
  } else {
    @studio = trim ( split m{ / }xms, $list[$pos] );
    return \@studio;
  }
};

=head1 AUTHOR

William Belle, C<< <william.belle at gmail.com> >>

=head1 BUGS AND LIMITATIONS

Please report any bugs or feature requests to C<bug-filmaffinity-userrating at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=FilmAffinity-UserRating>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc FilmAffinity::Movie

You can also look for information at:

=over 4

=item * RT: CPAN's request tracker (report bugs here)

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=FilmAffinity-UserRating>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/FilmAffinity-UserRating>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/FilmAffinity-UserRating>

=item * Search CPAN

L<http://search.cpan.org/dist/FilmAffinity-UserRating/>

=back

=head1 SEE ALSO

L<http://www.filmaffinity.com>

=head1 LICENSE AND COPYRIGHT

Copyright 2013 William Belle.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.

=cut

1; # End of FilmAffinity::Movie


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.