Yet Another CPAN Grep

WebService-IMDB/lib/WebService/IMDB.pm

# $Id: IMDB.pm 7370 2012-04-09 01:17:33Z chris $

=head1 NAME

WebService::IMDB - OO Perl interface to the Internet Movie Database imdb.com


=head1 SYNOPSIS


    use WebService::IMDB;

    my $ws = WebService::IMDB->new(cache => 1, cache_exp => "12h");

    my $movie = $ws->search(type => "Title", tconst => "tt0114814");

    print $movie->title(), ": \n\n";
    print $movie->synopsis(), "\n\n";

    foreach ( @{$movie->cast_summary()} ) {
        print $_->name()->name(), " : ", $_->char(), "\n";
    }


=head1 LEGAL

The data accessed via this API is provided by IMDB, and is currently supplied
with the following copyright notice.

=over 4

For use only by clients authorized in writing by IMDb.  Authors and users of unauthorized clients accept full legal exposure/liability for their actions.

=back

Anyone using WebService::IMDB must abide by the above requirements.

=cut

package WebService::IMDB;

use strict;
use warnings;

our $VERSION = '0.05';

use base qw(Class::Accessor);

use Cache::FileCache;

use Carp;

use File::Spec::Functions qw(tmpdir);

use HTTP::Request::Common;

use JSON;

use LWP::ConnCache;
use LWP::UserAgent;

use WebService::IMDB::Title;
use WebService::IMDB::Name;

use URI;

__PACKAGE__->mk_accessors(qw(
    _cache
    _cache_exp
    _cache_root
    _cache_obj
    _domain
    _useragent
));


=head1 METHODS

=head2 new(%opts)

Constructor.

%opts can contain:

=over 4

=item cache - Whether to cache responses.  Defaults to true

=item cache_root - The root dir for the cache.  Defaults to tmpdir();

=item cache_exp - How long to cache responses for.  Defaults to "1h"

=item domain - Domain from which to request data.  Defaults to "app.imdb.com"

=back

=cut

sub new {
    my $class = shift;
    my %args = @_;
    my $self = {};

    bless $self, $class;

    $self->_cache_root($args{'cache_root'} || tmpdir());
    $self->_cache_exp($args{'cache_exp'} || "1h");
    $self->_cache(defined $args{'cache'} ? $args{'cache'} : 1);

    $self->_domain($args{'domain'} || "app.imdb.com");

    if ($self->_cache()) {
	$self->_cache_obj( Cache::FileCache->new( {'cache_root' => $self->_cache_root(), 'namespace' => "WebService-IMDB", 'default_expires_in' => $self->_cache_exp()} ) );
    }

    $self->_useragent(LWP::UserAgent->new());
    $self->_useragent()->env_proxy();
    $self->_useragent()->agent($args{'agent'} || "WebService::IMDB/$VERSION");
    $self->_useragent()->conn_cache(LWP::ConnCache->new());
    $self->_useragent()->conn_cache()->total_capacity(3);

    return $self;
}


=head2 search(%args)

%args can contain:

=over 4

=item type - Resource type: "Title", "Name

=item tconst - IMDB tconst e.g. "tt0000001" (Title)

=item nconst - IMDB nconst e.g. "nm0000002" (Name)

=item imdbid - More tolerant version of tconst, nconst e.g. "123", "0000456", "tt0000001", "nm0000002" (Title, Name)

=back

=cut

sub search {
    my $self = shift;
    my $q = { @_ };

    if (!exists $q->{'type'}) {
	croak "TODO: Return generic resultset";
    } elsif ($q->{'type'} eq "Title") {
	delete $q->{'type'};
	return WebService::IMDB::Title->_new($self, $q);
    } elsif ($q->{'type'} eq "Name") {
	delete $q->{'type'};
	return WebService::IMDB::Name->_new($self, $q);
    } else {
	croak "Unknown resource type '" . $q->{'type'} . "'";
    }

}

sub copyright {
    my $self = shift;

    my $request = GET sprintf("http://app.imdb.com/title/tt0033467/maindetails?ts=%d", time()); # Crude, use timestamp in query string to bypass our own caching

    return $self->_response_copyright($request);

}

sub _request_cache_key {
    my $request = shift;
    my $type = shift;

    my $version = "0"; # Use a version number as the first part of the key to avoid collisions should we change the structure of the key later.

    # Using | as field separator as this shouldn't ever appear in a URL (or any of the other fields).
    my $cache_key = $version . "|" . $type . "|" . $request->method() . "|" . $request->uri();
    if ($request->method() eq "POST") {
	$cache_key .= "|" . $request->content();
    }

    return $cache_key;
}

sub _response {
    my $self = shift;
    my $request = shift;
    my $cacheCodes = shift || {'404' => 1}; # Only cache 404 responses by default

    my $cache_key = _request_cache_key($request, "RESPONSE");

    my $response;

    if ($self->_cache()) {
	$response = $self->_cache_obj()->get($cache_key);
    }

    if (!defined $response) {
	$response = $self->_useragent()->request($request);

	if ($self->_cache() && exists $cacheCodes->{$response->code()}) {
	    $self->_cache_obj()->set($cache_key, $response);
	}

    }

    return $response;

}

sub _response_decoded_content {
    my $self = shift;
    my $request = shift;

    my $saveToCache = shift;
    if (!defined $saveToCache) { $saveToCache = 1; }

    my $cache_key = _request_cache_key($request, "DECODED_CONTENT");

    my $content;

    if ($self->_cache()) {
	$content = $self->_cache_obj()->get($cache_key);
    }

    if (!defined $content) {

	my $response = $self->_response($request);

	if($response->code() ne "200") {
	    croak "URL (", $request->uri(), ") Request Failed - Code: ", $response->code(), " Error: ", $response->message(), "\n";
	}

	$content = $response->decoded_content();

	if ($self->_cache() && $saveToCache) {
	    $self->_cache_obj()->set($cache_key, $content);
	}
    }

    return $content;

}

sub _response_decoded_json {
    my $self = shift;
    my $request = shift;

    my $content = $self->_response_decoded_content($request);

    my $json = JSON->new();
    $json->utf8(0);

    my $resp = $json->decode($content);
    # TODO: Honour $resp->{'exp'}, and check $resp->{'copyright'}

    if (exists $resp->{'error'}) {
	croak $resp->{'error'}->{'status'} . " " . $resp->{'error'}->{'code'} . ": " . $resp->{'error'}->{'message'};
    } elsif (exists $resp->{'data'}) {
	return $resp->{'data'};
    } elsif (exists $resp->{'news'}) {
	return $resp->{'news'};
    } else {
	croak "Failed to parse response";
    }

}

sub _response_copyright {
    my $self = shift;
    my $request = shift;

    my $content = $self->_response_decoded_content($request);

    my $json = JSON->new();
    $json->utf8(0);

    my $resp = $json->decode($content);

    return $resp->{'copyright'};

}

1;
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.