Group
Extension

Net-Twitter-Loader/lib/Net/Twitter/Loader.pm

package Net::Twitter::Loader;
use strict;
use warnings;
use JSON qw(decode_json encode_json);
use Try::Tiny;
use Carp;
use Time::HiRes qw(sleep);

our $VERSION = "0.04";

our @CARP_NOT = qw(Try::Tiny Net::Twitter Net::Twitter::Lite Net::Twitter::Lite::WithAPIv1_1);

sub new {
    my ($class, %params) = @_;
    my $self = bless {}, $class;
    croak "backend parameter is mandatory" if not defined $params{backend};
    $self->{backend} = $params{backend};
    $self->_set_param(\%params, 'filepath', undef);
    $self->_set_param(\%params, 'page_max', 10);
    $self->_set_param(\%params, 'page_max_no_since_id', 1);
    $self->_set_param(\%params, 'page_next_delay', 0);
    $self->_set_param(\%params, "logger", undef);
    return $self;
}

sub backend { $_[0]->{backend} }

sub _set_param {
    my ($self, $params_ref, $key, $default) = @_;
    $self->{$key} = defined($params_ref->{$key}) ? $params_ref->{$key} : $default;
}

sub _load_next_since_id_file {
    my ($self) = @_;
    return {} if not defined($self->{filepath});
    open my $file, "<", $self->{filepath} or return undef;
    my $json_text = do { local $/ = undef; <$file> };
    close $file;
    my $since_ids = try {
        decode_json($json_text);
    }catch {
        my $e = shift;
        $self->_log("warn", "failed to decode_json");
        return {};
    };
    $since_ids = {} if not defined $since_ids;
    return $since_ids;
}

sub _log {
    my ($self, $level, $msg) = @_;
    $self->{logger}->($level, $msg) if defined $self->{logger};
}

sub _save_next_since_id_file {
    my ($self, $since_ids) = @_;
    return if not defined($self->{filepath});
    open my $file, ">", $self->{filepath} or die "Cannot open $self->{filepath} for write: $!";
    try {
        print $file encode_json($since_ids);
    }catch {
        my $e = shift;
        $self->_log("error", $e);
    };
    close $file;
}

sub _log_query {
    my ($self, $method, $params) = @_;
    $self->_log("debug", sprintf(
        "%s: method: %s, args: %s", __PACKAGE__, $method,
        join(", ", map {"$_: " . (defined($params->{$_}) ? $params->{$_} : "[undef]")} keys %$params)
    ));
}

sub _normalize_search_result {
    my ($self, $nt_result) = @_;
    if(!ref($nt_result)) {
        confess "Scalar is returned by the backend. Something is wrong.";
    }elsif(ref($nt_result) eq 'ARRAY') {
        return $nt_result;
    }elsif(ref($nt_result) eq 'HASH') {
        if(ref($nt_result->{statuses}) eq 'ARRAY') {    ## REST API v1.1
            return $nt_result->{statuses};
        }elsif(ref($nt_result->{results}) eq 'ARRAY') { ## REST API v1.0
            return $nt_result->{results};
        }
    }
    confess "Unknown type of data returned by the backend. Something is wrong.";
}

sub _load_timeline {
    my ($self, $nt_params, $method, @label_params) = @_;
    my %params = defined($nt_params) ? %$nt_params : ();
    if(not defined $method) {
        $method = (caller(1))[3];
        $method =~ s/^.*:://g;
    }
    my $label = "$method," .
        join(",", map { "$_:" . (defined($params{$_}) ? $params{$_} : "") } @label_params);
    my $since_ids = $self->_load_next_since_id_file();
    my $since_id = $since_ids->{$label};
    $params{since_id} = $since_id if !defined($params{since_id}) && defined($since_id);
    my $page_max = defined($params{since_id}) ? $self->{page_max} : $self->{page_max_no_since_id};
    if($method eq 'public_timeline') {
        $page_max = 1;
    }
    my $max_id = undef;
    my @result = ();
    my $load_count = 0;
    my %loaded_ids = ();
    my $next_since_id;
    while($load_count < $page_max) {
        $params{max_id} = $max_id if defined $max_id;
        $self->_log_query($method, \%params);
        my $loaded;
        try {
            $loaded = $self->_normalize_search_result($self->{backend}->$method({%params}));
        }catch {
            my $e = shift;
            $self->_log("error", $e);
            die $e;
        };
        return undef if not defined $loaded;
        @$loaded = grep { !$loaded_ids{$_->{id}} } @$loaded;
        last if !@$loaded;
        $loaded_ids{$_->{id}} = 1 foreach @$loaded;
        $max_id = $loaded->[-1]{id};
        $next_since_id = $loaded->[0]{id} if not defined $next_since_id;
        push(@result, @$loaded);
        $load_count++;
        sleep($self->{page_next_delay});
    }
    if($load_count == $self->{page_max}) {
        $self->_log("notice", "page has reached the max value of " . $self->{page_max});
    }
    if(defined($next_since_id)) {
        $since_ids = $self->_load_next_since_id_file();
        $since_ids->{$label} = $next_since_id;
        $self->_save_next_since_id_file($since_ids);
    }
    return \@result;
}

sub user_timeline {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params, undef, qw(id user_id screen_name));
}

sub public_timeline {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params);
}

sub home_timeline {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params);
}

sub list_statuses {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params, undef, qw(list_id slug owner_screen_name owner_id));
}

sub search {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params, undef, qw(q lang locale));
}

sub favorites {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params, undef, qw(id user_id screen_name))
}

sub mentions {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params);
}

sub retweets_of_me {
    my ($self, $nt_params) = @_;
    return $self->_load_timeline($nt_params);
}


1;
__END__

=pod

=head1 NAME

Net::Twitter::Loader - repeat loading Twitter statuses up to a certain point

=head1 SYNOPSIS

    use Net::Twitter::Loader;
    use Net::Twitter;
    
    my $loader = Net::Twitter::Loader->new(
        backend => Net::Twitter->new(
            traits => [qw(OAuth API::RESTv1_1)],
            consumer_key => "YOUR_CONSUMER_KEY_HERE",
            consumer_secret => "YOUR_CONSUMER_SECRET_HERE",
            access_token => "YOUR_ACCESS_TOKEN_HERE",
            access_token_secret => "YOUR_ACCESS_TOKEN_SECRET_HERE",
            ssl => 1,
    
            #### If you access to somewhere other than twitter.com,
            #### set the apiurl option
            ## apiurl => "http://example.com/api/",
        ),
        filepath => 'next_since_ids.json',
        logger => sub {
            my ($level, $msg) = @_;
            warn "$level: $msg\n";
        },
    );
    
    ## First call to home_timeline
    my $arrayref_of_statuses = $loader->home_timeline();
    
    ## The latest loaded status ID is saved to next_since_ids.json
    
    ## Subsequent calls to home_timeline automatically load
    ## all statuses that have not been loaded yet.
    $arrayref_of_statuses = $loader->home_timeline();
    
    
    ## You can load other timelines as well.
    $arrayref_of_statuses = $loader->user_timeline({screen_name => 'hoge'});
    
    
    foreach my $status (@$arrayref_of_statuses) {
        printf("%s: %s\n", $status->{user}{screen_name}, Encode::encode('utf8', $status->{text}));
    }


=head1 DESCRIPTION

This module is a wrapper for L<Net::Twitter> (or L<Net::Twitter::Lite>) to make it easy
to load a lot of statuses from timelines.

=head1 FEATURES

=over

=item *

It repeats requests to load a timeline that expands over multiple pages.
C<max_id> param for each request is adjusted automatically.

=item *

Optionally it saves the latest status ID to a file.
The file will be read to set C<since_id> param for the next request,
so that it can always load all the unread statuses.

=back

=head1 CLASS METHODS

=head2 $loader = Net::Twitter::Loader->new(%options);

Creates the object with the following C<%options>.

=over

=item backend => OBJECT (mandatory)

Backend L<Net::Twitter> object. L<Net::Twitter::Lite> object can be used, too.

=item filepath => FILEPATH (optional)

File path for saving and loading the next C<since_id>.
If this option is not specified, no file will be created or loaded.

=item page_max => INT (optional, default: 10)

Maximum number of pages this module tries to load when C<since_id> is given.

=item page_max_no_since_id => INT (optional, default: 1)

Maximum number of pages this module tries to load when no C<since_id> is given.

=item page_next_delay => NUMBER (optional, default: 0)

Delay in seconds before loading the next page. Fractional number can be used.

=item logger => CODE (optional)

A code-ref for logging. If specified, it is called to log what this module is doing.

    $logger->($level, $message)

The logger is called with C<$level> and C<$message>.
C<$level> is the log level string (e.g. C<"debug">, C<"error"> ...) and C<$message> is the log message.

If C<logger> is omitted, the log is suppressed.

=back

=head1 OBJECT METHODS

=head2 $status_arrayref = $loader->home_timeline($options_hashref)

=head2 $status_arrayref = $loader->user_timeline($options_hashref)

=head2 $status_arrayref = $loader->list_statuses($options_hashref)

=head2 $status_arrayref = $loader->public_statuses($options_hashref)

=head2 $status_arrayref = $loader->favorites($options_hashref)

=head2 $status_arrayref = $loader->mentions($options_hashref)

=head2 $status_arrayref = $loader->retweets_of_me($options_hashref)

Wrapper methods for corresponding L<Net::Twitter> methods. See L<Net::Twitter> for specification of C<$options_hashref>.

Note that L<Net::Twitter> accepts non-hashref arguments for convenience, but this is not supported by L<Net::Twitter::Loader>.
You always need to give a hash-ref to these methods.

If C<since_id> is given in C<$options_hashref> or it is loaded from the file specified by C<filepath> option,
these wrapper methods repeatedly call L<Net::Twitter>'s corresponding methods to load a complete timeline newer than C<since_id>.
If C<filepath> option is enabled, the latest ID of the loaded status is saved to the file.

The max number of calling the backend L<Net::Twitter> methods is limited to C<page_max> option
if C<since_id> is specified or loaded from the file. The max number is limited to C<page_max_no_since_id> option
if C<since_id> is not specified.

If the operation succeeds, the return value of these methods is an array-ref of unique status objects.

If something is wrong (e.g. network failure), these methods throw an exception.
In this case, the error is logged if C<logger> is specified in the constructor.

=head2 $status_arrayref = $loader->search($options_hashref)

Same as other timeline methods, but note that it returns only the statuses of the search result.

Original Twitter API returns other fields such as C<"search_metadata">, but those are discarded.

=head2 $backend = $loader->backend()

Get the backend L<Net::Twitter> or L<Net::Twitter::Lite> object.

=head1 SEE ALSO

=over

=item *

L<Net::Twitter>

=item *

L<Net::Twitter::Lite>

=back

=head1 REPOSITORY

L<https://github.com/debug-ito/Net-Twitter-Loader>

=head1 BUGS AND FEATURE REQUESTS

Please report bugs and feature requests to my Github issues
L<https://github.com/debug-ito/Net-Twitter-Loader/issues>.

Although I prefer Github, non-Github users can use CPAN RT
L<https://rt.cpan.org/Public/Dist/Display.html?Name=Net-Twitter-Loader>.
Please send email to C<bug-Net-Twitter-Loader at rt.cpan.org> to report bugs
if you do not have CPAN RT account.


=head1 AUTHOR
 
Toshio Ito, C<< <toshioito at cpan.org> >>


=head1 LICENSE AND COPYRIGHT

Copyright 2014 Toshio Ito.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See L<http://dev.perl.org/licenses/> for more information.


=cut



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.