Group
Extension

WebService-Blekko/lib/WebService/Blekko.pm

#

package WebService::Blekko;

=head1 NAME

WebService::Blekko - access the Blekko JSON APIs

=cut

use strict;
use warnings;
no warnings qw( uninitialized );

use LWP::UserAgent;
use LWP::Protocol;
use HTTP::Request;
use List::Util qw( min );
use Time::HiRes;
use JSON;

use Data::Dumper;

use WebService::Blekko::QueryResultSet;
use WebService::Blekko::Pagestats;
use WebService::Blekko::Result;

our $VERSION = '1.00_07';

my $useragent = __PACKAGE__ . '_' . $VERSION;

=head1 SYNOPSIS

 use WebService::Blekko;

 my $blekko = WebService::Blekko->new( auth => 'webservice-blekko-example', );

 $res = $blekko->query( "obama /date" );

 if ( $res->error ) { ... }

 while ( my $r = $res->next ) {
    print $r->url, $r->title; # etc.
 }

=head1 DESCRIPTION

 This API wraps the Blekko search engine API(s). You can query for results,
 manipulate slashtags, get tool-bar-useful information, and so forth.

 For the Terms and Conditions for using Blekko data, please see

 https://blekko.com/ws/+/terms
 and
 https://blekko.com/ws/+/apiterms

 To get an API Auth key, please contact apiauth@blekko.com

=head1 METHODS

=head2 new( %opts )

Options include

 server => server to talk to, defaults to blekko.com
 auth => api auth key, gotten by contacting apiauth@blekko.com
 source => the name of your program/service
 page_size => number of results to return, default 20, max of 100
 scheme => http, defaults to https if available
 qps => API calls per second, defaults to 1. Do not make this greater than 1 without asking.
 agent => the user-agent to be used by LWP::UserAgent. Defaults to the package name_version.

 cookie_jar_file => cookie jar file to use, see LWP::UserAgent
 cookie_jar => cookie jar object to use, see LWP::UserAgent

Additional options are passed to LWP::UserAgent.

=cut

sub new
{
    my $class = shift;
    my $self = bless {}, $class;

    my %opts = @_;

    my $https_available = LWP::Protocol::implementor( 'https' );

    $self->{server} = delete $opts{server} || 'blekko.com';
    $self->{auth} = delete $opts{auth} || die "Must specify auth in opts";
    $self->{auth} = "&auth=" . urlencode( $self->{auth} );
    $self->{source} = delete $opts{source} || $useragent;
    $self->{source} = "&source=" . urlencode( $self->{source} );
    $self->{page_size} = delete $opts{page_size};
    $self->{scheme} = delete $opts{scheme} || ( $https_available ? 'https' : 'http' );
    $self->{qps} = min( delete $opts{qps} || 1, 1 );
    $self->{last_query} = 0;

    $opts{agent} = $opts{agent} || $useragent;
    my $cjf = delete $opts{cookie_jar_file};
    my $cj = delete $opts{cookie_jar};
    $opts{max_redirect} ||= 0; # don't follow redirects

    # is the scheme valid?
    die "invalid scheme $self->{scheme}" if ! LWP::Protocol::implementor($self->{scheme});

    # remaining opts are for LWP::UserAgent... default timeout is 180 seconds, yuck.
    $self->{ua} = LWP::UserAgent->new( %opts );
    return if ! defined $self->{ua};

    $self->{ua}->cookie_jar( { file => $cjf }, autosave => 1, ) if defined $cjf;
    $self->{ua}->cookie_jar( $cj ) if defined $cj;

    $self->{have_cookie_jar} = 1 if defined $cjf || defined $cj;

    return $self;
}

=head2 query( query_string, %opts )

Queries the server, and returns a WebService::Blekko::QueryResultSet.

Options include page_size (see above) and p, to get the pth page of
results, counting from zero.

=cut

sub query
{
    my ( $self, $q, %opts ) = @_;

    my $template = "%s://%s/ws/?q=%s%s%s%s%s";

    my $ps = '';
    $ps = "&page_size=" . urlencode( $self->{page_size} ) if $self->{page_size};
    $ps = "&page_size=" . urlencode( $opts{page_size} ) if $opts{page_size};
    my $p = '';
    $p = "&p=" . urlencode( $opts{p} ) if $opts{p};

    my $url = sprintf( $template, $self->{scheme}, $self->{server},
                       urlencode( "/json $q" ), $self->{auth}, $self->{source}, $ps, $p );

    my $req = HTTP::Request->new( 'GET', $url );
    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    return WebService::Blekko::QueryResultSet->new( $resp->content, $resp->code );
}

=head2 pagestats( url )

Returns information about a webpage, suitable for toolbar use. Returns
a WebService::Blekko::Pagestats object, with methods host_inlinks, host_rank, etc.

=cut

sub pagestats
{
    my ( $self, $url ) = @_;

    if ( $url !~ m,^https?://,i )
    {
        return WebService::Blekko::Pagestats->new( undef, "url must start with http://", 200 );
    }

    my $template = "%s://%s/api/pagestats?url=%s%s%s";
    $url = sprintf( $template, $self->{scheme}, $self->{server}, $url, $self->{auth}, $self->{source} );

    my $req = HTTP::Request->new( 'GET', $url );
    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    if ( ! $resp->is_success )
    {
        return WebService::Blekko::Pagestats->new( undef, "http failure, code is ".$resp->code, $resp->code );
    }

    return WebService::Blekko::Pagestats->new( $resp->content, 0, $resp->code );
}

=head2 login( username, password )

Logs into blekko, which is needed before you create/add to/delete slashtags. Requires
a cookie jar file or object to work.

Returns WebService::Blekko::Result, which has methods error, result,
and http_code. Check error before using result.

=cut

sub login
{
    my ( $self, $username, $password ) = @_; # opts

    if ( ! $self->{have_cookie_jar} )
    {
        return WebService::Blekko::Result->new( 0, "No cookie jar configured. Read the WebServer::Blekko docs.", 200 );
    }

    my $template = "https://%s/login?u=%s&p=%s%s%s"; # forced to https
    my $url = sprintf( $template, $self->{server}, $username, $password, $self->{auth}, $self->{source} );

    my $req = HTTP::Request->new( 'GET', $url );
    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    if ( ! $resp->is_success )
    {
        return WebService::Blekko::Result->new( '', "http failure, code is ".$resp->code, $resp->code );
    }

    my $answer = my_decode_json( $resp->content ); # XXX does this need an eval?

    if ( defined $answer->{status} && $answer->{status} )
    {
        return WebService::Blekko::Result->new( 1, 0, $resp->code );
    }

    return WebService::Blekko::Result->new( 0, 'Login failed', $resp->code );
}

=head2 logout()

Logs out of blekko. Does not throw an error if you are already logged out.

=cut

sub logout
{
    my ( $self ) = @_;

    my $url = "$self->{scheme}://$self->{server}/logout";

    my $req = HTTP::Request->new( 'GET', $url );

    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    # redir is success
    if ( $resp->is_redirect )
    {
        return WebService::Blekko::Result->new( 1, 0, $resp->code );
    }

    return WebService::Blekko::Result->new( 0, 'Logout failed', $resp->code );
}

=head2 user_info()

Returns the username of the currently logged-in user. Useful in
toolbars, where the user logs directly into blekko.

=cut

sub user_info
{
    my ( $self ) = @_;

    my $url = "$self->{scheme}://$self->{server}/api/userinfo";

    my $req = HTTP::Request->new( 'GET', $url );

    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    if ( ! $resp->is_success )
    {
        return WebService::Blekko::Result->new( '', "http failure, code is ".$resp->code, $resp->code );
    }

    my $answer = my_decode_json( $resp->content ); # XXX does this need an eval?

    if ( defined $answer->{username} && $answer->{username} )
    {
        return WebService::Blekko::Result->new( $answer->{username}, 0, $resp->code );
    }

    return WebService::Blekko::Result->new( 0, 'Login failed', $resp->code );
}

=head2 create_slashtag( $slashtag, \@urls, $description )

Creates a slashtag.

=cut

sub create_slashtag
{
    my $self = shift;
    return $self->createupdate( "create", @_ );
}

=head2 add_urls( $slashtag, \@urls )

Adds urls to an existing slashtag.

=cut

sub add_urls
{
    my $self = shift;
    return $self->createupdate( "update", @_, undef );
}

sub createupdate
{
    my ( $self, $createupdate, $slashtag, $urls, $desc ) = @_; # XXX opts... at least urls is an array ref...

    if ( defined $urls && ref $urls ne 'ARRAY' )
    {
        return WebService::Blekko::Result->new( 0, "\$urls must be an array ref", 200 );
    }

    my $urls_string = '';
    if ( @$urls )
    {
        @$urls = map { urlencode( $_ ); } @$urls;
        $urls_string = "&urls=" . join( '%0A', @$urls );
    }
    $desc = $desc ? "&desc=" . urlencode( $desc ) : '';

    my $template = "%s://%s/tag/add?name=%s&submit=%s%s%s%s%s";
    my $url = sprintf( $template, $self->{scheme}, $self->{server}, $slashtag, $createupdate,
                       $urls_string, $desc, $self->{auth}, $self->{source} );

    my $req = HTTP::Request->new( 'GET', $url );
    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    if ( $resp->is_redirect )
    {
        if ( $createupdate eq "create" )
        {
            return WebService::Blekko::Result->new( 0, "You are not logged in, or tag already exists", 200 );
        }
        else
        {
            return WebService::Blekko::Result->new( 0, "You are not logged in", 200 );
        }
    }

    if ( ! $resp->is_success )
    {
        return WebService::Blekko::Result->new( 0, "http failure, code is ".$resp->code, $resp->code );
    }

    if ( $resp->content ne '' )
    {
        return WebService::Blekko::Result->new( 0, "Error: ".$resp->content, $resp->code );
    }

    return WebService::Blekko::Result->new( 1, 0, $resp->code );
}

=head2 list_urls( $slashtag )

Returns an arrayref of the urls in the slashtag

=cut

sub list_urls
{
    my ( $self, $slashtag ) = @_; # XXX opts

    my $template = "%s://%s/tag/view?name=%s&format=text%s%s";
    my $url = sprintf( $template, $self->{scheme}, $self->{server}, $slashtag, $self->{auth}, $self->{soure} );

    my $req = HTTP::Request->new( 'GET', $url );
    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    if ( ! $resp->is_success )
    {
        return WebService::Blekko::Result->new( 0, "http failure, code is ".$resp->code, $resp->code );
    }

    # if error, html is returned, even though we said 'format=text'
    # future proofed by also considering 'Error:' to indicate an error
    if ( substr( $resp->content, 0, 1 ) eq '<' || substr( $resp->content, 0, 6 ) eq 'Error:' )
    {
        return WebService::Blekko::Result->new( 0, "No such slashtag or other error", $resp->code );
    }

    my @answer = split /\n/, $resp->content;

    return WebService::Blekko::Result->new( \@answer, 0, $resp->code );
}

=head2 delete_urls( $slashtag, \@urls )

Deletes urls in a slashtag.

=cut

# XXX also &tags= to delete subtags in a slashtag

sub delete_urls
{
    my ( $self, $slashtag, $urls ) = @_; # XXX opts

    if ( defined $urls && ref $urls ne 'ARRAY' )
    {
        return WebService::Blekko::Result->new( 0, "\$urls must be an array ref", 200 );
    }

    my $urls_string = '';
    if ( @$urls )
    {
        @$urls = map { urlencode( $_ ); } @$urls;
        $urls_string = "&urls=" . join( '%0A', @$urls );
    }

    my $template = "%s://%s/tag/edit?submit=1&type=del&name=%s%s%s%s";
    my $url = sprintf( $template, $self->{scheme}, $self->{server}, $slashtag, $urls_string, $self->{auth}, $self->{source} );

    my $req = HTTP::Request->new( 'GET', $url );
    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    if ( ! $resp->is_success )
    {
        return WebService::Blekko::Result->new( 0, "http failure, code is ".$resp->code, $resp->code );
    }

    # this always returns javascript :-/ so key off css
    # XXX future-proof me
    if ( $resp->content =~ /alertMsgError/ )
    {
        return WebService::Blekko::Result->new( 0, "No such slashtag or other error", $resp->code );
    }

    return WebService::Blekko::Result->new( 1, 0, $resp->code );
}

=head2 remove_slashtag( $slashtag )

Removes a slashtag.

=cut

sub remove_slashtag
{
    my ( $self, $slashtag ) = @_; # XXX opts

    my $template = "%s://%s/tag/delete?submit=1&name=%s%s%s";
    my $url = sprintf( $template, $self->{scheme}, $self->{server}, $slashtag, $self->{auth}, $self->{source} );

    my $req = HTTP::Request->new( 'GET', $url );
    $self->query_sleep();
    my $resp = $self->{ua}->request( $req );

    if ( ! $resp->is_success )
    {
        return WebService::Blekko::Result->new( 0, "http failure, code is ".$resp->code, $resp->code );
    }

    if ( $resp->content =~ /alertMsgErr/ )
    {
        return WebService::Blekko::Result->new( 0, "Error", $resp->code );
    }

    if ( $resp->content =~ / has been deleted/ )
    {
        return WebService::Blekko::Result->new( 1, 0, $resp->code );
    }

    return WebService::Blekko::Result->new( 0, "Did not see success", $resp->code );
}

# poor man's request rate limiter

sub query_sleep
{
    my ( $self ) = @_;

    my $now = Time::HiRes::time;
    my $delta = 1. / ( $self->{qps} || 1 );

    if ( $now - $self->{last_query} < $delta )
    {
        my $s = $self->{last_query} + $delta - $now;
        Time::HiRes::sleep( $s );
        $self->{last_query} = Time::HiRes::time;
    }
    else
    {
        $self->{last_query} = $now;
    }
}

# ----------------------------------------------------------------------
# to go away
# ----------------------------------------------------------------------

my %escapes;
for (0..255)
{
    $escapes{chr($_)} = sprintf("%%%02X", $_);
}
$escapes{' '} = '+';

sub urlencode
{
    my $url = shift;

    $url =~ s/([^A-Za-z0-9\-_.!~*\'()])/$escapes{$1}/ge if defined $url;
    return $url;
}

# keep the JSON::Boolean bs down to a minimum -- why isn't this an option in JSON?
# I can't be the only person using JSON for non-roundtrip purposes.
sub my_decode_json
{
    my ( $string ) = @_;

    return if ! defined $string || $string eq '';

    my $ret = decode_json( $string ); # XXX needs eval?

    if ( $ret && ref $ret eq 'HASH' )
    {
        foreach my $k ( keys %$ret )
        {
            my $it = $ret->{$k};
            if ( UNIVERSAL::isa( $it, 'JSON::Boolean' ) )
            {
                $ret->{$k} = 1 if $it eq $JSON::true;
                $ret->{$k} = 0 if $it eq $JSON::false;
            }
        }
    }
    return $ret;
}

=head1 SEE ALSO

 L<WebService::Yahoo::BOSS>, L<Google::Search>

=head1 AUTHOR

"Greg Lindahl", E<lt>greg@blekko.comE<gt>

Thanks to Fred Moyer for commenting on the interfaces.

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2011 by blekko, inc.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.12.3 or,
at your option, any later version of Perl 5 you may have available.

=cut

1;



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.