Yet Another CPAN Grep

Mojolicious-Plugin-BlogSpam/lib/Mojolicious/Plugin/BlogSpam.pm

package Mojolicious::Plugin::BlogSpam;
use Mojo::Base 'Mojolicious::Plugin';
use Mojo::URL;
use Mojo::JSON;
use Mojo::Log;
use Mojo::UserAgent;
use Scalar::Util 'weaken';

our $VERSION = '0.05';

# Todo: - Check for blacklist/whitelist/max words etc. yourself.
#       - Create a route condition for posts.
#         -> $r->post('/comment')->over('blogspam')->to('#');

our @OPTION_ARRAY =
  qw/blacklist exclude whitelist mandatory
     max-links max-size min-size min-words/;
     # 'fail' is special, as it is boolean


# Register plugin
sub register {
  my ($plugin, $mojo, $params) = @_;

  $params ||= {};

  # Load parameter from Config file
  if (my $config_param = $mojo->config('BlogSpam')) {
    $params = { %$config_param, %$params };
  };

  # Set server url of BlogSpam instance
  my $url = Mojo::URL->new(
    delete $params->{url} || 'http://test.blogspam.net/'
  );

  # Set port of BlogSpam instance
  $url->port(delete $params->{port} || '8888');

  # Site name
  my $site = delete $params->{site};

  # Add Log
  my $log;
  if (my $log_path = delete $params->{log}) {
    $log = Mojo::Log->new(
      path  => $log_path,
      level => delete $params->{log_level} || 'info'
    );
  };

  my $app_clone = $mojo;
  weaken $app_clone;

  # Get option defaults
  my (%options, $base_options);
  foreach ('fail', @OPTION_ARRAY) {
    $options{$_} = delete $params->{$_} if $params->{$_};
  };
  $base_options = \%options if %options;


  # Add 'blogspam' helper
  $mojo->helper(
    blogspam => sub {
      my $c = shift;

      # Create new BlogSpam::Comment object
      my $obj = Mojolicious::Plugin::BlogSpam::Comment->new(
	url    => $url->to_string,
	log    => $log,
	site   => $site,
	app    => $app_clone,
	client => __PACKAGE__ . ' v' . $VERSION,
	base_options => $base_options,
	@_
      );

      # Get request headers
      my $headers = $c->req->headers;

      # Set user-agent if not given
      $obj->agent($headers->user_agent) unless $obj->agent;

      # No ip manually given
      unless ($obj->ip) {

	# Get forwarded ip
	if (my $ip = $headers->to_hash->{'X-Forwarded-For'}) {
	  $obj->ip( split(/\s*,\s*/, $ip) );
	}

	# Get host ip
	else {
	  $obj->ip( split(/\s*:\s*/, ($headers->host || '')) );
	};
      };

      # Return blogspam object
      return $obj;
    }
  );
};


# BlogSpam object class
package Mojolicious::Plugin::BlogSpam::Comment;
use Mojo::Base -base;


# Attributes
has [qw/comment ip email link name subject agent/];


# Test comment for spam
sub test_comment {
  my $self = shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  # No IP or comment text defined
  unless ($self->ip && $self->comment) {
    $self->{app}->log->debug('You have to specify ip and comment');
    return;
  };

  # Create option string
  my $option_string = $self->_options(@_);

  # Check for mandatory parameters
  while ($option_string &&
	   $option_string =~ m/(?:^|,)mandatory=([^,]+?)(?:,|$)/g) {
    return unless $self->{$1};
  };

  # Create option array if set
  my @options = (options => $option_string) if $option_string;

  # Push site to array if set
  push(@options, site => $self->{site}) if $self->{site};

  # Make xml-rpc call
  if ($cb) {

    # Make call non-blocking
    $self->_xml_rpc_call(
      testComment => (
	%{$self->hash},
	@options
      ) => sub {

	# Analyze response
	return $cb->( $self->_handle_test_response( shift ) );
      }
    );

    # Do not use this value
    return -1;
  };

  # Make call blocking
  my $res = $self->_xml_rpc_call(
    testComment => (
      %{$self->hash},
      @options
    )
  );

  # Analyze response
  return $self->_handle_test_response($res);
};


# Classify a comment as spam or ham
sub classify_comment {
  my $self = shift;
  my $train = shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  # Missing comment and valid train option
  unless ($self->comment && $train && $train =~ /^(?:ok|spam)$/) {
    $self->{app}->log->debug('You have to specify comment and train value');
    return;
  };

  # Create site array if set
  my @site = (site => $self->{site}) if $self->{site};

  # Send xml-rpc call
  if ($cb) {

    # Non-blocking request
    $self->_xml_rpc_call(classifyComment => (
      %{$self->hash},
      train => $train,
      @site,
      sub {
	my $res = shift;
	$cb->($res ? 1 : 0);
      }
    ));

    return;
  };

  # Blocking request
  return 1 if $self->_xml_rpc_call(classifyComment => (
    %{$self->hash},
    train => $train,
    @site
  ));

  return;
};


# Get a list of plugins installed at the BlogSpam instance
sub get_plugins {
  my $self = shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  # Response of xml-rpc call
  if ($cb) {

    # Non-blocking request
    $self->_xml_rpc_call(
      getPlugins => sub {
	my $res = shift;

	# Analyze response in callback
	return $cb->($self->_handle_plugins_response($res));
      });

    return ();
  };

  # Blocking request
  my $res = $self->_xml_rpc_call('getPlugins');

  # Analyze response
  return $self->_handle_plugins_response($res);
};


# Get statistics of your site from the BlogSpam instance
sub get_stats {
  my $self = shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  my $site = shift || $self->{site};

  # No site is given
  return unless $site;

  # Send xml-rpc call
  if ($cb) {

    # Send non-blocking request
    my $res = $self->_xml_rpc_call(
      'getStats', $site => sub {
	my $res = shift;
	return $cb->($self->_handle_stats_response($res));
      });

    return;
  };

  # Send blocking request
  my $res = $self->_xml_rpc_call('getStats', $site);
  return $self->_handle_stats_response($res);
};


# Get a hash representation of the comment
sub hash {
  my $self = shift;
  my %hash = %$self;

  # Delete non-comment info
  delete @hash{qw/site app url log client base_options/};

  # Delete empty values
  return { map {$_ => $hash{$_} } grep { $hash{$_} } keys %hash };
};


# Handle test_comment response
sub _handle_test_response {
  my ($self, $res)  = @_;

  # No response
  return -1 unless $res;

  # Get response element
  my $response =
    $res->dom->at('methodResponse > params > param > value > string');

  # Unexpected response format
  return -1 unless $response;

  # Get response tag
  $response = $response->all_text;

  # Response string is malformed
  return -1 unless $response =~ /^(OK|ERROR|SPAM)(?:\:\s*(.+?))?$/;

  # Comment is no spam
  return 1 if $1 eq 'OK';

  # Log is defined
  if (my $log = $self->{log}) {

    # Serialize comment
    my $msg = "[$1]: " . ($2 || '') . ' ' .
      Mojo::JSON->new->encode($self->hash);

    # Log error
    if ($1 eq 'ERROR') {
      $log->error($msg);
    }

    # Log spam
    else {
      $log->info($msg);
    };
  };

  # An error occured
  return -1 if $1 eq 'ERROR';

  # The comment is considered spam
  return 0;
};


# Handle get_plugins response
sub _handle_plugins_response {
  my ($self, $res) = @_;

  # Retrieve result
  my $array =
    $res->dom->at('methodResponse > params > param > value > array > data');

  # No plugins installed
  return () unless $array;

  # Convert data to array
  return @{$array->find('string')->map(sub { $_->text })};
};


# Handle get_stats response
sub _handle_stats_response {
  my ($self, $res) = @_;

  # Get response struct
  my $hash =
    $res->dom->at('methodResponse > params > param > value > struct');

  # No response struct defined
  return {} unless $hash;

  # Convert struct to hash
  return {@{$hash->find('member')->map(
    sub {
      return ($_->at('name')->text, $_->at('value > int')->text);
    })}};
};


# Get options string
sub _options {
  my $self = shift;
  my %options = @_;

  # Create option string
  my @options;
  if (%options || $self->{base_options}) {

    # Get base options from plugin registration
    my $base = $self->{base_options};

    # Check for fail flag
    if (exists $options{fail}) {
      push(@options, 'fail') if $options{fail};
    }

    # Check for fail flag in plugin defaults
    elsif ($base->{fail}) {
      push(@options, 'fail');
    };

    # Check for valid option parameters
    foreach my $n (@Mojolicious::Plugin::BlogSpam::OPTION_ARRAY) {

      # Option flag is not set
      next unless $options{$n} || $base->{$n};

      # Base options
      my $opt = [
	$base->{$n} ? (ref $base->{$n} ? @{$base->{$n}} : $base->{$n}) : ()
      ];

      # Push new options
      push(
	@$opt,
	$options{$n} ? (ref $options{$n} ? @{$options{$_}} : $options{$n}) : ()
      );

      # Option flag is set as an array
      push(@options, "$n=$_") foreach @$opt};
  };

  # return option string
  return join(',', @options) if @options;
  return;
};


# Send xml-rpc call
sub _xml_rpc_call {
  my $self = shift;

  # Callback for async
  my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';

  my ($method_name, $param) = @_;

  # Create user agent
  my $ua = Mojo::UserAgent->new(
    max_redirects => 3,
    name => $self->{client}
  );

  # Start xml document
  my $xml = '<?xml version="1.0"?>' .
    "\n<methodCall><methodName>$method_name</methodName>";

  # Send with params
  if ($param) {
    $xml .= '<params><param><value>';

    # Param is a struct
    if (ref $param) {
      $xml .= '<struct>';

      # Create struct object
      foreach (keys %$param) {
	$xml .= "<member><name>$_</name><value>" .
	        '<string>' . $param->{$_} . '</string>' .
	        "</value></member>\n" if $param->{$_};
      };

      # End struct
      $xml .= '</struct>';
    }

    # Param is a string
    else {
      $xml .= "<string>$param</string>";
    };

    # End parameter list
    $xml .= '</value></param></params>';
  };

  # End method call
  $xml .= '</methodCall>';

  # Post method call to BlogSpam instance
  if ($cb) {

    # Post non-blocking
    $ua->post(
      $self->{url} => {} => $xml => sub {
	my $tx = pop;

	my $res = $tx->success;

	# Connection failure - accept comment
	unless ($res) {
	  $self->_log_error($tx);
	  return;
	};

	# Send response to callback
	$cb->($res);
	$ua = undef;
      });
    Mojo::IOLoop->start unless Mojo::IOLoop->is_running;

    return;
  };

  # Post blocking
  my $tx = $ua->post($self->{url} => {} => $xml);
  my $res = $tx->success;

  # Connection failure - accept comment
  unless ($res) {
    $self->_log_error($tx);
    return;
  };

  # Return response
  return $res;
};


# Log connection_error
sub _log_error {
  my $self = shift;
  my $tx = shift;

  my ($err, $code) = $tx->error;
  $self->{app}->log->warn(
    'Connection error: [' . ($code || '*') . "] $code for " .
      $self->{url}
    );

  return;
};


1;


__END__

=pod

=head1 NAME

Mojolicious::Plugin::BlogSpam - Check your comments using BlogSpam


=head1 SYNOPSIS

  # In Mojolicious
  $app->plugin('BlogSpam');

  # In Mojolicious::Lite
  plugin 'BlogSpam';

  # In Controller
  my $blogspam = $c->blogspam(
    comment => 'I just want to test the system!'
  );

  # Check for spam
  if ($blogspam->test_comment) {
    print "Your comment is no spam!\n";
  };

  # Even non-blocking
  $blogspam->test_comment(sub {
    print "Your comment is no spam!\n" if shift;
  });

  # Train the system
  $blogspam->classify_comment('ok');


=head1 DESCRIPTION

L<Mojolicious::Plugin::BlogSpam> is a plugin
for L<Mojolicious> to test
comments or posts for spam against a
L<BlogSpam|http://blogspam.net/> instance
(see L<Blog::Spam::API> for the codebase).
It supports blocking as well as non-blocking requests.


=head1 METHODS

=head2 C<register>

  # Mojolicious
  $app->plugin(Blogspam => {
    url  => 'blogspam.sojolicio.us',
    port => '8888',
    site => 'http://grimms-abenteuer.de/',
    log  => '/spam.log',
    log_level => 'debug',
    exclude   => 'badip',
    mandatory => [qw/name subject/]
  });

  # Mojolicious::Lite
  plugin 'BlogSpam' => {
    site => 'http://grimms-abenteuer.de/'
  };

  # Or in your config file
  {
    BlogSpam => {
      url => 'blogspam.sojolicio.us',
      site => 'http://grimms-abenteuer.de/',
      port => '8888'
    }
  }

Called when registering the plugin.
Accepts the following optional parameters:

=over 2

=item C<url>

URL of your BlogSpam instance.
Defaults to C<http://test.blogspam.net/>.

=item C<port>

Port of your BlogSpam instance.
Defaults to C<8888>.

=item C<site>

The name of your site to monitor.

=item C<log>

A path to a log file.

=item C<log_level>

The level of logging, based on L<Mojo::Log>.
Spam is logged as C<info>, errors are logged as C<error>.

=back

In addition to these parameters, additional optional parameters
are allowed as defined in the
L<BlogSpam API|http://blogspam.net/api>.
See L</"test_comment"> method below.


=head1 HELPERS

=head2 C<blogspam>

  # In controller:
  my $bs = $c->blogspam(
    comment => 'This is a comment to test the system',
    name => 'Akron'
  );

Returns a new blogspam object, based on the given attributes.


=head1 OBJECT ATTRIBUTES

These attributes are primarily based on
the L<BlogSpam API|http://blogspam.net/api>.

=head2 C<agent>

  $bs->agent('Mozilla/5.0 (X11; Linux x86_64; rv:12.0) ...');
  my $agent = $bs->agent;

The user-agent sending the comment.
Defaults to the user-agent of the request.


=head2 C<comment>

  $bs->comment('This is just a test comment');
  my $comment_text = $bs->comment;

The comment text.


=head2 C<email>

  $bs->email('spammer@sojolicio.us');
  my $email = $bs->email;

The email address of the commenter.


=head2 C<hash>

  my $hash = $bs->hash;

Returns a hash representation of the comment.


=head2 C<ip>

  $bs->ip('192.168.0.1');
  my $ip = $bs->ip;

The ip address of the commenter.
Defaults to the ip address of the request.
Supports C<X-Forwarded-For> proxy information.


=head2 C<link>

  $bs->link('http://grimms-abenteuer.de/');
  my $link = $bs->link;

Homepage link given by the commenter.


=head2 C<name>

  $bs->name('Akron');
  my $name = $bs->name;

Name given by the commenter.


=head2 C<subject>

  $bs->subject('Fun');
  my $subject = $bs->subject;

Subject given by the commenter.


=head1 OBJECT METHODS

These methods are based on the L<BlogSpam API|http://blogspam.net/api>.

=head2 C<test_comment>

  # Blocking
  if ($bs->test_comment(
         mandatory => 'name',
         blacklist => ['192.168.0.1']
      )) {
    print 'Probably ham!';
  } else {
    print 'Spam!';
  };

  # Non-blocking
  $bs->test_comment(
    mandatory => 'name',
    blacklist => ['192.168.0.1'],
    sub {
      my $result = shift;
      print ($result ? 'Probably ham!' : 'Spam!');
    }
  );

Test the comment of the blogspam object for spam or ham.
It's necessary to have a defined comment text and an IP address.
The method returns nothing in case the comment is detected
as spam, C<1> if the comment is detected as ham and C<-1>
if something went horribly, horribly wrong.
Accepts additional option parameters as defined in the
L<BlogSpam API|http://blogspam.net/api>.

=over 2

=item C<blacklist>

Blacklist an IP or an array reference of IPs.
This can be either a literal IP address ("192.168.0.1")
or a CIDR range ("192.168.0.1/8").

=item C<exclude>

Exclude a plugin or an array reference of plugins from testing.
See L</"get_plugins"> for installed plugins of the BlogSpam instance.

=item C<fail>

Boolean flag that will, if set, return every comment as C<spam>.

=item C<mandatory>

Define an attribute (or an array reference of attributes)
of the blogspam object, that should be treated as mandatory
(e.g. "name" or "subject").

=item C<max-links>

The maximum number of links allowed in the comment.
This defaults to 10.

=item C<max-size>

The maximum size of the comment text allowed, given as a
byte expression (e.g. "2k").

=item C<min-size>

The minimum size of the comment text needed, given as a
byte expression (e.g. "2k").

=item C<min-words>

The minimum number of words of the comment text needed.
Defaults to 4.

=item C<whitelist>

Whitelist an IP or an array reference of IPs.
This can be either a literal IP address ("192.168.0.1")
or a CIDR range ("192.168.0.1/8").

=back

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.


=head2 C<classify_comment>

  $bs->classify_comment('ok');
  $bs->classify_comment(ok => sub {
    print 'Done!';
  });


Train the BlogSpam instance based on your
comment definition as C<ok> or C<spam>.
This may help to improve the spam detection.
Expects a defined C<comment> attribute and
a single parameter, either C<ok> or C<spam>.

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.


=head2 C<get_plugins>

  my @plugins = $bs->get_plugins;
  $bs->get_plugins(sub {
    print join ', ', @_;
  });

Requests a list of plugins installed at the BlogSpam instance.

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.

=head2 C<get_stats>

  my $stats = $bs->get_stats;
  my $stats = $bs->get_stats('http://sojolicio.us/');
  $bs->get_stats(sub {
    my $stats = shift;
  });

Requests a hash reference of statistics for your site
regarding the number of comments detected as C<ok> or C<spam>.
If no C<site> attribute is given (whether as a parameter or when
registering the plugin), this will return nothing.

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.


=head1 DEPENDENCIES

L<Mojolicious>.


=head1 SEE ALSO

L<Blog::Spam::API>,
L<http://blogspam.net/>.


=head1 AVAILABILITY

  https://github.com/Akron/Mojolicious-Plugin-BlogSpam


=head1 COPYRIGHT AND LICENSE

Copyright (C) 2012, Nils Diewald.

This program is free software, you can redistribute it
and/or modify it under the same terms as Perl.

The API definition as well as the BlogSpam API code were
written and defined by Steve Kemp.

Be aware that information of your users may be send
to a third party.
This should be noted in your privacy policy if you
use a foreign BlogSpam instance.

=cut