Yet Another CPAN Grep

Mojolicious-Plugin-BlogSpam/lib/Mojolicious/Plugin/BlogSpam.pm

package Mojolicious::Plugin::BlogSpam;
use Mojo::Base 'Mojolicious::Plugin';
use Mojo::URL;
use Mojo::JSON;
use Mojo::Log;
use Mojo::UserAgent;
use Mojo::IOLoop;
use Scalar::Util 'weaken';

our $VERSION = '0.12';

# TODO: X-Forwarded-For in Config per index steuern
# TODO: - Check for blacklist/whitelist/max words etc. yourself.
#       - Create a route condition for posts.
#         -> $r->post('/comment')->over('blogspam')->to('#');

our @OPTION_ARRAY =
  qw/blacklist exclude whitelist mandatory
     max-links max-size min-size min-words/;
     # 'fail' is special, as it is boolean


# Register plugin
sub register {
  my ($plugin, $mojo, $params) = @_;

  $params ||= {};

  # Load parameters from Config file
  if (my $config_param = $mojo->config('BlogSpam')) {
    $params = { %$config_param, %$params };
  };

  # Set server url of BlogSpam instance
  my $url = Mojo::URL->new(
    delete $params->{url} || 'http://test.blogspam.net/'
  );

  # Set port of BlogSpam instance
  $url->port(delete $params->{port} || '8888');

  # Site name
  my $site = delete $params->{site};

  # Add Log
  my $log;
  if (my $log_path = delete $params->{log}) {
    $log = Mojo::Log->new(
      path  => $log_path,
      level => delete $params->{log_level} || 'info'
    );
  };

  my $app_log_clone = $mojo->log;
  weaken $app_log_clone;

  # Get option defaults
  my (%options, $base_options);
  foreach ('fail', @OPTION_ARRAY) {
    $options{$_} = delete $params->{$_} if $params->{$_};
  };
  $base_options = \%options if %options;


  # Add 'blogspam' helper
  $mojo->helper(
    blogspam => sub {
      my $c = shift;

      # Create new BlogSpam::Comment object
      my $obj = Mojolicious::Plugin::BlogSpam::Comment->new(
        url     => $url->to_string,
        log     => $log,
        site    => $site,
        app_log => $app_log_clone,
        client  => __PACKAGE__ . ' v' . $VERSION,
        base_options => $base_options,
        @_
      );

      # Get request headers
      my $headers = $c->req->headers;

      # Set user-agent if not given
      $obj->agent($headers->user_agent) unless $obj->agent;

      # No ip manually given
      unless ($obj->ip) {

        # Get forwarded ip
        if (my $ip = $headers->to_hash->{'X-Forwarded-For'}) {
          $obj->ip( split(/\s*,\s*/, $ip) );
        };

        # Get host ip, because X-Forwarded-For wasn't set
        unless ($obj->ip) {
          $obj->ip( split(/\s*:\s*/, ($headers->host || '')) );
        };
      };

      # Return blogspam object
      return $obj;
    }
  );
};


# BlogSpam object class
package Mojolicious::Plugin::BlogSpam::Comment;
use Mojo::Base -base;


# Attributes
has [qw/comment ip email link name subject agent/];


# Test comment for spam
sub test_comment {
  my $self = shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  # No IP or comment text defined
  unless ($self->ip && $self->comment) {
    $self->{app_log}->debug('You have to specify ip and comment');
    return;
  };

  # Create option string
  my $option_string = $self->_options(@_);

  # Check for mandatory parameters
  while ($option_string &&
     $option_string =~ m/(?:^|,)mandatory=([^,]+?)(?:,|$)/g) {
    return unless $self->{$1};
  };

  # Create option array if set
  my @options = (options => $option_string) if $option_string;

  # Push site to array if set
  push(@options, site => $self->{site}) if $self->{site};

  # Make xml-rpc call
  if ($cb) {

    # Make call non-blocking
    $self->_xml_rpc_call(
      testComment => (
        %{$self->hash},
        @options
      ) => sub {

        # Analyze response
        return $cb->( $self->_handle_test_response( shift ) );
      }
    );

    # Do not use this value
    return -1;
  };

  # Make call blocking
  my $res = $self->_xml_rpc_call(
    testComment => (
      %{$self->hash},
      @options
    )
  );

  # Analyze response
  return $self->_handle_test_response($res);
};


# Classify a comment as spam or ham
sub classify_comment {
  my $self = shift;
  my $train = lc shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  # Missing comment and valid train option
  unless ($self->comment && $train && $train =~ /^(?:ok|spam)$/) {
    $self->{app_log}->debug('You have to specify comment and train value');
    return;
  };

  # Create site array if set
  my @site = (site => $self->{site}) if $self->{site};

  # Send xml-rpc call
  if ($cb) {

    # Non-blocking request
    $self->_xml_rpc_call(classifyComment => (
      %{$self->hash},
      train => $train,
      @site,
      sub {
        my $res = shift;
        $cb->($res ? 1 : 0);
      }
    ));

    return;
  };

  # Blocking request
  return 1 if $self->_xml_rpc_call(classifyComment => (
    %{$self->hash},
    train => $train,
    @site
  ));

  return;
};


# Get a list of plugins installed at the BlogSpam instance
sub get_plugins {
  my $self = shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  # Response of xml-rpc call
  if ($cb) {

    # Non-blocking request
    $self->_xml_rpc_call(
      getPlugins => sub {
        my $res = shift;

        # Analyze response in callback
        return $cb->($self->_handle_plugins_response($res));
      });

    return ();
  };

  # Blocking request
  my $res = $self->_xml_rpc_call('getPlugins');

  # Analyze response
  return $self->_handle_plugins_response($res);
};


# Get statistics of your site from the BlogSpam instance
sub get_stats {
  my $self = shift;

  # Callback for async
  my $cb = pop if $_[-1] && ref $_[-1] && ref $_[-1] eq 'CODE';

  my $site = shift || $self->{site};

  # No site is given
  return unless $site;

  # Send xml-rpc call
  if ($cb) {

    # Send non-blocking request
    my $res = $self->_xml_rpc_call(
      'getStats', $site => sub {
        my $res = shift;
        return $cb->($self->_handle_stats_response($res));
      });

    return;
  };

  # Send blocking request
  my $res = $self->_xml_rpc_call('getStats', $site);
  return $self->_handle_stats_response($res);
};


# Get a hash representation of the comment
sub hash {
  my $self = shift;
  my %hash = %$self;

  # Delete non-comment info
  delete @hash{qw/site app_log url log client base_options/};

  # Delete empty values
  return { map {$_ => $hash{$_} } grep { $hash{$_} } keys %hash };
};


# Handle test_comment response
sub _handle_test_response {
  my ($self, $res)  = @_;

  # No response
  return -1 unless $res;

  # Get response element
  my $response =
    $res->dom->at('methodResponse > params > param > value > string');

  # Unexpected response format
  return -1 unless $response;

  # Get response tag
  $response = $response->all_text;

  # Response string is malformed
  return -1 unless $response =~ /^(OK|ERROR|SPAM)(?:\:\s*(.+?))?$/;

  # Comment is no spam
  return 1 if $1 eq 'OK';

  # Log is defined
  if (my $log = $self->{log}) {

    # Serialize comment
    my $msg = "[$1]: " . ($2 || '') . ' ' .
      Mojo::JSON->new->encode($self->hash);

    # Log error
    if ($1 eq 'ERROR') {
      $log->error($msg);
    }

    # Log spam
    else {
      $log->info($msg);
    };
  };

  # An error occured
  return -1 if $1 eq 'ERROR';

  # The comment is considered spam
  return 0;
};


# Handle get_plugins response
sub _handle_plugins_response {
  my ($self, $res) = @_;

  # Retrieve result
  my $array =
    $res->dom->at('methodResponse > params > param > value > array > data');

  # No plugins installed
  return () unless $array;

  # Convert data to array
  return @{$array->find('string')->map(sub { $_->text })};
};


# Handle get_stats response
sub _handle_stats_response {
  my ($self, $res) = @_;

  # Get response struct
  my $hash =
    $res->dom->at('methodResponse > params > param > value > struct');

  # No response struct defined
  return +{} unless $hash;

  # Convert struct to hash
  return {@{$hash->find('member')->map(
    sub {
      return ($_->at('name')->text, $_->at('value > int')->text);
    })}};
};


# Get options string
sub _options {
  my $self = shift;
  my %options = @_;

  # Create option string
  my @options;
  if (%options || $self->{base_options}) {

    # Get base options from plugin registration
    my $base = $self->{base_options};

    # Check for fail flag
    if (exists $options{fail}) {
      push(@options, 'fail') if $options{fail};
    }

    # Check for fail flag in plugin defaults
    elsif ($base->{fail}) {
      push(@options, 'fail');
    };

    # Check for valid option parameters
    foreach my $n (@Mojolicious::Plugin::BlogSpam::OPTION_ARRAY) {

      # Option flag is not set
      next unless $options{$n} || $base->{$n};

      # Base options
      my $opt = [
        $base->{$n} ? (ref $base->{$n} ? @{$base->{$n}} : $base->{$n}) : ()
      ];

      # Push new options
      push(
        @$opt,
        $options{$n} ? (ref $options{$n} ? @{$options{$_}} : $options{$n}) : ()
      );

      # Option flag is set as an array
      push(@options, "$n=$_") foreach @$opt};
  };

  # return option string
  return join(',', @options) if @options;

  return;
};


# Send xml-rpc call
sub _xml_rpc_call {
  my $self = shift;

  # Callback for async
  my $cb = pop if ref $_[-1] && ref $_[-1] eq 'CODE';

  my ($method_name, $param) = @_;

  # Create user agent
  my $ua = Mojo::UserAgent->new(
    max_redirects => 3,
    name => $self->{client}
  );

  # Start xml document
  my $xml = '<?xml version="1.0"?>' .
    "\n<methodCall><methodName>$method_name</methodName>";

  # Send with params
  if ($param) {
    $xml .= '<params><param><value>';

    # Param is a struct
    if (ref $param) {
      $xml .= '<struct>';

      # Create struct object
      foreach (keys %$param) {
        $xml .= "<member><name>$_</name><value>" .
          '<string>' . $param->{$_} . '</string>' .
          "</value></member>\n" if $param->{$_};
      };

      # End struct
      $xml .= '</struct>';
    }

    # Param is a string
    else {
      $xml .= "<string>$param</string>";
    };

    # End parameter list
    $xml .= '</value></param></params>';
  };

  # End method call
  $xml .= '</methodCall>';

  # Post method call to BlogSpam instance
  if ($cb) {

    # Create delay object
    my $delay = Mojo::IOLoop->delay(
      sub {
        my $tx = pop;

        my $res = $tx->success;

        # Connection failure - accept comment
        unless ($res) {
          # Maybe there needs something to be weakened
          $self->_log_error($tx);
          return;
        };

        # Send response to callback
        $cb->($res);
      }
    );

    # Post non-blocking
    $ua->post($self->{url} => +{} => $xml => $delay->begin);

    # Start IOLoop if not started already
    $delay->wait unless Mojo::IOLoop->is_running;

    return;
  };

  # Post blocking
  my $tx = $ua->post($self->{url} => +{} => $xml);
  my $res = $tx->success;

  # Connection failure - accept comment
  unless ($res) {
    $self->_log_error($tx);
    return;
  };

  # Return response
  return $res;
};


# Log connection_error
sub _log_error {
  my ($self, $tx) = @_;

  my ($err, $code) = $tx->error;
  $code ||= '*';

  $self->{app_log}->warn(
    "Connection error: [$code] $err for " .
      $self->{url}
    );

  return;
};


1;


__END__

=pod

=head1 NAME

Mojolicious::Plugin::BlogSpam - Check your Comments using BlogSpam


=head1 SYNOPSIS

  # In Mojolicious
  $app->plugin('BlogSpam');

  # In Mojolicious::Lite
  plugin 'BlogSpam';

  # In Controller
  my $blogspam = $c->blogspam(
    comment => 'I just want to test the system!'
  );

  # Check for spam
  if ($blogspam->test_comment) {
    print "Your comment is no spam!\n";
  };

  # Even non-blocking
  $blogspam->test_comment(sub {
    print "Your comment is no spam!\n" if shift;
  });

  # Train the system
  $blogspam->classify_comment('ok');


=head1 DESCRIPTION

L<Mojolicious::Plugin::BlogSpam> is a plugin
for L<Mojolicious> to test
comments or posts for spam against a
L<BlogSpam|http://blogspam.net/> instance
(see L<Blog::Spam::API> for the codebase).
It supports blocking as well as non-blocking requests.


=head1 METHODS

L<Mojolicious::Plugin::BlogSpam> inherits all methods
from L<Mojolicious::Plugin> and implements the following new ones.

=head2 register

  # Mojolicious
  $app->plugin(Blogspam => {
    url  => 'blogspam.sojolicious.example',
    port => '8888',
    site => 'http://grimms-abenteuer.de/',
    log  => '/spam.log',
    log_level => 'debug',
    exclude   => 'badip',
    mandatory => [qw/name subject/]
  });

  # Mojolicious::Lite
  plugin 'BlogSpam' => {
    site => 'http://grimms-abenteuer.de/'
  };

  # Or in your config file
  {
    BlogSpam => {
      url => 'blogspam.sojolicious.example',
      site => 'http://grimms-abenteuer.de/',
      port => '8888'
    }
  }

Called when registering the plugin.
Accepts the following optional parameters:

=over 2

=item C<url>

URL of your BlogSpam instance.
Defaults to C<http://test.blogspam.net/>.

=item C<port>

Port of your BlogSpam instance.
Defaults to C<8888>.

=item C<site>

The name of your site to monitor.

=item C<log>

A path to a log file.

=item C<log_level>

The level of logging, based on L<Mojo::Log>.
Spam is logged as C<info>, errors are logged as C<error>.

=back

In addition to these parameters, additional optional parameters
are allowed as defined in the
L<BlogSpam API|http://blogspam.net/api>.
See L</"test_comment"> method below.


=head1 HELPERS

=head2 blogspam

  # In controller:
  my $bs = $c->blogspam(
    comment => 'This is a comment to test the system',
    name => 'Akron'
  );

Returns a new blogspam object, based on the given attributes.


=head1 OBJECT ATTRIBUTES

These attributes are primarily based on
the L<BlogSpam API|http://blogspam.net/api>.

=head2 agent

  $bs->agent('Mozilla/5.0 (X11; Linux x86_64; rv:12.0) ...');
  my $agent = $bs->agent;

The user-agent sending the comment.
Defaults to the user-agent of the request.


=head2 comment

  $bs->comment('This is just a test comment');
  my $comment_text = $bs->comment;

The comment text.


=head2 email

  $bs->email('spammer@sojolicious.example');
  my $email = $bs->email;

The email address of the commenter.


=head2 hash

  my $hash = $bs->hash;

Returns a hash representation of the comment.


=head2 ip

  $bs->ip('192.168.0.1');
  my $ip = $bs->ip;

The ip address of the commenter.
Defaults to the ip address of the request.
Supports C<X-Forwarded-For> proxy information.


=head2 link

  $bs->link('http://grimms-abenteuer.de/');
  my $link = $bs->link;

Homepage link given by the commenter.


=head2 name

  $bs->name('Akron');
  my $name = $bs->name;

Name given by the commenter.


=head2 subject

  $bs->subject('Fun');
  my $subject = $bs->subject;

Subject given by the commenter.


=head1 OBJECT METHODS

These methods are based on the L<BlogSpam API|http://blogspam.net/api>.

=head2 test_comment

  # Blocking
  if ($bs->test_comment(
         mandatory => 'name',
         blacklist => ['192.168.0.1']
      )) {
    print 'Probably ham!';
  } else {
    print 'Spam!';
  };

  # Non-blocking
  $bs->test_comment(
    mandatory => 'name',
    blacklist => ['192.168.0.1'],
    sub {
      my $result = shift;
      print ($result ? 'Probably ham!' : 'Spam!');
    }
  );

Test the comment of the blogspam object for spam or ham.
It's necessary to have a defined comment text and an IP address.
The method returns nothing in case the comment is detected
as spam, C<1> if the comment is detected as ham and C<-1>
if something went horribly, horribly wrong.
Accepts additional option parameters as defined in the
L<BlogSpam API|http://blogspam.net/api>.

=over 2

=item C<blacklist>

Blacklist an IP or an array reference of IPs.
This can be either a literal IP address ("192.168.0.1")
or a CIDR range ("192.168.0.1/8").

=item C<exclude>

Exclude a plugin or an array reference of plugins from testing.
See L</"get_plugins"> for installed plugins of the BlogSpam instance.

=item C<fail>

Boolean flag that will, if set, return every comment as C<spam>.

=item C<mandatory>

Define an attribute (or an array reference of attributes)
of the blogspam object, that should be treated as mandatory
(e.g. "name" or "subject").

=item C<max-links>

The maximum number of links allowed in the comment.
This defaults to 10.

=item C<max-size>

The maximum size of the comment text allowed, given as a
byte expression (e.g. "2k").

=item C<min-size>

The minimum size of the comment text needed, given as a
byte expression (e.g. "2k").

=item C<min-words>

The minimum number of words of the comment text needed.
Defaults to 4.

=item C<whitelist>

Whitelist an IP or an array reference of IPs.
This can be either a literal IP address ("192.168.0.1")
or a CIDR range ("192.168.0.1/8").

=back

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.


=head2 classify_comment

  $bs->classify_comment('ok');
  $bs->classify_comment(ok => sub {
    print 'Done!';
  });


Train the BlogSpam instance based on your
comment definition as C<ok> or C<spam>.
This may help to improve the spam detection.
Expects a defined C<comment> attribute and
a single parameter, either C<ok> or C<spam>.

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.


=head2 get_plugins

  my @plugins = $bs->get_plugins;
  $bs->get_plugins(sub {
    print join ', ', @_;
  });

Requests a list of plugins installed at the BlogSpam instance.

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.

=head2 get_stats

  my $stats = $bs->get_stats;
  my $stats = $bs->get_stats('http://sojolicious.example/');
  $bs->get_stats(sub {
    my $stats = shift;
    ...
  });

Requests a hash reference of statistics for your site
regarding the number of comments detected as C<ok> or C<spam>.
If no C<site> attribute is given (whether as a parameter or when
registering the plugin), this will return nothing.

For a non-blocking request, append a callback function.
The parameters of the callback are identical to the method's
return values in blocking requests.


=head1 DEPENDENCIES

L<Mojolicious>.


=head1 SEE ALSO

L<Blog::Spam::API>,
L<http://blogspam.net/>.


=head1 AVAILABILITY

  https://github.com/Akron/Mojolicious-Plugin-BlogSpam


=head1 PRIVACY NOTE

Be aware that information of your users may be send
to a third party.
This may need to be noted in your privacy policy if you
use a foreign BlogSpam instance, especially if you
are located in the European Union.


=head1 COPYRIGHT AND LICENSE

Copyright (C) 2012-2021, L<Nils Diewald|https://www.nils-diewald.de/>.

This program is free software, you can redistribute it
and/or modify it under the terms of the Artistic License version 2.0.

The API definition as well as the BlogSpam API code were
written and defined by Steve Kemp.

=cut