Group
Extension

App-Twitch/lib/App/Twitch.pm

package App::Twitch;
BEGIN {
  $App::Twitch::AUTHORITY = 'cpan:GETTY';
}
BEGIN {
  $App::Twitch::VERSION = '0.904';
}
# ABSTRACT: Your personal Twitter b...... lalalala

# Speed up complete POE Kernel
sub POE::Kernel::USE_SIGCHLD () { 1 }
use MooseX::POE;

with qw(
	MooseX::Getopt
	MooseX::SimpleConfig
	MooseX::LogDispatch
	MooseX::Daemonize
);

use POE qw(
	Component::Client::HTTP
	Component::Client::Keepalive
	Component::FeedAggregator
	Component::WWW::Shorten
);

use HTTP::Request;
use Text::Trim;
use URI;
use POSIX;
use IO::All;
use String::Truncate qw(elide);
use utf8;
use Text::Keywords;
use Text::Keywords::Container;
use Text::Keywords::List;
use Text::Tweet;
use HTML::ExtractContent;
use Carp qw( croak );
use YAML qw( DumpFile );

# could be ... ah forget it :-P
use Net::Twitter;

our $VERSION ||= '0.0development';

before run => sub {
	my $package = __PACKAGE__;
	print <<"END_OF_INTRO";
 _            _ _       _
| |_ _      _(_) |_ ___| |___
|  _\\ \\ /\\ / / | __/ __| '_  \\
| |_ \\ V  V /| | || (__| | | | $package $VERSION
 \\__| \\_/\\_/ |_|\\__\\___|_| |_| Usage with --help

More information under http://search.cpan.org/perldoc/twitch

Commands:

  start   Start the twitch
  stop    Stop the running twitch
  restart Restart the twitch, or start it up if not running
  status  Give current status of the twitch

END_OF_INTRO
};

after start => sub {
	my $self = shift;
	return unless $self->is_daemon;
	# Required, elsewhere your POE goes nuts
	POE::Kernel->has_forked if !$self->foreground;
	POE::Kernel->run;
};

has '+pidbase' => (
	default => sub { shift->tmpdir },
	documentation => 'Directory for the pid file (default: tmpdir)',
);

has '+pidfile' => (
	documentation => 'Filename for the pidfile (default: basedir/progname.pid)',
);

has '+progname' => (
	default => sub { 'twitch' },
	documentation => 'Name for the application, like configfile name base and so on (default: twitch)',
);

has '+foreground' => (
	documentation => 'Run on the console and don\'t detach into background (default: off)',
);

has '+configfile' => (
	default => sub { 'twitch.yml' },
	documentation => 'Configuration file used for all those settings (default: twitch.yml)',
);

has 'configdir' => (
	is => 'ro',
	isa => 'Str',
	default => sub { getcwd },
	documentation => 'Directory for the keyword files and the feed file if given (default: current directory)',
);

has log_dispatch_conf => (
	is => 'ro',
	isa => 'HashRef',
	lazy => 1,
	required => 1,
	traits => [ 'NoGetopt' ],
	default => sub {
		my $self = shift;
		return {} if $self->no_logging;
		my $format = '[%d] [%p] %m';
		my $minlevel = $self->debug ? 'debug' : 'info';
		if ($self->foreground || !$self->logfile) {
			return {
				class     => 'Log::Dispatch::Screen',
				min_level => $minlevel,
				stderr    => 1,
				format    => $format,
				newline		=> 1,
			}
		} else {
			return {
				class		=> 'Log::Dispatch::File',
				min_level	=> $minlevel,
				filename	=> $self->configdir.'/'.$self->logfile,
				mode		=> '>>',
				format		=> $format,
				newline		=> 1,
			}
		}
	},
);

has consumer_key => (
	isa => 'Str',
	is => 'ro',
	required => 1,
	documentation => 'Consumer Key of your Twitter application',
);

has consumer_secret => (
	isa => 'Str',
	is => 'ro',
	required => 1,
	documentation => 'Consumer Secret of your Twitter application',
);

has access_token => (
	isa => 'Str',
	is => 'ro',
	required => 1,
	documentation => 'Access Token of the Twitter user for the application',
);

has access_token_secret => (
	isa => 'Str',
	is => 'ro',
	required => 1,
	documentation => 'Access Token Secret of the Twitter user for the application',
);

has feeds => (
	traits  => [ 'Array' ],
	is      => 'ro',
	isa     => 'ArrayRef[Str]',
	default => sub {
		my $self = shift;
		my @lines;
		if ($self->feeds_file) {
			@lines = grep {
				$_ = trim($_);
				/^http:\/\//
			} io($self->configdir.'/'.$self->feeds_file)->slurp;
		}
		return \@lines;
	},
	#documentation => 'Feeds (must be given via config file as array [TODO])',
	handles => {
		feeds_shift => 'shift',
		feeds_count => 'count',
	},
);

has feeds_file => (
	isa => 'Str',
	is => 'ro',
	default => sub { 'feeds.txt' },
	documentation => 'File with the list of Feeds (one per line, default: feeds.txt)',
);

has feed_delay => (
	isa => 'Int',
	is => 'ro',
	required => 1,
	default => sub { 600 },
	documentation => 'How often every feed should be checked in seconds (default: 600)',
);

has hashtags_at_end => (
	is => 'ro',
	isa => 'Bool',
	required => 1,
	default => sub { 0 },
	documentation => 'Put all hashtag keywords after the URL (default: 0)',
);

has dryrun => (
	is => 'ro',
	isa => 'Bool',
	required => 1,
	default => sub { 0 },
	documentation => 'Do not actually generate tweets, but do all other steps (default: 0)',
);

has dryrun_url => (
	is => 'ro',
	isa => 'Str',
	required => 1,
	default => sub { 'http://xrl.us/DrYRuN' },
	documentation => 'ShortenURL used for the dryrun debugging informations (default: http://xrl.us/DrYRuN)',
);

has tweet_everything => (
	is => 'ro',
	isa => 'Bool',
	required => 1,
	default => sub { 0 },
	documentation => 'Do not require a trigger keyword in the RSS for a tweet (default: 0)',
);

has triggercontainer => (
	traits  => [ 'Array' ],
	is      => 'ro',
	isa     => 'ArrayRef',
	default => sub {[]},
	documentation => 'Give list of triggering keyword files (comma seperated list of filenames)',
);

has container => (
	traits  => [ 'Array' ],
	is      => 'ro',
	isa     => 'ArrayRef',
	default => sub {[]},
	documentation => 'Give list of keyword files (comma seperated list of filenames)',
);

has blockercontainer => (
	traits  => [ 'Array' ],
	is      => 'ro',
	isa     => 'ArrayRef',
	default => sub {[]},
	documentation => 'Give list of keyword files, which block tweeting that entry (comma seperated list of filenames)',
);

has shorten_type => (
	isa => 'Str',
	is => 'ro',
	required => 1,
	default => sub { 'Metamark' },
	documentation => 'Which shorten service to be used, see WWW::Shorten (default: Metamark)',
);

has shorten_params => (
	isa => 'ArrayRef',
	is => 'ro',
	required => 1,
	default => sub {[]},
	documentation => 'Parameter used for the WWW::Shorten call, see WWW::Shorten (default: none)',
);

has tmpdir => (
	isa => 'Str',
	is => 'ro',
	required => 1,
	default => sub { getcwd },
	documentation => 'Temp directory for the application (default: current directory)',
);

has debug => (
	isa => 'Bool',
	is => 'ro',
	default => sub { 0 },
	documentation => 'Write debugging into logfile (default: 0)',
);

has no_logging => (
	isa => 'Bool',
	is => 'ro',
	default => sub { 0 },
	documentation => 'Do not log on screen or file (default: 0)',
);

has logfile => (
	isa => 'Str',
	is => 'ro',
	default => sub { shift->progname.'.log' },
	documentation => 'Name of the logfile in the configuration directory (default: twitch.log)',
);

has ignore_first => (
	isa => 'Str',
	is => 'ro',
	required => 1,
	default => sub { 1 },
	documentation => 'When no cache file exist, ignore the first incoming feed news (default: 1)',
);

has http_agent => (
	isa => 'Str',
	is => 'ro',
	default => sub { __PACKAGE__.'/'.$VERSION },
	documentation => 'HTTP-agent to be used for the HTTP request to fetch the content (default: '.__PACKAGE__.'/'.$VERSION.')',
);

has [ '+no_double_fork', '+ignore_zombies', '+dont_close_all_files', '+stop_timeout' ] => (
	documentation => 'Please see MooseX::Daemonize documentation',
);

#--------------------------------------------------------

has '+basedir' => (
	default => sub { shift->tmpdir },
	traits => [ 'NoGetopt' ],
);

has [ '+logger', '+use_logger_singleton', '+help_flag' ] => (
	traits => [ 'NoGetopt' ],
);

sub _generate_containers {
	my ( $self, $array, $params ) = @_;
	my @containers;
	$params = {} if !$params;
	for (@{$array}) {
		$self->logger->debug($self->logger_prefix.'Preparing Keywords::Container for blockercontainer');
		my @lists;
		for (split(',',$_)) {
			$self->logger->debug($self->logger_prefix.'Preparing Keywords::List '.$_);
			my @lines = grep { $_ = trim($_); } io($self->configdir.'/'.$_)->slurp;
			push @lists, Text::Keywords::List->new(
				keywords => \@lines,
			);
		}
		push @containers, Text::Keywords::Container->new(
			lists => \@lists,
			params => $params,
		);
	}
	return @containers;
}

has _containers => (
	traits  => [ 'NoGetopt', 'Array' ],
	is      => 'ro',
	isa     => 'ArrayRef[Text::Keywords::Container]',
	lazy    => 1,
	default => sub {
		my ( $self ) = @_;
		$self->logger->debug($self->logger_prefix.'Generating all Keywords::Container');
		my @containers;
		push @containers, $self->_generate_containers($self->blockercontainer, { blocker => 1 });
		push @containers, $self->_generate_containers($self->triggercontainer, { trigger => 1 });
		push @containers, $self->_generate_containers($self->container);
		return \@containers;
	},
);

has _max_feeds_count => (
	traits => [ 'NoGetopt' ],
	isa => 'Int',
	is => 'ro',
	default => sub { shift->feeds_count },
);

has _feedaggregator => (
	traits  => [ 'NoGetopt' ],
	isa => 'POE::Component::FeedAggregator',
	is => 'ro',
	lazy => 1,
	default => sub {
		my $self = shift;
		$self->logger->debug($self->logger_prefix.'Starting POE::Component::FeedAggregator');
		POE::Component::FeedAggregator->new(
			tmpdir     => $self->tmpdir,
			logger     => $self->logger,
			http_agent => $self->http_agent,
		);
	},
);

has _twitter => (
	traits  => [ 'NoGetopt' ],
	isa => 'Net::Twitter',
	is => 'ro',
	lazy => 1,
	default => sub {
		my $self = shift;
		$self->logger->debug($self->logger_prefix.'Starting Net::Twitter');
		Net::Twitter->new(
			traits   => [qw/ API::REST API::Search OAuth /],
			consumer_key		=> $self->consumer_key,
			consumer_secret		=> $self->consumer_secret,
			access_token		=> $self->access_token,
			access_token_secret	=> $self->access_token_secret,
		),
	},
);

has _session => (
	is => 'rw',
	isa => 'POE::Session',
	traits => [ 'NoGetopt' ],
);

has _keywords => (
	is => 'ro',
	isa => 'Text::Keywords',
	traits => [ 'NoGetopt' ],
	lazy => 1,
	default => sub {
		Text::Keywords->new(
			containers => shift->_containers,
		)
	},
);

has _tweet => (
	is => 'ro',
	isa => 'Text::Tweet',
	traits => [ 'NoGetopt' ],
	lazy => 1,
	default => sub {
		Text::Tweet->new()
	},
);

has _http_alias => (
	is => 'rw',
	isa => 'Str',
	traits => [ 'NoGetopt' ],
	default => sub { 'http' },
);

has _keepalive => (
	isa => 'POE::Component::Client::Keepalive',
	is => 'ro',
	traits => [ 'NoGetopt' ],
	lazy => 1,
	default => sub {
		my ( $self ) = @_;
		$self->logger->debug($self->logger_prefix.'Startup POE::Component::Client::Keepalive');
		POE::Component::Client::Keepalive->new(
			keep_alive    => 20, # seconds to keep connections alive
			max_open      => 100, # max concurrent connections - total
			max_per_host  => 100, # max concurrent connections - per host
			timeout       => 10, # max time (seconds) to establish a new connection
		)
	},
);

has _shorten => (
	isa => 'POE::Component::WWW::Shorten',
	is => 'ro',
	traits => [ 'NoGetopt' ],
	lazy => 1,
	default => sub {
		my ( $self ) = @_;
		$self->logger->debug($self->logger_prefix.'Startup '.$self->shorten_type.' Shorten Service...');
		return POE::Component::WWW::Shorten->spawn(
			alias => $self->_shorten_alias,
			type => $self->shorten_type,
			params => $self->shorten_params,
		);
	},
);

has _shorten_alias => (
	is => 'rw',
	isa => 'Str',
	traits => [ 'NoGetopt' ],
	default => sub { 'shorten' },
);

has _entry_count => (
	traits  => [ 'Counter', 'NoGetopt' ],
	is      => 'ro',
	isa     => 'Num',
	default => 0,
	handles => {
		_entry_count_inc => 'inc',
	},
);

sub running_config { shift->_running_config(@_) }

has _running_config => (
	is => 'rw',
	isa => 'HashRef',
	traits => [ 'NoGetopt' ],
	lazy => 1,
	default => sub {
		my $self = shift;
		my %attributes;
		for ($self->meta->get_all_attributes) {
			if (!$_->does('MooseX::Getopt::Meta::Attribute::Trait::NoGetopt')) {
				my $value = $_->get_value($self);
				$value = $value->file if (blessed $value and $value->isa('MooseX::Daemonize::Pid::File'));
				$value = $value->stringify if (blessed $value and ( $value->isa('Path::Class::Dir') or $value->isa('Path::Class::File') ));
				$attributes{$_->name} = $value;
			}
		}
		$attributes{INC} = \@INC;
		$attributes{PACKAGE} = __PACKAGE__;
		$attributes{getcwd} = getcwd;
		$attributes{getpid} = getpid;
		$attributes{started_time} = time;
		return \%attributes;
	},
);

sub set_process_name {
	my ( $self ) = @_;
	$0 = __PACKAGE__.' '.$VERSION.' using '.( getcwd ).'/'.$self->configfile;
}

sub run {
	my ( $self ) = @_;
	POE::Kernel->run;
	if (!blessed $self) {
		$self = $self->new_with_options;
	}
	my ( $cmd ) = @{$self->extra_argv};
	$cmd = 'start' if !$cmd;

	print "Using configfile: ".( getcwd ).'/'.$self->configfile."\n";
	print "Status: ".$self->status_message."\n" if ($cmd ne 'status' && $self->status);
	print "\n";

	die __PACKAGE__." already running...\n" if ($cmd eq 'start' && $self->status);
	die __PACKAGE__." not running...\n" if ($cmd eq 'stop' && !$self->status);

	if ($cmd eq 'start') {
		print "Starting up ".__PACKAGE__."...\n";
		$self->start;
	};
	if ($cmd eq 'status') {
		print "Status: ".__PACKAGE__." is ".( $self->status ? '' : 'not ')."running...\n";
		exit $self->status ? 0 : 1;
	}
	if ($cmd eq 'restart') {
		if ($self->status) {
			print __PACKAGE__." not running, starting up...\n";
		} else {
			print __PACKAGE__." restarting...\n";
		}
		$self->restart;
	}
	if ($cmd eq 'stop') {
		print "Stopping ".__PACKAGE__."...\n";
		$self->stop;
	};
	
	exit $self->exit_code;
}

sub START {
	my ( $self, $session ) = @_[ OBJECT, SESSION ];
	$self->set_process_name;
	$self->logger->info($self->logger_prefix.'Starting up App::Twitch '.$App::Twitch::VERSION.'... ');
	$self->logger->debug($self->logger_prefix.'Assigning POE::Session');
	$self->_session($session);
	$self->_containers;
	$self->_twitter if !$self->dryrun;
	$self->_tweet;
	$self->_keywords;
	$self->_feedaggregator;
	$self->_shorten if !$self->dryrun;
	$self->logger->info($self->logger_prefix.'Startup HTTP Service...');
	POE::Component::Client::HTTP->spawn(
		Agent				=> $self->http_agent,
		Alias				=> $self->_http_alias,
		Timeout				=> 30,
		ConnectionManager	=> $self->_keepalive,
		FollowRedirects		=> 5,
	);
	$self->_max_feeds_count;
	my $running_config_dumpfile = $self->tmpdir.'/'.$self->configfile;
	$running_config_dumpfile =~ s/\.yml/\.running_config\.yml/;
	DumpFile($running_config_dumpfile,$self->running_config);
	chmod 0600, $running_config_dumpfile;
	$self->yield('add_feed');
}

event add_feed => sub {
	my ( $self, $kernel ) = @_[ OBJECT, KERNEL ];
	my $feed_url = $self->feeds_shift;
	$self->logger->info($self->logger_prefix.'Adding feed: '.$feed_url);
	eval {
		my $feed = {
			url				=> $feed_url,
			delay			=> $self->feed_delay,
			max_headlines	=> 100,
			ignore_first	=> $self->ignore_first,
		};
		$self->_feedaggregator->add_feed($feed);
	};
	$self->logger->error($self->logger_prefix.'ERROR ['.$feed_url.']: '.$@) if $@;
	my $delay = floor( $self->feed_delay / $self->_max_feeds_count );
	$kernel->delay('add_feed',$delay) if $self->feeds_count;
};

event new_feed_entry => sub {
	my ( $self, $feed, $entry ) = @_[ OBJECT, ARG0..$#_ ];
	$self->_entry_count_inc;
	my $url = $entry->link;
	$url =~ s/ //g;
	my $event = {
		entry => $entry,
		url => $url,
		run_id => $self->_entry_count,
	};
	$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} New feed entry: '.$url);
	POE::Kernel->post(
		$self->_http_alias,
		'request',
		'new_content',
		HTTP::Request->new(GET => $url),
		$event,
	);
};

use Encode;
require Encode::Detect;

event new_content => sub {
	my ( $self, $request_packet, $response_packet ) = @_[ OBJECT, ARG0..$#_ ];
	my $event = $request_packet->[1];
	my $response = $response_packet->[0];
	eval {
		if ($response->code == 200) {
			my $extractor = HTML::ExtractContent->new;
			my $content = $response->decoded_content;
			my $title = $event->{entry}->title;
			if (!utf8::is_utf8($content)) {
				$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} No utf8, trying recode content');
				$content = decode("Detect", $content);
			}
			if (utf8::is_utf8($content)) {
				$extractor->extract($content);
				my $extracted_text = $extractor->as_text;
				$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} Extracted content with '.length($extracted_text).' chars');
				$event->{content} = $extracted_text;
				my @keywords = $self->_keywords->from($title, $extracted_text);
				if ($self->debug && @keywords) {
					my @keywords_text;
					push @keywords_text, $_->found for (@keywords);
					$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} Keywords found: '.join(", ",@keywords_text));
				}
				if ( $keywords[0] && $keywords[0]->container->params->{blocker} ) {
					$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} Blocker found, ignoring entry');
				} elsif ( $self->tweet_everything || ( $keywords[0] && $keywords[0]->container->params->{trigger} ) ) {
					$event->{keywords} = \@keywords;
					$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} Trigger keyword found in: '.$title) if (!$self->tweet_everything);
					if ($self->dryrun) {
						$self->yield('new_shortened',{
							short => $self->dryrun_url,
							_twitch_event => $event,
						});
					} else {
						$self->_shorten->shorten({
							url => $event->{url},
							event => 'new_shortened',
							_twitch_event => $event,
						});
					}
				} else {
					$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} Yeah... what i care... doing nothing with it');
				}
			} else {
				$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} Is no UTF8');
			}
		} else {
			$self->logger->error($self->logger_prefix.'{'.$event->{run_id}.'} Wrong HTTP Code '.$response->code);
		}
	};
	$self->logger->error($self->logger_prefix.'{'.$event->{run_id}.'} ERROR [content handling]: '.$@) if $@;
};

event new_shortened => sub {
	my ( $self, $returned ) = @_[ OBJECT, ARG0..$#_ ];
	my $event = $returned->{_twitch_event};
	eval {
		my $title = $event->{entry}->title;
		my $content = $event->{content};
		my $url = $event->{url};
		my @keywords = @{$event->{keywords}};
		if ($returned->{short}) {
			$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} Received ShortURL');
			my $short = $returned->{short};
			my @keywords_text;
			for (@keywords) {
				push @keywords_text, $_->found;
			}
			$event->{tweet} = $self->_tweet->make(\@keywords_text,$title,\$short);
			$self->twitter_update($event);
		} else {
			$self->logger->error($self->logger_prefix.'{'.$event->{run_id}.'} Failing generation of ShortURL');
		}
	};
	$self->logger->error($self->logger_prefix.'{'.$event->{run_id}.'} ERROR [finalize and tweeting]: '.$@) if $@;
};

sub twitter_update {
	my ( $self, $event ) = @_;
	my $tweet = $event->{tweet};
	$self->logger->info($self->logger_prefix.'{'.$event->{run_id}.'} Twitter update: '.$tweet);
	if (!$self->dryrun) {
		eval {
			$self->_twitter->update({ status => $tweet });
		};
		if ($@) {
			$self->logger->error('('.$event->{run_id}.') ERROR [twitter]: '.$@);
			return 0;
		}
	} else {
		$self->logger->debug($self->logger_prefix.'{'.$event->{run_id}.'} dryrun set, not really twittering it!');
	}
	return 1;
}

sub logger_prefix {
	my $self = shift;
	__PACKAGE__.' ('.$self->get_session_id.') ';
}

__PACKAGE__->meta->make_immutable;

1;



=pod

=head1 NAME

App::Twitch - Your personal Twitter b...... lalalala

=head1 VERSION

version 0.904

=head1 DESCRIPTION

More documentation coming soon....

=head1 SEE ALSO

=over 4

=item *

L<Net::Twitter>

=item *

L<POE::Component::FeedAggregator>

=item *

L<MooseX::POE>

=back

=head1 AUTHOR

Torsten Raudssus <torsten@raudssus.de> L<http://www.raudssus.de/>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2010 by L<Raudssus Social Software|http://www.raudssus.de/>.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut


__END__



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.