Group
Extension

Net-YASA/lib/Net/YASA.pm

package Net::YASA;

use warnings;
use strict;
use utf8;

use Encode qw/encode decode/;
use LWP::UserAgent;

=head1 NAME

Net::YASA - Interface to YASA (Yet Another Suffix Array)

=head1 VERSION

Version 0.03

=cut

our $VERSION = '0.03';
our $AUTOLOAD;
our %ok_field;

for my $attr ( qw(content minfreq minlength ) ) { $ok_field{$attr}++; }

sub AUTOLOAD {
    my $self = shift;
    my $attr = $AUTOLOAD;
    $attr =~ s/.*:://;
    return if $attr eq 'DESTROY';

    if ($ok_field{$attr}) {
	$self->{lc $attr} = shift if @_;
	return $self->{lc $attr};
    } else {
	my $superior = "SUPER::$attr";
	$self->$superior(@_);
    }
}

use constant YASA_WEB_URL => 'http://yasa.newzilla.org/run/';
=head1 SYNOPSIS

This module will submit content to the YASA WebService to return
a list of terms and corresponding frequencies.

    use Net::YASA;

    my $foo = Net::YASA->new();
    my $termset = $foo->extract(<some_of_utf8_words>);
    print 'TermSet 1: ', $$termset[0], "\n";
    print 'TermSet 2: ', $$termset[1], "\n";
    ...

=head1 EXPORT

A list of functions that can be exported.  You can delete this section
if you don't export anything, such as for a purely object-oriented module.

=head1 METHODS

=head2 new

=cut

sub new {
    my $class = shift;
    my $self = {
	_ua => undef,
	minlength => 1,
	minfreq => 2,
	output => 'xml',
	_content => undef
    };
    if(@_) {
	my %arg = @_;

	foreach (keys %arg) {
	    $self->{lc($_)} = $arg{$_};
	}
    }
    $self->{_ua} = LWP::UserAgent->new;
    $self->{_ua}->timeout(30);
    $self->{_ua}->agent('CPAN::Net::YASA');
    bless($self, $class);
    return($self);
}

=head2 extract

=cut

sub extract {
    my ($self, $content) = @_;
    die 'No content specified' unless $content ne "";
    my $ua = $self->{_ua};
    my $response = $ua->post(
	YASA_WEB_URL.$self->{output}."/",
	{   
	    'min' => $self->minlength,
	    'freq' => $self->minfreq,
	    'content' => encode("utf8",$content), 
	}
    );
    die "Error in extracting data from YASA!\n" unless $response->is_success();
    if ($self->{output} eq "json" and eval {
	    require JSON::Any;
	    1;
	}) {
	my $result = $response->content();
        my $j = JSON::Any->new;

	my $data = $j->decode($result);
	return $data;
    } 
    else {
	my $xml = decode("utf8",$response->content());
	my @results = ();
	while ($xml =~ m#<Term>([^<]*)</Term><Freq>(\d+)</Freq>#g) {
	    push @results, $1."\t".$2;
	}
        return \@results;
    }
}

=head1 AUTHOR

Cheng-Lung Sung, C<< <clsung at FreeBSD.org> >>

=head1 BUGS

Please report any bugs or feature requests to
C<bug-net-yasa at rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Net-YASA>.
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.

=head1 SEE ALSO

YASA (Yet Another Suffix Array) web site: L<http://yasa.newzilla.org>

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Net::YASA

You can also look for information at:

=over 4

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/Net-YASA>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/Net-YASA>

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Net-YASA>

=item * Search CPAN

L<http://search.cpan.org/dist/Net-YASA>

=back

=head1 ACKNOWLEDGEMENTS

=head1 COPYRIGHT & LICENSE

Copyright 2007-2009 Cheng-Lung Sung, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

1; # End of Net::YASA


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.