DTA-CAB/CAB/Datum.pm
## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Datum.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: generic API for data (tokens,sentences,documents,...) passed to/from DTA::CAB::Analyzer
package DTA::CAB::Datum;
use DTA::CAB::Logger;
use Exporter;
use Carp;
use strict;
##==============================================================================
## Globals
##==============================================================================
our @ISA = qw(Exporter DTA::CAB::Logger);
our @EXPORT = qw(toToken toSentence toDocument);
our @EXPORT_OK = @EXPORT;
our %EXPORT_TAGS = (all=>\@EXPORT_OK);
##==============================================================================
## Constructors etc.
##==============================================================================
## $dat = $CLASS_OR_OBJECT->new(%args)
## + %$dat, %args:
## _attrs => \%attrs, ##-- scalar attributes ($key=>$val,...), e.g. for xml pass-through
## _dtrs => \@dtrs, ##-- structural daughters (DTA::CAB::Datum objects), e.g. for xml pass-through
sub new {
return bless {@_[1..$#_]}, ref($_[0])||$_[0];
}
##==============================================================================
## Utilities
##==============================================================================
## $tok = CLASS::toToken($tok)
## $tok = CLASS::toToken($text)
## + creates a new token object or returns its argument
sub toToken {
# return $_[0] if (UNIVERSAL::isa($_[0],'DTA::CAB::Token'));
# return bless({text=>$_[0]},'DTA::CAB::Token') if (!ref($_[0]));
# return bless($_[0],'DTA::CAB::Token') if (ref($_[0]) eq 'HASH' && exists($_[0]{text}));
# return DTA::CAB::Token->new(@_); ##-- default
##--
return $_[0] if (UNIVERSAL::isa($_[0],'DTA::CAB::Token'));
return {text=>$_[0]} if (!ref($_[0]));
return $_[0] if (ref($_[0]) eq 'HASH' && exists($_[0]{text}));
return DTA::CAB::Token->new(@_); ##-- default
}
## $sent = CLASS::toSentence($sent)
## $sent = CLASS::toSentence(\@tokens)
## + creates a new sentence object or returns its argument
sub toSentence {
# return $_[0] if (UNIVERSAL::isa($_[0],'DTA::CAB::Sentence'));
# return bless({tokens=>$_[0]},'DTA::CAB::Sentence') if (UNIVERSAL::isa($_[0],'ARRAY'));
# return bless($_[0],'DTA::CAB::Sentence') if (ref($_[0]) eq 'HASH' && exists($_[0]{tokens}));
# return DTA::CAB::Sentence->new(@_); ##-- default
##--
return $_[0] if (UNIVERSAL::isa($_[0],'DTA::CAB::Sentence'));
return {tokens=>$_[0]} if (UNIVERSAL::isa($_[0],'ARRAY'));
return $_[0] if (ref($_[0]) eq 'HASH' && exists($_[0]{tokens}));
return DTA::CAB::Sentence->new(@_); ##-- default
}
## $doc = CLASS::toDocument($doc)
## $doc = CLASS::toDocument(\@sents)
## + creates a new document object or returns its argument
sub toDocument {
return $_[0] if (UNIVERSAL::isa($_[0],'DTA::CAB::Document'));
return bless({body=>$_[0]},'DTA::CAB::Document') if (UNIVERSAL::isa($_[0],'ARRAY'));
return bless($_[0],'DTA::CAB::Document') if (ref($_[0]) eq 'HASH' && exists($_[0]{body}));
return DTA::CAB::Document->new(@_); ##-- default
}
## $thingy = $obj->TO_JSON()
## + annoying wrapper for JSON::XS
sub TO_JSON {
return { %{$_[0]} };
}
1; ##-- be happy
__END__
##========================================================================
## POD DOCUMENTATION, auto-generated by podextract.perl, & edited
##========================================================================
## NAME
=pod
=head1 NAME
DTA::CAB::Datum - generic API for data (tokens,sentences,documents,...) passed to/from DTA::CAB::Analyzer
=cut
##========================================================================
## SYNOPSIS
=pod
=head1 SYNOPSIS
use DTA::CAB::Datum;
$tok = CLASS::toToken($tok);
$sent = CLASS::toSentence($sent);
$doc = CLASS::toDocument($doc);
=cut
##========================================================================
## DESCRIPTION
=pod
=head1 DESCRIPTION
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Datum: Globals
=pod
=head2 Globals
=over 4
=item Variable: @ISA
DTA::CAB::Datum inherits from
C<Exporter>
and
L<DTA::Cab::Logger|DTA::Cab::Logger>.
=item Variable: @EXPORT
By default, the methods L</toToken>(), L</toSentence>(), and L</toDocument>() are exported.
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Datum: Constructors etc.
=pod
=head2 Constructors etc.
=over 4
=item toToken
$tok = CLASS::toToken($tok);
$tok = CLASS::toToken($text);
Creates a new L<DTA::CAB::Token|DTA::CAB::Token>
object or returns its argument (if already such an object).
=item toSentence
$sent = CLASS::toSentence($sent);
$sent = CLASS::toSentence(\@tokens);
Creates a new L<DTA::CAB::Sentence|DTA::CAB::Sentence>
object or returns its argument (if already such an object).
=item toDocument
$doc = CLASS::toDocument($doc);
$doc = CLASS::toDocument(\@sents);
Creates a new L<DTA::CAB::Document|DTA::CAB::Document>
object or returns its argument (if already such an object).
=back
=cut
##========================================================================
## END POD DOCUMENTATION, auto-generated by podextract.perl
##======================================================================
## Footer
##======================================================================
=pod
=head1 AUTHOR
Bryan Jurish E<lt>moocow@cpan.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2009-2019 by Bryan Jurish
This package is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.24.1 or,
at your option, any later version of Perl 5 you may have available.
=cut
=cut