DTA-CAB/CAB/Format/CONLLU.pm
## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Format::CONLLU.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: Datum parser/formatter: CONLL-U format
## + see https://universaldependencies.org/format.html
##
## Format fields:
## ID: Word index, integer starting at 1 for each new sentence;
## FORM: Word form or punctuation symbol.
## LEMMA: Lemma or stem of word form.
## UPOS: Universal part-of-speech tag.
## XPOS: Language-specific part-of-speech tag; underscore if not available.
## FEATS: List of morphological features from the universal feature inventory or underscore if not available.
## HEAD: Head of the current word, which is either a value of ID or zero (0).
## DEPREL: Universal dependency relation to the HEAD (root iff HEAD = 0) or a defined language-specific subtype of one.
## DEPS: Enhanced dependency graph in the form of a list of head-deprel pairs.
## MISC: Any other annotation, split by '|'
##
## Local format conventions for MISC ("MISC1|...|MISCn")
## + MISC fields "MISC$i" is of the form 'ATTR=VALUE' are handled specially for the following ATTRs:
## id=TOKID # sets $tok->{id}
## loc=OFFSET LENGTH # sets $tok->{loc}
## xlit=XTEXT # sets $tok->{xlit}{latin1Text}; also honors CONLL-U "Translit=XTEXT"
## norm=NORM # sets $tok->{moot}{word}
## details=DETAILS # sets $tok->{moot}{details}{details}
## json=JSON # clobbers %$tok with JSON a la Format::TJ
## + VALUEs of specially handled attributes containing literal '%' or '|'
## should have these 2 characters (and only these 2 characters) URI-escaped ('%25', '%7C' respectively)
package DTA::CAB::Format::CONLLU;
use DTA::CAB::Format;
use DTA::CAB::Format::TJ;
use DTA::CAB::Datum ':all';
use IO::File;
use Carp;
use strict;
##==============================================================================
## Globals
##==============================================================================
our @ISA = qw(DTA::CAB::Format::TJ);
BEGIN {
DTA::CAB::Format->registerFormat(name=>__PACKAGE__, filenameRegex=>qr/\.(?i:conllu|conll[_-]u|cab[\.-]connlu|cab[\.-]conll[\.-]u)$/);
DTA::CAB::Format->registerFormat(name=>__PACKAGE__, short=>$_)
foreach (qw(conllu conll-u cab-conllu cab-conll-u));
}
##==============================================================================
## Constructors etc.
##==============================================================================
## $fmt = CLASS_OR_OBJ->new(%args)
## + object structure: assumed HASH
## {
## ##-- Input
## doc => $doc, ##-- INHERITED: buffered input document
## cuMiscIn => $bool, ##-- NEW: parse special MISC attrs (default=true)
##
## ##-- Output
## outbuf => $stringBuffer, ##-- INHERITED: buffered output
## level => $formatLevel, ##-- OVERRIDE: <0:omit-misc ; 0:default:include-misc,exclude-json, >=1:include-json, >=2:canonical-json
## tagset => $tagset, ##-- auto-convert XPOS->UPOS for $tagset (known values: 'stts' (default))
##
## ##-- Common (INHERITED from Format::TT)
## raw => $bool, ##-- INHERITED: attempt to load/save raw data
## fh => $fh, ##-- INHERITED: IO::Handle for read/write
## utf8 => $bool, ##-- INHERITED: read/write utf8?
## tloc => $attr, ##-- INHERITED: if non-empty, parseTokenizerString() sets $w->{$attr}="$off $len"; default=0
## #defaultFieldName => $name, ##-- INHERITED: default name for unnamed misc-fields; parsed into @{$tok->{other}{$name}}; default=''
## }
sub new {
my $that = shift;
my $fmt = $that->SUPER::new(
##-- input
doc => undef,
cuMiscIn=>1,
##-- output
#level => 0,
tagset => 'stts',
##-- common
utf8 => 1,
#defaultFieldName => '',
#tloc => undef,
##-- user args
@_
);
return $fmt;
}
##==============================================================================
## Methods: Persistence
##==============================================================================
## @keys = $class_or_obj->noSaveKeys()
## + returns list of keys not to be saved
## + default just returns empty list
## + INHERITED from Format::TJ
##==============================================================================
## Methods: I/O: Generic
##==============================================================================
## $jxs = $fmt->jsonxs()
## + INHERITED from Format::TJ
## $str = unescapeConllu($str)
## + un-escapes CONLLU value strings using URI-escape sequences ('%7C' => '|', '%25'=>'%')
sub unescapeConllu {
my $str = shift;
$str =~ s/%7C/\|/gi;
$str =~ s/%25/\%/gi;
return $str;
}
## $str = escapeConllu($str)
## + escapes CONLLU value strings using URI-escape sequences ('|'=>'%7C', '%'=>'%25')
sub escapeConllu {
my $str = shift;
$str =~ s/\%/%25/gi;
$str =~ s/\|/%7C/gi;
return $str;
}
##--------------------------------------------------------------
## Methods & Data: tagset conversions
## %XPOS2UPOS => ($tagset => $CODE_OR_HASHREF, ...)
our %XPOS2UPOS =
(
##-- xpos2upos:stts: see http://universaldependencies.org/tagset-conversion/de-stts-uposf.html
stts => {
'$(' => 'PUNCT',
'$,' => 'PUNCT',
'$.' => 'PUNCT',
'ADJA' => 'ADJ',
'ADJD' => 'ADJ',
'ADV' => 'ADV',
'APPO' => 'ADP',
'APPR' => 'ADP',
'APPRART' => 'ADP',
'APZR' => 'ADP',
'ART' => 'DET',
'CARD' => 'NUM',
'FM' => 'X',
'ITJ' => 'INTJ',
'KOKOM' => 'CCONJ',
'KON' => 'CCONJ',
'KOUI' => 'SCONJ',
'KOUS' => 'SCONJ',
'NE' => 'PROPN',
'NN' => 'NOUN',
'PAV' => 'ADV',
'PDAT' => 'DET',
'PDS' => 'PRON',
'PIAT' => 'DET',
'PIDAT' => 'DET',
'PIS' => 'PRON',
'PPER' => 'PRON',
'PPOSAT' => 'DET',
'PPOSS' => 'PRON',
'PRELAT' => 'DET',
'PRELS' => 'PRON',
'PRF' => 'PRON',
'PTKA' => 'PART',
'PTKANT' => 'PART',
'PTKNEG' => 'PART',
'PTKVZ' => 'ADP',
'PTKZU' => 'PART',
'PWAT' => 'DET',
'PWAV' => 'ADV',
'PWS' => 'PRON',
'TRUNC' => 'X',
'VAFIN' => 'AUX',
'VAIMP' => 'AUX',
'VAINF' => 'AUX',
'VAPP' => 'AUX',
'VMFIN' => 'VERB',
'VMINF' => 'VERB',
'VMPP' => 'VERB',
'VVFIN' => 'VERB',
'VVIMP' => 'VERB',
'VVINF' => 'VERB',
'VVIZU' => 'VERB',
'VVPP' => 'VERB',
'XY' => 'X',
},
);
##==============================================================================
## Methods: I/O: Block-wise
## + mostly INHERITED from Format::TT
##==============================================================================
##--------------------------------------------------------------
## Methods: I/O: Block-wise: Generic
## %blockOpts = $CLASS_OR_OBJECT->blockDefaults()
## + returns default block options as for blockOptions()
## + INHERITED default just returns as for $CLASS_OR_OBJECT->blockOptions('128k@w')
##--------------------------------------------------------------
## Methods: I/O: Block-wise: Input
## \%head = blockScanHead(\$buf,$io,\%opts)
## + gets header offset, length from (mmaped) \$buf
## + %opts are as for blockScan()
## + OVERRIDE scans for CONLL-U "# newdoc" comment
sub blockScanHead {
my ($fmt,$bufr,$io,$opts) = @_;
return [0,$+[0]] if ($$bufr =~ m(\A\n*+(?:#\s*newdoc\b.*\n++)?));
return [0,0];
}
## \%head = blockScanFoot(\$buf,$io,\%opts)
## + gets footer offset, length from (mmaped) \$buf
## + %opts are as for blockScan()
## + override INHERITED from Format::TT returns empty
## \@blocks = $fmt->blockScanBody(\$buf,\%opts)
## + scans $filename for block boundaries according to \%opts
## + INHERITED from Format::TT
##--------------------------------------------------------------
## Methods: I/O: Block-wise: Output
## $blk = $fmt->blockStore(\$odata,$blk,$bopt)
## + store output buffer \$buf in $blk->{odata}
## + additionally store keys qw(ofmt ohead odata ofoot) relative to $blk->{odata}
## + override truncates trailing newlines according to $blk->{eos} before calling inherited method
## + INHERITED from Format::TT
## $fmt_or_undef = $fmt->blockAppend($block,$filename)
## + append a block $block to a file $filename
## + $block is a HASH-ref as returned by blockScan()
## + INHERITED from DTA::CAB::Format
##==============================================================================
## Methods: Input
##==============================================================================
##--------------------------------------------------------------
## Methods: Input: Input selection
## $fmt = $fmt->fromFh($filename_or_handle)
## + new override calls Format::fromFh
sub fromFh {
#return $_[0]->fromFh_str(@_[1..$#_]);
my $fmt = shift;
$fmt->DTA::CAB::Format::fromFh(@_)
or $fmt->logconfess("fromFh(): inherited Format::fromFh() failed: $!");
return $fmt->parseConlluFh($_[0]);
}
## $fmt = $fmt->fromString(\$string)
## + select input from string $string
## + INHERITED from Format::TJ
##--------------------------------------------------------------
## Methods: Input: Local
## $fmt = $fmt->parseConlluFH($fh)
## + guts for fromFh(): parse handle $fh into local document buffer.
sub parseConlluFh {
my ($fmt,$fh) = @_;
$fmt->setLayers($fh);
my $jxs = $fmt->jsonxs();
##-- ye olde loope
my (%sa,%doca);
my $toks = [];
my @body = qw();
my ($tok,%cu,@misc,$json);
while (defined($_=<$fh>)) {
if (/^\#\s*newdoc\s+id\s*=\s*(.*)$/) {
##-- conllu comment: document attribute: doc id (-> "id", "base")
$doca{'id'} = $doca{'base'} = $1;
}
elsif ($_ =~ /^#\s*sent_id\s*=\s*(.*)$/) {
##-- connlu comment: sentence attribute: xml:id (-> "sent_id", "id")
$sa{sent_id} = $sa{'id'} = $1;
}
elsif ($_ =~ /^#\s*text\s*=\s*(.*)$/) {
##-- connl-u comment: sentence attribute: raw text (-> "stxt", "text")
$sa{'stxt'} = $sa{'text'} = $1;
}
elsif ($_ =~ /^#\s*\$TJ:DOC=(.+)$/) {
##-- tj directive: document attributes
$json = defined($1) && $1 ? $jxs->decode($1) : {};
@doca{keys %$json} = values %$json;
}
elsif ($_ =~ /^#\s*\$TJ:SENT=(.+)$/) {
##-- tj directive: setence attributes
$json = defined($1) && $1 ? $jxs->decode($1) : {};
@sa{keys %$json} = values %$json;
}
elsif ($_ =~ /^# (?:xml\:)?base=(.*)$/) {
##-- (tt-compat) special comment: document attribute: xml:base
$doca{'base'} = $1;
}
elsif ($_ =~ /^# Sentence (.*)$/) {
##-- (tt-compat) special comment: sentence attribute: xml:id
$sa{'id'} = $1;
}
elsif ($_ =~ /^\#(.*)$/) {
##-- generic conllu- comment line: add to '_cmts' attribute of current sentence
push(@{$sa{_cmts}},$1);
}
elsif ($_ =~ /^$/) {
##-- empty line: EOS
if (%sa || @$toks) {
push(@body,{%sa,tokens=>$toks});
$toks = [];
%sa = qw();
}
}
else {
##-- vanilla token
chomp;
@cu{qw(id form lemma upos xpos feats head deprel deps misc)}
= map { ($_//'_') eq '_' ? undef : $_ } split(/\t/,$_,10);
$tok = { text=>$cu{form} };
##-- parse: feats
## + example: 'Case=Acc,Dat|Number=Sing' --> {'Case'=>'Acc,Dat', 'Number'=>'Sing'}
$cu{feats} = { map {split('=',$_,2)} split('|',$cu{feats}) }
if ($cu{feats});
##-- parse: deps (extended dependency graph)
## + example: '0:root|2:conj|4:conj' --> [[0,'root'],[2,'conj'],[4,'conj']]
$cu{deps} = [ map {[split(':',$_,2)]} split('|',$cu{deps}) ]
if ($cu{deps});
##-- parse: misc
if ($cu{misc} && ($fmt->{cuMiscIn}//1)) {
@misc = qw();
foreach (split(/\|/,$cu{misc})) {
if (m/^loc=(?:off=)?([0-9]+) (?:len=)?([0-9]+)$/) {
##-- misc: loc=OFFSET LENGTH (sets $tok->{loc})
$tok->{loc} = { off=>$1, len=>$2 };
}
elsif (m/(?:xml\:?)?id=(.*)$/) {
##-- misc: id=TOKID # sets $tok->{id}
$tok->{id} = unescapeConllu($1);
}
elsif (s/^(?:xlit|[Ll]?[Tt]ranslit)=//) {
##-- misc: xlit=XTEXT (sets $tok->{xlit}{latin1Text}; also honors CONLL-U "Translit=XTEXT")
if (m/^(?:isLatin1|l1)=([01]) (?:isLatinExt|lx)=([01]) (?:latin1Text|l1s)=(.*)$/) {
$tok->{xlit} = { isLatin1=>($1||0), isLatinExt=>($2||0), latin1Text=>unescapeConllu($3) };
} else {
$tok->{xlit} = { isLatin1=>'', isLatinExt=>'', latin1Text=>unescapeConllu($_) };
}
}
elsif (m/^norm=(.*)$/) {
## misc: norm=NORM (sets $tok->{moot}{word})
$tok->{moot}{word} = unescapeConllu($1);
}
elsif (m/^details=(\S*)?(?:\s\@\s(\S+))?\s(?:\~\s)?(.*?)(?: <([0-9\.\+\-eE]+)>)?$/) {
## misc: details=DETAILS (sets $tok->{moot}{details}
$tok->{moot}{details} = {
(($1//'') ne '' ? (tag=>unescapeConllu($1)) : qw()),
(($2//'') ne '' ? (lemma=>unescapeConllu($2)) : qw()),
details=>unescapeConllu($3),
prob=>$4,
};
}
elsif (m/^json=(.+)$/) {
## misc: json=JSON (clobbers %$tok with JSON a la Format::TJ)
$json = $jxs->decode(unescapeConllu($1));
@$tok{keys %$json} = values %$json;
}
else {
## misc: extra attribute, add to $cu{misc}
push(@misc, $_);
}
}
$cu{misc} = [@misc];
}
elsif ($cu{misc}) {
##-- MISC: don't parse special attributes, just split
$cu{misc} = [split(/\|/,$cu{misc})];
}
##-- store token
$tok->{conllu} = {%cu};
$tok->{moot}{tag} //= ($cu{xpos} // $cu{upos});
$tok->{moot}{lemma} //= $cu{lemma};
push(@$toks, $tok)
}
}
push(@body, {%sa,tokens=>$toks}) if (%sa || @$toks); ##-- handle missing EOS at EOF
##-- construct & buffer output document
$fmt->{doc} = bless({%doca,body=>\@body}, 'DTA::CAB::Document');
return $fmt;
}
##--------------------------------------------------------------
## Methods: Input: Generic API
## $doc = $fmt->parseDocument()
## + INHERITED from Format::TJ
##==============================================================================
## Methods: Output
##==============================================================================
##--------------------------------------------------------------
## Methods: Output: Generic
## $type = $fmt->mimeType()
## + INHERITED default returns text/plain
## $ext = $fmt->defaultExtension()
## + returns default filename extension for this format
sub defaultExtension { return '.conllu'; }
##--------------------------------------------------------------
## Methods: Output: Generic API
## $fmt = $fmt->putToken($tok)
## $fmt = $fmt->putToken($tok,$conllu_id)
## + honors $fmt->{level} : <0:omit-misc ; 0:default:include-misc,exclude-json, >=1:include-json, >=2:canonical-json
sub putToken {
my ($fmt,$tok,$id) = @_;
##-- conllu fields ($id,$text,$lemma,$upos,$xpos,$feats,$head,$deprel,$deps) ... everything but MISC
my %cu = %{$tok->{conllu} // {}};
$cu{id} //= $id // '_';
$cu{form} //= $tok->{text};
$cu{xpos} //= $tok->{pos} // ($tok->{moot} ? $tok->{moot}{tag} : undef);
$cu{lemma} //= $tok->{lemma} // ($tok->{moot} ? $tok->{moot}{lemma} : undef);
##-- implicit xpos->upos conversion
if (!$cu{upos} && $fmt->{tagset} && defined(my $x2u=$XPOS2UPOS{$fmt->{tagset}})) {
if (UNIVERSAL::isa($x2u,'HASH')) {
$cu{upos} = $x2u->{$cu{xpos}//''} // 'X';
}
elsif (UNIVERSAL::isa($x2u,'CODE')) {
$cu{upos} = $x2u->($cu{xpos}) // 'X';
}
else {
confess(__PACKAGE__, "::putToken(): PoS-translation table must be a HASH- or CODE-ref");
}
}
##-- special MISC ATTRS
my @misc = qw();
if (($fmt->{level}//0) >= 0) {
##-- include special misc ATTRS
push(@misc, "id=$tok->{id}") if ($tok->{id});
push(@misc, "loc=$tok->{loc}{off} $tok->{loc}{len}") if ($tok->{loc});
push(@misc, "Translit=$tok->{xlit}{latin1Text}") if ($tok->{xlit});
if ($tok->{moot}) {
push(@misc, "norm=$tok->{moot}{word}")
if (defined($tok->{moot}{word}));
push(@misc, "details=$tok->{moot}{details}{details} <".($tok->{moot}{details}{prob}||$tok->{moot}{details}{cost}||0).">")
if (defined($tok->{moot}{details}{details}));
}
if (($fmt->{level}//0) >= 1) {
##-- include misc/json
push(@misc, "json=".$fmt->jsonxs->encode($tok));
}
$_ = escapeConllu($_) foreach (@misc)
}
$fmt->{fh}->print
(
##-- comments
($tok->{_cmts} ? join('', map {"#$_\n"} map {split(/\n/,$_)} @{$tok->{_cmts}}) : ''),
##
join("\t",
##-- conllu fixed fields ($id,$text,$lemma,$upos,$xpos,$feats,$head,$deprel,$deps) ... everything but MISC
map { ($_//'') eq '' ? '_' : $_ }
@cu{qw(id form lemma upos xpos)},
(UNIVERSAL::isa($cu{feats},'HASH')
? join('|',map {"$_:$cu{feats}{$_}"} sort keys %{$cu{feats}})
: $cu{feats}),
@cu{qw(head deprel)},
(UNIVERSAL::isa($cu{deps},'ARRAY')
? join('|',map {"$_->[0]:$_->[1]"} @{$cu{deps}})
: $cu{deps}),
##-- conllu MISC (may be empty, depending on $fmt->{level})
join('|', @{$cu{misc}//[]}, @misc),
),
"\n",
);
return $fmt;
}
## $fmt = $fmt->putSentence($sent)
## + concatenates formatted tokens, adding sentence-id comment if available
sub putSentence {
#my ($fmt,$sent) = @_;
my $sh = {(map {$_ eq 'tokens' ? qw() : ($_=>$_[1]{$_})} keys %{$_[1]})};
$_[0]{fh}->print(join('', map {"#$_\n"} map {split(/\n/,$_)} @{$_[1]{_cmts}})) if ($_[1]{_cmts});
$_[0]{fh}->print("# sent_id = ", ($_[1]{id}//''), "\n");
$_[0]{fh}->print("# text = $_[1]{stxt}\n") if (defined($_[1]{stxt}));
$_[0]{fh}->print('# $TJ:SENT=', $_[0]->jsonxs->encode($sh), "\n") if (%$sh && ($_[0]{level}//0) >= 1);
my $i = 0;
$_[0]->putToken($_,++$i) foreach (@{toSentence($_[1])->{tokens}});
$_[0]{fh}->print("\n");
return $_[0];
}
## $fmt = $fmt->putDocument($doc)
## + concatenates formatted sentences, adding document 'xmlbase' comment if available
our %TJ_BAD_DOC_KEYS = %DTA::CAB::Format::TJ::TJ_BAD_DOC_KEYS;
sub putDocument {
#my ($fmt,$doc) = @_;
my $dh = { (map {($_=>$_[1]{$_})} grep {!exists($TJ_BAD_DOC_KEYS{$_})} keys %{$_[1]}) };
$_[0]{fh}->print('# $TJ:DOC=', $_[0]->jsonxs->encode($dh), "\n") if (%$dh && ($_[0]{level}//0) >= 1);
$_[0]->putSentence($_) foreach (@{toDocument($_[1])->{body}});
return $_[0];
}
## $fmt = $fmt->putData($data)
## + puts raw data (uses forceDocument())
## + OVERRIDE uses Format::TT implementation
sub putData {
return $_[0]->DTA::CAB::Format::TT->putData($_[1]);
}
1; ##-- be happy
__END__
##========================================================================
## POD DOCUMENTATION, auto-generated by podextract.perl
##========================================================================
## NAME
=pod
=head1 NAME
DTA::CAB::Format::CONLLU - Datum parser: CONLL-U format
=cut
##========================================================================
## SYNOPSIS
=pod
=head1 SYNOPSIS
use DTA::CAB::Format::CONLLU;
##========================================================================
## Constructors etc.
$fmt = DTA::CAB::Format::CONLLU->new(%args);
##========================================================================
## Methods: I/O: Input
\%head = blockScanHead(\$buf,$io,\%opts);
$fmt = $fmt->fromFh($filename_or_handle);
##========================================================================
## Methods: Output
$ext = $fmt->defaultExtension();
$fmt = $fmt->putToken($tok);
$fmt = $fmt->putSentence($sent);
$fmt = $fmt->putData($data);
##========================================================================
## Methods: Low-Level
$str = unescapeConllu($str);
$str = escapeConllu($str);
=cut
##========================================================================
## DESCRIPTION
=pod
=head1 DESCRIPTION
DTA::CAB::Format::CONLLU is a CAB datum parser+formatter conforming to the
CONLL-U format conventions; see L<https://universaldependencies.org/format.html> for details.
=head2 Format fields
ID: Word index, integer starting at 1 for each new sentence;
FORM: Word form or punctuation symbol.
LEMMA: Lemma or stem of word form.
UPOS: Universal part-of-speech tag.
XPOS: Language-specific part-of-speech tag; underscore if not available.
FEATS: List of morphological features from the universal feature inventory or underscore if not available.
HEAD: Head of the current word, which is either a value of ID or zero (0).
DEPREL: Universal dependency relation to the HEAD (root iff HEAD = 0) or a defined language-specific subtype of one.
DEPS: Enhanced dependency graph in the form of a list of head-deprel pairs.
MISC: Any other annotation, split by '|'
=head2 Local format conventions for C<MISC> field
By the CONLL-U conventions, the final token field C<MISC> is separated
by vertical bars (C<MISC ::= "MISC1|...|MISCn">). This module treats
C<MISC$i> elements of the form C<ATTR=VALUE> specially for the following
C<ATTR>s:
id=TOKID # sets $tok->{id}
loc=OFFSET LENGTH # sets $tok->{loc}
xlit=XTEXT # sets $tok->{xlit}{latin1Text}; also honors CONLL-U "Translit=XTEXT"
norm=NORM # sets $tok->{moot}{word}
details=DETAILS # sets $tok->{moot}{details}{details}
json=JSON # clobbers %$tok with JSON a la Format::TJ
C<VALUE>s of specially handled attributes containing literal C<%> or C<|>
should have these 2 characters (and only these 2 characters) URI-escaped (to C<%25>, and C<%7C> respectively).
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Format::CONLLU: Globals
=pod
=head2 Globals
=over 4
=item Variable: @ISA
DTA::CAB::Format::CONLLU
inherits from
L<DTA::CAB::Format::TJ|DTA::CAB::Format::TJ>.
=item Variable: %XPOS2UPOS
Global tag translation table from language-specific PoS-tagset to UD PoS-tagset
(C<XPOS E<gt> UPOS>) used for output. Keys are language-specific tagsets, values are HASH-
or CODE-refs for tagset translation.
%XPOS2UPOS => ($tagset =E<gt> $CODE_OR_HASHREF, ...)
$upos = $XPOS2UPOS{$tagset}->{$xpos}; ##-- HASH-ref
$upos = $XPOS2UPOS{$tagset}->($xpos); ##-- CODE-ref
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Format::CONLLU: Constructors etc.
=pod
=head2 Constructors etc.
=over 4
=item new
$fmt = CLASS_OR_OBJ->new(%args);
object structure: assumed HASH
{
##-- Input
doc => $doc, ##-- INHERITED: buffered input document
cuMiscIn => $bool, ##-- NEW: parse special MISC attrs (default=true)
##-- Output
outbuf => $stringBuffer, ##-- INHERITED: buffered output
level => $formatLevel, ##-- OVERRIDE: <0:omit-misc ; 0:default:include-misc,exclude-json, >=1:include-json, >=2:canonical-json
tagset => $tagset, ##-- auto-convert XPOS->UPOS for $tagset (known values: 'stts' (default))
##-- Common (INHERITED from Format::TT)
raw => $bool, ##-- INHERITED: attempt to load/save raw data
fh => $fh, ##-- INHERITED: IO::Handle for read/write
utf8 => $bool, ##-- INHERITED: read/write utf8?
tloc => $attr, ##-- INHERITED: if non-empty, parseTokenizerString() sets $w->{$attr}="$off $len"; default=0
#defaultFieldName => $name, ##-- INHERITED: default name for unnamed misc-fields; parsed into @{$tok->{other}{$name}}; default=''
}
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Format::CONLLU: Methods: I/O: Block-wise: Input
=pod
=head2 Methods: Input
=over 4
=item blockScanHead
\%head = blockScanHead(\$buf,$io,\%opts);
gets header offset, length from (mmaped) \$buf.
%opts are as for blockScan().
OVERRIDE scans for CONLL-U C<"# newdoc"> comment.
=item fromFh
$fmt = $fmt->fromFh($filename_or_handle);
new override calls L<DTA::CAB::Format::fromFh()|DTA::CAB::Format/fromFh>.
=item parseConlluFh
guts for L<fromFh()|/fromFh> method: parse handle $fh into local document buffer.
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Format::CONLLU: Methods: Output: Generic
=pod
=head2 Methods: Output
=over 4
=item defaultExtension
$ext = $fmt->defaultExtension();
returns default filename extension for this format (C<.conllu>).
=item putToken
$fmt = $fmt->putToken($tok);
$fmt = $fmt->putToken($tok,$conllu_id);
honors $fmt-E<gt>{level} : E<lt>0:omit-misc ; 0:default:include-misc,exclude-json, E<gt>=1:include-json, E<gt>=2:canonical-json
=item putSentence
$fmt = $fmt->putSentence($sent);
concatenates formatted tokens, adding sentence-id comment if available
=item putDocument
concatenates formatted sentences, adding document C<# $TJ:DOC> comment comment if appropriate.
=item putData
$fmt = $fmt->putData($data);
puts raw data (uses forceDocument());
OVERRIDE uses L<DTA::CAB::Format::TT|DTA::CAB::Format::TT> implementation.
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Format::CONLLU: Methods: I/O: Generic
=pod
=head2 Methods: Low-Level
=over 4
=item unescapeConllu
$str = unescapeConllu($str);
un-escapes CONLLU value strings using URI-escape sequences (C<'%7C' =E<gt> '|'>, C<'%25'=E<gt>'%'>)
=item escapeConllu
$str = escapeConllu($str);
escapes CONLLU value strings using URI-escape sequences (C<'|'=E<gt>'%7C'>, C<'%'=E<gt>'%25'>)
=back
=cut
##========================================================================
## END POD DOCUMENTATION, auto-generated by podextract.perl
##========================================================================
## EXAMPLE
##========================================================================
=pod
=head1 EXAMPLES
=head2 Basic Example
An example file in the format accepted/generated by this module with the default options
(C<level =E<gt> 0, tagset =E<gt> 'stts'>) is:
# sent_id = s1
1 EJn eine DET ART _ _ _ _ Translit=Ejn|norm=Ein|details=eine[_ARTINDEF][sg][acc][neut] <2.5>
2 zamer zahm ADJ ADJA _ _ _ _ Translit=zamer|norm=zahmer|details=zahm[_ADJA][none][pos][pl][gen]\*[strong] <0>
3 Elephant Elefant NOUN NN _ _ _ _ Translit=Elephant|norm=Elefant|details=Elefant[_NN][k_l_t][masc][sg][nom] <0>
4 gillt gelten VERB VVFIN _ _ _ _ Translit=gillt|norm=gilt|details=gelt~en[_VVFIN][third][sg][pres][ind] <0>
5 ohngefähr ohngefähr ADV ADV _ _ _ _ Translit=ohngefähr|norm=ohngefähr|details=ohngefähr[_ADV] <0>
6 zweyhundert zweihundert NUM CARD _ _ _ _ Translit=zweyhundert|norm=zweihundert|details=zwei/Z#hundert[_CARD][num ] <0>
7 Thaler Taler NOUN NN _ _ _ _ Translit=Thaler|norm=Taler|details=Taler[_NN][k_g_artef][masc][pl][nom_acc_gen] <0>
8 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>
# sent_id = s2
1 Ceterum ceterum X FM.la _ _ _ _ Translit=Ceterum|norm=Ceterum|details=* <0>
2 censeo censeo X FM.la _ _ _ _ Translit=censeo|norm=censeo|details=* <0>
3 Carthaginem carthaginem X FM.la _ _ _ _ Translit=Carthaginem|norm=Carthaginem|details=* <0>
4 esse esse X FM.la _ _ _ _ Translit=esse|norm=esse|details=* <0>
5 delendam delendam X FM.la _ _ _ _ Translit=delendam|norm=delendam|details=* <0>
6 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>
=head2 Terse Example
An example file in the terse format generated by this module with the options (C<level =E<gt> -1, tagset =E<gt> 'none'>) is:
# sent_id = s1
1 EJn eine _ ART _ _ _ _ _
2 zamer zahm _ ADJA _ _ _ _ _
3 Elephant Elefant _ NN _ _ _ _ _
4 gillt gelten _ VVFIN _ _ _ _ _
5 ohngefähr ohngefähr _ ADV _ _ _ _ _
6 zweyhundert zweihundert _ CARD _ _ _ _ _
7 Thaler Taler _ NN _ _ _ _ _
8 . . _ $. _ _ _ _ _
# sent_id = s2
1 Ceterum ceterum _ FM.la _ _ _ _ _
2 censeo censeo _ FM.la _ _ _ _ _
3 Carthaginem carthaginem _ FM.la _ _ _ _ _
4 esse esse _ FM.la _ _ _ _ _
5 delendam delendam _ FM.la _ _ _ _ _
6 . . _ $. _ _ _ _ _
=head2 Verbose Example
An example file in the verbose format generated by this module with the options (C<level =E<gt> 2, tagset =E<gt> 'stts'>) including
a full C<TJ>-style dump in the C<json> attribute of the C<MISC> field is:
# sent_id = s1
# $TJ:SENT={"lang":"de"}
1 EJn eine DET ART _ _ _ _ Translit=Ejn|norm=Ein|details=eine[_ARTINDEF][sg][acc][neut] <2.5>|json={"dmoot":{"analyses":[{"details":"Ein","prob":0,"tag":"Ein"}],"morph":[{"hi":"ein~en[_VVIMP][sg]","w":2},{"hi":"eine[_ARTINDEF][sg][nom][masc]","w":2.5},{"hi":"eine[_ARTINDEF][sg][nom][neut]","w":2.5},{"hi":"eine[_ARTINDEF][sg][acc][neut]","w":2.5},{"hi":"ein[_ADV]","w":2.5},{"hi":"ein[_CARD][num]","w":2.5},{"hi":"ein[_PTKVZ]","w":2.5}],"tag":"Ein"},"errid":"72751","exlex":"Ein","f":407,"lts":[{"hi":"\\?ejn","w":0}],"moot":{"analyses":[{"details":"ein[_ADV]","lemma":"ein","prob":2.5,"tag":"ADV"},{"details":"ein[_CARD][num]","lemma":"ein","prob":2.5,"tag":"CARD"},{"details":"ein[_PTKVZ]","lemma":"ein","prob":2.5,"tag":"PTKVZ"},{"details":"eine[_ARTINDEF][sg][acc][neut]","lemma":"eine","prob":2.5,"tag":"ART"},{"details":"eine[_ARTINDEF][sg][nom][masc]","lemma":"eine","prob":2.5,"tag":"ART"},{"details":"eine[_ARTINDEF][sg][nom][neut]","lemma":"eine","prob":2.5,"tag":"ART"},{"details":"ein~en[_VVIMP][sg]","lemma":"einen","prob":2,"tag":"VVIMP"}],"details":{"details":"eine[_ARTINDEF][sg][acc][neut]","lemma":"eine","prob":2.5,"tag":"ART"},"lemma":"eine","tag":"ART","word":"Ein"},"msafe":0,"rw":[],"text":"EJn","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Ejn"}}
2 zamer zahm ADJ ADJA _ _ _ _ Translit=zamer|norm=zahmer|details=zahm[_ADJA][none][pos][pl][gen]\*[strong] <0>|json={"dmoot":{"analyses":[{"details":"zahmer","prob":0.129596281051636,"tag":"zahmer"},{"details":"zamer","prob":1.248,"tag":"zamer"}],"morph":[{"hi":"zahm[_ADJA][none][pos][sg][nom][masc][strong_mixed]","w":0},{"hi":"zahm[_ADJA][none][pos][sg][dat_gen][fem][strong]","w":0},{"hi":"zahm[_ADJA][none][pos][pl][gen]\\*[strong]","w":0},{"hi":"zahm[_ADJC][none][comp]","w":0}],"tag":"zahmer"},"eqphox":[{"hi":"zahmer","w":0.237610012292862}],"f":1,"lts":[{"hi":"tsame6","w":0}],"moot":{"analyses":[{"details":"zahm[_ADJA][none][pos][pl][gen]\\*[strong]","lemma":"zahm","prob":0,"tag":"ADJA"},{"details":"zahm[_ADJA][none][pos][sg][dat_gen][fem][strong]","lemma":"zahm","prob":0,"tag":"ADJA"},{"details":"zahm[_ADJA][none][pos][sg][nom][masc][strong_mixed]","lemma":"zahm","prob":0,"tag":"ADJA"},{"details":"zahm[_ADJC][none][comp]","lemma":"zahm","prob":0,"tag":"ADJD"}],"details":{"details":"zahm[_ADJA][none][pos][pl][gen]\\*[strong]","lemma":"zahm","prob":0,"tag":"ADJA"},"lemma":"zahm","tag":"ADJA","word":"zahmer"},"msafe":0,"rw":[{"hi":"zahmer","w":15.7981405258179}],"text":"zamer","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"zamer"}}
3 Elephant Elefant NOUN NN _ _ _ _ Translit=Elephant|norm=Elefant|details=Elefant[_NN][k_l_t][masc][sg][nom] <0>|json={"dmoot":{"analyses":[{"details":"Elefant","prob":0,"tag":"Elefant"}],"morph":[{"hi":"Elefant[_NN][k_l_t][masc][sg][nom]","w":0}],"tag":"Elefant"},"errid":"84974","exlex":"Elefant","f":303,"lang":["de"],"lts":[{"hi":"\\?elefant","w":0}],"moot":{"analyses":[{"details":"Elefant[_NN][k_l_t][masc][sg][nom]","lemma":"Elefant","prob":0,"tag":"NN"}],"details":{"details":"Elefant[_NN][k_l_t][masc][sg][nom]","lemma":"Elefant","prob":0,"tag":"NN"},"lemma":"Elefant","tag":"NN","word":"Elefant"},"morph":[{"hi":"Elephant[_NN][k_l_t][masc][sg][nom]","w":0},{"hi":"elephant[_FM][en]","w":2.5}],"msafe":1,"rw":[],"text":"Elephant","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Elephant"}}
4 gillt gelten VERB VVFIN _ _ _ _ Translit=gillt|norm=gilt|details=gelt~en[_VVFIN][third][sg][pres][ind] <0>|json={"dmoot":{"analyses":[{"details":"gilt","prob":0.135864566802979,"tag":"gilt"},{"details":"gillt","prob":1.248,"tag":"gillt"},{"details":"Gild","prob":1.35002433472872,"tag":"Gild"}],"morph":[{"hi":"gelt~en[_VVFIN][third][sg][pres][ind]","w":0},{"hi":"gelt~en[_VVIMP][sg]","w":0}],"tag":"gilt"},"eqphox":[{"hi":"gilt","w":0.0521488003432751},{"hi":"Gild","w":0.298937886953354}],"f":5,"lts":[{"hi":"gilt","w":0}],"moot":{"analyses":[{"details":"gelt~en[_VVFIN][third][sg][pres][ind]","lemma":"gelten","prob":0,"tag":"VVFIN"},{"details":"gelt~en[_VVIMP][sg]","lemma":"gelten","prob":0,"tag":"VVIMP"}],"details":{"details":"gelt~en[_VVFIN][third][sg][pres][ind]","lemma":"gelten","prob":0,"tag":"VVFIN"},"lemma":"gelten","tag":"VVFIN","word":"gilt"},"msafe":0,"rw":[{"hi":"gilt","w":18.9322834014893}],"text":"gillt","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"gillt"}}
5 ohngefähr ohngefähr ADV ADV _ _ _ _ Translit=ohngefähr|norm=ohngefähr|details=ohngefähr[_ADV] <0>|json={"dmoot":{"analyses":[{"details":"ohngefähr","prob":0,"tag":"ohngefähr"}],"morph":[{"hi":"ohngefähr[_ADV]","w":0}],"tag":"ohngefähr"},"lang":["de"],"lts":[{"hi":"\\?oNefe6","w":0}],"moot":{"analyses":[{"details":"ohngefähr[_ADV]","lemma":"ohngefähr","prob":0,"tag":"ADV"}],"details":{"details":"ohngefähr[_ADV]","lemma":"ohngefähr","prob":0,"tag":"ADV"},"lemma":"ohngefähr","tag":"ADV","word":"ohngefähr"},"morph":[{"hi":"ohngefähr[_ADV]","w":0}],"msafe":1,"text":"ohngefähr","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"ohngefähr"}}
6 zweyhundert zweihundert NUM CARD _ _ _ _ Translit=zweyhundert|norm=zweihundert|details=zwei/Z#hundert[_CARD][num] <0>|json={"dmoot":{"analyses":[{"details":"zweihundert","prob":0,"tag":"zweihundert"}],"morph":[{"hi":"zwei/Z#hundert[_CARD][num]","w":0}],"tag":"zweihundert"},"errid":"ec","exlex":"zweihundert","f":397,"lts":[{"hi":"tsvaihunde6t","w":0}],"moot":{"analyses":[{"details":"zwei/Z#hundert[_CARD][num]","lemma":"zweihundert","prob":0,"tag":"CARD"}],"details":{"details":"zwei/Z#hundert[_CARD][num]","lemma":"zweihundert","prob":0,"tag":"CARD"},"lemma":"zweihundert","tag":"CARD","word":"zweihundert"},"msafe":0,"rw":[],"text":"zweyhundert","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"zweyhundert"}}
7 Thaler Taler NOUN NN _ _ _ _ Translit=Thaler|norm=Taler|details=Taler[_NN][k_g_artef][masc][pl][nom_acc_gen] <0>|json={"dmoot":{"analyses":[{"details":"Taler","prob":0,"tag":"Taler"}],"morph":[{"hi":"Taler[_NN][k_g_artef][masc][sg][nom_acc_dat]","w":0},{"hi":"Taler[_NN][k_g_artef][masc][pl][nom_acc_gen]","w":0}],"tag":"Taler"},"errid":"57836","exlex":"Taler","f":4078,"lts":[{"hi":"tale6","w":0}],"moot":{"analyses":[{"details":"Taler[_NN][k_g_artef][masc][pl][nom_acc_gen]","lemma":"Taler","prob":0,"tag":"NN"},{"details":"Taler[_NN][k_g_artef][masc][sg][nom_acc_dat]","lemma":"taler","prob":0,"tag":"NN"}],"details":{"details":"Taler[_NN][k_g_artef][masc][pl][nom_acc_gen]","lemma":"Taler","prob":0,"tag":"NN"},"lemma":"Taler","tag":"NN","word":"Taler"},"morph":[{"hi":"Thaler[_NE][lastname][none][k_l_h_m_namti_fam][sg][nom_acc_dat]","w":0},{"hi":"Thale/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][sg][nom_acc_dat]","w":5},{"hi":"Thale/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][pl][nom_acc_gen]","w":5},{"hi":"Thal/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][sg][nom_acc_dat]","w":5},{"hi":"Thal/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][pl][nom_acc_gen]","w":5}],"msafe":0,"rw":[],"text":"Thaler","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Thaler"}}
8 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>|json={"dmoot":{"analyses":[{"details":".","prob":0,"tag":"."}],"morph":[{"hi":"$.","w":0}],"tag":"."},"errid":"ec","exlex":".","f":5318438,"lts":[{"hi":"","w":0}],"moot":{"analyses":[{"details":"$.","lemma":".","prob":0,"tag":"$."}],"details":{"details":"$.","lemma":".","prob":0,"tag":"$."},"lemma":".","tag":"$.","word":"."},"msafe":1,"text":".","toka":["$."],"tokpp":["$."],"xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"."}}
# sent_id = s2
# $TJ:SENT={"lang":"la"}
1 Ceterum ceterum X FM.la _ _ _ _ Translit=Ceterum|norm=Ceterum|details=* <0>|json={"dmoot":{"analyses":[{"details":"Ceterum","prob":0,"tag":"Ceterum"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"Ceterum"},"f":11,"lang":["la"],"lts":[{"hi":"kete6um","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"ceterum","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"ceterum","prob":0,"tag":"FM.la"},"lemma":"ceterum","tag":"FM.la","word":"Ceterum"},"msafe":1,"text":"Ceterum","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Ceterum"}}
2 censeo censeo X FM.la _ _ _ _ Translit=censeo|norm=censeo|details=* <0>|json={"dmoot":{"analyses":[{"details":"censeo","prob":0,"tag":"censeo"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"censeo"},"f":9,"lang":["la"],"lts":[{"hi":"kenzeo","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"censeo","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"censeo","prob":0,"tag":"FM.la"},"lemma":"censeo","tag":"FM.la","word":"censeo"},"msafe":1,"text":"censeo","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"censeo"}}
3 Carthaginem carthaginem X FM.la _ _ _ _ Translit=Carthaginem|norm=Carthaginem|details=* <0>|json={"dmoot":{"analyses":[{"details":"Carthaginem","prob":0,"tag":"Carthaginem"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"Carthaginem"},"f":6,"lang":["la"],"lts":[{"hi":"ka6taginem","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"carthaginem","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"carthaginem","prob":0,"tag":"FM.la"},"lemma":"carthaginem","tag":"FM.la","word":"Carthaginem"},"msafe":1,"text":"Carthaginem","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Carthaginem"}}
4 esse esse X FM.la _ _ _ _ Translit=esse|norm=esse|details=* <0>|json={"dmoot":{"analyses":[{"details":"esse","prob":0,"tag":"esse"}],"morph":[{"hi":"ess~en[_VVFIN][first][sg][pres][ind]","w":0},{"hi":"ess~en[_VVFIN][first][sg][pres][subjI]","w":0},{"hi":"ess~en[_VVFIN][third][sg][pres][subjI]","w":0},{"hi":"[_FM][lat]","w":0}],"tag":"esse"},"errid":"71075","exlex":"esse","f":1046,"lang":["de","la"],"lts":[{"hi":"\\?ese","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"esse","prob":0,"tag":"FM"},{"details":"ess~en[_VVFIN][first][sg][pres][ind]","lemma":"essen","prob":0,"tag":"VVFIN"},{"details":"ess~en[_VVFIN][first][sg][pres][subjI]","lemma":"essen","prob":0,"tag":"VVFIN"},{"details":"ess~en[_VVFIN][third][sg][pres][subjI]","lemma":"essen","prob":0,"tag":"VVFIN"}],"details":{"details":"*","lemma":"esse","prob":0,"tag":"FM.la"},"lemma":"esse","tag":"FM.la","word":"esse"},"morph":[{"hi":"ess~en[_VVFIN][first][sg][pres][ind]","w":0},{"hi":"ess~en[_VVFIN][first][sg][pres][subjI]","w":0},{"hi":"ess~en[_VVFIN][third][sg][pres][subjI]","w":0}],"msafe":1,"text":"esse","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"esse"}}
5 delendam delendam X FM.la _ _ _ _ Translit=delendam|norm=delendam|details=* <0>|json={"dmoot":{"analyses":[{"details":"delendam","prob":0,"tag":"delendam"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"delendam"},"f":2,"lang":["la"],"lts":[{"hi":"delendam","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"delendam","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"delendam","prob":0,"tag":"FM.la"},"lemma":"delendam","tag":"FM.la","word":"delendam"},"msafe":1,"text":"delendam","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"delendam"}}
6 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>|json={"dmoot":{"analyses":[{"details":".","prob":0,"tag":"."}],"morph":[{"hi":"$.","w":0}],"tag":"."},"errid":"ec","exlex":".","f":5318438,"lts":[{"hi":"","w":0}],"moot":{"analyses":[{"details":"$.","lemma":".","prob":0,"tag":"$."}],"details":{"details":"$.","lemma":".","prob":0,"tag":"$."},"lemma":".","tag":"$.","word":"."},"msafe":1,"text":".","toka":["$."],"tokpp":["$."],"xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"."}}
=cut
##======================================================================
## Footer
##======================================================================
=pod
=head1 AUTHOR
Bryan Jurish E<lt>jurish@bbaw.deE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2020 by Bryan Jurish
This package is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.20.2 or,
at your option, any later version of Perl 5 you may have available.
=cut