##
## File: DTA::CAB::Format::JSON.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: Datum parser|formatter: YML code (generic)
package DTA::CAB::Format::JSON;
use DTA::CAB::Format;
use DT
A::CAB::Datum ':all';
use IO::File;
use JSON::XS;
use Carp;
use strict;
##==============================================================================
## Globals
##=================================
sterFormat(name=>__PACKAGE__, short=>'json-xs', filenameRegex=>qr/\.(?i:json(?:[\.\-\_]xs)?)$/);
DTA::CAB::Format->registerFormat(name=>__PACKAGE__, short=>'json');
}
##============================
::CAB::Analyzer::Dict::Json.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: generic analysis dictionary API using JSON values
package DTA::CAB::Analyzer::Dict::Json;
use DTA::CAB::Analyz
er ':child';
use DTA::CAB::Analyzer::Dict;
use JSON::XS;
use IO::File;
use Carp;
use Encode qw(encode decode);
use strict;
##=========================================================================
me Options
## dictFile => $filename, ##-- filename (default=undef): should be TT-dict with JSON-encoded hash values
##
## ##-- Analysis Output
## label => $lab, ##-- analyzer
fault='/xmlrpc')
##
## format => $formatName, ##-- default query I/O format (default='json')
## #encoding => $encoding, ##-- query encoding (always utf8)
## cacheGet => $bo
rpcpath => '/xmlrpc',
##
cacheGet=>1,
cacheSet=>1,
##
format => 'json',
##
##-- low-level stuff
ua => undef,
uargs => {},
##
##-- u
rpc')
format => $fmtName, ##-- DTA::CAB::Format short name for transfer (default='json')
cacheGet => $bool, ##-- allow cached response from server? (default=1)
ca
]
Simple machine-readable "vertical" text format based on the L<TT|/TT>
format but using L<JSON|http://json.org/> to encode sentence- and token-level attributes
rather than an explicit attribute labe
"C<%%$TJ:DOC=>I<JSON>", where I<JSON> is a JSON object representing
auxilliary document attributes.
Sentence-attribute lines are analogousd comments of the form
"C<%%$TJ:SENT=>I<JSON>".
Token lines c
onsist of the the token surface text,
followed by a TAB character,
followed by a JSON object representing the internal token structure.
Useful for further script-based processing.
=cut
##==========
g DTA::CAB::Analyzer::Dict::JsonDB
=item L<DTA::CAB::Analyzer::Cache::Static::BDB|DTA::CAB::Analyzer::Cache::Static::BDB>
Static cache using DTA::CAB::Analyzer::Dict::JsonDB
=item L<DTA::CAB::Analy
AB::Analyzer::Dict::Json|DTA::CAB::Analyzer::Dict::Json>
generic analysis dictionary API using JSON values
=item L<DTA::CAB::Analyzer::Dict::JsonCDB|DTA::CAB::Analyzer::Dict::JsonCDB>
generic analy
sis dictionary API using JSON values
=item L<DTA::CAB::Analyzer::Dict::JsonDB|DTA::CAB::Analyzer::Dict::JsonDB>
generic analysis dictionary API using JSON values
=item L<DTA::CAB::Analyzer::DmootSu
turn DTA::CAB::Document->new(@_); ##-- default
}
## $thingy = $obj->TO_JSON()
## + annoying wrapper for JSON::XS
sub TO_JSON {
return { %{$_[0]} };
}
1; ##-- be happy
__END__
##===============
e DTA::CAB::Analyzer::Common;
use DTA::CAB::Analyzer::Dict::Json;
use DTA::CAB::Analyzer::Dict::JsonDB;
use DTA::CAB::Analyzer::Dict::JsonCDB;
use DTA::CAB::Analyzer::TextPhonetic;
use DTA::CAB::Ana
mats for
L<document data|/"Data Model">,
including
L<"CSV"|DTA::CAB::Format::CSV>,
L<"JSON"|DTA::CAB::Format::JSON>,
L<"Raw"|DTA::CAB::Format::Raw>,
L<"Text"|DTA::CAB::Format::Text>,
L<"TT"|DTA::CAB::
tDocument($_[1]); }
## $fmt = $fmt->putData($data)
## + put arbitrary raw data (e.g. for YAML, JSON, XmlPerl)
sub putData {
$_[0]->logconfess("putData() not implemented!");
}
1; ##-- be happy
_
/format.html>
format, with optional special handling for additional C<MISC> fields, including
C<json=JSON> for embedding L<DTA::CAB::Format::TJ|DTA::CAB::Format::TJ> CAB-token structure.
Registered as
mat::JSON|DTA::CAB::Format::JSON>
Abstract datum parser|formatter for JSON I/O.
Transparently wraps one of the
L<DTA::CAB::Format::JSON::XS|DTA::CAB::Format::JSON::XS>
or
L<DTA::CAB::Format::JSON::Sy
=>'csv', class=>'DTA::CAB::Format::CSV', label=>'CSV'},
{key=>'json', class=>'DTA::CAB::Format::JSON', label=>'JSON', level=>1},
{key=>'perl', class=>'DTA::CAB::Format::Perl', lab
Doc() if (!$qdoc);
$qopts = $wr->parseQueryOpts() if (!$qopts);
#my $fmt = DTA::CAB::Format::JSON->new;
my $fmt = DTA::CAB::Format::YAML->new;
my $qstr = $fmt->putDocument($qdoc)->toString
::Text1; ##-- test v1.x
use DTA::CAB::Format::TT;
use DTA::CAB::Format::TJ; ##-- tt-like with json-encoded token data
use DTA::CAB::Format::ExpandList; ##-- flat tt-like expansion list, for DDC
us
emmaList; ##-- flat tt-like lemma-list, for DDC
use DTA::CAB::Format::YAML;
use DTA::CAB::Format::JSON;
use DTA::CAB::Format::XmlCommon;
use DTA::CAB::Format::XmlNative; ##-- load first to avoid clobb
:CAB::Analyzer::Dict::JsonDB.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: generic analysis dictionary API using JSON values
package DTA::CAB::Analyzer::Dict::JsonDB;
use DTA::CAB::Ana
lyzer ':child';
use DTA::CAB::Analyzer::Dict::Json;
use DTA::CAB::Analyzer::Dict::BDB;
use IO::File;
use Carp;
use Encode qw(encode decode);
use strict;
##============================================
==========================
our @ISA = qw(DTA::CAB::Analyzer::Dict::BDB DTA::CAB::Analyzer::Dict::Json);
##==============================================================================
## Constructo
CAB::Analyzer::Dict::JsonCDB.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: generic analysis dictionary API using JSON values
package DTA::CAB::Analyzer::Dict::JsonCDB;
use DTA::CAB::An
alyzer ':child';
use DTA::CAB::Analyzer::Dict::Json;
use DTA::CAB::Analyzer::Dict::CDB;
use IO::File;
use Carp;
use utf8;
use strict;
##===============================================================
==========================
our @ISA = qw(DTA::CAB::Analyzer::Dict::CDB DTA::CAB::Analyzer::Dict::Json);
##==============================================================================
## Constructo
Analyzer::Dict::Json;
use Carp;
use strict;
our @ISA = qw(DTA::CAB::Analyzer::Dict::Json);
## $obj = CLASS_OR_OBJ->new(%args)
## + object structure: see DTA::CAB::Analyzer::Dict::Json
sub new {
my
ew simple language-guesser object, which inherits
from L<DTA::CAB::Analyzer::Dict::Json|DTA::CAB::Analyzer::Dict::Json>.
Known options in %args:
##-- analysis selection
label => 'lang', ##-- a
alyzer::EqPho::JsonCDB.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: dictionary-based equivalence-class expander, phonetic variant
package DTA::CAB::Analyzer::EqPho::JsonCDB;
use DTA::
CAB::Analyzer ':child';
use DTA::CAB::Analyzer::Dict::JsonCDB;
use strict;
##==============================================================================
## Globals
##==============================
================================================
our @ISA = qw(DTA::CAB::Analyzer::Dict::JsonCDB);
##==============================================================================
## Constructors et
nalyzer::EqRW::JsonCDB.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: dictionary-based equivalence-class expander, phonetic variant
package DTA::CAB::Analyzer::EqRW::JsonCDB;
use DTA::C
AB::Analyzer ':child';
use DTA::CAB::Analyzer::Dict::JsonCDB;
use strict;
##==============================================================================
## Globals
##===============================
===============================================
our @ISA = qw(DTA::CAB::Analyzer::Dict::JsonCDB);
##==============================================================================
## Constructors etc
-------------------------------
## Methods: Output: Generic API
## + these methods just dump raw json
## + you're pretty much restricted to dumping a single document here
## $fmt = $fmt->putAnythin
EGIN {
DTA::CAB::Format->registerFormat(name=>__PACKAGE__, filenameRegex=>qr/\.(?i:tj|tjson|cab\-tj|cab\-tjson)$/);
}
##=============================================================================
=========================
## $jxs = $fmt->jsonxs()
sub jsonxs {
require JSON::XS;
return $_[0]{jxs} if (defined($_[0]{jxs}));
return $_[0]{jxs} = JSON::XS->new->utf8(0)->relaxed(1)->canonical(a
@_;
$fmt->setLayers($fh);
my $jxs = $fmt->jsonxs();
##-- ye olde loope
my (%sa,%doca);
my $toks = [];
my @body = qw();
my ($tok,$text,$json);
while (defined($_=<$fh>)) {
if ($_ =~
d}
## details=DETAILS # sets $tok->{moot}{details}{details}
## json=JSON # clobbers %$tok with JSON a la Format::TJ
## + VALUEs of specially handled attributes containing lit
ormatLevel, ##-- OVERRIDE: <0:omit-misc ; 0:default:include-misc,exclude-json, >=1:include-json, >=2:canonical-json
## tagset => $tagset, ##-- auto-convert XPOS->UPOS for $tagse
##==============================================================================
## $jxs = $fmt->jsonxs()
## + INHERITED from Format::TJ
## $str = unescapeConllu($str)
## + un-escapes CONLLU value
use DTA::CAB::Analyzer ':child';
use DTA::CAB::Analyzer::Dict::JsonCDB;
use Carp;
use strict;
our @ISA = qw(DTA::CAB::Analyzer::Dict::JsonCDB);
##-----------------------------------------------------
= CLASS_OR_OBJ->new(%args)
## + object structure: see DTA::CAB::Analyzer::Dict::JsonCDB, DTA::CAB::Analyzer::Dict::Json
sub new {
my $that = shift;
my $dic = $that->SUPER::new(
##-- over
for sub-keys...");
my $tied = $dic->{dbf}{tied};
my %jkeys = qw();
my $jxs = $dic->jsonxs;
my ($t,$js,$j);
for ($t=$tied->FIRSTKEY; defined($t); $t=$tied->NEXTKEY($t)) {
$