use v5.28;
use warnings;
use FindBin;
use lib $FindBin::Bin. "/../lib";
use JSON;
use File::Slurp qw(read_file);
use Encode qw(encode);
use Getopt::Long qw< GetOptions >;
use Mojo::DOM;
use NewsExtr
;
use NewsExtractor::GenericExtractor;
my %opts;
GetOptions(
\%opts,
"file=s",
);
my $json = JSON->new->pretty->canonical->utf8->allow_blessed->convert_blessed;
if ($opts{file}) {
my $ht
jo::DOM->new($html);
my $x = NewsExtractor::GenericExtractor->new( dom => $dom );
print $json->encode({
file => $opts{file},
extracted => {
site_name => $x->site_n
> 'ro', isa => Text1K );
has journalist => ( predicate => 1, is => 'ro', isa => Text1K );
sub TO_JSON {
my ($self) = @_;
return {
headline => $self->headline,
article_body =>
sExtractor::JSONLDExtractor;
use Moo;
extends 'NewsExtractor::TXExtractor';
use Mojo::Transaction::HTTP;
use Types::Standard qw( InstanceOf HashRef ArrayRef );
use Mojo::JSON qw(from_json);
use Impor
my ($self) = @_;
my $el = $self->dom->at('script[type="application/ld+json"]') or return {};
my $x = from_json( $el->text );
if (HashRef->check($x)) {
return $x;
}
if (