Group
Extension

Matches 3

NewsExtractor ( G/GU/GUGOD/NewsExtractor-v0.45.0.tar.gz, GUGOD, 2022; MetaCPAN )
NewsExtractor/dev-bin/try-htmlextract.pl ( view source; MetaCPAN )
use v5.28;
use warnings;
use FindBin;
use lib $FindBin::Bin. "/../lib";

use JSON;
use File::Slurp qw(read_file);
use Encode qw(encode);
use Getopt::Long qw< GetOptions >;
use Mojo::DOM;

use NewsExtr
;
use NewsExtractor::GenericExtractor;

my %opts;
GetOptions(
    \%opts,
    "file=s",
);
my $json = JSON->new->pretty->canonical->utf8->allow_blessed->convert_blessed;

if ($opts{file}) {
    my $ht
jo::DOM->new($html);
    my $x = NewsExtractor::GenericExtractor->new( dom => $dom );

    print $json->encode({
        file => $opts{file},
        extracted => {
            site_name => $x->site_n
NewsExtractor ( G/GU/GUGOD/NewsExtractor-v0.45.0.tar.gz, GUGOD, 2022; MetaCPAN )
NewsExtractor/lib/NewsExtractor/Article.pm ( view source; MetaCPAN )
> 'ro', isa => Text1K );
has journalist => ( predicate => 1, is => 'ro', isa => Text1K );

sub TO_JSON {
    my ($self) = @_;
    return {
        headline => $self->headline,
        article_body => 
NewsExtractor ( G/GU/GUGOD/NewsExtractor-v0.45.0.tar.gz, GUGOD, 2022; MetaCPAN )
NewsExtractor/lib/NewsExtractor/JSONLDExtractor.pm ( view source; MetaCPAN )
sExtractor::JSONLDExtractor;
use Moo;
extends 'NewsExtractor::TXExtractor';

use Mojo::Transaction::HTTP;
use Types::Standard qw( InstanceOf HashRef ArrayRef );
use Mojo::JSON qw(from_json);
use Impor
    my ($self) = @_;
    my $el = $self->dom->at('script[type="application/ld+json"]') or return {};
    my $x = from_json( $el->text );
    if (HashRef->check($x)) {
        return $x;
    }
    if (

Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.