RDF-LDF/lib/RDF/LDF.pm
package RDF::LDF;
use strict;
use warnings;
use feature qw(state);
use utf8;
use Moo;
use Data::Compare;
use RDF::NS;
use RDF::Trine;
use RDF::Query;
use URI::Escape;
use LWP::UserAgent;
use HTTP::Request::Common;
use Log::Any ();
use Cache::LRU;
use Clone qw(clone);
use JSON;
use URI::Template;
use RDF::LDF::Error;
our $VERSION = '0.25';
has url => (
is => 'ro' ,
required => 1
);
has sn => (
is => 'ro' ,
lazy => 1,
builder => sub {
RDF::NS->new->REVERSE;
}
);
has lru => (
is => 'ro' ,
lazy => 1,
builder => sub {
Cache::LRU->new( size => 100 );
}
);
has log => (
is => 'ro',
lazy => 1,
builder => sub {
Log::Any->get_logger(category => ref(shift));
}
);
# Public method
sub is_fragment_server {
my $self = shift;
my $federated = ref($self->url) ? $self->url : [ $self->url ];
for my $part (@$federated) {
return 0 unless $self->get_query_pattern($part);
}
return 1;
}
# Public method
# Optimized method to find all bindings matching a pattern
# See:
# Verborgh, Ruben, et al. Querying Datasets on the Web with High Availability. ISWC2014
# http://linkeddatafragments.org/publications/iswc2014.pdf
sub get_pattern {
my ($self,$bgp,$context,%args) = @_;
unless (defined $bgp) {
RDF::LDF::Error->throw(text => "can't execute get_pattern for an empty pattern");
}
my (@triples) = ($bgp->isa('RDF::Trine::Statement') or $bgp->isa('RDF::Query::Algebra::Filter'))
? $bgp
: $bgp->triples;
unless (@triples) {
RDF::LDF::Error->throw(text => "can't execute get_pattern for an empty pattern");
}
my @vars = $bgp->referenced_variables;
my @bgps = map { $self->_parse_triple_pattern($_)} @triples;
my $sub = sub {
state $it = $self->_find_variable_bindings(\@bgps);
my $b = $it->();
return undef unless $b;
my $binding = RDF::Trine::VariableBindings->new({});
for my $key (keys %$b) {
my $val = $b->{$key};
$key =~ s{^\?}{};
$binding->set($key => $val);
}
$binding;
};
RDF::Trine::Iterator::Bindings->new($sub,\@vars);
}
sub _find_variable_bindings {
my $self = shift;
my $bgps = shift;
my $bindings = shift // {};
my $iterator = sub {
state $it;
state $results = sub {};
my $ret;
# Loop over all variabe bindings with multiple matches
while (!defined($ret = $results->())) {
unless (defined $it) {
# Find the an binding iterator for the best pattern from $bgpgs
($it,$bgps) = $self->_find_variable_bindings_($bgps);
return undef unless $it;
}
# Update all the other bgps with the current binding..
my $this_binding = $it->();
return undef unless $this_binding;
$bindings = { %$bindings , %$this_binding };
return $bindings unless @$bgps;
# Apply all the bindings to the rest of the bgps;
my $bgps_prime = $self->_apply_binding($this_binding,$bgps);
$results = $self->_find_variable_bindings($bgps_prime,$bindings);
}
$ret;
};
$iterator;
}
# Given an array ref of patterns return the variable bindings for the
# pattern with the least number of triples.
#
# my ($iterator, $rest) = $self->_find_variable_bindings([ {pattern} , {pattern} , ... ]);
#
# where:
#
# $iterator - Iterator for variable bindings for the winnnig pattern, or undef when no
# patterns are provided or we get zero results
#
# $rest - An array ref of patterns not containing the best pattern
sub _find_variable_bindings_ {
my ($self,$bgps) = @_;
return (undef, undef) unless _is_array_ref($bgps) && @$bgps > 0;
my ($pattern,$rest) = $self->_find_best_pattern($bgps);
return (undef,undef) unless defined $pattern;
my $it = $self->get_statements($pattern);
# Build a mapping of variable bindings to Triple nodes. E.g.
# {
# '?s' => 'subject' ,
# '?p' => 'predicate' ,
# '?o' => 'object' ,
#}
my %pattern_var_map = map { $pattern->{$_} =~ /^\?/ ? ($pattern->{$_} , $_) : () } keys %$pattern;
my $num_of_bindings = keys %pattern_var_map;
my $sub = sub {
my $triple = $it->();
return undef unless defined $triple;
my %var_map = %pattern_var_map;
for (keys %var_map) {
my $method = $var_map{$_};
$var_map{$_} = $triple->$method;
}
return {%var_map};
};
return ($sub,$rest);
}
sub _apply_binding {
my ($self,$binding,$bgps) = @_;
return unless _is_array_ref($bgps) && @$bgps > 0;
my $copy = clone $bgps;
my @new = ();
for my $pattern (@$copy) {
for (qw(subject predicate object)) {
my $val = $pattern->{$_};
if (defined($val) && $binding->{$val}) {
my $str_val = $self->_node_as_string($binding->{$val});
$pattern->{$_} = $str_val
}
}
push @new, $pattern;
}
return \@new;
}
# Create a pattern which binds to the graph pattern
#
# Usage:
#
# my $triples = [
# { subject => ... , predicate => ... , object => ... } , #tp1
# { subject => ... , predicate => ... , object => ... } , #tp2
# ...
# { subject => ... , predicate => ... , object => ... } , #tpN
# ];
#
# my ($pattern, $rest) = $self->_find_best_pattern($triples);
#
# $pattern => Pattern in $triples which least amount of results
# $rest => All patterns in $triples except $pattern
#
sub _find_best_pattern {
my ($self,$triples) = @_;
return undef unless @$triples > 0;
# If we only have one tripple pattern, the use it to create the bind
if (@$triples == 1) {
return $triples->[0] , [];
}
my $best_pattern = undef;
my $best_count = undef;
for my $pattern (@$triples) {
my $count = $self->_total_triples($pattern) // 0;
if ($count == 0) {
$best_pattern = undef;
$best_count = 0;
last;
}
elsif (!defined $best_count || $count < $best_count) {
$best_count = $count;
$best_pattern = $pattern;
}
}
return (undef,$triples) unless defined $best_pattern;
my @rest_triples = map { Data::Compare::Compare($_,$best_pattern) ? () : ($_) } @$triples;
return ($best_pattern, \@rest_triples);
}
# Find the total number of triples available for a pattern
#
# Usage:
#
# my $count = $self->_total_triples(
# { subject => ... , predicate => ... , object => ...}
# );
# Where
# $count is a number
sub _total_triples {
my ($self,$pattern) = @_;
# Retrieve one...
my $iterator = $self->get_statements($pattern);
return 0 unless $iterator;
my ($model,$info) = $iterator->();
$info->{hydra_totalItems};
}
sub _node_as_string {
my $self = shift;
my $node = shift;
if (_is_blessed($node) && $node->isa('RDF::Trine::Node')) {
if ($node->isa('RDF::Trine::Node::Variable')) {
return $node->as_string; # ?foo
} elsif ($node->isa('RDF::Trine::Node::Literal')) {
return $node->as_string; # includes quotes and any language or datatype
} else {
return $node->value; # the raw IRI or blank node identifier value, without other syntax
}
}
return '';
}
# For an BGP triple create a fragment pattern
sub _parse_triple_pattern {
my ($self,$triple) = @_;
my $subject = $self->_node_as_string($triple->subject);
my $predicate = $self->_node_as_string($triple->predicate);
my $object = $self->_node_as_string($triple->object);
my $hash = {
subject => $subject ,
predicate => $predicate,
object => $object
};
return $hash;
}
# Dynamic find out which triple patterns need to be used to query the fragment server
# Returns a hash:
# {
# rdf_subject => <name_of_subject_variable> ,
# rdf_predicate => <name_of_predicate_variable> ,
# rdf_object => <name_of_object_variable>
# hydra_template => <endpoint_for_tripple_pattern>
# }
sub get_query_pattern {
my ($self,$url) = @_;
my $fragment = $self->get_model_and_info($url);
return undef unless defined $fragment;
my $info = $fragment->{info};
my $pattern;
return undef unless _is_hash_ref($info);
return undef unless $info->{hydra_template};
for (keys %$info) {
next unless _is_hash_ref($info->{$_}) && $info->{$_}->{hydra_property};
my $property = join "_" , $self->sn->qname($info->{$_}->{hydra_property});
my $variable = $info->{$_}->{hydra_variable};
$pattern->{$property} = $variable;
}
return undef unless $pattern->{rdf_subject};
return undef unless $pattern->{rdf_predicate};
return undef unless $pattern->{rdf_object};
$pattern->{hydra_template} = $info->{hydra_template};
$pattern;
}
#----------------------------------------------------------------------------------
# Public method
sub get_statements {
my ($self,@triple) = @_;
my ($subject,$predicate,$object);
if (@triple == 3) {
($subject,$predicate,$object) = @triple;
}
elsif (_is_hash_ref($triple[0])) {
$subject = $triple[0]->{subject};
$predicate = $triple[0]->{predicate};
$object = $triple[0]->{object};
}
$subject = $subject->value if (_is_blessed($subject) && $subject->isa('RDF::Trine::Node') and not $subject->is_variable);
$predicate = $predicate->value if (_is_blessed($predicate) && $predicate->isa('RDF::Trine::Node') and not $predicate->is_variable);
if (_is_blessed($object) && $object->isa('RDF::Trine::Node') and not $object->is_variable) {
$object = ($object->isa('RDF::Trine::Node::Literal')) ? $object->as_string : $object->value;
}
# Do a federated search over all the URLs provided
my $parts = ref($self->url) ? $self->url : [ $self->url ];
my @federated;
for my $part (@$parts) {
my $pattern = $self->get_query_pattern($part);
return undef unless defined $pattern;
my %params;
$params{ $pattern->{rdf_subject} } = $subject if _is_string($subject);
$params{ $pattern->{rdf_predicate} } = $predicate if _is_string($predicate);
$params{ $pattern->{rdf_object} } = $object if _is_string($object);
my $template = URI::Template->new($pattern->{hydra_template});
push @federated , $template->process(%params)->as_string;
}
my $sub = sub {
state $model;
state $info;
state $iterator;
state $url = shift(@federated);
my $triple;
do {
unless (defined $model) {
# When no more result pages are available switch
# to the next federated url...
return unless defined($url) || defined($url = pop(@federated));
my $fragment = $self->get_model_and_info($url);
return unless defined $fragment->{model};
$model = $fragment->{model};
$info = $fragment->{info};
$url = $info->{hydra_next};
$iterator = $model->get_statements;
}
$triple = $iterator->next;
unless ($iterator->peek) {
$model = undef;
}
}
while (!defined $triple && defined($url = pop(@federated)));
wantarray ? ($triple,$info) : $triple;
};
$sub;
}
# Fetch a fragment page and extract the metadata
sub get_model_and_info {
my ($self,$url) = @_;
if (my $cache = $self->lru->get($url)) {
return $cache;
}
my $model = $self->get_fragment($url);
my $info = {};
if (defined $model) {
$info = $self->_model_metadata($model,$url, clean => 1);
}
my $fragment = { model => $model , info => $info };
$self->lru->set($url => $fragment);
$fragment;
}
# Fetch a result page from fragment server
sub get_fragment {
my ($self,$url) = @_;
return undef unless $url;
$self->log->info("fetching: $url");
my $model = RDF::Trine::Model->temporary_model;
# JSON support in RDF::Trine isn't JSON-LD
# Set the accept header quality parameter at a minimum for this format
my $ua = clone(RDF::Trine->default_useragent);
$ua->agent("RDF:::LDF/$RDF::LDF::VERSION " . $ua->_agent);
$ua->default_header('Accept','text/turtle;q=1.0,application/turtle;q=1.0,application/x-turtle;q=1.0,application/rdf+xml;q=0.9,text/x-nquads;q=0.9,application/json;q=0.1,application/x-rdf+json;q=0.1');
eval {
# Need to explicitly set the useragent to keep the accept headers
RDF::Trine::Parser->parse_url_into_model($url, $model, useragent => $ua);
};
if ($@) {
$self->log->error("failed to parse input");
}
return $model;
}
# Create a hash with fragment metadata from a RDF::Trine::Model
# parameters:
# $model - RDF::Trine::Model
# $this_uri - result page URL
# %opts
# clean => 1 - remove the metadata from the model
sub _model_metadata {
my ($self,$model,$this_uri,%opts) = @_;
my $info = {};
$self->_build_metadata($model, {
subject => RDF::Trine::Node::Resource->new($this_uri)
} , $info);
if ($opts{clean}) {
$model->remove_statements(
RDF::Trine::Node::Resource->new($this_uri),
undef,
undef
);
$model->remove_statements(
undef,
undef,
RDF::Trine::Node::Resource->new($this_uri)
);
}
for my $predicate (
'http://www.w3.org/ns/hydra/core#variable' ,
'http://www.w3.org/ns/hydra/core#property' ,
'http://www.w3.org/ns/hydra/core#mapping' ,
'http://www.w3.org/ns/hydra/core#template' ,
'http://www.w3.org/ns/hydra/core#member' ,
'http://www.w3.org/ns/hydra/core#variableRepresentation' ,
) {
$self->_build_metadata($model, {
predicate => RDF::Trine::Node::Resource->new($predicate)
}, $info);
if ($opts{clean}) {
$model->remove_statements(
undef,
RDF::Trine::Node::Resource->new($predicate) ,
undef);
}
}
my $source = $info->{dct_source}->[0] if _is_array_ref($info->{dct_source});
if ($source) {
$self->_build_metadata($model, {
subject => RDF::Trine::Node::Resource->new($source)
}, $info);
if ($opts{clean}) {
$model->remove_statements(
RDF::Trine::Node::Resource->new($source),
undef,
undef
);
$model->remove_statements(
undef,
undef,
RDF::Trine::Node::Resource->new($source)
);
}
}
$info;
}
# Helper method for _parse_model
sub _build_metadata {
my ($self, $model, $triple, $info) = @_;
my $iterator = $model->get_statements(
$triple->{subject},
$triple->{predicate},
$triple->{object}
);
while (my $triple = $iterator->next) {
my $subject = $triple->subject->as_string;
my $predicate = $triple->predicate->uri_value;
my $object = $triple->object->value;
my $qname = join "_" , $self->sn->qname($predicate);
if ($qname =~ /^(hydra_variable|hydra_property)$/) {
my $id= $triple->subject->value;
$info->{"_$id"}->{$qname} = $object;
}
elsif ($qname eq 'hydra_mapping') {
my $id= $triple->subject->value;
push @{$info->{"_$id"}->{$qname}} , $object;
}
elsif ($qname =~ /^(void|hydra)_/) {
$info->{$qname} = $object;
}
else {
push @{$info->{$qname}} , $object;
}
}
$info;
}
sub _is_array_ref {
return ref($_[0]) eq 'ARRAY';
}
sub _is_hash_ref {
return ref($_[0]) eq 'HASH';
}
sub _is_blessed {
return ref($_[0]) =~ /\S/;
}
sub _is_string {
return defined($_[0]) && !ref($_[0]) && ref(\$_[0]) ne 'GLOB' && length($_[0]) > 0;
}
1;
__END__
=head1 NAME
RDF::LDF - Linked Data Fragments client
=begin markdown
# STATUS
[](https://travis-ci.org/phochste/RDF-LDF)
[](https://coveralls.io/r/phochste/RDF-LDF)
[](http://cpants.cpanauthors.org/dist/RDF-LDF)
=end markdown
=head1 SYNOPSIS
use RDF::Trine::Store::LDF;
use RDF::Trine::Store;
# To use a HTTP cache:
use LWP::UserAgent::CHICaching;
my $cache = CHI->new( driver => 'Memory', global => 1 );
my $ua = LWP::UserAgent::CHICaching->new(cache => $cache);
RDF::Trine->default_useragent($ua);
my $store = RDF::Trine::Store->new_with_config({
storetype => 'LDF',
url => $url
});
my $it = $store->get_statements();
while (my $st = $it->next) {
# $st is a RDF::Trine::Statement
print "$st\n";
}
# Or the low level modules themselves
use RDF::LDF;
my $client = RDF::LDF->new(url => 'http://fragments.dbpedia.org/2014/en');
my $iterator = $client->get_statements($subject, $predicate, $object);
while (my $statement = $iterator->()) {
# $model is a RDF::Trine::Statement
}
=head1 DESCRIPTION
RDF::LDF implements a basic L<Linked Data Fragment|http://linkeddatafragments.org/> client.
This a low level module to implement the Linked Data Fragment protocol. You probably want to
use L<RDF::Trine::Store::LDF>.
=head1 CONFIGURATION
=over
=item url
URL to retrieve RDF from.
Experimental: more than one URL can be provided for federated search over many LDF endpoints.
my $store = RDF::Trine::Store->new_with_config({
storetype => 'LDF',
url => [ $url1, $url2, $url3 ]
});
# or
my $client = RDF::LDF->new(url => [ $url1, $url2, $url3 ]);
=back
=head1 METHODS
=over
=item get_statements( $subject, $predicate, $object )
Return an iterator for every RDF::Trine::Statement served by the LDF server.
=item get_pattern( $bgp );
Returns a stream object of all bindings matching the specified graph pattern.
=back
=head1 CONTRIBUTORS
Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >>
Gregory Todd Williams, C<< greg@evilfunhouse.com >>
Jacob Voss, C<< voss@gbv.de >>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2015 by Patrick Hochstenbach.
This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself.
=encoding utf8
=cut