Yet Another CPAN Grep

Bio-BioVeL/lib/Bio/BioVeL/Service/NeXMLMerger.pm

package Bio::BioVeL::Service::NeXMLMerger;
use strict;
use warnings;
use Scalar::Util 'looks_like_number';
use Bio::Phylo::Factory;
use Bio::BioVeL::Service;
use Bio::BioVeL::Service::NeXMLMerger::DataReader;
use Bio::BioVeL::Service::NeXMLMerger::TreeReader;
use Bio::BioVeL::Service::NeXMLMerger::MetaReader;
use Bio::BioVeL::Service::NeXMLMerger::CharSetReader;
use Bio::Phylo::Util::CONSTANT ':objecttypes';
use base 'Bio::BioVeL::Service';

# XXX this is a bogus namespace for properties we encounter in metadata
my $ns  = 'http://biovel.eu/terms#';

# creates Bio::Phylo objects
my $fac = Bio::Phylo::Factory->new;

# maps between the metadata field that identifies the subject of the 
# annotation (e.g. a taxon) and Bio::Phylo's type constant. this is
# used by $project->get_items($const) to fetch the objects that are
# candidates for annotation.
my %typemap = (
	'TaxonID'     => _TAXON_,
	'NodeID'      => _NODE_,
	'TreeID'      => _TREE_,
	'AlignmentID' => _MATRIX_,
	'SiteID'      => _CHARACTER_,
	'CharacterID' => _CHARACTER_,
	'MatrixID'    => _MATRIX_,
);

=head1 NAME

Bio::BioVeL::Service::NeXMLMerger - merges phylogenetic data and metadata into NeXML

=head1 SYNOPSIS

 use Bio::BioVeL::Service::NeXMLMerger;

 # arguments can either be passed in from the command line argument array or as 
 # HTTP request parameters, e.g. from $QUERY_STRING
 @ARGV = (
 	'-data'          => $data,  # a location, either local or as URL
 	'-trees'         => $tree,  # a location, either local or as URL
 	'-meta'          => $meta,  # a location, either local or as URL
 	'-charsets'      => $sets,  # a location, either local or as URL
 	'-dataformat'    => 'nexus',
 	'-treeformat'    => 'newick',
 	'-metaformat'    => 'json',
 	'-charsetformat' => 'nexus',
 );

 my $merger = Bio::BioVeL::Service::NeXMLMerger->new;
 my $nexml = $merger->response_body;

=head1 DESCRIPTION

This package merges phylogenetic data and metadata to produce a NeXML document. Although
it can be used inside scripts that receive command line arguments, it is intended to be
used as a RESTful web service that clients can be written against, e.g. in 
L<http://taverna.org.uk> for inclusion in L<http://biovel.eu> workflows.

=head1 METHODS

=over

=item new

The constructor typically receives no arguments.

=cut

sub new {
	my $self = shift->SUPER::new(
	
		# these parameters are turned into object properties
		# whose values are magically filled in. after object
		# construction the object can access these properties,
		# e.g. as $self->dataformat
		'parameters' => [
			'dataformat',
			'datatype',
			'data',
			'treeformat',
			'trees',
			'metaformat',
			'meta',
			'charsetformat',
			'charsets',			
		],
		@_,
	);	
	return $self;
}

=item response_header

Returns the MIME-type HTTP header. Note: at present this isn't really used, it needs
refactoring to play nice with the way mod_perl constructs response headers. This would
probably be done by only returning the MIME-type itself, which is then included in the
header by the superclass.

=cut

sub response_header { "Content-type: application/xml\n\n" }

# given a project object, instantiates a metadata reader, figures out what type of
# objects is to be annotated, and attaches the annotations as biovel:* CURIEs.

sub _attach_metadata {
	my ( $self, $project ) = @_;
	my $log = $self->logger;
	
	# parse metadata, if any
	if ( my $f = $self->metaformat ) {
		$log->info("instantiating a $f metadata reader");
		my $r = Bio::BioVeL::Service::NeXMLMerger::MetaReader->new($f);
		
		# read the metadata
		my $location = $self->meta;
		$log->info("going to read metadata from $location");
		my @meta = $r->read_meta( $self->get_handle($location) );
		
		# attach metadata to taxa
		$project->set_namespaces( 'biovel' => $ns );
		for my $m ( @meta ) {
			for my $key ( keys %typemap ) {
			
				# the annotation hash should contain TaxonID or NodeID, or ...
				if ( my $id = delete $m->{$key} ) {
					my $type = $typemap{$key};
					$log->info("object $key => $id has type constant $type");

					# fetch all the objects of that type					
					my @objects = @{ $project->get_items($type) };
					$log->info("found ".scalar(@objects)." with constant type $type");
					my $obj; # the one we want
					
					# pick the one by its 1-based (!!!!!) index
					if ( looks_like_number $id ) {
						$obj = $objects[ $id - 1 ];
					}					
					# grep the one with the provided name
					else {
						no warnings 'uninitialized';
						($obj) = grep { $_->get_name eq $id } @objects;
					}
					
					$log->info("going to annotate object $obj");
					for my $predicate ( keys %{ $m } ) {
						$obj->add_meta(
							$fac->create_meta( '-triple' => { 
								"biovel:$predicate" => $m->{$predicate} 
							} )
						);
					}
				}
			}
		}
	}	
}

# given a project object, instantiates a character set reader and attaches the
# character sets to the FIRST character matrix in the project. XXX: this means
# there will be trouble if the project contains multiple matrices that each
# need character sets attached to them.

sub _attach_charsets {
	my ( $self, $project ) = @_;
	my $log = $self->logger;

	# parse charsets, if any
	if ( my $f = $self->charsetformat ) {
		$log->info("instantiating a $f charset reader");
		my $r = Bio::BioVeL::Service::NeXMLMerger::CharSetReader->new($f);
		
		# read the character sets
		my $location = $self->charsets;
		$log->info("going to read charsets from $location");		
		my %sets = $r->read_charsets( $self->get_handle($location) );
		
		# pre-process the focal character block
		my ($matrix) = @{ $project->get_items(_MATRIX_) };
		my $characters = $matrix->get_characters;
		my @sets = @{ $characters->get_sets };
		$characters->remove_set($_) for @sets;
		$log->info("done pre-processing characters object $characters");
		
		# attach the sets
		for my $set_name ( keys %sets ) {
			my $set_obj = $fac->create_set( '-name' => $set_name );
			$characters->add_set($set_obj);
			$log->info("attached set object $set_obj to $characters");
			
			# iterate over coordinate ranges
			for my $range ( @{ $sets{$set_name} } ) {
				$log->debug("processing range $range");
			
				# convert to 0-based indices
				my $start = $range->{'start'} - 1;
				my $end   = $range->{'end'} ? $range->{'end'} - 1 : $start;
				my $phase = $range->{'phase'} || 1;				
				COORD: for ( my $i = $start; $i <= $end; $i += $phase ) {
					if ( my $char = $characters->get_by_index($i) ) {
						$characters->add_to_set($char,$set_obj);
					}
					else {
						$log->warn("charset $set_name coordinate $i is out of range");
						last COORD;
					}
				}
			}
		}
	}
}

=item response_body

Generates the NeXML response body by reading the input data and metadata objects and
folding them into a single L<Bio::Phylo::Project> object that is serialized to NeXML.

=cut

sub response_body {
	my $self    = shift;
	my $log     = $self->logger;	
	my $project = $fac->create_project;
	my $taxa    = $fac->create_taxa;	
	my ( @taxa, @matrices, $forest );
	
	# parse character data reader, if any
	if ( my $f = $self->dataformat ) {
		$log->info("instantiating a $f data reader");
		my $r = Bio::BioVeL::Service::NeXMLMerger::DataReader->new($f);
		
		# read the data
		my $location = $self->data;
		$log->info("going to read data from $location");
		@matrices = $r->read_data( $self->get_handle($location) );
		
		# create taxa blocks, add to project
		push @taxa, $_->make_taxa for @matrices;
		$project->insert($_) for @matrices;
	}
	
	# parse tree data, if any
	if ( my $f = $self->treeformat ) {
		$log->info("instantiating a $f tree reader");
		my $r = Bio::BioVeL::Service::NeXMLMerger::TreeReader->new($f);
		
		# read the trees
		my $location = $self->trees;
		$log->info("going to read trees from $location");
		my @trees = $r->read_trees( $self->get_handle($location) );
		
		# merge into forest, create corresponding taxa block, add to project
		$forest = $fac->create_forest;
		$forest->insert($_) for @trees;
		push @taxa, $forest->make_taxa;
		$project->insert($forest);
	}	
	my $merged = $taxa->merge_by_name(@taxa);
	$_->set_taxa($merged) for @matrices;
	$forest->set_taxa($merged) if $forest;
	$project->insert($merged);
	
	# attach the metadata
	$self->_attach_metadata($project);
		
	# attach the character sets
	$self->_attach_charsets($project);
	
	return $project->to_xml( '-compact' => 1 );
}

=back

=cut

1;
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.