Bio-Palantir/lib/Bio/Palantir/Parser/Root.pm
package Bio::Palantir::Parser::Root;
# ABSTRACT: BiosynML DTD-derived internal class
$Bio::Palantir::Parser::Root::VERSION = '0.211420';
use Moose;
use namespace::autoclean;
# AUTOGENERATED CODE! DO NOT MODIFY THIS FILE!
use XML::Bare qw(forcearray);
use POSIX;
use aliased 'Bio::Palantir::Parser::Cluster';
use aliased 'Bio::Palantir::Parser::Gene';
use aliased 'Bio::Palantir::Parser::Domain';
use aliased 'Bio::Palantir::Parser::Motif';
use aliased 'Bio::Palantir::Roles::Modulable::Module';
# private attributes
has '_root' => (
is => 'ro',
isa => 'HashRef',
required => 1,
);
has 'module_delineation' => (
is => 'ro',
isa => 'Str',
);
# public array(s) of composed objects
has 'clusters' => (
traits => ['Array'],
is => 'ro',
isa => 'ArrayRef[Bio::Palantir::Parser::Cluster]',
writer => '_set_clusters',
init_arg => undef,
handles => {
count_clusters => 'count',
all_clusters => 'elements',
get_cluster => 'get',
next_cluster => 'shift',
},
);
## no critic (ProhibitUnusedPrivateSubroutines)
## use critic
has 'genes' => (
traits => ['Array'],
is => 'ro',
isa => 'ArrayRef[Bio::Palantir::Parser::Gene]',
writer => '_set_genes',
handles => {
count_genes => 'count',
all_genes => 'elements',
get_gene => 'get',
next_gene => 'shift',
},
);
## no critic (ProhibitUnusedPrivateSubroutines)
## use critic
has 'domains' => (
traits => ['Array'],
is => 'ro',
isa => 'ArrayRef[Bio::Palantir::Parser::Domain]',
writer => '_set_domains',
handles => {
count_domains => 'count',
all_domains => 'elements',
get_domain => 'get',
next_domain => 'shift',
},
);
## no critic (ProhibitUnusedPrivateSubroutines)
## use critic
has 'motifs' => (
traits => ['Array'],
is => 'ro',
isa => 'ArrayRef[Bio::Palantir::Parser::Motif]',
writer => '_set_motifs',
handles => {
count_motifs => 'count',
all_motifs => 'elements',
get_motif => 'get',
next_motif => 'shift',
},
);
## no critic (ProhibitUnusedPrivateSubroutines)
## use critic
# public composed object(s)
# public deep methods
# public methods
# public aliases
sub BUILD {
my $self = shift;
my @motifs = map { Motif->new( _root => $_ ) } @{
forcearray $self->_root->{'motiflist'}{'motif'}
};
$self->_set_motifs( \@motifs );
my %motifs_in;
push @{ $motifs_in{ $_->_root->{'domainID'}->{'value'} } }, $_ for @motifs;
my @domains;
for my $domain ( @{ forcearray $self->_root->{'domainlist'}{'domain'} }) {
push @domains, Domain->new(
_root => $domain,
motifs => $motifs_in{ $domain->{id}{value} } // [],
);
}
$self->_set_domains( \@domains );
my %domains_in;
push @{
$domains_in{ $_->_root->{'location'}->{'gene'}->{'geneid'}->{'value'} }
}, $_ for @domains
;
my %orphan_motifs_in;
push @{
$orphan_motifs_in{ $_->_root->{'geneID'}->{'value'} }
}, $_ for @{ $motifs_in{'0'} }
;
my @genes;
for my $gene ( @{ forcearray $self->_root->{'genelist'}->{'gene'} }) {
my $gene_object = Gene->new(
_root => $gene,
domains => $domains_in{ $gene->{id}{value} } // [],
orphan_motifs => $orphan_motifs_in{ $gene->{id}{value} } // [],
);
# fill monomer attribute
my @monomers = $gene_object->monomers;
if (@monomers) {
for my $domain ($gene_object->all_domains) {
if ($domain->function =~ m/^A$ | ^A-OX$ | ^AT/xms) {
$domain->_set_monomer( shift @monomers );
}
}
}
push @genes, $gene_object;
}
$self->_set_genes( \@genes );
my @modules;
if ($self->_root->{modulelist}) {
@modules = _extract_antismash_modules($self->_root, @genes);
}
my $cluster_rank = 1;
# fix for antismash5: cluster order is not preserved during json2xml conversion
my $cluster_list;
unless (
grep { $_->{genecluster}->{sequence}->{value} }
@{ forcearray $self->_root->{'model'} })
{
# filter clusters by coordinates order for antiSMASH 5 (and 3)
$cluster_list
= [ sort { $a->{genecluster}->{region}->{begin}->{value}
<=> $b->{genecluster}->{region}->{begin}->{value} }
@{ forcearray $self->_root->{'model'} } ]
;
}
else {
# do not filter clusters as antismash 4 reboots coordinates per contig
$cluster_list = [ @{ forcearray $self->_root->{'model'} } ];
}
my @clusters;
for my $cluster (@$cluster_list) {
my $begin = $cluster->{'genecluster'}->{'region'}->{'begin'}->{'value'};
my $end = $cluster->{'genecluster'}->{'region'}->{'end' }->{'value'};
# fix for antiSMASH 4: keep coordinates in strand + order
my $strand = $begin < $end ? '+' : '-';
if ($strand eq '-') {
my $temp_begin = $begin;
$begin = $end;
$end = $temp_begin;
}
# second fix for antiSMASH 4: handle the coordinates reset for each contig (use of the sequence value which is only exploited in antiSMASH 4)
my @cluster_genes;
if ($cluster->{'genecluster'}->{'sequence'}->{'value'}) {
my $cluster_seqlist
= $cluster->{'genecluster'}->{'sequence'}->{'value'};
# filter on the sequence value and the cluster coordinates
@cluster_genes = grep { $_->genomic_dna_begin < $end
&& $_->genomic_dna_end > $begin }
grep { $_->_root->{'sequence'}{'value'}
eq $cluster_seqlist } @genes
;
}
# this information is not extracted in antiSMASH 5 (but coordinates are continuous)
else {
@cluster_genes = grep { $_->genomic_dna_begin < $end
&& $_->genomic_dna_end > $begin } @genes;
}
@cluster_genes
= sort { $a->genomic_dna_begin <=> $b->genomic_dna_begin }
@cluster_genes
;
my $gene_rank = 1;
my $domain_rank = 1;
for my $gene (@cluster_genes) {
$_->_set_rank($domain_rank++)
for sort { $a->begin <=> $b->begin } $gene->all_domains;
$gene->_set_rank($gene_rank++);
}
# fix antiSMASH 5.1 and its module delineation
if ($self->_root->{modulelist}) { # Add new module feature from version 5.1
# TODO see how synchronize domain rank in @genes and @modules
my @cluster_modules =
sort {$a->genomic_prot_begin <=> $b->genomic_prot_begin }
grep { $_->genomic_dna_begin < $end
&& $_->genomic_dna_end > $begin }
@modules;
;
my $mrank = 1;
$_->_set_rank($mrank++) for @cluster_modules;
push @clusters, Cluster->new(
module_delineation => $self->module_delineation,
rank => $cluster_rank,
_root => $cluster->{'genecluster'},
genes => \@cluster_genes,
genomic_dna_begin => $begin,
genomic_dna_end => $end,
genomic_prot_begin => ceil($begin / 3),
genomic_prot_end => floor($end / 3),
modules => \@cluster_modules,
);
}
else {
push @clusters, Cluster->new(
module_delineation => $self->module_delineation,
rank => $cluster_rank,
_root => $cluster->{'genecluster'},
genes => \@cluster_genes,
genomic_dna_begin => $begin,
genomic_dna_end => $end,
genomic_prot_begin => ceil($begin / 3),
genomic_prot_end => floor($end / 3),
);
}
$cluster_rank++;
}
# enables module cutting mode
$_->_set_cutting_mode( $self->module_delineation ) for @clusters;
$self->_set_clusters( \@clusters );
return;
}
sub _extract_antismash_modules {
my ($root, @report_genes) = @_;
my @report_modules;
for my $module ( @{ forcearray $root->{'modulelist'}{'module'} }) {
next
if $module->{complete}{value} eq 'false';
my @mgenes
= grep { $module->{prot_start}{value} < $_->genomic_prot_end
&& $module->{prot_end}{value} > $_->genomic_prot_begin }
sort { $a->genomic_prot_begin <=> $b->genomic_prot_end }
@report_genes
;
my $gene_ids = [map { $_->uui } @mgenes]; # ArrayRef Module attribute
my $mdomains;
for my $gene (@mgenes) {
push @{ $mdomains },
sort { $a->begin <=> $b->begin }
grep { ($_->begin + $gene->genomic_prot_begin - 1) # -1 for beginning domain pos 1 in gene pos 1
< $module->{prot_end}{value}
&& ($_->end + $gene->genomic_prot_begin - 1)
> $module->{prot_start}{value} }
$gene->all_domains
;
}
my $module_sequence = join '', map { $_->protein_sequence }
@{ $mdomains };
# my $module_sequence = substr(
# $module->{prot_begin} - 1,
# $module->{prot_end} - $module->{prot_begin} + 1,
# $mgene->protein_sequence
# );
#
my $size
= $module->{prot_end}{value} - $module->{prot_start}{value} + 1;
push @report_modules, Module->new(
rank => $module->{id}{value},
genomic_prot_begin => $module->{prot_start}{value},
genomic_prot_end => $module->{prot_end}{value},
genomic_prot_coordinates => [
$module->{prot_start}{value},
$module->{prot_end}{value},
],
protein_sequence => $module_sequence,
gene_uuis => $gene_ids,
domains => $mdomains,
size => $size,
);
}
return(@report_modules);
}
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
=head1 NAME
Bio::Palantir::Parser::Root - BiosynML DTD-derived internal class
=head1 VERSION
version 0.211420
=head1 SYNOPSIS
# TODO
=head1 DESCRIPTION
# TODO
=head1 ATTRIBUTES
=head2 clusters
ArrayRef of L<Bio::Palantir::Parser::Cluster>
=head2 genes
ArrayRef of L<Bio::Palantir::Parser::Gene>
=head2 domains
ArrayRef of L<Bio::Palantir::Parser::Domain>
=head2 motifs
ArrayRef of L<Bio::Palantir::Parser::Motif>
=head1 METHODS
=head2 count_clusters
Returns the number of Clusters of the Root.
# $root is a Bio::Palantir::Parser::Root
my $count = $root->count_clusters;
This method does not accept any arguments.
=head2 all_clusters
Returns all the Clusters of the Root (not an array reference).
# $root is a Bio::Palantir::Parser::Root
my @clusters = $root->all_clusters;
This method does not accept any arguments.
=head2 get_cluster
Returns one Cluster of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Cluster does not exist, this method will return C<undef>.
# $root is a Bio::Palantir::Parser::Root
my $cluster = $root->get_cluster($index);
croak "Cluster $index not found!" unless defined $cluster;
This method accepts just one argument (and not an array slice).
=head2 next_cluster
Shifts the first Cluster of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Clusters in
the array, returns C<undef>.
# $root is a Bio::Palantir::Parser::Root
while (my $cluster = $root->next_cluster) {
# process $cluster
# ...
}
This method does not accept any arguments.
=head2 count_genes
Returns the number of Genes of the Root.
# $root is a Bio::Palantir::Parser::Root
my $count = $root->count_genes;
This method does not accept any arguments.
=head2 all_genes
Returns all the Genes of the Root (not an array reference).
# $root is a Bio::Palantir::Parser::Root
my @genes = $root->all_genes;
This method does not accept any arguments.
=head2 get_gene
Returns one Gene of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Gene does not exist, this method will return C<undef>.
# $root is a Bio::Palantir::Parser::Root
my $gene = $root->get_gene($index);
croak "Gene $index not found!" unless defined $gene;
This method accepts just one argument (and not an array slice).
=head2 next_gene
Shifts the first Gene of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Genes in
the array, returns C<undef>.
# $root is a Bio::Palantir::Parser::Root
while (my $gene = $root->next_gene) {
# process $gene
# ...
}
This method does not accept any arguments.
=head2 count_domains
Returns the number of Domains of the Root.
# $root is a Bio::Palantir::Parser::Root
my $count = $root->count_domains;
This method does not accept any arguments.
=head2 all_domains
Returns all the Domains of the Root (not an array reference).
# $root is a Bio::Palantir::Parser::Root
my @domains = $root->all_domains;
This method does not accept any arguments.
=head2 get_domain
Returns one Domain of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Domain does not exist, this method will return C<undef>.
# $root is a Bio::Palantir::Parser::Root
my $domain = $root->get_domain($index);
croak "Domain $index not found!" unless defined $domain;
This method accepts just one argument (and not an array slice).
=head2 next_domain
Shifts the first Domain of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Domains in
the array, returns C<undef>.
# $root is a Bio::Palantir::Parser::Root
while (my $domain = $root->next_domain) {
# process $domain
# ...
}
This method does not accept any arguments.
=head2 count_motifs
Returns the number of Motifs of the Root.
# $root is a Bio::Palantir::Parser::Root
my $count = $root->count_motifs;
This method does not accept any arguments.
=head2 all_motifs
Returns all the Motifs of the Root (not an array reference).
# $root is a Bio::Palantir::Parser::Root
my @motifs = $root->all_motifs;
This method does not accept any arguments.
=head2 get_motif
Returns one Motif of the Root by its index. You can also use
negative index numbers, just as with Perl's core array handling. If the
specified Motif does not exist, this method will return C<undef>.
# $root is a Bio::Palantir::Parser::Root
my $motif = $root->get_motif($index);
croak "Motif $index not found!" unless defined $motif;
This method accepts just one argument (and not an array slice).
=head2 next_motif
Shifts the first Motif of the array off and returns it, shortening the
array by 1 and moving everything down. If there are no more Motifs in
the array, returns C<undef>.
# $root is a Bio::Palantir::Parser::Root
while (my $motif = $root->next_motif) {
# process $motif
# ...
}
This method does not accept any arguments.
=head1 AUTHOR
Loic MEUNIER <lmeunier@uliege.be>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2019 by University of Liege / Unit of Eukaryotic Phylogenomics / Loic MEUNIER and Denis BAURAIN.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut