Group
Extension

Bio-Palantir/bin/explore_bgc_domains.pl

#!/usr/bin/env perl
# PODNAME: explore_bgc_domains.pl
# ABSTRACT: Reports all detected domain signatures for a NRPS/PKS gene cluster
# CONTRIBUTOR: Denis BAURAIN <denis.baurain@uliege.be>

use Modern::Perl '2011';
use autodie;

use Getopt::Euclid qw(:vars);
use JSON::Create qw(create_json);

use aliased 'Bio::Palantir::Explorer::ClusterFasta';

my (%json_for, $i);

my $cluster = ClusterFasta->new( file => $ARGV_fasta_file );
my @lines;

GENE:
for my $gene (sort { $a->rank <=> $b->rank } $cluster->all_genes) {
        
    $json_for{'no-taxonomy'}{Clusters}{++$i}{GenesPlus}{$gene->rank} = {
#                    uui =>  $gene->uui,
       rank => $gene->rank,
       name => $gene->name, 
       size => $gene->size, 
       coordinates => (join '-', @{ $gene->coordinates }), 
       begin => $gene->gene_begin, 
       end => $gene->gene_end, 
    };

    DOMAIN:
    for my $domain (sort { $a->rank <=> $b->rank } $gene->all_domains) {

        my $subtype = $domain->subtype // 'NULL';
        my $subtype_evalue = $domain->subtype_evalue // 'NULL';
        my $subtype_score  = $domain->subtype_score // 'NULL';

        $json_for{'no-taxonomy'}{Clusters}{$i}{GenesPlus}{$gene->rank}{'Do'
        . 'mainsPlus'}{$domain->rank} = {
            rank              => $domain->rank, 
            function          => $domain->symbol, 
            size              => $domain->size, 
            coordinates       => (join '-', @{ $domain->coordinates }), 
            begin             => $domain->begin,
            end               => $domain->end, 
            evalue            => $domain->evalue,
            bit_score         => $domain->score,
            subtype           => $subtype,
            subtype_evalue    => $subtype_evalue,
            subtype_bit_score => $subtype_score,
        };
    
        push @lines, [
            'gene' . $gene->rank, $gene->name, 
            (join '-', @{  $gene->coordinates }), 
            'domain' . $domain->rank, $domain->symbol, 
            (join '-', @{ $domain->coordinates }), $domain->size,
            $domain->evalue, $domain->score, $subtype,
            $subtype_evalue, $subtype_score,
        ];
    }
}

open my $out, '>', $ARGV_outfile . '.json';
say {$out} create_json(\%json_for);

open $out, '>', $ARGV_outfile . '.tsv';
say {$out} join "\t", qw(gene_rank gene_name gene_coordinates domain_rank 
    domain_function domain_coordinates domain size evalue bit_score
    subtype subtype_evalue subtype_bit_score)
;

say {$out} join "\t", @{ $_ } for @lines;

__END__

=pod

=head1 NAME

explore_bgc_domains.pl - Reports all detected domain signatures for a NRPS/PKS gene cluster

=head1 VERSION

version 0.211420

=head1 NAME

explore_bgc_domains.pl - Reports all detected domain signatures for a NRPS/PKS
gene cluster without any predefined consensus architecture. The domain
predictions are reported in TSV and JSON formats.

=head1 USAGE

	$0 [options] --fasta-file [=] <infile>

=head1 REQUIRED ARGUMENTS

=over

=item --fasta[-file] [=] <infile>

Absolute path to a fasta file.

=back

=head1 OPTIONAL ARGUMENTS

=over

=item --outfile <filename>

Output filename to generate TSV and JSON files [default: exploratory_domains].

=for Euclid: filename.type: writable
    filename.default: 'exploratory_domains'

=item --version

=item --usage

=item --help

=item --man

print the usual program information

=back

=head1 AUTHOR

Loic MEUNIER <lmeunier@uliege.be>

=head1 CONTRIBUTOR

=for stopwords Denis BAURAIN

Denis BAURAIN <denis.baurain@uliege.be>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2019 by University of Liege / Unit of Eukaryotic Phylogenomics / Loic MEUNIER and Denis BAURAIN.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.