Group
Extension

Bio-MUST-Drivers/lib/Bio/MUST/Drivers/Blast/Database/Temporary.pm

package Bio::MUST::Drivers::Blast::Database::Temporary;
# ABSTRACT: Internal class for BLAST driver
$Bio::MUST::Drivers::Blast::Database::Temporary::VERSION = '0.252830';
use Moose;
use namespace::autoclean;

use autodie;
use feature qw(say);

use Smart::Comments '###';

use Carp;
use IPC::System::Simple qw(system);
use Module::Runtime qw(use_module);
use Path::Class qw(file);

extends 'Bio::MUST::Core::Ali::Temporary';

with 'Bio::MUST::Drivers::Roles::Blastable';


## no critic (ProhibitUnusedPrivateSubroutines)

# overload Ali::Temporary default builder
# Note: an id prefix longer than 3 letters is needed to avoid casing issues
# when building BLAST databases (e.g., seq1234 is left untouched but seq12345
# becomes SEQ12345, which prevents restore_ids to work properly).
sub _build_args {
    return { clean => 1, degap => 1, id_prefix => 'temp' };
}

## use critic

# overload equivalent attribute in plain Database
sub remote {
    return 0;
}

sub BUILD {
    my $self = shift;

    # provision executable
    my $app = use_module('Bio::MUST::Provision::Blast')->new;
       $app->meet();

    my $in = $self->filename;
    my $dbtype = $self->type;

    # TODO: modify all drivers to print the native errors for easy debugging
    # create makeblastdb command
    # -parse_seqids now required for blastdbcmd to work (side effects?)
    my $pgm = file($ENV{BMD_BLAST_BINDIR}, 'makeblastdb');
    my $cmd = "$pgm -in $in -dbtype $dbtype -parse_seqids"
        . ' > /dev/null 2> /dev/null';
    #### $cmd

    # try to robustly execute makeblastdb
    my $ret_code = system( [ 0, 127 ], $cmd);
    if ($ret_code == 127) {
        # TODO: do something to abort construction
        carp "[BMD] Warning: cannot execute $pgm command; returning!";
        return;
    }

    return;
}

sub DEMOLISH {
    my $self = shift;

    # updated with ChatGPT in Sept 2024
    # The following is valid for BLAST 2.16.0:
    # Core Nucleotide Database Files:
    # - .ndb – Nucleotide database file: Contains the actual nucleotide sequences.
    # - .nhr – Nucleotide header file: Stores sequence descriptions and identifiers (like the protein .phr file).
    # - .nin – Nucleotide index file: Contains indexing information for fast lookup (analogous to the .pin file for proteins).
    # - .nsq – Nucleotide sequence file: Holds the nucleotide sequences in a compact format for efficient searching (similar to the .psq file for proteins).
    # Partitioning and Multi-threading Support Files (Nucleotide Version):
    # - .nog – Group partition index file: Manages partitioned groups of nucleotide sequences.
    # - .not – Partition offset table: Contains offsets for different partitions within the nucleotide database.
    # - .ntf – Partition table file: The reference table for nucleotide database partitions.
    # - .nto – Partition offset file: Holds the positions of nucleotide partitions.
    # Metadata and Optimization Files:
    # - .njs – Nucleotide JSON schema file: Similar to the .pjs file in protein databases, it contains metadata in JSON format about the nucleotide database.
    # - .nos – Nucleotide offset file: Similar to .pos, it stores sequence offsets for optimized retrieval.

    # unlink temp files
    my @suffices = map { ( $self->type eq 'prot' ? 'p' : 'n' ) . $_ }
        qw(db hr in sq og ot tf to js os);
    my $basename = $self->filename;
    #### $basename
    file($_)->remove for map { "$basename.$_" } @suffices;

    return;
}

__PACKAGE__->meta->make_immutable;
1;

__END__

=pod

=head1 NAME

Bio::MUST::Drivers::Blast::Database::Temporary - Internal class for BLAST driver

=head1 VERSION

version 0.252830

=head1 SYNOPSIS

    # TODO

=head1 DESCRIPTION

    # TODO

=head1 AUTHOR

Denis BAURAIN <denis.baurain@uliege.be>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.