Group
Extension

App-Egaz/lib/App/Egaz/Common.pm

package App::Egaz::Common;
use strict;
use warnings;
use autodie;

use 5.018001;

use Bio::Phylo qw();
use Bio::Phylo::IO qw();
use Carp qw();
use File::Find::Rule;
use File::ShareDir qw();
use IO::Zlib;
use IPC::Cmd qw();
use JSON qw();
use List::Util qw();
use List::MoreUtils qw();
use MCE;
use Number::Format qw();
use Path::Tiny qw();
use Set::Scalar;
use String::Similarity;
use Template;
use YAML::Syck qw();

use App::Fasops::Common;

sub resolve_file {
    my $original = shift;
    my @paths    = @_;

    my $file = $original;
    if ( !-e $file ) {
        $file = Path::Tiny::path($file)->basename();

        if ( !-e $file ) {
            for my $p (@paths) {
                $file = Path::Tiny::path($p)->child($file);
                last if -e $file;
            }
        }
    }

    if ( -e $file ) {
        return $file->stringify;
    }
    else {
        Carp::confess "Can't resole file for [$original]\n";
    }
}

sub round {
    my $float = shift;

    return int( $float + $float / abs( $float * 2 || 1 ) );
}

# Return a list of 1-based overlapping ranges
sub overlap_ranges {
    my $start   = shift;
    my $end     = shift;
    my $chunk   = shift;
    my $overlap = shift;

    my @ranges;
    for ( my $i = $start - 1; $i < $end; $i += $chunk ) {
        my $j = $i + $chunk + $overlap;
        $j = $end if ( $j > $end );
        push @ranges, [ ( $i + 1 ), $j ];
    }
    return \@ranges;
}

sub run_sparsemem {
    my Path::Tiny $result = shift;
    my $query             = shift;
    my $genome            = shift;
    my $length = shift || 20;

    # mummer
    # -b    compute forward and reverse complement matches
    # -F    force 4 column output format regardless of the number of reference sequence inputs
    # -n    match only the characters a, c, g, or t
    #
    # sparsemem only
    # -k    sampled suffix positions (one by default)
    my $template;
    my $exe;
    if ( IPC::Cmd::can_run('sparsemem') ) {
        $exe      = 'sparsemem';
        $template = "%s -maxmatch -F -l %d -b -n -k 4 -threads 4 %s %s > %s";
    }
    else {
        $exe      = 'mummer';
        $template = "%s -maxmatch -F -l %d -b -n %s %s > %s";
    }

    my $cmd = sprintf $template, $exe, $length, $genome, $query, $result->stringify;
    system $cmd;
}

sub get_size {
    my $file = shift;

    tie my %length_of, "Tie::IxHash";

    if ( IPC::Cmd::can_run('faops') ) {
        my $cmd = sprintf "faops size %s", $file;
        my @lines = grep {defined} split /\n/, `$cmd`;

        for (@lines) {
            my ( $key, $value ) = split /\t/;
            $length_of{$key} = $value;
        }
    }
    else {
        my $seq_of    = App::Fasops::Common::read_fasta($file);
        my @seq_names = keys %{$seq_of};

        for my $name (@seq_names) {
            $length_of{$name} = length $seq_of->{$name};
        }
    }

    return \%length_of;
}

#----------------------------#
# parsing tree
#----------------------------#
sub ladder {
    my $file        = shift;
    my $target_name = shift;

    my $tree = Bio::Phylo::IO::parse( -format => 'newick', -file => $file )->first;

    my $target = $tree->get_by_name($target_name);

    my @ladder = ( [$target_name], );
    my $set = Set::Scalar->new;
    $set->insert($target_name);

    my $start = $target;
    while (1) {
        my $parent = $start->get_parent;
        last unless $parent;
        my @nodes
            = grep { !$set->has( $_->get_name ) } @{ $parent->get_terminals };
        $set->insert( $_->get_name ) for @nodes;
        my $distance_of = {};
        for my $node (@nodes) {
            $distance_of->{ $node->get_name } = $node->calc_patristic_distance($target);
        }
        my @sorted = map { $_->[0] }
            sort { $a->[1] <=> $b->[1] }
            map { [ $_, $distance_of->{$_} ] }
            keys %{$distance_of};
        push @ladder, [@sorted];

        $start = $parent;
    }

    return \@ladder;
}

sub exec_cmd {
    my $cmd = shift;
    my $opt = shift;

    if ( defined $opt and ref $opt eq "HASH" and $opt->{verbose} ) {
        print STDERR "CMD: ", $cmd, "\n";
    }

    system $cmd;
}

1;



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.