Group
Extension

Geanfammer/Bin/convert_sso_to_msp.pl

#!/usr/bin/perl
# Last Update by /gn0/jong/Perl/update_subroutines.pl: Thu Jul 10 14:33:16 BST 1997
#_________________________________________________________________________
# Title     : convert_sso_to_msp.pl
# Usage     : convert_sso_to_msp.pl <1bia_1-63.sso>  or  <xxxxxx.clu>
# Function  : converts FASTA or SSEARCH -m 10  file output format to
#             matching sequence pair (MSP format)
#             WARN: This will concatenate all the output into ONE msp file
#               unless you set 'e' option at the prompt for each
#               xxx.msso to xxx.msp
#
# Example   : convert_sso_to_msp.pl steve_clufile.clu
# Keywords  : convert_sso_to_msp, convert_ssso_to_msp, convert_fsso_to_msp
# Options   :
#   e  for each msp file is made for each input xxxxxx.sso file
#   s  for single file out format (one big concatenated file will be made)
#   u= for upper E value limit (above this are discarded)
#   l= for lower E value limit (below this are discarded)
#   r  for adding ranges
#   $single_out_opt         =  s by s
#   $verbose                =  v by v -v     # to show some info
#   $uppercase_seq_name     =  U by U -U
#   $upper_expect_limit     =    by u=
#   $lower_expect_limit     =    by l=
#   $single_file_name       =    by s=
#   $make_each_msp          =  e by e -e
#   $over_write             =  o by o
#   $add_range              =  r by r -r
#   $add_range2             = r2 by r2 R
#   $new_format             =  n by n -n
#   $write_each_msp_to_disk =  w by w -w  # for writing each msp files to save memory
#   $eval_threshold=  by t=
#
# Returns   : Writes a file automatically
# Author    : Sarah Teichmann, Jong Park, sat@mrc-lmb.cam.ac.uk,jong@salt2.med.harvard.edu
# Version   : 2.0
#--------------------------------------------------------------
$|=1;
my (@sub_dir_heads, $s, $i, $j, $k, $clu);
local $eval_threshold=0.001;
my @sso_files=@{&parse_arguments(1)}; ## SSO files are (xx.ssso, xx.fsso, xx.sso)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#  Default parameter setting
#__________________________________
unless($upper_expect_limit=~/\S/){   $upper_expect_limit=0.081; }
unless($lower_expect_limit=~/\S/){   $lower_expect_limit=0; }
if($eval_threshold){ $upper_expect_limit=$eval_threshold; }

print "\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ";
print "\n# Input files are                 opt: @sso_files ";
print "\n# \$add_range                      opt: $add_range";
print "\n# \$upper_expect_limit             opt: $upper_expect_limit";
print "\n# \$uppercase_seq_name             opt: $uppercase_seq_name";
print "\n# \$single_out_opt                 opt: $single_out_opt";
print "\n# \$verbose                        opt: $verbose";
print "\n# \$over_write                     opt: $over_write";
print "\n# \$write_each_msp_to_disk         opt: $write_each_msp_to_disk";
print "\n#_______________________________________________________________\n";

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# sso_files has either xxxxx.sso  or xxxxx.gz
#______________________________________________________
for($i=0; $i < @sso_files; $i++){
   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # remove .clu file and save it in $clu
   #________________________________________
   if($sso_files[$i]=~/\.s?clus?/){	   $clu=$sso_files[$i];
      splice(@sso_files, $i, 1);	   $i--;
   }
}


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#     If clu file is given, let's open it
#_______________________________________________________________
if(defined($clu)){
    &convert_clu_to_msp(\$clu);
}else{
    print "\n# No clu file is given. I open individual fasta or ssearch output\n";
    if($make_each_msp=~/e/){
	 print "\n# You put \"e\" option for each file conversion\n";
	 for($i=0; $i< @sso_files; $i ++){
             &convert_sso_to_msp($sso_files[$i],
                 $single_out_opt,
                 "s=$single_file_name",
		 $add_range, $add_range2,
		 "u=$upper_expect_limit",
                 "t=$eval_threshold",
	         "l=$lower_expect_limit",
	         $new_format);
         }
    }else{
	 print "\n# $0 : You did not put e opt, so I will concatenate all the sso contents to one MSP\n";
	 &convert_sso_to_msp(@sso_files,
	    $single_out_opt, "s=$single_file_name",
	    $add_range,
	    $add_range2,
	    "u=$upper_expect_limit",
	    "l=$lower_expect_limit",
            "t=$eval_threshold",
	    $new_format);
    }
}


print "\n# Sarah! Please remember that $0 used default E value of \"$upper_expect_limit\" for upper limit";
print "\n#        Also,  remember that $0 used default E value of \"$lower_expect_limit\" for lower limit\n";


#___________________________________________________________________
# Title     : convert_sso_to_msp
# Usage     : &convert_sso_to_msp(@ARGV, $single_out_opt);
# Function  : This takes sso file(s) and produces MSP file. It
#             concatenate sso file contents when more than one
#             sso file is given.
# Example   : &convert_sso_to_msp(@ARGV, 'OUT.msp', $single_out_opt);
# Warning   : This capitalize all the input file names when
#              producing xxxxx.msp. xxxxx.sso -> XXXX.sso
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#             v  for showing the MSP result to screen
#             s  for making single MSP file for each sso file
#                    as well as big MSP file which has all sso
#             u= for upper expectation value limit
#             l= for lower expect val limit
#             s= for single file name input eg. "s=xxxxx.msp"
#             n  for new format (msp2 format)
#             r  for adding range
#             r2 for adding ranges in all sequence names
#   $eval_threshold =  by e=
#
# Returns   : the file names created (xxxx.msp, yyyy.msp,,,,)
# Argument  :
# Category  :
# Version   : 2.7
#--------------------------------------------------------------
sub convert_sso_to_msp{
   #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
   my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
   my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
   my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
   my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
   my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
   if($debug==1){print "\n\t\@hash=\"@hash\"
   \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
   \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
   #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my ($upper_expect_limit, $lower_expect_limit)=(50, 0);
   my (%sso, @sso, @SSO, $big_out_msp1,  @final_out, $big_out_msp2,
       $create_sso, $single_out_opt, $add_range, $add_range2, $big_out_msp,
       $Evalue_thresh, $new_format, $Score_thresh, $margin, $single_file_name);
   if($vars{'u'}=~/(\.?\d+)/){ $upper_expect_limit = $vars{'u'} };
   if($vars{'l'}=~/(\.?\d+)/){ $lower_expect_limit = $vars{'l'} };
   if($vars{'t'}=~/(\d+)/){ $Score_thresh  = $vars{'t'} };
   if($vars{'e'}=~/(\.?\d+)/){ $upper_expect_limit = $vars{'e'} };
   if($vars{'m'}=~/(\d+)/){ $margin  = $vars{'m'} };
   if($vars{'s'}=~/\S/){ $single_file_name  = $vars{'s'} };
   if($char_opt=~/r2/){  $add_range='r'; $add_range2='r2' }
   if($char_opt=~/r/){   $add_range = 'r' }
   if($char_opt=~/c/){   $create_sso = 'c' }
   if($char_opt=~/s/){   $single_out_opt='s' }
   if($char_opt=~/n/){   $new_format='n' }
   print "\n# (i) File given to convert_sso_to_msp is \"@file\", Normally xxx.sso file names\n";

   if($single_file_name=~/\S/){
       $big_out_msp=$single_file_name;
   }else{
       for($i=0; $i < @file; $i++){
          if($file[$i]=~/\.msp$/){ ## when output file name is given
              $big_out_msp=$file[$i];
              splice(@file, $i, 1);
              $i--;
          }elsif($file[$i]=~/^(\d+\-\d+)([_\d]*)\.[mfs]?sso/){  ## creates xxxx.msp file name from xxxx.sso
              $big_out_msp1="\U$1"."$2"."\.msp";
              $big_out_msp2="\U$1".".msp";
          }elsif($file[$i]=~/^(\S+)\.[mfs]?sso$/){
              $big_out_msp1="\U$1"."\.msp";
              $big_out_msp2="\U$1"."_all".".msp";
              print "\n# convert_sso_to_msp: File matched  xxxx.sso  format \n";
          }elsif($file[$i]=~/^(\S+)\.out$/){
              $big_out_msp1="\U$1"."\.msp";
              $big_out_msp2="\U$1"."_all".".msp";
              print "\n# convert_sso_to_msp: File matched  xxxx.out  format \n";
          }elsif($file[$i]=~/^(\S+)\.p[rot\,]*\.ts\.gz/){
              $big_out_msp1="\U$1".".msp";
              $big_out_msp2="\U$1"."_all".".msp";
          }elsif($file[$i]=~/^(\S+)\.ts\.gz/){
              $big_out_msp1="\U$1".".msp";
              $big_out_msp2="\U$1"."_all".".msp";
          }elsif($file[$i]=~/^(\S+)\.out\.gz/ or $file[$i]=~/^(\S+)\.[mfs]?sso\.gz/){
              $big_out_msp1="\U$1".".msp";
              $big_out_msp2="\U$1"."_all".".msp";
          }
       }
   }
   if(defined($big_out_msp)){
       $big_out_msp1=$big_out_msp2=$big_out_msp;
       print "\n# \$big_out_msp is defined as \'$big_out_msp\'\n";
   }else{
       print "\n# convert_sso_to_msp: You did not define the big MSP file out format, so $big_out_msp1 \n";
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``~~~~
   #  (1) When File was given(normal situ) to this sub routine
   #_____________________________________________________________
   if(@file == 1){   ## ONE single file input??
	  print "# one file @file is given, OUT will be: $big_out_msp1 \n";
          @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	          "u=$upper_expect_limit",
		  "l=$lower_expect_limit",
		  "m=$margin", $over_write,
		  $new_format,
		  "s=$big_out_msp")};
          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # Write MSP and finish off
          #______________________________________________
	  push(@final_out, &write_msp_files(@sso, $big_out_msp1,
	        $single_out_opt, $add_range) );

   }elsif(@file > 1){ ## MOre than 1 file input??
       @sso=@{&open_sso_files(@file, $add_range, $add_range2,
             "l=$lower_expect_limit",
             "u=$upper_expect_limit",
             "m=$margin", $over_write,
             $new_format)};
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Write MSP and finish off (multiple @file input)
       #______________________________________________
       push(@final_out, @{&write_msp_files(@sso, $big_out_msp2,
             $single_out_opt, $add_range)} ); ## concatenates all the hash ref to one
   }

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   #  (2) When NO File but ARRAY is given
   #      Here, you can have SSO files created
   #__________________________________________
   elsif(@array >=1){
       print "\n# In convert_sso_to_msp, \@array is given rather than \@file";
       @sso=@{&open_sso_files(@array, "u=$upper_expect_limit", $add_range2,
                  "l=$lower_expect_limit", $add_range, $create_sso,
                  "m=$margin", $new_format, $over_write)};
       push(@final_out, @{&write_msp_files(@sso, $big_out_msp,
                                               $single_out_opt, $add_range)} );
   }
   return(\@final_out);
}



#________________________________________________________________________________
# Title     : clu_to_sso_to_msp
# Usage     : &clu_to_sso_to_msp(\$clu);
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big msp file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.msp file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : clu_2_sso_2_msp, cluster_to_msp, cluster_to_sso_to_msp
#              convert_clu_to_sso_to_msp
# Options   : USE, convert_clu_to_sso_to_msp, this is obsolute now
# Category  :
# Version   : 1.7
#--------------------------------------------------------------------------------
sub clu_to_sso_to_msp{
    my($i, $j, $k, $s, $u, $p, $m, $n, $y, @possible_extensions, @list,
	@final_files, @U_L_case, $file, @file, @written_msp_files);

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Opening cluster file (xx.clu)
    # %clus looks like this:  2-507     YGR041W YLR353W
    #                         3-308     YDR222W YDR346C YLR225C
    #                         2-184     YCL066W YCR040W
    #______________________________________________________________
    my $clu=${$_[0]} || $_[0];
    if($verbose){
        print "\n# clu_to_sso_to_msp : \"$clu\" is given
                        and I am processing it with clu_to_sso_to_msp\n" if defined $clu;
    }
    my %clus=%{&open_clu_files(\$clu)};
    my @keys= keys %clus;
    my $num_of_cluster=@keys=@{&sort_by_cluster_size(\@keys)};

    print "# $0: clu_to_sso_to_msp: No. of cluster=$num_of_cluster after open_clu_files \n" if $verbose;

    &show_array(\@keys) if $verbose;
    &show_hash(\%clus) if $verbose;
    @possible_extensions=('msp', 'sso', 'msso', 'msso.gz','fsso', 'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
    @U_L_case=('\U', '\L');

    for($i=0; $i< @keys; $i++){
          my (@list, @final_files, $clus_name, $big_out_msp, @msp_hashes);
          $clus_name=$keys[$i];
          unless($single_file_name=~/\S/){
                          $big_out_msp="$clus_name\_cluster\.msp"; #<<<----- final output name
          }else{
                          $big_out_msp=$single_file_name;
          }
          push(@written_msp_files, $big_out_msp); ## This is the output of this sub

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          #  If $clus_name.msp is already there, skip
          #_____________________________________________
          if( (-s $big_out_msp) > 100  and !$over_write ){
                print "\n# clu_to_sso_to_msp : $big_out_msp MSP file already exists, skipping\n";
                print "#    Use  \$over_write option \'o\' to start all over again or \n";
                print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
                next ;
          }
          $num_of_seq_member=@list=split(/ +/, $clus{$keys[$i]}); # @list has (HIU001, HI002, HI333, MJ111, etc)
          print "# $0: clu_to_sso_to_msp: No. of seq member=$num_of_seq_member after split \n" if $verbose;

          FOR0: for($j=0; $j < @list; $j++){
               my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low, @sub_dir_heads,
                     $file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
                     $file_name_prot_low_gz, $file_name_prot_up_gz);

               $each_seq_name=$list[$j];
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Here I take chars from the sequ names, as dirs have fragments of chars
               #_______________________________________________________________________________
               for($s=1; $s <=2 ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
                     $sub_dir_head= substr($list[$j], 0, $s);
                     push(@sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
                     push(@sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
               }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Checking all the possible subdirectories to crop all the sso files
               #_______________________________________________________________________________
               FOR1: for($p=0; $p < @sub_dir_heads; $p++){
                     $subd=$sub_dir_heads[$p];
                     FOR2 : for($e=0; $e < @possible_extensions; $e++){
                           $ext=$possible_extensions[$e];
                           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                           #  This makes all the possible lower upper case names
                           #______________________________________________________
                           for( $u=0; $u < @U_L_case; $u++){
                                  if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
                                  }else{                   $each_seq_name="\L$each_seq_name"; }

                                  if(-s "$each_seq_name\.$ext"){   push(@final_files, "$each_seq_name\.$ext" ) ; next FOR0 }
                                  elsif(-s "$each_seq_name\.$ext\.gz"){ push(@final_files, "$each_seq_name\.$ext\.gz" ) ; next FOR0 }
                                  else{
                                       $file_wanted="\.\/$subd\/$each_seq_name\.$ext";
                                       if(-s $file_wanted){
                                            push( @final_files, $file_wanted); next FOR0 }
                                       elsif(-s "$file_wanted\.gz"){
                                            push( @final_files, "$file_wanted\.gz");
                                            next FOR0
                                       }
                                  }
                           }
                     } # FOR2
               } # FOR1

          } # FOR0

          print "\n# @final_files \n=============> $big_out_msp  \n\n" if $verbose;

          if(@final_files < 1){
               print "\n# clu_to_sso_to_msp :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
               print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
               next;
          }
          # $write_each_msp_to_disk='w';

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           #  Check if small msp files have already made in previous steps
           #________________________________________________________________
           if($final_files[0]=~/\.msp *$/){ ##  concatenate msp into big_msp
                 open(BIG_MSP_FILE, ">$big_out_msp");
                 for($y=0; $y< @final_files; $y++){
                      open(SINGLE_MSP, "$final_files[$y]");
                      while(<SINGLE_MSP>){
                           print BIG_MSP_FILE $_;
                      }
                 }
                 close(BIG_MSP_FILE);
                 close(SINGLE_MSP);
                 push(@written_msp_files, $big_out_msp);

           }else{
                if($write_each_msp_to_disk){
                    print "\# $0 : going to run open_sso_files with $write_each_msp_to_disk opt\n";
                    $big_out_msp=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                            "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};
                    if(-s $big_out_msp > 200){  print "\n# $0: SUCCESS to create $big_out_msp :) :) :-) :-) ?\n"; }
                }else{
                    print "\n# clu_to_sso_to_msp: I am running open_sso_files. \n";
                    @msp_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                            "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};

                    &write_msp_files(@msp_hashes, $big_out_msp); ## concatenates all the hash ref to one
                }
          }
    }
    return(\@written_msp_files);
}# end of



#________________________________________________________________________
# Title     : remove_dup_in_array
# Usage     : @out2 = @{&remove_dup_in_array(\@input1, \@input2,,,,)};
#             @out1 = &remove_dup_in_array(\@input1 );
# Function  : removes duplicate entries in an array. You can sort the
#             result if you wish by 's' opt. Otherwise, result will keep
#             the original order
# Example   : (1,1,1,1,3,3,3,3,4,4,4,3,3,4,4);  --> (1,3,4);
# Warning   :
# Keywords  : merge array elements, remove_repeting_elements,
#             remove_same_array_elements
# Options   :
#   s for sorting the array output
# Returns   : one or more references.
# Argument  : one or more refs for arrays or one array.
# Version   : 1.4
#--------------------------------------------------------------------
sub remove_dup_in_array{
  my($i, $sort_opt, @out_ref, @nondup,%duplicate, @orig, @out_ref);
  my @in=@_;
  for($i=0; $i<@in; $i++){
	 if($in[$i] eq 's'){
		$sort_opt=1;  splice(@in, $i, 1); $i--;
	 }elsif( ref($in[$i]) eq 'SCALAR'  and  ${$in[$i]} eq 's' ){
		$sort_opt=1;  splice(@in, $i, 1); $i--;
	 }
  }
  for($i=0; $i<@in; $i++){
	  undef(%duplicate);
	  if(ref($in[$i]) eq 'ARRAY'){    @orig = @{$in[$i]};    }
	  else{ @orig=@in }
	  @nondup = grep { ! $duplicate{$_}++ } @orig;    ## NOTE -> $_
	  if($sort_opt==1){ @nondup= sort @nondup }
	  push(@out_ref, \@nondup);
  }
  if(@out_ref ==1){ return($out_ref[0]);}
  elsif(@out_ref >1){  return(@out_ref);}
}



#__________________________________________________________________________
# Title     : sort_by_cluster_size
# Usage     : @out=@{&sort_by_cluster_size(\@input_line_array)};
# Function  : it sorts by the 1st digit before '-'  as in 2-183_cluster, 2-140_cluster,
#               etc.
# Example   :
# Keywords  : sort_by_columns, sort_by_text_columns, sort_by_column_numerically
#             sort_by_pattern
# Options   :
# Version   : 1.2
#----------------------------------------------------------------------------
sub sort_by_cluster_size{
   my (@in, @M, $col);
   if(@_ < 1  ){ print "\n# FATAL: sort_by_cluster_size needs 1 argument\n"; exit }
   if(ref $_[0] eq 'ARRAY'){ 	  @in = @{$_[0]};      }else{ 	  @in = @_;    }
   $col=0;
   @in= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { [$_, ($_=~/^(\S+)\-/)[$col] ] } @in;
   return(\@in);
}




#________________________________________________________________________
# Title     : read_dir_names_only
# Usage     : @all_dirs_list = @{&read_dir_names_only(\$absolute_path_dir_name, ....)};
# Function  : read any dir names and and then put in array.
# Example   :
# Warning   : This does not report '.', '..'
#             Only file names are reported. Compare with &read_any_dir
# Class     :
# Keywords  :
# Options   :
# Package   :
# Reference :
# Returns   : one ref. of array.
# Tips      :
# Argument  : takes one or more scaler references. ('.', \$path, $path, ... )
# Todo      :
# Author    : A Biomatic
# Version   : 3.1
# Used in   :
# Enclosed  :
#--------------------------------------------------------------------
sub read_dir_names_only{
  my($in_dir, $i,$k, @possible_dirs,
	  @final_files, $full_dir, $pwd, $path,@read_files);
  $pwd=`pwd`; chomp($pwd); $full_dir=1;
  for($k=0; $k < @_; $k++){
	 if   ( ($_[$k] eq '.') || !(defined($_[$k]))){  $in_dir=$pwd;  }
	 elsif(!(ref($_[$k]))){   $in_dir=$_[$k];   }
	 elsif(ref($_[$k])){      $in_dir =${$_[$k]};    }
	 if($in_dir =~ /^([\w\-\.]+)$/){  $in_dir="$pwd\/$in_dir"; $full_dir = 0; }
	 else{ $full_dir =1; }
	 ##########  Main READING PART ##########
	 opendir(DIR1,"$in_dir");
	 @read_files = readdir(DIR1);
	 for($i=0; $i < @read_files; $i ++){
		$read_files[$i]="$in_dir\/$read_files[$i]";
		if( ($read_files[$i] !~ /\/\.\.?$/) && ( -d $read_files[$i]) ){
		  $read_files[$i]=~s/\.\///; ## removing ./ in front of dirs (in bash)
		  push(@final_files, "$read_files[$i]");
		}
	 }
  }
  return([sort @final_files]);
}


#________________________________________________________________________
# Title     : default_help
# Usage     : &default_help2;  usually with 'parse_arguments' sub.
# Function  : Prints usage information and others when invoked. You need to have
#             sections like this explanation box in your perl code. When invoked,
#             default_help routine reads the running perl code (SELF READING) and
#             displays what you have typed in this box.
#             After one entry names like # Function :, the following lines without
#             entry name (like this very line) are attached to the previous entry.
#             In this example, to # Function : entry.
# Example   : &default_help2; &default_help2(\$arg_num_limit);   &default_help2( '3' );
#             1 scalar digit for the minimum number of arg (optional),
#             or its ref. If this defined, it will produce exit the program
#             telling the minimum arguments.
# Warning   : this uses format and references
# Keywords  :
# Options   :
# Returns   : formated information
# Argument  :
# Version   : 3.3
#--------------------------------------------------------------------
sub default_help{
  my($i, $perl_dir, $arg_num_limit, $head ,$arg_num_limit,
	  @entries, @entries_I_want_write );
  my($logname)=getlogin();
  my($pwd)=`pwd`;
  my($date)=`date`;
  chomp($date,$pwd);
  my($not_provided)="--- not provided ---\n";
  my($file_to_read) = $0;

  for($i=0; $i < @_; $i ++){
	  if((ref($_[$i]) eq 'SCALAR')&&(${$_[$i]} =~ /^\d$/)){
		  $arg_num_limit = ${$_[$i]};  }
	  elsif( (!(ref($_[$i]))) && ($_[$i] =~ /^\d$/)){
		  $arg_num_limit = $_[$i];     }
  }
  my %entries = %{&read_head_box(\$file_to_read )};
  if($option_tb_found ==1){
	 @option_tb=@{&read_option_table(\$file_to_read)};
  }

  @entries = keys %entries;
  foreach $help_item (@entries){
	  ${$help_item}= $not_provided if( ${$help_item}=~/^[\W]*$/  and  !defined(${$help_item}) );
  }
  #""""""""""""""""""""""""""""""""""""""""
  #########  Writing the format <<<<<<<<<<<
  #""""""""""""""""""""""""""""""""""""""""
  $~ =HEADER_HELP;
  write;   ## <<--  $~ is the selection operator
  $~ =DEFAULT_HELP_FORM;

  @entries_I_want_write=sort keys %entries;

  for( @entries_I_want_write ){  write  }

  print chr(7);  print "_"x72,"\n\n";

  if(@ARGV < $arg_num_limit){ print "\* $0 fataly needs $arg_num_limit arguments\n\n" }

  if(  $option_tb_found == 1){
	 #########  Printing the OPTION table contents <<<<<<<<<<<<
	 print "  Press \"Return\" key to see what options $logname\'s \n\n    \"$0\" take... \n";
		 $key_press=getc();
	 print @option_tb, "\n"x2 if(@option_tb > 0);
  }
format HEADER_HELP  =
_____________________________________________________________________
		  __  __      ______     __          _____
		 /\ \/\ \    /\  ___\   /\ \        /\  _ `\
		 \ \ \_\ \   \ \ \__/   \ \ \       \ \ \L\ \
		  \ \  _  \   \ \  _\    \ \ \       \ \ ,__/
		   \ \ \ \ \   \ \ \/___  \ \ \_____  \ \ \/
		    \ \_\ \_\   \ \_____\  \ \______\  \ \_\
		     \/_/\/_/    \/_____/   \/______/   \/_/ V 3.1`
_____________________________________________________________________
.
format DEFAULT_HELP_FORM =
 @<<<<<<<<<: @*
 $_        $entries{$_}
.
}

#________________________________________________________________________
# Title     : set_debug_option
# Usage     : &set_debug_option;
# Function  : If you put '#' or  '##' at the prompt of any program which uses
#             this sub you will get verbose printouts for the program if the program
#             has a lot of comments.
# Example   : set_debug_option #    <-- at prompt.
# Warning   :
# Keywords  :
# Options   : #   for 1st level of verbose printouts
#             ##  for even more verbose printouts
# $debug  becomes 1 by '#'  or '_'
# $debug2 becomes 1 by '##'  or '__'
#
# Returns   :  $debug
# Argument  :
# Version   : 1.8
#             generalized debug var is added for more verbose printouts.
#--------------------------------------------------------------------
sub set_debug_option{
  my($j, $i, $level );
  unless( defined($debug) ){
	 for($j=0; $j < @ARGV; $j ++){
		 if( $ARGV[$j] =~/^(_+)$|^(#+)$/){ # in bash, '#' is a special var, so use '_'
			 print __LINE__," >>>>>>> Debug option is set by $1 <<<<<<<<<\n";
			 $debug=1;
				  print chr(7);
			 print __LINE__," \$debug  is set to ", $debug, "\n";
			 splice(@ARGV,$j,1); $j-- ;
			 $level = length($1)+1;
			 for($i=0; $i < $level; $i++){
				 ${"debug$i"}=1;
				 print __LINE__," \$debug${i} is set to ", ${"debug$i"}, "\n";
			 }
		 }
	 }
  }
}
#________________________________________________________________________
# Title     : show_array
# Usage     : &show_array(\@input_array);
# Function  : for debugging purpose. Shows any array elem line by line.
# Example   : Output:      item1
#             Output:      item2
#             Output:      item3
# Warning   : can handle scalar ref, too.
# Keywords  :
# Options   : -h  for horizontal display of elements
#             c   for compact (do not put new line between array chunk)
#             s   for putting new line between arrays
# Returns   :
# Argument  :
# Version   : 2.4
#--------------------------------------------------------------------
sub show_array{
  my($k, $i, $t,  @in2, $in, $space, $show_horizontally, $compact);
  my(@in)=@_;

  ## This is to get the option of 'horizontal' to make horizontal output.
  for($t=0; $t < @in ; $t++){
	 if($in[$t] =~/\-?[hH][orizontal]*$/){   ### No ref.
		 $show_horizontally = "h";
		 splice(@in, $t, 1);  $t--;
	 }elsif(${in[$t]} =~/-?[hH][orizontal]*$/){  ### ref.
		 $show_horizontally = "h";
		 splice(@in, $t, 1);  $t--;
	 }elsif(${in[$t]} =~/^s$/i){  ### ref.
		 $space = "s";
		 $compact='';
		 splice(@in, $t, 1);  $t--;
	 }elsif(${in[$t]} =~/^c$/i){  ### ref.
		 $compact = "c";
		 $space='';
		 splice(@in, $t, 1);  $t--;
	 }
  }

  for($k=0; $k < @in; $k++){
	 if(ref($in[$k]) eq 'ARRAY'){
		 &show_array(@{$in[$k]}, "$show_horizontally", "$compact", "$space" );
	 }elsif(ref($in[$k]) eq 'SCALAR'){
		 if($show_horizontally eq "h"){
			 print ${$in[$k]}, ",  ";
		 }elsif(  $show_horizontally ne "h"){
			 print ${$in[$k]}, "\n";
		 }
	 }elsif( !ref($in[$k]) ){
		 if($show_horizontally eq 'h'){
			 print  $in[$k] , ",  ";
		 }elsif(  $show_horizontally ne "h"){
			 print  $in[$k] , "\n";
		 }
	 }
  }
  if($compact !~/^c$/i){
	print "\n"; #### This is necessary to distinguish different arrays.
  }
}
#________________________________________________________________________
# Title     : parse_arguments
# Usage     : &parse_arguments; or  (file1, file2)=@{&parse_arguments};
# Function  : Parse and assign any types of arguments on prompt in UNIX to
#             the various variables inside of the running program.
#             This is more visual than getopt and easier.
#             just change the option table_example below for your own variable
#             setttings. This program reads itself and parse the arguments
#             according to the setting you made in this subroutine or
#             option table in anywhere in the program.
# Example   : &parse_arguments(1);
#             @files=@{&parse_arguments(1)};
# Warning   : HASH and ARRAY mustn't be like = (1, 2,3) or (1,2 ,3)
# Keywords  :
# Options   : '0'  to specify that there is no argument to sub, use
#              &parse_arguments(0);
#             parse_arguments itself does not have any specific option.
#             '#' at prompt will make a var  $debug set to 1. This is to
#              print out all the print lines to make debugging easier.
#             'e=xxxx' for filtering input files by extension xxxx
#
# Returns   : Filenames in a reference of array
#             and input files in an array (file1, file2)=@{&parse_arguments};
# Argument  : uses @ARGV
# Version   : 1.7
#--------------------------------------------------------------------
sub parse_arguments{
  my( $c, $d, $f, $arg_num, $option_table_seen, $n, $option_filtered,
		$option_table_example, $input_line, @input_files,
		$extension);
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Checks if there were arguments
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( @ARGV < 1 ){ #<-- If Argument is not given at prompt
	  for(@_){
		 if($_ eq '0'){
			 last;
		 }else{
			 print "\n \"$0\" requires at least one Argument, suiciding.\n\n";
			 print chr(7); #<-- This is beeping
			 print "  To get help type \"$0  h\"\n\n\n ";
			 exit;
		 }
	  }
  }
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  #  Checking some input options like 'e=txt' for extension filtering
  #_____________________________________________________________________
  for($i=0; $i< @_; $i++){
	  if($_[$i]=~/e=(\S+)/){
		 $extension=$1;
	  }
  }

  #""""""""""""""""""""""""""""""""""""""""""""""""""
  #   Some DEFAULT $debug variables for debugging purposes
  #""""""""""""""""""""""""""""""""""""""""""""""""""
  &set_debug_option;
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #  If there is only one prompt arg. and is [-]*[hH][elp]*, it calls
  #   &default_help and exits
  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( ( @ARGV == 1 ) && ($ARGV[0] =~ /^[\-]*[hH\?][elp ]*$/) ){
		&default_help;
		exit;
  }
  for($f=0; $f < @ARGV; $f++){
	 if( $ARGV[$f] =~ /\w+[\-\.\w]+$/ and -f $ARGV[$f] ){
		 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		 # When extension is defined, filter files by it
		 #____________________________________________
		 if($extension){
			 if($ARGV[$f]=~/\S\.$extension/){
				 push(@input_files, $ARGV[$f] );
			 }else{ next }
		 }else{
			 push(@input_files, $ARGV[$f] );
			 next;
		 }
	 }
  }

  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  #     Reading the running program script
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""
  &assign_options_to_variables;
  if($HELP == 1){ &default_help }
  return(\@input_files);
}

#________________________________________________________________________
# Title     : handle_arguments
# Usage     : Just put the whole box delimited by the two '###..' lines below
#             to inside of your subroutines. It will call 'handle_arguments'
#             subroutine and parse all the given input arguments.
#             To use, claim the arguments, just use the variable in the box.
#             For example, if you had passed 2 file names for files existing
#             in your PWD(or if the string looks like this: xxxx.ext),
#             you can claim them by $file[0], $file[1] in
#             your subroutine.
# Function  : Sorts input arguments going into subroutines and returns default
#             arrays of references for various types (file, dir, hash, array,,,,)
#             If you give (\@out, @file), it will put @out into @array as a ref
#             and also the contents of @out will be dereferenced and put to
#             raw_string regardless what is in it).
#
# Example   : 'handle_arguments(\@array, $string, \%hash, 8, 'any_string')
# Warning   :
# Keywords  : handling arguments, parsing arguments,
# Options   :
# Returns   : Following GLOBAL variables
#
#             $num_opt,    @num_opt     @file          @dir
#             $char_opt,   @char_opt    %vars          @array,
#             @hash        @string,     @raw_string    @range,
#
#             $num_opt has 10,20
#             @num_opt has (10, 20)
#             @file has  xxxx.ext
#             @dir has  dir  or /my/dir
#             $char_opt has 'A,B'
#             @char_opt has (A, B)
#             @array has  (\@ar1, \@ar2)
#             @hash has (\%hash1, \%hash2)
#             @string  ('sdfasf', 'dfsf')
#             @raw_string (file.ext, dir_name, 'strings',,)
#             @range has values like  10-20
#             %vars deals with x=2, y=3 stuff.
#
# Argument  : any type, any amount
# Version   : 4.8
#--------------------------------------------------------------------
sub handle_arguments{
	my($c, $d, $e, $f, $i, $j, $k, $l, $s, $t, $x, $y, $z, $char_opt, $dir, @hash,
		$file, $in_dir, $num_opt, @char_opt, @dir, @file, @string, @file_dir, @k,
		@num_opt, @raw_string,@string, @array, %vars, @range, @temp, $temp,
		@char_options);

  &set_debug_option;
  if(@_<1){ print chr(7),"\n This is handle_arguments. No args Passed, Error?\n"}
  elsif( (@_ ==1)&& (ref($_[0]) eq 'ARRAY') ){ # when there is only 1 argument
	  push(@array, $_[0]);
	  push(@k, $_[0]);
  }elsif( (@_==1)&&( !ref($_[0]) ) ){
	  if(-f $_[0]){ push(@file, $_[0]);   push(@string, $_[0]) }
	  elsif(-d $_[0]){ push(@dir, $_[0]); push(@string, $_[0]) }
	  elsif($_[0]=~/^\d+$/){ push(@num_opt, $_[0]); $num_opt.=$_[0] }
	  elsif($_[0]=~/^\w+$/){ push(@string, $_[0]); }
  }elsif(@_ >=1){ @k = @_ }

  #####______Start of  general argument handling______######
  for($k=0; $k < @k ;$k++){
	  if( !ref($k[$k]) ){
		  if($k[$k]=~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){  push(@char_opt, $1); $char_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\-([a-zA-Z]+)$/){          ## When multiple option is given,
			  @char_options = split(/\,|/, $1);  push(@char_opt, @char_options);
			  $char_opt .= join("\,", @char_options); ## '-' should be used. eg. '-HEGI'
		  }elsif($k[$k]=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
		  }elsif($k[$k]=~ /^(\-?\d+)$/){ push(@num_opt, $1);  $num_opt .= "$1\,";
		  }elsif($k[$k]=~ /^\d+\.?\d*\-\d+\.?\d*$/){  push(@range,  $k[$k] );
		  }elsif(-f $k[$k]){                          push(@file,   $k[$k] );
		  }elsif(-d $k[$k]){                          push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /\/[\w\d\.\-]+[\/].+[\/]$/){push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^\/[\w\d\.\-]+[\/]*$/){    push(@dir,    $k[$k] );
		  }elsif($k[$k]=~ /^[\/\w\d\-\.]+\.\w+$/){    push(@file,   $k[$k] );
		  }elsif($k[$k]=~ /\S\/[\/\w\d\-\.]+\.\w+$/){ push(@file,   $k[$k] );
		  }elsif($k[$k]=~/^\w+[\/\\\w\d\.\-]+$/){     push(@string, $k[$k] );
		        # string does not have space, but includes '\', '/', '.'
		  }else{                                      push(@raw_string, $k[$k] );  }

	  }elsif( ref($k[$k]) ){
		  if( ref($k[$k]) eq "SCALAR"){
			 if(${$k[$k]} =~ /^[\-]?([a-zA-Z]\d*) {0,5}$/){ push(@char_opt, $1); $char_opt  .= "$1\,";
				}elsif(${$k[$k]}=~ /^\-([a-zA-Z]+)$/){ push(@char_opt, @char_options);
					$char_opt  .= join("\,", @char_options);  ## as an option string.
				}elsif(${$k[$k]}=~ /^(\w+)\=(\S* *)$/){  $vars{$1}=$2;  $vars .= "$1\,";
				}elsif(${$k[$k]}=~ /^(\-?\d+)$/){ $num_opt .= "$1\,";  push(@num_opt, $1);
			    }elsif(${$k[$k]}=~ /^\d+\.?\d*\-\d+\.?\d*$/){    push(@range,  $k[$k] );
				}elsif(-f ${$k[$k]}){                            push(@file,   ${$k[$k]} );
				}elsif(-d ${$k[$k]}){                            push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\/[\/\w\d\.\-]+[\/]*$/){     push(@dir,    ${$k[$k]} );
				}elsif(${$k[$k]}=~ /^[\/\w\d\-\.]+\.\w+$/){      push(@file,   ${$k[$k]} );
				}elsif(${$k[$k]}=~/^\w+[\w\d\.\-]+$/){           push(@string, ${$k[$k]} );
				}else{                                           push(@raw_string, ${$k[$k]}); }
		  }elsif(ref($k[$k]) eq "ARRAY"){ my @temp_arr = @{$k[$k]}; push(@array, $k[$k]);
			for ($i=0; $i<@temp_arr; $i++){
			   if(-f $temp_arr[$i]){                            push(@file, $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/^\d+\.?\d*\-\d+\.?\d*$/){ push(@range,$temp_arr[$i] );
			   }elsif(-d $temp_arr[$i]){                        push(@dir , $temp_arr[$i]);
			   }elsif($temp_arr[$i]=~/\/[\/\w\d\.\-]+[\/].+[\/]/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\/[\/\w\d\.\-]+[\/]*$/){ push(@dir, $temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^[\/\w\d\-\.]+\.\w+$/){   push(@file,$temp_arr[$i] );
																push(@string,$temp_arr[$i] );
			   }elsif($temp_arr[$i]=~/^\w+[\w\d\.\-]+$/){       push(@string,$temp_arr[$i]);
			   }else{                                           push(@raw_string, $temp_arr[$i]); }
			 }
		  }elsif(ref($k[$k]) eq "HASH"){                             push(@hash,   $k[$k] ); }
	  }
  }
  @raw_string=(@raw_string, @string);
  @file = @{&remove_dup_in_arrayH(\@file)};
  #-----------------------------------------------------
	 sub remove_dup_in_arrayH{  my($i, @nondup, @out_ref, %duplicate, @orig, @out_ref);
		for($i=0; $i<@_; $i++){  undef(%duplicate);
	       if(ref($_[$i]) eq 'ARRAY'){    @orig = @{$_[$i]};    }
		   @nondup = grep { ! $duplicate{$_}++ } @orig; push(@out_ref, \@nondup);  }
		if(@out_ref ==1){ return($out_ref[0]);}
		elsif(@out_ref >1){  return(@out_ref);}
	 }
  #-----------------------------------------------------
  return(\@hash, \@array, \@string, \@dir, \@file, \@num_opt,
			\@char_opt, \$num_opt, \$char_opt, \@raw_string, \%vars, \@range );
}

#________________________________________________________________________
# Title     : show_hash
# Usage     : &show_hash(\@input_array);
# Function  : for debugging purpose. Shows any array elem line by line.
#             the line is 60 elements long (uses recursion)
# Example   : Output:      item1
#             Output:      item2
#             Output:      item3
# Warning   : There is a global variable:  $show_hash_option
#             It tries to detect any given sting which is joined by ','
# Keywords  :
# Options   : -s or -S or s or S for spaced output. Eg)
#             seq1       1 1 1 1 1 1 1 1 1 1 1 1
#
#             instead of
#             seq1       111111111111
#
#             -h or -H or h or H for horizontal line of '---------...'
#
# Returns   :
# Argument  :
# Version   : 1.7
#--------------------------------------------------------------------
sub show_hash{
  my($k, $i, $t, @in2, $in, $LEN, %TEM ); ## You should not put $show_hash_option
  my(@in)=@_;                     ## and $horizontal_line  in my !!!
  my($KL)=2; # default keys string length;
  my($VL)=80; # default values string length;
  my($GAP)=2;  # default space between keys and values
  my($horizontal_line, $show_hash_optionXX, $Hash_counter, @line);

  ## This is to get the option of 'space' to make spaced output.
  for($t=0; $t < @in; $t++){
	 if($in[$t] =~/^[-]+[sS][pace]*$/){
		 $show_hash_optionXX = 1;
		 splice(@in, $t, 1);
	 }elsif(${in[$t]} =~/^[-]+[sS][pace]*$/){
		 $show_hash_optionXX = 1;
		 splice(@in, $t, 1);
	 }elsif($in[$t] =~/^[-]+[hH][rR]*$/){
		 $horizontal_line = 1;
		 splice(@in, $t, 1);
	 }elsif(${in[$t]} =~/^[-]+[hH][rR]*$/){
		 $horizontal_line = 1;
		 splice(@in, $t, 1);
	 }
  }

  ######## Main loop #################
  if($horizontal_line ==1){  ## This puts the delimiter '--------------(  )'
	  $Hash_counter ++;
	  print "\n","-"x78,"(${Hash_counter}th hash)", "\n";
  }

  for($k=0; $k < @in; $k++){
	 if(ref($in[$k]) eq 'ARRAY'){  ### When the hashes were given in array ref.
		 &show_hash(@{$in[$k]}, $show_hash_optionXX, $horizontal_line);
		 print "\n";
	 }
	 elsif(ref($in[$k]) eq 'HASH'){  ### recursion
		 &show_hash(%{$in[$k]});
		 print "\n";
	 }
	 elsif(ref($in[$k+1]) eq 'HASH'){  ### recursion
		 &show_hash(%{$in[$k+1]}); print "\n";
	 }
	 elsif(ref($in[$k]) eq 'SCALAR'){  print ${$_[$k]}, "\n";  }
	 elsif( !ref($in[$k]) ){
		 if( !ref($in[$k+1]) && defined($in[$k+1])  ){
			 if($show_hash_optionXX == 1){  #### space option checking.
				#if($in[$k+1] =~ /\,.+\,/){  #### if the string is joined with ','
				#	 @line = split(/\,/, $_[$k+1]);
				# }else{
				#	 @line = split(//, $_[$k+1]);
				# }
				%TEM = @in;
				$LEN = ${&max_elem_string_array_show_hash(keys %TEM)};
				 if($LEN > $KL){ $KL = $LEN + $GAP +2};
				 printf ("%-${KL}s ", $in[$k]);  $k++;
				 printf ("%-${VL}s\n","@line");
			 }else{                        ### If not option is set, just write
				%TEM = @in;
				$LEN = ${&max_elem_string_array_show_hash( keys %TEM)};
				 if($LEN > $KL){ $KL = $LEN + $GAP +2};
				 printf ("%-${KL}s ", $in[$k]);  $k++; # print $in[$k], "\t";  $k++;
				 printf ("%-${VL}s\n",$in[$k]);        # print $in[$k], "\n";
			 }
		 }
		  #________________________________________________________
		  # Title    : max_elem_string_array_show_hash
		  # Keywords : largest string length of array
		  # Function : gets the largest string length of element of any array of numbers.
		  # Usage    : ($out1, $out2)=@{&max_elem_array(\@array1, \@array2)};
		  #            ($out1)       =${&max_elem_array(\@array1)          };
		  # Argument : numerical arrays
		  # returns  : one or more ref. for scalar numbers.
		  # Version  : 1.1
		  #-------------------------------------------------------
		  sub max_elem_string_array_show_hash{
			 my(@input, $i, $max_elem);
			 @input = @{$_[0]} || @_;
			 for($i=0; $i< @input ; $i++){
					$max_elem = length($input[0]);
					if (length($input[$i]) > $max_elem){
						$max_elem = length($input[$i]);
					}
			 }
			 \$max_elem;
		  }
		  #####################################insert_gaps_in_seq_hash
	 }
  }
}
#________________________________________________________________________
# Title     : read_option_table
# Usage     :
# Function  : Reads the option table made by Jong in any perl script. The
#             option table is a box with separators.
# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   :
# Argument  :
# Version   : 1.0
#--------------------------------------------------------------------
sub read_option_table{
	my($table_found, @option_tb, $head);
	 open(SELF, "${$_[0]}");
	 while(<SELF>){
		if( (/^ *#+/) && ( $table_found== 1) ){
		  push (@option_tb, "$_");
		}elsif( ($table_found != 1)&&(/^ *\#+ *[Oo]ption *[Tt]able */) ){
			$table_found=1; $head="############## Option Table for $logname\'s \"$0\"\n"; ##
			push(@option_tb, $head);
		}
		if( ($table_found==1)&&(/^ *###################+ *$/)){  ### to find the end point of reading
			$table_found =0; last; }
	 }
	 return(\@option_tb);
}

#______________________________________________________________
# Title     : write_msp_files
# Usage     : &write_msp_files(\%in1, \%in2, ['s'], [$filename],,)
# Function  : Writes input which is already in msp file format to
#              files either the name is given or generated
#              If more than one ref of hash is given, this will
#              concatenate all the hashes to one big one to
#              make one file.
#             When NO output xxx.msp file name is given, it creates
#              with the query sequence name.
# Example   :  &write_msp_files(@sso, 's', $out_file);
# Warning   : When NO output xxx.msp file name is given, it creates
#              with the query sequence name.
# Keywords  : write_msp,
# Options   : _  for debugging.
#             #  for debugging.
#             s  for each single file output for each hash input
#      filename  for putting output to the specified filename, should be xxx.msp
#
# Returns   : if 's' option is set, it will make say,
#               HI001.msp HI002.msp HI003.msp  rather than
#
#               HI001HI002HI003.msp
#  eg of one output(single file case)
#
#   1027     0.0     1     154   HI0004     1     154   HI0004
#   40       0.0     84    132   HI0004     63    108   HI0001
#   31       0.0     79    84    HI0004     98    103   HI0003
#
# Category  :
# Version   : 2.8
#--------------------------------------------------------------
sub write_msp_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

    my ($out_msp_file, $add_range, @final_out, $msp_file_out,
         @keys, $N, $temp_1, %hash, $query_seq_name, $single_out_opt);

    if($char_opt=~/r/){ $add_range      ='r' };
    if($char_opt=~/s/){ $single_out_opt ='s' };
    if(@file == 1){ $out_msp_file=$file[0]; $single_out_opt='' } # s is for single file output

    if($single_out_opt eq 's'){ #~~~~~~~~~~~` single files output option WITHOUT a given outfilename
         $msp_file_out='default_single_out.msp';
         for($i=0; $i< @hash; $i++){
             my %hash=%{$hash[$i]};
             my @keys =sort keys %hash;

             #------------------ Writing the first line ---------------------------
             for($j=0; $j< @keys; $j++){
                  if($keys[$j]=~/(\S+)_\d+\-\d+/){ $N = $1 }else{ $N = $keys[$j] }
                  if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
                       open(MSP_FILE, ">$msp_file_out") ||
                           die "# write_msp_files: I can not create $msp_file_out, check permission\n";
                       chomp( $hash{$keys[$j]} ); ## precaution
                       print MSP_FILE "# (H) $0 write_msp_files: $keys[$j]\n";
                       print MSP_FILE $hash{$keys[$j]}, "\n";
                       splice(@keys, $j, 1);
                       $j--; last;
                  }
             }
             #------------- Writing the rest of the lines ____________________
             for($j=0; $j< @keys; $j++){
                  chomp( $hash{$keys[$j]} );
                  print MSP_FILE $hash{$keys[$j]}, "\n";
             }
             print MSP_FILE "\n";
         }
         if(-s $msp_file_out){
              print "\n# write_msp_files: $msp_file_out is written \n";
         }else{
              print "\n# Error, write_msp_files\n"; exit
         }
         push(@final_out, $msp_file_out);
         close(MSP_FILE);
         return(\@final_out);
    }else{
         #~~~~~~~~~~~~~ DEfault ~~~~~~~~~~~~~~~~~~
         #  When output file name was given!
         #________________________________________
         if(@file==1){
             my($temp_1);
             open(MSP_FILE, ">$out_msp_file") ||  die "# write_msp_files: I can not create $out_msp_file, check permission\n";
             print MSP_FILE "# (H) $0 write_msp_files: @file\n";
             for($i=0; $i< @hash; $i++){
                 my %hash=%{$hash[$i]};
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # Sorting %hash values by the second column(Evalue)
                 #_______________________________________________________
                 @keys= map {$_->[0]} sort { $a->[1] <=> $b->[1] } map { $hash{$_}=~/^ *\S+[\t ]+(\S+)[\t ]+/ and [$_, $1] } keys %hash;

                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # for Final output
                 #_____________________________
                 push(@final_out, $out_msp_file);

                 #--------- Writing the first line only --------------
                 for($j=0; $j< @keys; $j++){
                      if($keys[$j]=~/(\S+)_\d+\-\d+$/){ $N = $1 }else{ $N = $keys[$j] }

                      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                      # Following is to put the self match on top of the list
                      #________________________________________________________
                      if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
                          $temp_1=$keys[0]; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
                      }
                 }
                 for($j=0; $j< @keys; $j++){
                      chomp($hash{$keys[$j]});
                      print MSP_FILE $hash{$keys[$j]}, "\n";
                 }
                 print MSP_FILE "\n";
             }
             print MSP_FILE "\n";
             close(MSP_FILE);
             if(-s $out_msp_file and $out_msp_file !~/^ *\.msp$/){
                 print "\n# write_msp_files: $out_msp_file is written\n" if(-s $out_msp_file);
             }else{
                 print "\n# write_msp_files: ERROR. Either $out_msp_file is empty or \".msp\" is written\n";
             }
         }else{
             for($i=0; $i< @hash; $i++){
                 my %hash=%{$hash[$i]};
                 my @keys =sort keys %hash;
                 ($query_seq_name)=$hash{$keys[0]}=~/\S+ +\d+ +\d+ +(\S+) +\d+ +\d+ +\S+/;
                 $msp_file_out="$query_seq_name\.msp";
                 open(MSP_FILE, ">$msp_file_out") or die "\n# write_msp_files: Failed to open $msp_file_out\n";
                 print MSP_FILE "# (H) $0 write_msp_files: $query_seq_name\n";

                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # for Final output
                 #_____________________________
                 push(@final_out, $msp_file_out);
                 #~~~~~~~ Writing the first line only ~~~~~~~~~~~~~~~~~~
                 for($j=0; $j< @keys; $j++){
                     if($keys[$j]=~/(\S+)_\d+\-\d+$/){ $N = $1 }else{ $N = $keys[$j] }
                     if($hash{$keys[$j]}=~/ +$N[\_\d+\-\d+]* +\d+ +\d+ +$N[\_\d+\-\d+]*/){
                      $keys[0]=$temp_1; $keys[0]=$keys[$j]; $keys[$j]=$temp_1;
                     }
                 }
                 for($j=0; $j< @keys; $j++){
                     chomp($hash{$keys[$j]});
                     print MSP_FILE $hash{$keys[$j]}, "\n";
                 }
                 print MSP_FILE "\n";
             }
             print MSP_FILE "\n";
             if(-s $out_msp_file and $out_msp_file !~/^ *\.msp$/){
                  print "\n# write_msp_files: $out_msp_file is written\n" if(-s $out_msp_file);
             }else{
                  print "\n# write_msp_files: ERROR. Either $out_msp_file is empty or only \".msp\" is written\n";
             }
             close MSP_FILE;
         }
     }
     if(@final_out ==1){ return( \$final_out[0] ); }else{
         return(\@final_out);
     }
}

#________________________________________________________________________
# Title     : read_head_box
# Usage     : %entries = %{&read_head_box([\$file_to_read, \@BOXED ] )};
# Function  : Reads the introductory header box(the one you see on top of sub routines of
#             Jong's programs.). Make a hash(associative array) to put entries
#             and descriptions of the items. The hash values have new lines '\n' are
#             attached, so that later write_head_box just sorts Title to the top
#             and prints without much calculation.
#             This is similar to read_head_box, but
#             This has one long straight string as value(no \n inside)
#             There are two types of ending line one is Jong's #---------- ...
#             the other is Astrid's  #*************** ...
# Example   : Output is something like
#             ('Title', 'read_head_box', 'Tips', 'Use to parse doc', ...)
# Warning   :
# Keywords  : open_head_box, open_headbox, read_headbox
# Options   : 'b' for remove blank lines. This will remove all the entries
#             with no descriptions
# Returns   : A hash ref.
# Argument  : One or None. If you give an argu. it should be a ref. of an ARRAY
#              or a filename, or ref. of a filename.
#             If no arg is given, it reads SELF, ie. the program itself.
# Version   : 2.7
#--------------------------------------------------------------------
sub read_head_box{
  my($i, $c, $d, $j, $s, $z, @whole_file, $title_found, %Final_out,
	  $variable_string, $TITLE, $title, @keys, $end_found, $line, $entry,
	  $entry_match, $End_line_num, $remove_blank,  $title_entry_null,
	  $end_found, $Enclosed_entry, $Enclosed_var, $blank_counter,
	  $title_entry_exist, $entry_value, $temp_W, $Warning_part
	);

  if(ref($_[0]) eq 'ARRAY'){ ## When array is given
	  @whole_file = @{$_[0]};
  }elsif(-e ${$_[0]}){       ## When filename is given in a ref
	  open(FILE, "${$_[0]}");
	  @whole_file=(<FILE>);
  }elsif(-e $_[0]){          ## When filename is given
	  open(FILE, "$_[0]");
	  @whole_file=(<FILE>);
  }elsif( $_[0] eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }elsif( ${$_[0]} eq 'b'){          ## When filename is given
	  $remove_blank = 1;
  }else{
	  open(SELF, "$0");
	  @whole_file=(<SELF>);
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  for($i=0; $i<@whole_file; $i++){
	 $whole_file[$i] =~ tr/\t/ {7}/;  ## This is quite important to some parsing!!!
	 #########################################
	 ##  The first and second line of box 1 ##
	 #########################################
	 if( ($whole_file[$i]=~/^#[_\*\~\-\=]{20,}$/)&&    ##  '#______' is discarded
		 ($whole_file[$i+1]=~/ *\# {0,3}([TitlNam]+e) {0,8}: {1,10}([\w\.:]*) *(Copyright.*)/i) ){
		 $TITLE = $1;      $title = "$2\n";   $Final_out{'Warning'}.="$3\n";
		 $entry_match=$TITLE; ## The very first $entry_match is set to 'Title' to prevent null entry
		 if($TITLE =~ /^Title|Name$/i){   #
			  if( ($title=~/^\s+$/)||( $title eq "\n") ){
				  $title_entry_null =1;  $title = '';  }    }
		 $Final_out{$TITLE}=$title;
		 $title_found ++ ;   $i++;  ## << this is essential to prevent reading the same line again.
		 last if $title_found > 1;    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ## The first and second line of box 2, #__________ or #**************
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($whole_file[$i]=~/^#[_\*]{20,}$/)&&
		 ($whole_file[$i+1]=~/^# {1,3}(\w{1,6}\s{0,2}\w+) {0,7}: {1,5}(.*) */i) ){
		 $title_found ++ ;        $i++;
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;  ## Capitalize words
		 $Final_out{$entry_match}.= "$entry_value\n";
		 last if $title_found > 1;  next;   }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  'Enclosed' : section. After this, everything is read without discrimination ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($Enclosed_entry == 1)&&($whole_file[$i] =~/^#{1,} {1,}(.*)$/) ){
		 $Final_out{$Enclosed_var}.= "$1\n";    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  With proper entry 1 : for  'eg)'
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,12}(eg ?\)) {0,8}(.*)/i)){
		 $entry_match='Example';
		 $Final_out{$entry_match}.= "$2\n";
	 }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ##  With PROPER entry 2 : descriptins like. 'Ussage : ssssssxxjkk  kj'
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)] {0,6}(.*) */i)){
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= "$entry_value\n";
		 if($entry_match=~/^(Enclosed?)$/i){
			  $Enclosed_entry = 1;  $Enclosed_var=$1;        }    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  With proper entry 3 : descriptins like. 'Ussage :', But blank description ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&($title_found==1)&&
		 ($whole_file[$i]=~ /^# {1,2}(\w{1,4}\s{0,2}\w{1,7}) {0,8}[:\)]( {0,})$/i)){
		 $entry_match=$1;       $entry_value=$2;
		 $entry_match =~ s#^\S#(($tmp = $&) =~ tr/[a-z]/[A-Z]/,$tmp)#e;
		 $Final_out{$entry_match}.= " $entry_value\n";
		 if($entry_match=~/^(Enclosed?)$/i){
			  $Enclosed_entry = 1;  $Enclosed_var=$1;      }    }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  $option variable matching                ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1) && ($title_found==1) &&
		 ($whole_file[$i]=~ /^# {1,15}([\$\@]\w+ +[\w=\>]+ +\S+ \w+ \S+ *.*)/ )){
		 $Final_out{$entry_match} .= "$1\n";  }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  all space line matching                 ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&  ##<----- If blank line is matched. Take the line
		 ($title_found==1)&&($whole_file[$i]=~/^# {0,}$/) ){
		 $blank_counter++;
		 if($blank_counter > 2){ $blank_counter--; }
		 else{ $Final_out{$entry_match}.= " \n";  }     }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  Anything after 3 space to 12 positions  ##
	 ###  To match 'examples' etc. INC. ':'       ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^#( {2,12})(.+)/) ){
		 $Final_out{$entry_match}.= "$1$2\n"; $blank_counter=0; }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###  Anything after 1 space to 11 positions  ##
	 ###  To match 'examples' etc. EXC. ':'       ##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($end_found != 1)&&
		 ($title_found==1)&&($whole_file[$i]=~/^# {1,12}([^:.]+)/) ){
		 $Final_out{$entry_match}.= "$1\n"; $blank_counter=0;}

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 ###-------End of the read_box reading--------##
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( ($title_found==1)&&
		 ($whole_file[$i]=~ /^#[\~\=\*\-]{15,}/)){  ## to match '#-----..' or '#******..'(Astrid's)
		 $End_line_num = $i;       $end_found++;
		 last;      }

	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 #  <<<<  Check if there is option table >>>>  #
	 #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	 elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
  } ## < End of for loop


  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### If title is not there at all     ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  @keys=keys %Final_out;
  for(@keys){
	  if(/^Title$/i){    ## No Entry of Title at all??
		  $TITLE =$&;
		  $title_entry_exist = 1;
		  if($Final_out{$_}=~/^ *$/){   ## if Title => Null or just space
			  $title_entry_null = 1;    }  }  }

  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### When title entry is not there    ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if( $title_entry_exist != 1){
		for($s=$End_line_num+1; $s < $End_line_num+20; $s++){
			if( $whole_file[$s] =~ /^sub {1,5}([\w\.]+) {0,6}\{/){
				$Final_out{'Title'} = "$1\n";   last;       }
			elsif( $whole_file[$s] =~/^#________________________+/){
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{'Title'} = "$0";     last;
			}else{
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{'Title'} = "$0";
			}
		}
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ### When title is blank              ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  elsif($title_entry_null ==1){  ## It looks for 'sub xxxxx{ ' line to get title
		### $End_line_num is the last line read.
		for($s = $End_line_num+1; $s < $End_line_num+20; $s++){
			if( $whole_file[$s] =~ /^sub {1,5}(\w+\.*\w*) {0,7}{/){
				$Final_out{$TITLE} = "$1\n";    last;     }
			elsif( $whole_file[$s] =~/^#________________________+/){
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{$TITLE} = "$0";     last;
			}else{
				#######################################
				## Uses running file name as titile  ##
				#######################################
				$Final_out{$TITLE} = "$0";
			}
		}
  }
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  ## Error handling, if no head box is found   ####
  #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  if($title_found < 1){ print "\nFatal: No headbox found by read_head_box2 sub.\n";  }
  \%Final_out;
}               ##<<--- ENd of the sub read_head_box


#______________________________________________________________
# Title     : open_clu_files
# Usage     : %clus=%{&open_clu_files(\$input)};
# Function  :
# Example   : Clu file eg)
#
#  Cluster 7360103
#    1  1 SLL1058         7-255       2   Origin: 3   736   Sub:3
#    1  1 MJ0422          17-283      2   Origin: 3   736   Sub:3
#    1  1 HI1308          3-245       2   Origin: 3   736   Sub:3
#
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
#              This automatically converts lower to upper letters
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
#             b  for to get just names ($simple_clu_reading)
#             r  for adding ranges in the names
# Returns   : a ref of hash of $clus{"$clus_size\-$id"}.=$m."\n";
#             Actual content:
#             3-133 => 'HI00111 HI00222 MG1233 '
# Argument  :
# Version   : 1.9
#--------------------------------------------------------------
sub open_clu_files{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   my($simple_clu_reading, $possible_range, $add_ranges,
	  $id, $name_range, %clus, $found, $upper_case_seq_name);
   my $file=$file[0];
   if($char_opt=~/b/){ $simple_clu_reading= 'b' };
   if($char_opt=~/U/){ $upper_case_seq_name='U'; };

   my $clus_size=1;
   open(CLU, "$file");
   while(<CLU>){
	  if($simple_clu_reading=~/\S/){ ## to get just names
		  if(/^ *\d+ +\d+ +\d+ +\d+ +\d+/){  ## To skip the very first summary columns
			 next;
		  }elsif(/^ *#/ ){ next;
		  }elsif(/^ *\d+ +\d+ +(\S+) +(\S+)/){
			 $seq_name=$1;
			 $possible_range=$2;
			 if($2=~/\d+\-\d+/ and $char_opt=~/r/){
				$name_range="$seq_name\_$possible_range";
				$clus{$name_range} = $name_range;
			 }else{
			    $clus{$seq_name}=$seq_name;
			 }
		  }
	  }else{
		  if(/^ *\d+ +\d+ +\d+ +\d+ +\d+/){  ## To skip the very first summary columns
			 next;
		  }elsif(/^ *#/ ){
			 next;
		  }elsif(/^ *Cluster +size +(\d+)/i ){
			 $clus_size=$1;
			 $found=1;
		  }elsif(/^ *Cluster +([_\d]+) *size:? *(\d+)/i){  # to match 'Cluster 14313'  or  'Cluster 234_1234_1'
			 $id  =$1;
			 $found=1;
			 $clus_size=$2; # if defined($2);
		  }elsif(/^ *Cluster +[number]* *([\w]+)/i){  # to match 'Cluster 14313'  or  'Cluster 234_1234_1'
			 $id  = $1;
			 $found=1;
		  }elsif(($found==1)&&(/^ *\S* *\S* *(\S+)\.prot\,? *.*/)){ ## this is to correct MP genome names
			 $m=$1;
             if($upper_case_seq_name){
                 $clus{"$clus_size\-$id"}.="\U$m ";
             }else{
                 $clus{"$clus_size\-$id"}.="\U$m ";
             }
		  }elsif(($found==1)&&(/^ *(\d+) *\d* *(\S{2,32}) *(\S*)/)){          # general clu match
			 $clus_size=$1 unless ($clus_size);
			 $m=$2;
			 $possible_range=$3;
			 if($2=~/\d+\-\d+/ and $char_opt=~/r/){
				$name_range="$m\_$possible_range";
                if($upper_case_seq_name){
                    $clus{"$clus_size\-$id"}.="\U$name_range ";
                }else{  $clus{"$clus_size\-$id"}.="$name_range "; }
			 }else{
                if($upper_case_seq_name){
                    $clus{"$clus_size\-$id"}.="\U$m ";
                }else{  $clus{"$clus_size\-$id"}.="$m ";  }
			 }
		  }
	  }
   }
   return(\%clus);
}

#________________________________________________________________________
# Title     : assign_options_to_variables
# Usage     : &assign_options_to_variables(\$input_line);
# Function  : Assigns the values set in head box to the variables used in
#             the programs according to the values given at prompt.
#             This produces global values.
#             When numbers are given at prompt, they go to @num_opt
#              global variable. %vars global option will be made
#
# Example   : When you want to set 'a' char to a variable called '$dummy' in
#             the program, you put a head box commented line
#             '#  $dummy    becomes  a  by  -a '
#             Then, the parse_arguments and this sub routine will read the head
#             box and assigns 'a' to $dummy IF you put an argument of '-a' in
#             the prompt.
# Warning   : This is a global vars generator!!!
# Keywords  :
# Options   : '#' at prompt will make a var  $debug set to 1. This is to
#              print out all the print lines to make debugging easier.
# Returns   : Some globaly used variables according to prompt options.
#             @num_opt,
#
# Argument  : None.
# Version   : 2.8
#--------------------------------------------------------------------
sub assign_options_to_variables{
    my($i, %vars, $j, $op, $z, $n, $symb, $value, $var, %val, @val, $ARG_REG,
           $option_table_example, @input_options, $first_border_and_title, $sym, @arg);

    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    #      Defining small variables for option table reading
    #________________________________________________________________________
    my($g)='gets';                my($if)='if';
    my($is)='is';                 my(@input_files);
    my($o)='or';   my(@arguments) = sort @ARGV;

    #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    #  Assigning global arguments(@num_opt, %vars) variables
    #_______________________________________________________________
    for($i=0; $i< @arguments; $i++){
         print "\n# \"$arguments[$i]\"";
         if(($arguments[$i]=~/^(\-?\d+[\.\d+]?)$/)&&   ### it mustn't be a file
             ( !(-f $arguments[$i]) ) ){                ### getting NUM opt
             push(@num_opt, $1);
         }elsif( $arguments[$i]=~/^(\S+)=(\S+)$/){
             $vars{$1}=$2;
             print "\n# (1) assign_options_to_variables: Your variable option for \"$1\" is \"$2\"\n";
         }elsif( $arguments[$i]=~/^(\S+)=[\'\"]([ \S]+)[\'\"]$/){ # just in case Shell fails
             $vars{$1}="$2";
         }elsif( $arguments[$i]=~/^(\S+)=([ \S]+)$/){
             $vars{$1}="$2";
         }
    }


    #""""""""""""""""""""""""""""""""""""""""""""""""""
    #   Some DEFAULT $debug variables for debugging purposes
    #__________________________________________________________
    &set_debug_option;

    #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    #   The main processing of self
    #_______________________________________________________________
    open(SELF, "$0");    ## opens the program you ran to get the options table.
    while(<SELF>){

       if( $first_border_and_title > 5 ){  ## This is to make it read only the first headbox.
            last;                            #  $first_border_and_title is an incremental counter.
       }elsif( /^ *#[_\*\-]{15,}$/ and /^ *# *[Tt][itle]*[ :]*/ ){
            $first_border_and_title++;
            print __LINE__, "# assign_options_to_variables : Title line found\n" if $debug eq 1;
       }elsif(/^ {0,5}#[\t ]{1,50}([\$\%\@].+)$/){
            $op = $1;  ## $op is for the whole input option line which has $xxxx, @xxx, %xxxx format
            $op =~ s/^(\W\w+.+)(\s+\#.*)$/$1/;  ## This is removing any comments in the line.
            #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
            ## matching the following line input format.
            ## $av_sc_segment     becomes    a  by  a  # To smooth the SC rates. Gets the averages of
            ## $ARG_REG is for arguments regular expression variable.
            ##  This reg. exp. matches = 'a or A or E or e' part
            ##  which represents alternative prompt arguments possibilities. \=$b$g$is$e$set
            #"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # These are optional argument flag like (-a as well as a)
            #__________________________________________________________
            $ARG_REG ='(\S*) *[or=\,]* *(\S*) *[=or\,]* *(\S*) *[=or\,]* *(\S*) *[=or\,]* *(\S*)';

            if(#$op=~/^([\$\@\%])([\w\-]+)[\t ]{0,25}[=|$g|$is]([\t ]+)[bB]y +$ARG_REG/
               #or
               $op=~/^([\$\@\%])([\w\-]+)[\t ]{0,25}[=|$g|$is][\t ]*[\$\@\%]?(\S*) *[bB]y +$ARG_REG/){
                                            ## $sym     $var        becomes          a [$a...]       by       a -a -A
                 my $sym = $1;  #### The symbols like ($, @, %), '$' in the above.
                 my $var = $2;  #### Actual variable name 'var' from $var, 'av_sc_segment' in the above.
                 my $val = $3;  #### The becoming value  first 'a' in the above.
                 my @arg = ($4, $5, $6, $7, $8);  ## The alternative prompt arguments, second 'a' in the above..
                 print "\n $sym $var $val \n" if $debug==1;
                 print "\n \@arg are @arg \n" if $debug==1;

                 #""""""""""""""""""""""""""""""""""""""""""""""""""""
                 #  Going through the PROMPT args.
                 #""""""""""""""""""""""""""""""""""""""""""""""""""""
                 for($z=0; $z < @arguments; $z++){     ## $arguments[$z]  is from @ARGV
                      print "\n# Prompt:  \"$arguments[$z]\"\n" if $debug;
                      if($arguments[$z]=~/^\-\w+$/){
                           $arguments[$z] =~ s/\-//;
                      }
                      for ($i=0; $i < @arg; $i ++ ){
                           if( ("$arg[$i]" eq "$arguments[$z]" )&& ($arg[$i] !~ /\=/)
                               && ($sym eq '$') ){
                               ${"$var"}="$val";
                               if($debug == 1){
                                       print __LINE__," \$${var} is set to \"$1\"\n";
                               }

                           }#'''''''''''''''' $arg = by s=  syntax ~~~~~~~~~~~~~~~~~~~~~~~~~~~
                           elsif( ( $arg[$i] =~ /^(\w+) *\=/ ) &&
                               ( $arguments[$z] =~ /^${1}= *([ \S]+)$/) &&  ## NOTE  '[ \S]+' not \S+
                               ( $sym eq '$') ){
                                 ${"$var"}="$1";
                                 print "\n# I am assigning $1 to $var\n";
                                 if($debug eq 1){ print __LINE__,"\$${var} is set to \"$1\"\n";  }
                           }
                      }
                 }
             }
          }
     }
}



#______________________________________________________________________________________
# Title     : open_sso_files
# Usage     :  @sso=@{&open_sso_files(@file, $add_range, $add_range2, "u=$upper_expect_limit",
#			                            "l=$lower_expect_limit", "m=$margin", $new_format)};
# Function  : This reads the parseable( -m 10 option)
#              and non-parseable form of ssearch program output
#             If you give 5 files, it produces 5 hashes as a ref of array.
#             This understands xxxx.gz files.
#             This reads FASTA -m 10 output, too.
# Example   :
#  717    0         0.343  16    373    EC1260_16-373              74    434    YBL6_YEAST_74-434
#  348    9e-16     0.500  113   233    EC1260_113-233             27    146    YDBG_ECOLI_27-146
#  472    2.9e-08   0.271  13    407    EC1260_13-407              148   567    YHJ9_YEAST_148-567
#  459    1.9e-22   0.260  1     407    EC1260_1-407               65    477    YLQ6_CAEEL_65-477
#  452    4.5e-14   0.275  1     407    EC1260_1-407               103   537    YSCPUT2_103-537
#  1131   0         0.433  1     407    EC1260_1-407               112   519    ZMU43082_112-519
#
#  Input SSO file example)-> below
#
#   >>MG032 ATP-dependent nuclease (addA) {Bacillus subtilis  (666 aa)
#    Z-score: 88.3 expect()  1.9
#   Smith-Waterman score: 77;  27.143% identity in 70 aa overlap
#
#           30        40        50        60        70        80
#   MJ0497 RSAGSKGVDLIAGRKGEVLIFECKTSSKTKFYINKEDIEKLISFSEIFGGKPYLAIKFNG
#                                        : .. ...  . .:.:::. :: : ..:
#   MG032  HDKVRYAFEVKFNIALVLSINKSNVDFDFDFILKTDNFSDIENFNEIFNRKPALQFRFYT
#        200       210       220       230       240       250
#
#           90       100             110       120       130
#   MJ0497 EMLFINPFLLSTNGK------NYVIDERIKAIAIDFYEVIGRGKQLKIDDLI
#          .   ::   :: ::.      : ....... . ::. . :
#   MG032  K---INVHKLSFNGSDSTYIANILLQDQFNLLEIDLNKSIYALDLENAKERFDKEFVQPL
#        260          270       280       290       300       310
#
# Parseable form -m 10 option =========================================
#   >>>MJ0497.fa, 133 aa vs GMG.fa library
#   ; pg_name: Smith-Waterman (PGopt)
#   ; pg_ver: 3.0 June, 1996
#   ; pg_matrix: BL50
#   ; pg_gap-pen: -12 -2
#   >>MG032 ATP-dependent nuclease (addA) {Bacillus subtilis
#   ; sw_score:  77
#   ; sw_z-score: 88.3
#   ; sw_expect    1.9
#   ; sw_ident: 0.271
#   ; sw_overlap: 70
#   >MJ0497 ..
#   ; sq_len: 133
#   ; sq_type: p
#   ; al_start: 58
#   ; al_stop: 121
#   ; al_display_start: 28
#
# Warning   : By default, the SW score comes to the first
#             If expect value is not found, it becomes '0'
#             By default, the offset of seq match with a seq name like seq_30-40
#               will be 30 not 1.
#             It ignores special chars like , : .prot in the name (eg, AADF_FASDF: will be AADF_FASDF)
# Keywords  : open_ssearch_output_files, ssearch_output, ssearch, FASTA,
# Options   : _  for debugging.
#             #  for debugging.
#             u= for upper E value limit
#             l= for lower E value limit
#             r  for attaching ranges to out seq names (eg> HI0001_1-20 as a key)
#             U  for making the matched seqname to upppercase
#             L  for making the matched seqname to lowercase
#             R  for attaching ranges to out seq names for both TARGET and MATCH
#             n  for new format (msp2)
#             a  for getting alignments of the pair
#
# Category  :
# Version   : 4.5
#----------------------------------------------------------------------------
sub open_sso_files{
    #"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
    my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
    my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
    my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
    my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
    my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
    if($debug==1){print "\n\t\@hash=\"@hash\"
    \@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
    \@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
    #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my (@out_refs, @SSO, $create_sso, $parseable, @OUT, @temp_sso_lines,
            %match, $attach_range_in_names, $margin, $uppercase_seq_name,
            $lowercase_seq_name, $target_seq, $new_format, $get_alignment,
            $pvm_version_fasta_out, $original_target_seq, $big_msp_out_file);

    my ($upper_expect_limit, $lower_expect_limit)=(50,0);

    if($char_opt=~/R/){  $attach_range_in_names2=1; };
    if($char_opt=~/r2/){ $attach_range_in_names =1; $attach_range_in_names2=1 };
    if($char_opt=~/r/){  $attach_range_in_names =1; };
    if($char_opt=~/c/){  $create_sso   ='c' };
    if($char_opt=~/n/){  $new_format   ='n' };
    if($char_opt=~/a/){  $get_alignment='a' };
    if($char_opt=~/U[pperPPER]*/){ $uppercase_seq_name='U' };
    if($char_opt=~/L[owerOWER]*/){ $lowercase_seq_name='L' };
    if($vars{'u'}=~/(\.?\d+)/){ $upper_expect_limit = $vars{'u'} };
    if($vars{'l'}=~/(\.?\d+)/){ $lower_expect_limit = $vars{'l'} };
    if($vars{'m'}=~/\d+/){ $margin = $vars{'m'} };
    $attach_range_in_names2=$attach_range_in_names=1;

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # opening file input (can handle .gz  files)
    #_______________________________________________
    if(@file < 1 and @array > 0){
         for($i=0; $i< @array; $i++){
              @sso=@{$array[$i]};
         }
         print "\n# (i) \@sso has ", scalar(@sso), " lines. \n"  if $verbose;
         if(@sso > 3000){ # if @sso is very big, I remove the useless contents
             print "\n# (i) open_sso_files: size of \@sso for $file[$i] exceeds 3000 lines, ", scalar(@sso), " !!! \n";
         }
         push(@OUT, &read_sso_lines(\@sso, $create_sso,
                    "u=$upper_expect_limit",
                    "l=$lower_expect_limit",
                    $attach_range_in_names,
                    $attach_range_in_names2,
                    $new_format, $get_alignment) );
    }
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Opening input FILE!
    #_______________________________________________
    else{
         print "\n# open_sso_files : processing @file \n\n";
         for($i=0; $i< @file; $i++){
              if($file[$i]=~/\S+\.msp *$/){ $big_msp_out_file=$file[$i]; splice (@file, $i, 1); $i--;
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # Opening zipped file
              #_______________________________________________
              }elsif($file[$i]=~/\S+\.\gz$/ or -B $file[$i]){  ## if file has xxxx.gz extension
                  my (@sso);
                  @sso=`gunzip -c $file[$i]`;
                  if(@sso < 30){  @sso=`zcat $file[$i]`; }      # if zcat fails to produce output use gunzip -c
                  if(@sso > 3000){ # if @sso is very big, I remove the useless contents
                      print "\n# open_sso_files: size of \@sso for $file[$i] exceeds 3000 lines, ", scalar(@sso), " !!! \n";
                  }
                  push(@OUT, &read_sso_lines(\@sso, $create_sso,
                      "u=$upper_expect_limit",
                      "l=$lower_expect_limit",
                      $attach_range_in_names,
                      $attach_range_in_names2,
                      $new_format, $get_alignment) );
              }
              #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
              # Opening plain file(not zipped)
              #_______________________________________________
              elsif($file[$i]=~/\S+\.[fsm]?sso/ or $file[$i]=~/\S+\.out/ or $file[$i]=~/\S+\.fso/){
                  print "\n# (i) openning text File format xxxx.[fms]sso $file[$i]";
                  open(SSO, "$file[$i]") or die "\n# (E) open_sso_files: Failed to open $file[$i]\n";
                  my @sso=<SSO>;
                  if(@sso < 30){  @sso=`zcat $file[$i]`; }      # if zcat fails to produce output use gunzip -c
                  if(@sso > 3000){ # if @sso is very big, I remove the useless contents
                      print "\n# (i) open_sso_files: size of \@sso is for $file[$i] exceeds 3000 lines, ",
                             scalar(@sso), " !!! \n";
                  }
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # Calling read_sso_lines sub
                  #_____________________________________
                  push(@OUT, &read_sso_lines([@sso], $create_sso,
                       "u=$upper_expect_limit",
                       "l=$lower_expect_limit",
                       $attach_range_in_names,
                       $attach_range_in_names2,
                       $new_format, $get_alignment) );
                  close SSO;
              }
         }
    }
    print "\n# \@OUT has ", scalar(@OUT), " elements \n" if $verbose;
    return(\@OUT); # @OUT has refs of hashes  (\%xxx, \%YYY, \%XXX,,,,)
}

#_________________________________________________________________________________
# Title     : read_sso_lines
# Usage     : &read_sso_lines([@sso], $create_sso, $attach_range_in_names,
#                 $attach_range_in_names2, $new_format, $get_alignment) );
# Function  : Main subroutine for open_sso_files. This calls either machine
#              readable or unreadable form parsing subroutine
# Example   :
# Keywords  : read_sso_lines_in_array
# Options   : a c r r2 n
#             u= for upper E value limit
#             l= for lower E value limit
# Category  :
# Version   : 1.4
#----------------------------------------------------------------------------
sub read_sso_lines{
    my (@out_refs, $parseable, @SSO, $create_sso, $i, $j, $k, $attach_range_in_names);
    my ($upper_expect_limit, $lower_expect_limit)=(50,0); ##<<--- DEFAULT

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~```
    # Processing input and options, don't worry!
    #__________________________________________
    for($i=0; $i< @_; $i++){
       if($_[$i]=~/u=(\S+)/){    $upper_expect_limit=$1 }
       elsif(ref($_[$i]) eq 'ARRAY'){ @SSO=@{$_[$i]};   }
       elsif($_[$i]=~/l=(\S+)/){ $lower_expect_limit=$1 }
       elsif($_[$i]=~/^c$/){     $create_sso = 'c' }
       elsif($_[$i]=~/^a$/){     $get_alignment='a'; }
       elsif($_[$i]=~/^r$/){   $attach_range_in_names='r' }
       elsif($_[$i]=~/^r2$/){   $attach_range_in_names2='r2' }
       elsif($_[$i]=~/^n$/){   $new_format='n' }
    }
    print "\n# (i) \$attach_range_in_names2 is $attach_range_in_names2\n" if $attach_range_in_names2;

    #~~~~~~ Checking if sso is a parseable form or not~~~~~~~~~~~~~
    TEMP:for($k=0; $k < @SSO; $k++){
          if($SSO[$k] =~ /\>\>\>/  or $SSO[$k] =~ /^ *\; \S+\:/ ){
                  $parseable++;  if($parseable >= 10){  last TEMP;     }
          }elsif($SSO[$k]=~/^  +\:+/){ $parseable--;
          }elsif($SSO[$k] =~ /^ +1\>\>\>(\S+)/){ $pvm_version_fasta_out=1; $parseable +=10; $original_target_seq=$1; last TEMP;
          }
    }
    if($parseable >= 10){
        @out_refs=@{&read_machine_readable_sso_lines(\@SSO,
                "u=$upper_expect_limit",
                "l=$lower_expect_limit",
                $get_alignment, $create_sso,
                $new_format, $attach_range_in_names,
                $attach_range_in_names2)};
    }else{
        @out_refs=@{&read_machine_unreadable_sso_lines(\@SSO,
                $get_alignment, $create_sso,
                "u=$upper_expect_limit",
                "l=$lower_expect_limit",
                $new_format, $attach_range_in_names,
                $attach_range_in_names2)};
    }
    return(@out_refs);
}




#________________________________________________________________________________
# Title     : read_machine_unreadable_sso_lines
# Usage     : @out_refs=@{&read_machine_unreadable_sso_lines(\@SSO, $get_alignment,
#                           $create_sso, $upper_expect_limit,$new_format, $lower_expect_limit,
#                           $attach_range_in_names, $attach_range_in_names2)};
# Function  :
# Example   : output will look=>
#  ZFH1_DROME 60 d1ad3a_ 446 d1ad3a_ 0.9 71 34 3-37 253-287
#  ZFH1_DROME 60 d1ahdp_ 68 d1ahdp_ 0.00018 100 56 2-58 3-59
#  ZFH1_DROME 60 d1crka2 282 d1crka2 8.4 58 50 5-55 73-123
#  ZFH1_DROME 60 d1dkza_ 215 d1dkza_ 4.9 59 40 1-41 112-152
#  ZFH1_DROME 60 d1ecra_ 305 d1ecra_ 3.2 63 47 9-56 201-248
#
# Keywords  : read_normal_sso_lines
# Options   : a c r r2 n
#             u= for upper E value limit
#             l= for lower E value limit
# Version   : 1.3
#--------------------------------------------------------------------------------
sub read_machine_unreadable_sso_lines{
   my ($upper_expect_limit, $lower_expect_limit)=(50,0);
   my (@SSO, @out_refs, $match_seq, $match_evalue, $alignment_found,
       $match_zscore, $query_seq_len, $space, %match, $entry_found,
       $match_gap_len, $query_seq_name, $i, $j, $sw_score, $match_evalue,
       $sw_ident, $query_seg_start, $query_seg_end, $match_seg_start,
       $match_seg_end, $query_gap_len, $match_gap_len, $overlap,
       $gap_start, $start, $match_range);
   for($i=0; $i< @_; $i++){
       if($_[$i]=~/u=(\S+)/){    $upper_expect_limit=$1 }
       elsif(ref($_[$i]) eq 'ARRAY'){ @SSO=@{$_[$i]};   }
       elsif($_[$i]=~/l=(\S+)/){ $lower_expect_limit=$1 }
       elsif($_[$i]=~/^c$/){     $create_sso = 'c' }
       elsif($_[$i]=~/^a$/){     $get_alignment='a'; }
       elsif($_[$i]=~/^r$/){   $attach_range_in_names='r' }
       elsif($_[$i]=~/^r2$/){   $attach_range_in_names2='r2' }
       elsif($_[$i]=~/^n$/){   $new_format='n' }
   }

   print "\n# (i)read_machine_unreadable_sso_lines : You have put non-parseable format of xxxx.sso\n";
   print "#      Did you set \'M\' option in do_sequence_search? \n";

   for($j=4; $j< @SSO; $j++){
       if($SSO[$j]=~/^ *\S+\: +(\d+) +\w+ *$/){ ## matching 'hbox.fa: 60 aa'
            $query_seq_len=$1;
            print "\n (i) target seq len is  $query_seq_len \n";

       }elsif($query_seq_len and $SSO[$j]=~/^ \>(\w[\w\-\.\/\\]+)/){ ##  matching  >MJ0497
            $query_seq_name=$1;
            $j+=4; ## jumping to skip the stat bars
            print "\n# (i)read_machine_unreadable_sso_lines : Found Query seq=> $query_seq_name ";
            next;
       }

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Matching the Match sequences
       #_________________________________________________
       if($query_seq_name and $SSO[$j]=~/^ {0,4}\>\> *(\S+) +.+\((\d+) aa\) *$/){ # # matching >>MG032 ATP-d (addA) Bacillus subtilis  (666 aa)
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
            # This is the assignment to OUT hash
            #________________________________________________
            if($alignment_found > 0){ ## If I have already read >>xxxx
                $match{$match_seq}=sprintf("%-5s %-8s %-6s %-4s %-5s %-30s %-4s %-5s %s\n",
                   $sw_score, $match_evalue, $sw_ident, $query_seg_start, $query_seg_end, $query_seq_name,
                   $match_seg_start, $match_seg_end, $match_seq);
            }
            $match_seq=$1;
            $entry_found=1;    $alignment_found=0;
            $query_gap_len=$match_gap_len=$match_seg_start=$match_seg_end=0;
            $query_seg_start=$query_seg_end=$overlap=0;
            undef($space);
            print "\n# (i) read_machine_unreadable_sso_lines : Found MATCHed seq $match_seq\n" if $verbose;
       }elsif($entry_found and $SSO[$j]=~/ *Z\-score: *(\S+) *expect\( *\) +(\S+)/){ ## getting Evalue
            $match_zscore=$1;   # just in case needed
            $match_evalue=$2;
            if($upper_expect_limit < $match_evalue or $lower_expect_limit > $match_evalue){
                $match_seq=$alignment_found=$entry_found=$query_gap_len='';
            }
       }elsif($entry_found and $SSO[$j]=~/Smith\-Waterman +score\: +(\d+)\; *(\S+) *\% *identity *in *(\d+) *aa overlap/i){
            $sw_score=$1;
            $sw_ident=$2;
            $overlap=$3;
       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       # Measuring the gap before position info "^         50        60$"
       #_______________________________________________________________________
       }elsif( $entry_found and !$alignment_found  and $SSO[$j]=~/^( +)(\d+) +\d+/  ){
            $start=$2;                                ## $start is the digit
            $gap_start=length($1)+length($start)-1;
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # $alignment_found is set HERE!
            #_____________________________________
            $alignment_found=1;
            print "\n# $query_seq_name $match_seq \$gap_start is  $gap_start, \$start: $start" if $verbose;
       }elsif($alignment_found==1 and $SSO[$j]=~/^( +)[\.\:]/ ){ ### matching    .: .: : ::     :.:..: :.. .. ..
            $space=length($1);
            $query_seg_start=$space-$gap_start+$start;
            $query_seg_end=$query_seg_start+$overlap;
            $query_range="$query_seg_start-$query_seg_end";
       }elsif($space and $alignment_found==1 and  $SSO[$j]=~/^( +)(\d+)/ ){ ##  !! $alignment_found == 1 !!
            $alignment_found++; ## <<--- $alignment_found++;  should increment!
            $match_gap_start=length($1)+length($2)-1;
            $match_start=$2;
            $match_seg_start=$space-$match_gap_start+$match_start;
            $match_seg_end=$match_seg_start+$overlap;
            $match_range ="$match_seg_start-$match_seg_end";
            #$match{$match_seq}.="$query_range $match_range ";
            #print "\n $query_seq_name $match_seq $match_evalue $overlap $query_range $match_range";
       }
    }# end of for $j
    if( ($create_sso=~/c/) && (@file < 1) ){
       open (SSO2, ">$query_seq_name\.sso");
       print SSO2 @SSO, "\n";
       print "\n# (i) $query_seq_name\.sso  is created";
       close SSO2;
    }
    push(@out_refs, \%match);
    return(\@out_refs);
}# end of for $i


#________________________________________________________________________________
# Title     : read_machine_readable_sso_lines
# Usage     : @out_refs=@{&read_machine_readable_sso_lines(\@SSO, $get_alignment,
#                           $create_sso, $upper_expect_limit,$new_format, $lower_expect_limit,
#                           $attach_range_in_names, $attach_range_in_names2)};
# Function  :
# Example   :
# Keywords  : read_m10_sso_lines read_msso_lines
# Options   : a c r r2 n
#             u= for upper E value limit
#             l= for lower E value limit
# Category  :
# Version   : 1.4
#--------------------------------------------------------------------------------
sub read_machine_readable_sso_lines{
   my ($upper_expect_limit, $lower_expect_limit)=(50,0);
   my (%match, @out_refs, $query_found, $query_sq_stop, $query_sq_statrt, $match_found,
      $match_seq, $match_found2, $i, $j,$match_found3, $overlap, $sw_score,
      $match_sq_stop, $match_seq2, $sw_ident, $name_range, $query_seq,
      $al_display_start, $match_seq_count);
   for($i=0; $i< @_; $i++){
       if($_[$i]=~/u=(\S+)/){    $upper_expect_limit=$1 }
       elsif(ref($_[$i]) eq 'ARRAY'){ @SSO=@{$_[$i]};   }
       elsif($_[$i]=~/l=(\S+)/){ $lower_expect_limit=$1 }
       elsif($_[$i]=~/^c *$/){     $create_sso = 'c'; print "\n# read_machine_readable_sso_lines: \$create_sso is set"; }
       elsif($_[$i]=~/^a *$/){     $get_alignment='a'; }
       elsif($_[$i]=~/^ *r *$/){     $attach_range_in_names='r' }
       elsif($_[$i]=~/^r2 *$/){    $attach_range_in_names2='r2' }
       elsif($_[$i]=~/^n *$/){     $new_format='n' }
   }
   $attach_range_in_names='r';

   print "\n# read_machine_readable_sso_lines : You put PARSEABLE form of sso file";
   for($j=0; $j< @SSO; $j++){
	  if($SSO[$j]=~/\>\>\> *(\S+)\,? +(\d+) +/){  ## >>>  line
               $query_found=1;  $query_seq_leng=$2;  ## Ignoring the $1, as file name can be different from rea seq names
                   $j+=8;
	  }elsif( $query_found==1 and $SSO[$j]=~/\>\>(\w[\w\-\.]+)([\.prot\,\:]*) */ ){ ##
               $match_found=1;
               $match_seq_count++;
               $al_display_start=0;
               if(length($2)>0){  print "\n# read_machine_readable_sso_lines: Seq name has this special char \"$2\". I ignore it"; }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Changing the CASE according to the option
               #_____________________________________________
               if($uppercase_seq_name eq 'U'){
                       $match_seq="$1"; $match_seq="\U$match_seq";  ## make it uppercase
               }elsif($lowercase_seq_name eq 'L'){
                       $match_seq="$1"; $match_seq="\L$match_seq"; ## make it lowercase
               }else{ $match_seq="$1"; } ## make it uppercase
               next;
	  }elsif($match_found and $SSO[$j]=~/^\; +\w+_expect\:? +(\S+)/){
               #~~~~~~~~~~~~~~~~~~~~~~~
               # Filtering by E val
               #_______________________
               if( $1 > $upper_expect_limit or $1 < $lower_expect_limit ){
                       $match_found=0; next;
               }else{ $expect =$1; }
	  }elsif($match_found and $SSO[$j]=~/^ *\; +sw_score *\: +(\S+)/i){  $sw_score =$1;
	  }elsif($match_found and $SSO[$j]=~/^\; +sw_ident\: +(\S+)/){  $sw_ident =$1;
	  }elsif($match_found and $SSO[$j]=~/^ *\; +sw_overlap\: +(\S+)/){  $overlap=$1;
	  }elsif($match_found and $SSO[$j]=~/^ *\>(\w[\w\-\.]+)([\.prot\,\:]*) *[\d+]*/){
               $match_found2=1;        $match_found=0;
               if( length($2)>0 ){  print "\n# read_machine_readable_sso_lines: Seq name has this special char \"$2\". I ignore it"; }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Changing the CASE according to the option
               #_____________________________________________
               if($uppercase_seq_name eq 'U'){
                       $match_seq2="$1"; $match_seq2="\U$match_seq2"; ## make it uppercase
               }elsif($lowercase_seq_name eq 'L'){
                       $match_seq2="$1";  $match_seq2="\L$match_seq2"; ## make it lowercase
               }else{ $match_seq2="$1";  }
               $query_seq=$match_seq2;
	  }elsif($match_found2==1 and $SSO[$j]=~/\; +sq_len\: +(\S+)/){
		     $query_sq_len=$1;
	  }elsif($match_found2==1 and $SSO[$j]=~/\; +al_start\: +(\S+)/){
		     $query_sq_start=$1;
	  }elsif($match_found2==1 and $SSO[$j]=~/\; +al_stop\: +(\S+)/){
		     $query_sq_stop=$1;
	  }elsif($SSO[$j]=~/\; +al_display_start/ and $al_display_start < 1){
             $al_display_start ++;
	  #------------------------------------------------------------
	  }elsif($match_found2 and $SSO[$j]=~/\>(\w[\w\-\.]+)([\.prot\,\:]*) *[\d+]*/){
             $match_found3=1; $match_found2=0;
             if(length($2)>0){  print "\n# open_sso_files: Seq name has this special char \"$2\". I ignore it"; }
	  }elsif($match_found3 and $SSO[$j]=~/\; +sq_len\: +(\S+)/){
		     $match_sq_len=$1;
	  }elsif($match_found3 and $SSO[$j]=~/\; +al_start\: +(\d+)/){
		     $match_sq_start=$1;
	  }elsif($match_found3 and $SSO[$j]=~/\; +al_stop\: +(\d+)/){
             $match_sq_stop=$1;
	  }elsif($match_found3 and $SSO[$j]=~/\; +al_display_start/){
             $match_found3=0;          $al_display_start++;
             if($expect=~/^$/){ $expect='0.0'; }
             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # adding the offset for names with ranges
             #__________________________________________________
             if($query_seq=~/^\S+_(\d+)\-(\d+)/){ $query_sq_start +=$1-1; $query_sq_stop +=$1-1;  }

             #~~~~~~~~~~~~~~~~~~~~~~~~~
             # Attaching the ranges  (under NO e option)
             #_________________________

             if($attach_range_in_names){
                  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  # Checks margin opt and adds it
                  #__________________________________
                  if($margin=~/\d+/){
                      if($match_sq_start < $margin){  $match_sq_start=1;
                      }else{          $match_sq_start-=$margin;   }
                      $match_sq_stop += $margin;
                  }
                  $name_range="$match_seq\_$match_sq_start\-$match_sq_stop";

                  #~~~~~~~~ If 'rr' opt is set, put ranges for both target and match seqs ~~~~~~~
                  if($attach_range_in_names2 and $query_seq !~/^\S+_(\d+)\-(\d+)/){
                      $query_seq="$query_seq\_$query_sq_start\-$query_sq_stop";
                  }
                  if($original_target_seq=~/\S+/){ $query_seq=$original_target_seq } # for PVM version out
                  if($new_format=~/n/){  # under NO e option
                      $match{$name_range}=
                         sprintf("%s %s %s %s %s %s %s %s %s\n",
                         $query_seq, $query_sq_start, $query_sq_stop, $sw_score, $expect, $sw_ident,
                         $match_sq_start, $match_sq_stop, $name_range);
                  }else{
                      $match{$name_range}=
                         sprintf("%-5s %-8s %-6s %-4s %-5s %-30s %-4s %-5s %s\n",
                         $sw_score, $expect, $sw_ident, $query_sq_start, $query_sq_stop, $query_seq,
                         $match_sq_start, $match_sq_stop, $name_range);
                  }
             }else{
                 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 # Checks margin opt and adds it
                 #__________________________________
                 if($margin=~/\d+/){
                     if($match_sq_start < $margin){  $match_sq_start=1;
                     }else{                          $match_sq_start-=$margin; }
                     $match_sq_stop += $margin;
                 }
                 if($original_target_seq=~/\S+/){ $query_seq=$original_target_seq } # for PVM version out
                 if($new_format=~/n/){
                     $match{$match_seq}=
                        sprintf("%s %s %s %s %s %s %s %s %s\n",
                        $query_seq, $query_sq_start, $query_sq_stop, $sw_score, $expect, $sw_ident,
                        $match_sq_start, $match_sq_stop, $match_seq);
                 }else{
                    $match{$match_seq}=sprintf("%-5s %-8s %-6s %-4s %-5s %-30s %-4s %-5s %s\n",
                       $sw_score, $expect, $sw_ident, $query_sq_start, $query_sq_stop, $query_seq,
                       $match_sq_start, $match_sq_stop, $match_seq);
                 }
             }
	  }elsif($get_alignment and $al_display_start==1 and $SSO[$j]=~/^([\w\-]+) *$/){
		  ${"match_alignment\_$match_seq_count"}{$match_seq2} .= $1;
	  }elsif($get_alignment and $al_display_start==2 and $SSO[$j]=~/^([\w\-]+) *$/){
		  ${"match_alignment\_$match_seq_count"}{"$match_seq"} .= $1;
	  }elsif($get_alignment and $SSO[$j]=~/^ *\;al_cons\:/){
		  $al_display_start=0;
		  my %temp=%{"match_alignment\_$match_seq_count"};
		  push(@out_refs, \%temp );
		  %{"match_alignment\_$match_seq_count"}=();
	  }
   } ## <-- for($j=0; $j< @SSO; $j++)

   #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # If create sso option is set, it creates SSO files(array input case)
   #________________________________________________________________________
   if( $create_sso and !$get_alignment){
	   open (SSO2, ">$query_seq\.msso");
	   print SSO2 @SSO, "\n";
	   print "\n# read_machine_readable_sso_lines : $query_seq\.msso file  is created by \"c\" opt ";
	   close SSO2
   }
   unless($get_alignment){
	   push(@out_refs, \%match);
   }
   return(\@out_refs);
}


#________________________________________________________________________
# Title     : get_base_names
# Usage     : $base =${&get_base_names(\$file_name)};
#             :   or @bases = &get_base_names(\@files);  # <-- uses `pwd` for abs directory
# Function  : produces the file base name(eg, "evalign"  out of "evalign.pl" ).
# Example   : $base => 'test'  with 'test.txt' or '/home/dir/of/mine/text.txt'
# Warning   :
# Keywords  : get_base_name{, base_name, file_base_name ,  get_file_base_name
#             get_basename, basename, get_root_name
# Options   :
# Returns   :
# Argument  : handles both ref and non-ref.
# Version   : 1.3
#--------------------------------------------------------------------
sub get_base_names{
	my($x, $pos, $pos1, @out_file, $file_only, $file, @file, $base, @base);
	@file=@{$_[0]} if (ref($_[0]) eq 'ARRAY');
	@file=@_ if !(ref($_[0]) eq 'ARRAY');
	for($x=0; $x < @file; $x ++){
		if( ref($file[$x]) ){
			$file = ${$file[$x]};
			$pos1=rindex($file, "/");
	        $file_only=substr($file, ($pos1+1));
			$pos = rindex($file_only, ".");
	        $base= substr($file_only, 0, $pos);
		}else{
			$file = $file[$x];
			$pos1=rindex($file, "/");
	        $file_only=substr($file, ($pos1+1));
			$pos = rindex($file_only, ".");
	        $base= substr($file_only, 0, $pos);
		}
		push(@base, $base);
	}
	if(@base == 1 ){ \$base[0] }else{ \@base }
}

#________________________________________________________________________________
# Title     : convert_clu_to_msp
# Usage     : @written_msp_files=@{&convert_clu_to_msp(\$single_linkage_file)};
# Function  : reads in a big single linkage cluster file(or normal cluster file)
#              and creates a big msp file which contains all the entries in the
#              cluster file (usually with the extension of sclu or clu)
#             This normally reads in xxxx.mso, xxxx.sso like files, but if the
#              corresponding  xxx.msp file already exists, it concatenates them to
#              make a bigger one.
# Example   :
# Keywords  : clu_2_sso_2_msp, cluster_to_msp, cluster_to_sso_to_msp
#              clu_to_sso_to_msp
# Options   :
# Category  :
# Version   : 2.4
#--------------------------------------------------------------------------------
sub convert_clu_to_msp{
     my($i, $j, $k, $s, $u, $v, $p, $m, $n, $y, @possible_extensions, $single_file_name,
        @seq_names, @final_files, @U_L_case, $file, @file, @name_types,
        @poss_sub_dir_heads, @written_msp_files, $Lean_output, $subdir_char_size,
        $search_file_base, $found_real_subdir_name, $found_search_prog_exention_used,
        $no_of_ext_to_be_checked, $extension_type_found);

     $subdir_char_size=2; # default
     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # Opening cluster file (xx.clu)
     # %clus looks like this:  2-507     YGR041W YLR353W
     #                         3-308     YDR222W YDR346C YLR225C
     #                         2-184     YCL066W YCR040W
     #______________________________________________________________
     my $clu=${$_[0]} || $_[0];
     $Lean_output=${$_[1]} || $_[1];

     if($verbose){
           print "\n# convert_clu_to_msp : \"$clu\" is given
                and I am processing it with clu_to_sso_to_msp\n" if defined $clu;
     }
     my %clus=%{&open_clu_files(\$clu)};
     my @clusters= keys %clus;
     my $num_of_cluster=@clusters=@{&sort_by_cluster_size(\@clusters)};

     print "# (i) $0: convert_clu_to_msp: No. of cluster=$num_of_cluster after open_clu_files \n" if $verbose;

     &show_array(\@clusters) if $verbose;
     &show_hash(\%clus) if $verbose;
     @possible_extensions=('msp', 'msp.gz', 'msso', 'msso.gz','fsso', 'pbla', 'pbla.gz',
                                  'ssso', 'fso', 'out', 'prot.sso', 'prot.ts');
     @U_L_case=('\U', '\L', ' ');  ## !! the ' ' is necessary

     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
     # Making each SINGLE linkage clu to MSP file format to be ready for divclus
     #______________________________________________________________________________
     for($i=0; $i< @clusters; $i++){
         my (@seq_names, @final_files, $clus_name, $big_out_msp, @msp_hashes);
         $clus_name=$clusters[$i];
         unless($single_file_name=~/\S/){
             $big_out_msp="$clus_name\_cluster\.msp"; #<<<----- final output name
         }else{
             $big_out_msp=$single_file_name;
         }
         push(@written_msp_files, $big_out_msp); ## This is the output of this sub

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         #  If $clus_name.msp is already there, skip
         #_____________________________________________
         if( (-s $big_out_msp) > 100  and !$over_write ){
             print "\n# (i) convert_clu_to_msp : $big_out_msp MSP file already exists, skipping\n";
             print "#    Use  \$over_write option \'o\' to start all over again or \n";
             print "#    delete clustering files like XX-XX_cluster.clu to go on\n";
             next ;
         }
         $num_of_seq_member=@seq_names=split(/ +/, $clus{$clusters[$i]}); # @seq_names has (HIU001, HI002, HI333, MJ111, etc)
         print "# $0: convert_clu_to_msp: No. of seq member=$num_of_seq_member after split \n" if $verbose;

         $no_of_ext_to_be_checked=@possible_extensions;
         $extension_type_found=0;

         FOR0: for($j=0; $j < @seq_names; $j++){
               my($sub_dir_head, $file_name_low, $file_name_up, $file_name_prot_low,
                  $file_name_prot_up, $file_name_low_gz, $file_name_up_gz,
                  $file_name_prot_low_gz, $file_name_prot_up_gz);
                  $each_seq_name=$seq_names[$j];
               my @poss_sub_dir_heads=('.'); ## <<<<------- This is critically important, when 'D' opt is not used!

               if($each_seq_name=~/(\S+)_\d+\-\d+$/){
                   $each_seq_name_range=$each_seq_name;
                   $each_seq_name=$1;
                   @name_types=($each_seq_name, $each_seq_name_range);
               }else{
                   @name_types=($each_seq_name);
               }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Here I take chars from the sequ names, as dirs have fragments of chars
               #_______________________________________________________________________________
               for($s=1; $s <= $subdir_char_size ; $s++){  ## here, number 2 indicates, I check single or 2 char sub dir names
                   $sub_dir_head= substr($seq_names[$j], 0, $s);
                   unshift(@poss_sub_dir_heads, "\L$sub_dir_head") if (-d "\L$sub_dir_head" );
                   unshift(@poss_sub_dir_heads, "\U$sub_dir_head") if (-d "\U$sub_dir_head" );
               }
               #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
               #  Checking all the possible subdirectories to crop all the sso files
               #_______________________________________________________________________________

               FOR1: for($p=0; $p <= @poss_sub_dir_heads; $p++){ ## Default has '.' will make things like '././fam_8_8.pbla.gz'

                    $subd=$poss_sub_dir_heads[$p];               ## Also, the '<=' not '<' cures the same problem.
                    print "# (i) Checking sub dir $subd\n";
                    FOR2 : for($e=$extension_type_found; $e < $no_of_ext_to_be_checked; $e++){
                         $ext=$possible_extensions[$e];
                         print "      (i) \$ext at $subd is  $ext\n";
                         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                         #  This makes all the possible lower upper case names
                         #______________________________________________________
                         for( $u=0; $u < @U_L_case; $u++){
                            for($v=0; $v <@name_types; $v++){
                               $each_seq_name=$name_types[$v];
                               if($U_L_case[$u]=~/U/){  $each_seq_name="\U$each_seq_name";
                               }elsif($U_L_case[$u]=~/L/){ $each_seq_name="\L$each_seq_name";
                               }else{ $each_seq_name=$each_seq_name }

                               print "          (i) $each_seq_name\n";

                               if(-s "$each_seq_name\.$ext"){
                                    push(@final_files, "$each_seq_name\.$ext" ) ;
                                    $extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
                                    $found_search_prog_exention_used=$ext;
                                    $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                    $found_search_prog_exention_used=$ext;
                                    next FOR0
                               }elsif(-s "$each_seq_name\.$ext\.gz"){
                                    push(@final_files, "$each_seq_name\.$ext\.gz" ) ;
                                    $extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
                                    $found_search_prog_exention_used=$ext;
                                    $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                    $found_search_prog_exention_used=$ext;
                                    next FOR0
                               }else{
                                    $file_wanted="\.\/$subd\/$each_seq_name\.$ext";
                                    if(-s $file_wanted){
                                        push( @final_files, $file_wanted);
                                        $extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
                                        print "             (i) $file_wanted is found with $extension_type_found\n";
                                        $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                        $found_search_prog_exention_used=$ext;
                                        last FOR1;
                                    }elsif(-s "$file_wanted\.gz"){
                                        push( @final_files, "$file_wanted\.gz");
                                        $extension_type_found=$e; $no_of_ext_to_be_checked=$e+1;
                                        $found_search_prog_exention_used=$ext;
                                        $found_real_subdir_name=$subd; ## This is to report the name of the actual subd found
                                        next FOR0;
                                    }
                               }
                            }
                         }
                    } # FOR2
               } # FOR1
               print "                   (i) \@final_files are: @final_files \n";
         } # FOR0

         print "\n# @final_files \n=============> $big_out_msp  \n\n";

         if(@final_files < 1){
              print "\n# (E) convert_clu_to_msp :LINE no.: ", __LINE__, " ERROR: \@final_files is empty. Serious error\n";
              print "\n If you have sub dir which have more than 2 chars as names, you may increase the default 2 to 3 in the above\n";
              next;
         }
         $write_each_msp_to_disk='';

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         #  Check if small msp files have already made in previous steps
         #________________________________________________________________
         if($final_files[0]=~/(\S+)\.msp/){ ##  concatenate msp into big_msp
             $search_file_base=$1;
             print "\n# $search_file_base $found_real_subdir_name $found_search_prog_exention_used\n" if $verbose;

             if($final_files[0]=~/\S\.gz$/){
                 print "\n# $final_files[0] is gzipped \n";
                 system("gzip -d  $final_files[0]");
                 $final_files[0]=~s/\.gz//;
             }
             open(BIG_MSP_FILE, ">$big_out_msp");
             for($y=0; $y< @final_files; $y++){
                 open(SINGLE_MSP, "$final_files[$y]");
                 while(<SINGLE_MSP>){
                     print BIG_MSP_FILE $_;
                 }
             }
             close(BIG_MSP_FILE);
             close(SINGLE_MSP);
             push(@written_msp_files, $big_out_msp);

             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
             # Doing something for L option ($Lean_output)
             #___________________________________________________
             if($Lean_output and -d $found_real_subdir_name and $found_search_prog_exention_used){
                 for($y=0; $y< @seq_names; $y++){
                     unlink("$found_real_subdir_name\/$seq_names[$y]\.$found_search_prog_exention_used");
                     unlink("$found_real_subdir_name\/$seq_names[$y]\.$found_search_prog_exention_used\.gz");
                 }
             }elsif($Lean_output){
                 for($y=0; $y< @seq_names; $y++){
                     unlink("$seq_names[$y]\.$found_search_prog_exention_used");
                     unlink("$seq_names[$y]\.$found_search_prog_exention_used\.gz");
                 }
             }
         }else{
             if($write_each_msp_to_disk){
                  print "\# $0 : going to run open_sso_files with $write_each_msp_to_disk opt\n";
                  $big_out_msp=${&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                                          "u=$upper_expect_limit", $new_format, $add_range, $add_range2, $big_out_msp, $over_write)};
                  if(-s $big_out_msp > 200){  print "\n# $0: SUCCESS to create $big_out_msp :) :) :-) :-) ?\n"; }
             }else{
                  print "\n# convert_clu_to_msp: I am running open_sso_files. \n";
                  @msp_hashes=@{&open_sso_files(\@final_files, $uppercase_seq_name, $write_each_msp_to_disk,
                                                 "u=$upper_expect_limit", $new_format, $add_range,
                                                 $add_range2, $big_out_msp, $over_write)};
                  &write_msp_files(@msp_hashes, $big_out_msp); ## concatenates all the hash ref to one
             }
         }
     }## end of  for($i=0; $i< @clusters; $i++){
     return(\@written_msp_files);
}# end of





__END__



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.