Group
Extension

Bio-Phylo/lib/Bio/Phylo/Forest/TreeRole.pm

package Bio::Phylo::Forest::TreeRole;
use strict;
use warnings;
use Bio::Phylo::Util::MOP;
use base 'Bio::Phylo::Listable';
use Bio::Phylo::Util::Exceptions 'throw';
use Bio::Phylo::Util::CONSTANT qw'/looks_like/ :objecttypes';
use Bio::Phylo::Util::OptionalInterface 'Bio::Tree::TreeI';
use Bio::Phylo::Forest::Node;
use Bio::Phylo::IO 'unparse';
use Bio::Phylo::Factory;
use Scalar::Util 'blessed';
use List::Util qw'sum shuffle';
my $LOADED_WRAPPERS = 0;
{
    my $logger = __PACKAGE__->get_logger;
    my ( $TYPE_CONSTANT, $CONTAINER_CONSTANT ) = ( _TREE_, _FOREST_ );
    my $fac                      = Bio::Phylo::Factory->new;
    my %default_constructor_args = (
        '-listener' => sub {
            my ( $self, $method, @args ) = @_;
            for my $node (@args) {
                if ( $method eq 'insert' ) {
                    $node->set_tree($self);
                }
                elsif ( $method eq 'delete' ) {
                    $node->set_tree();
                }
                elsif ( $method eq '_set_things' ) {
                    $_->set_tree($self) for @{ $node };
                }
            }
        },
    );

=head1 NAME

Bio::Phylo::Forest::TreeRole - Extra behaviours for a phylogenetic tree

=head1 SYNOPSIS

 # some way to get a tree
 use Bio::Phylo::IO;
 my $string = '((A,B),C);';
 my $forest = Bio::Phylo::IO->parse(
    -format => 'newick',
    -string => $string
 );
 my $tree = $forest->first;

 # do something:
 print $tree->calc_imbalance;

 # prints "1"

=head1 DESCRIPTION

The object models a phylogenetic tree, a container of
L<Bio::Phylo::Forest::Node> objects. The tree object
inherits from L<Bio::Phylo::Listable>, so look there
for more methods.

=head1 METHODS

=head2 CONSTRUCTORS

=over

=item new()

Tree constructor.

 Type    : Constructor
 Title   : new
 Usage   : my $tree = Bio::Phylo::Forest::Tree->new;
 Function: Instantiates a Bio::Phylo::Forest::Tree object.
 Returns : A Bio::Phylo::Forest::Tree object.
 Args    : No required arguments.

=cut

    sub new : Constructor {

        # could be child class
        my $class = shift;

        # notify user
        $logger->info("constructor called for '$class'");
        if ( not $LOADED_WRAPPERS ) {
            eval do { local $/; <DATA> };
            $LOADED_WRAPPERS++;
        }

        # go up inheritance tree, eventually get an ID
        my $self = $class->SUPER::new( %default_constructor_args, @_ );
        return $self;
    }

=item new_from_bioperl()

Tree constructor from Bio::Tree::TreeI argument.

 Type    : Constructor
 Title   : new_from_bioperl
 Usage   : my $tree = 
           Bio::Phylo::Forest::Tree->new_from_bioperl(
               $bptree           
           );
 Function: Instantiates a 
           Bio::Phylo::Forest::Tree object.
 Returns : A Bio::Phylo::Forest::Tree object.
 Args    : A tree that implements Bio::Tree::TreeI

=cut

    sub new_from_bioperl {
        my ( $class, $bptree ) = @_;
        my $self;
        if ( blessed $bptree && $bptree->isa('Bio::Tree::TreeI') ) {
            $self = $fac->create_tree;
#            bless $self, $class;
            $self = $self->_recurse( $bptree->get_root_node );

            # copy name
            my $name = $bptree->id;
            $self->set_name($name) if defined $name;

            # copy score
            my $score = $bptree->score;
            $self->set_score($score) if defined $score;
        }
        else {
            throw 'ObjectMismatch' => 'Not a bioperl tree!';
        }
        return $self;
    }

=begin comment

 Type    : Internal method
 Title   : _recurse
 Usage   : $tree->_recurse( $bpnode );
 Function: Traverses a bioperl tree, instantiates a Bio::Phylo::Forest::Node
           object for every Bio::Tree::NodeI object it encounters, copying
           the parent, sibling and child relationships.
 Returns : None (modifies invocant).
 Args    : A Bio::Tree::NodeI object.

=end comment

=cut    

    sub _recurse {
        my ( $self, $bpnode, $parent ) = @_;
        my $node = Bio::Phylo::Forest::Node->new_from_bioperl($bpnode);
        if ($parent) {
            $parent->set_child($node);
        }
        $self->insert($node);
        foreach my $bpchild ( $bpnode->each_Descendent ) {
            $self->_recurse( $bpchild, $node );
        }
        return $self;
    }

=begin comment

 Type    : Internal method
 Title   : _analyze
 Usage   : $tree->_analyze;
 Function: Traverses the tree, creates references to first_daughter,
           last_daughter, next_sister and previous_sister.
 Returns : A Bio::Phylo::Forest::Tree object.
 Args    : none.
 Comments: This method only looks at the parent, so theoretically
           one could mess around with the
           Bio::Phylo::Forest::Node::set_parent(Bio::Phylo::Forest::Node) method and
           subsequently call Bio::Phylo::Forest::Tree::_analyze to overwrite old
           (and wrong) child and sister references with new (and correct) ones.

=end comment

=cut

    sub _analyze {
        my $tree  = $_[0];
        my $nodes = $tree->get_entities;
        foreach ( @{$nodes} ) {
            $_->set_next_sister();
            $_->set_previous_sister();
            $_->set_first_daughter();
            $_->set_last_daughter();
        }
        my ( $first, $next );

        # mmmm... O(N^2)
      NODE: for my $i ( 0 .. $#{$nodes} ) {
            $first = $nodes->[$i];
            for my $j ( ( $i + 1 ) .. $#{$nodes} ) {
                $next = $nodes->[$j];
                my ( $firstp, $nextp ) =
                  ( $first->get_parent, $next->get_parent );
                if ( $firstp && $nextp && $firstp == $nextp ) {
                    if ( !$first->get_next_sister ) {
                        $first->set_next_sister($next);
                    }
                    if ( !$next->get_previous_sister ) {
                        $next->set_previous_sister($first);
                    }
                    next NODE;
                }
            }
        }

        # O(N)
        foreach ( @{$nodes} ) {
            my $p = $_->get_parent;
            if ($p) {
                if ( !$_->get_next_sister ) {
                    $p->set_last_daughter($_);
                    next;
                }
                if ( !$_->get_previous_sister ) {
                    $p->set_first_daughter($_);
                }
            }
        }
        return $tree;
    }

=back 

=head2 QUERIES

=over

=item get_midpoint()

Gets node that divides tree into two distance-balanced partitions.

 Type    : Query
 Title   : get_midpoint
 Usage   : my $midpoint = $tree->get_midpoint;
 Function: Gets node nearest to the middle of the longest path
 Returns : A Bio::Phylo::Forest::Node object.
 Args    : NONE
 Comments: This algorithm was ported from ETE. 
           It assumes the tree has branch lengths.

=cut

    sub get_midpoint {
        my $self     = shift;
        my $root     = $self->get_root;
        my $nA       = $self->get_tallest_tip;
        my $nB       = $nA->get_farthest_node(1);
        $logger->error("no farthest node!") unless $nB; 
        my $A2B_dist = $nA->calc_path_to_root + $nB->calc_path_to_root;
        my $outgroup = $nA;
        my $middist  = $A2B_dist / 2;
        my $cdist    = 0;
        my $current  = $nA;
        while ($current) {

            if ( $cdist > $middist ) {
                last;
            }
            else {
                if ( my $parent = $current->get_parent ) {
                    $cdist += $current->get_branch_length;
                    $current = $parent;
                }
                else {
                    last;
                }
            }
        }
        return $current;
    }

=item get_terminals()

Get terminal nodes.

 Type    : Query
 Title   : get_terminals
 Usage   : my @terminals = @{ $tree->get_terminals };
 Function: Retrieves all terminal nodes in
           the Bio::Phylo::Forest::Tree object.
 Returns : An array reference of 
           Bio::Phylo::Forest::Node objects.
 Args    : NONE
 Comments: If the tree is valid, this method 
           retrieves the same set of nodes as 
           $node->get_terminals($root). However, 
           because there is no recursion it may 
           be faster. Also, the node method by 
           the same name does not see orphans.

=cut

    sub get_terminals {
        my $self = shift;
        my @terminals;
        if ( my $root = $self->get_root ) {
            $root->visit_level_order(
                sub {
                    my $node = shift;
                    if ( $node->is_terminal ) {
                        push @terminals, $node;
                    }
                }
            );
        }
        else {
            $self->visit(
                sub {
                    my $n = shift;
                    if ( $n->is_terminal ) {
                        push @terminals, $n;
                    }
                }
            );
        }
        return \@terminals;
    }

=item get_internals()

Get internal nodes.

 Type    : Query
 Title   : get_internals
 Usage   : my @internals = @{ $tree->get_internals };
 Function: Retrieves all internal nodes 
           in the Bio::Phylo::Forest::Tree object.
 Returns : An array reference of 
           Bio::Phylo::Forest::Node objects.
 Args    : NONE
 Comments: If the tree is valid, this method 
           retrieves the same set of nodes as 
           $node->get_internals($root). However, 
           because there is no recursion it may 
           be faster. Also, the node method by 
           the same name does not see orphans.

=cut

    sub get_internals {
        my $self = shift;
        my @internals = grep { scalar @{ $_->get_children } } @{ $self->get_entities };
        return \@internals;
    }

=item get_cherries()

Get all cherries, i.e. nodes that have two terminal children

 Type    : Query
 Title   : get_cherries
 Usage   : my @cherries = @{ $tree->get_cherries };
 Function: Returns an array ref of cherries
 Returns : ARRAY
 Args    : NONE

=cut

    sub get_cherries {
        my $self = shift;
        my @cherries;
        for my $node ( @{ $self->get_entities } ) {
            my @children = @{ $node->get_children };
            
            # node has to be bifurcating
            if ( scalar(@children) == 2 ) {
                
                # both children need to be tips
                if ( not @{ $children[0]->get_children } and not @{ $children[1]->get_children } ) {
                    push @cherries, $node;
                }
            }
        }        
        return \@cherries;
    }

=item get_all_rootings()

Gets a forest of all rooted versions of the invocant tree.

 Type    : Query
 Title   : get_all_rootings
 Usage   : my $forest = $tree->get_all_rootings;
 Function: Returns an array ref of cherries
 Returns : Bio::Phylo::Forest object
 Args    : NONE
 Comments: This method assumes the invocant tree has a basal trichotomy.
           "Rooted" trees with a basal bifurcation will give strange
           results.

=cut    

    sub get_all_rootings {
        my $self = shift;
        my $forest = $fac->create_forest;
        
        # iterate over all nodes
        my $i = 0;
        $self->visit(sub{
           
            # clone the tree
            my $clone = $self->clone;
            my $node  = $clone->get_by_index($i++);
            my $anc   = $node->get_ancestors;
            
            # create the new root if node isn't already root
            if ( $anc->[0] ) {
                my $nroot = $fac->create_node;
                $nroot->set_child($node);
                $clone->insert($nroot);
                $anc->[0]->delete($node) if $anc->[0];
                
                # flip the nodes on the path to the root
                for my $j ( 0 .. $#{ $anc } ) {
                    $nroot->set_child($anc->[$j]);                
                    $nroot = $anc->[$j];
                }
                $forest->insert($clone);
            }
        });
        return $forest;
    }

    
=item get_root()

Get root node.

 Type    : Query
 Title   : get_root
 Usage   : my $root = $tree->get_root;
 Function: Returns the root node.
 Returns : Bio::Phylo::Forest::Node
 Args    : NONE

=cut

    sub get_root {
        my $self = shift;
        
        # the simplest approach: look for nodes without parents
        my ($root) = grep { ! $_->get_parent } @{ $self->get_entities };
        if ( $root ) {
            return $root;
        }
        
        else {
            my ( %children_of, %node_by_id );
            for my $node ( @{ $self->get_entities } ) {
                $node_by_id{ $node->get_id } = $node;
                if ( my $parent = $node->get_parent ) {
                    my $parent_id = $parent->get_id;
                    $children_of{$parent_id} = [] if not $children_of{$parent_id};
                    push @{ $children_of{$parent_id} }, $node;
                }
                else {
                    return $node;
                }
            }
            for my $parent ( keys %children_of ) {
                if ( not exists $node_by_id{$parent} ) {
                    my @children = @{ $children_of{$parent} };
                    if ( scalar @children > 1 ) {
                        $logger->warn("Tree has multiple roots");
                    }
                    return shift @children;
                }
            }
            return;
        }
    }

=item get_ntax()

Gets number of tips

 Type    : Query
 Title   : get_ntax
 Usage   : my $ntax = $tree->get_ntax;
 Function: Calculates the number of terminal nodes
 Returns : Int
 Args    : NONE

=cut
    
    sub get_ntax { scalar(@{ shift->get_terminals } ) }

=item get_tallest_tip()

Retrieves the node furthest from the root. 

 Type    : Query
 Title   : get_tallest_tip
 Usage   : my $tip = $tree->get_tallest_tip;
 Function: Retrieves the node furthest from the
           root in the current Bio::Phylo::Forest::Tree
           object.
 Returns : Bio::Phylo::Forest::Node
 Args    : NONE
 Comments: If the tree has branch lengths, the tallest tip is
           based on root-to-tip path length, else it is based
           on number of nodes to root

=cut

    sub get_tallest_tip {
        my $self = shift;
        my $criterion;

        # has (at least some) branch lengths
        if ( $self->calc_tree_length ) {
            $criterion = 'calc_path_to_root';
        }
        else {
            $criterion = 'calc_nodes_to_root';
        }
        my $tallest;
        my $height = 0;
        for my $tip ( @{ $self->get_terminals } ) {
            if ( my $path = $tip->$criterion ) {
                if ( $path > $height ) {
                    $tallest = $tip;
                    $height  = $path;
                }
            }
        }
        return $tallest;
    }

=item get_nodes_for_taxa()

Gets node objects for the supplied taxon objects

 Type    : Query
 Title   : get_nodes_for_taxa
 Usage   : my @nodes = @{ $tree->get_nodes_for_taxa(\@taxa) };
 Function: Gets node objects for the supplied taxon objects
 Returns : array ref of Bio::Phylo::Forest::Node objects
 Args    : A reference to an array of Bio::Phylo::Taxa::Taxon objects
           or a Bio::Phylo::Taxa object

=cut

    sub get_nodes_for_taxa {
        my ( $self, $taxa ) = @_;
        my ( $is_taxa, $taxa_objs );
        eval { $is_taxa = looks_like_object $taxa, _TAXA_ };
        if ( $is_taxa and not $@ ) {
            $taxa_objs = $taxa->get_entities;
        }
        else {
            $taxa_objs = $taxa;
        }
        my %ids = map { $_->get_id => 1 } @{$taxa_objs};
        my @nodes;
        for my $node ( @{ $self->get_entities } ) {
            if ( my $taxon = $node->get_taxon ) {
                push @nodes, $node if $ids{ $taxon->get_id };
            }
        }
        return \@nodes;
    }

=item get_mrca()

Get most recent common ancestor of argument nodes.

 Type    : Query
 Title   : get_mrca
 Usage   : my $mrca = $tree->get_mrca(\@nodes);
 Function: Retrieves the most recent 
           common ancestor of \@nodes
 Returns : Bio::Phylo::Forest::Node
 Args    : A reference to an array of 
           Bio::Phylo::Forest::Node objects 
           in $tree.

=cut

    sub get_mrca {
        my ( $tree, $nodes ) = @_;
        if ( not $nodes or not @{$nodes} ) {
            return;
        }
        elsif ( scalar @{$nodes} == 1 ) {
            return $nodes->[0];
        }
        else {
            my $node1 = shift @{$nodes};
            my $node2 = shift @{$nodes};
            my $anc1  = $node1->get_ancestors;
            my $anc2  = $node2->get_ancestors;
            unshift @{$anc1}, $node1;
            unshift @{$anc2}, $node2;
          TRAVERSAL: for my $i ( 0 .. $#{$anc1} ) {
                for my $j ( 0 .. $#{$anc2} ) {
                    if ( $anc1->[$i]->get_id == $anc2->[$j]->get_id ) {
                        unshift @{$nodes}, $anc1->[$i];
                        last TRAVERSAL;
                    }
                }
            }
            return $tree->get_mrca($nodes);
        }
    }

=back

=head2 TESTS

=over

=item is_binary()

Test if tree is bifurcating.

 Type    : Test
 Title   : is_binary
 Usage   : if ( $tree->is_binary ) {
              # do something
           }
 Function: Tests whether the invocant 
           object is bifurcating.
 Returns : BOOLEAN
 Args    : NONE

=cut

    sub is_binary {
        my $self = shift;
        my $return = 1;
        $self->visit(sub{
        	my $count = scalar(@{ shift->get_children });
        	$return = 0 if $count != 0 && $count != 2;
        });
        $return;
    }

=item is_ultrametric()

Test if tree is ultrametric.

 Type    : Test
 Title   : is_ultrametric
 Usage   : if ( $tree->is_ultrametric(0.01) ) {
              # do something
           }
 Function: Tests whether the invocant is 
           ultrametric.
 Returns : BOOLEAN
 Args    : Optional margin between pairwise 
           comparisons (default = 0).
 Comments: The test is done by performing 
           all pairwise comparisons for
           root-to-tip path lengths. Since many 
           programs introduce rounding errors 
           in branch lengths the optional argument is
           available to test TRUE for nearly 
           ultrametric trees. For example, a value 
           of 0.01 indicates that no pairwise
           comparison may differ by more than 1%. 
           Note: behaviour is undefined for 
           negative branch lengths.

=cut

    sub is_ultrametric {
        my $tree = shift;
        my $margin = shift || 0;
        my ( @tips, %path );
        $tree->visit_depth_first(
            '-pre' => sub {
                my $node = shift;
                if ( my $parent = $node->get_parent ) {
                    $path{ $node->get_id } =
                      $path{ $parent->get_id } +
                      ( $node->get_branch_length || 0 );
                }
                else {
                    $path{ $node->get_id } = $node->get_branch_length || 0;
                }
                push @tips, $node if $node->is_terminal;
            }
        );
        for my $i ( 0 .. ( $#tips - 1 ) ) {
            my $id1 = $tips[$i]->get_id;
          PATH: for my $j ( $i + 1 .. $#tips ) {
                my $id2 = $tips[$j]->get_id;
                next PATH unless $path{$id2};
                return 0 if abs( 1 - $path{$id1} / $path{$id2} ) > $margin;
            }
        }
        return 1;
    }

=item is_monophyletic()

Tests if first argument (node array ref) is monophyletic with respect
to second argument.

 Type    : Test
 Title   : is_monophyletic
 Usage   : if ( $tree->is_monophyletic(\@tips, $node) ) {
              # do something
           }
 Function: Tests whether the set of \@tips is
           monophyletic w.r.t. $outgroup.
 Returns : BOOLEAN
 Args    : A reference to a list of nodes, and a node.
 Comments: This method is essentially the
           same as 
           &Bio::Phylo::Forest::Node::is_outgroup_of.

=cut

    sub is_monophyletic {
        my $tree = shift;
        my ( $nodes, $outgroup );
        if ( @_ == 2 ) {
            ( $nodes, $outgroup ) = @_;
        }
        elsif ( @_ == 4 ) {
            my %args = @_;
            $nodes    = $args{'-nodes'};
            $outgroup = $args{'-outgroup'};
        }
        for my $i ( 0 .. $#{$nodes} ) {
            for my $j ( ( $i + 1 ) .. $#{$nodes} ) {
                my $mrca = $nodes->[$i]->get_mrca( $nodes->[$j] );
                return if $mrca->is_ancestor_of($outgroup);
            }
        }
        return 1;
    }

=item is_paraphyletic()

 Type    : Test
 Title   : is_paraphyletic
 Usage   : if ( $tree->is_paraphyletic(\@nodes,$node) ){ }
 Function: Tests whether or not a given set of nodes are paraphyletic
           (representing the full clade) given an outgroup
 Returns : [-1,0,1] , -1 if the group is not monophyletic
                       0 if the group is not paraphyletic
                       1 if the group is paraphyletic
 Args    : Array ref of node objects which are in the tree,
           Outgroup to compare the nodes to

=cut

    sub is_paraphyletic {
        my $tree = shift;
        my ( $nodes, $outgroup );
        if ( @_ == 2 ) {
            ( $nodes, $outgroup ) = @_;
        }
        elsif ( @_ == 4 ) {
            my %args = @_;
            $nodes    = $args{'-nodes'};
            $outgroup = $args{'-outgroup'};
        }
        return -1 if !$tree->is_monophyletic( $nodes, $outgroup );
        my @all  = ( @{$nodes}, $outgroup );
        my $mrca = $tree->get_mrca( \@all );
        my $tips = $mrca->get_terminals;
        return scalar @{$tips} == scalar @all ? 0 : 1;
    }

=item is_clade()

Tests if argument (node array ref) forms a clade.

 Type    : Test
 Title   : is_clade
 Usage   : if ( $tree->is_clade(\@tips) ) {
              # do something
           }
 Function: Tests whether the set of 
           \@tips forms a clade
 Returns : BOOLEAN
 Args    : A reference to an array of Bio::Phylo::Forest::Node objects, or a
           reference to an array of Bio::Phylo::Taxa::Taxon objects, or a
	   Bio::Phylo::Taxa object
 Comments:

=cut

    sub is_clade {
        my ( $tree, $arg ) = @_;
        my ( $is_taxa, $is_node_array, $tips );

        # check if arg is a Taxa object
        eval { $is_taxa = looks_like_object $arg, _TAXA_ };
        if ( $is_taxa and not $@ ) {
            $tips = $tree->get_nodes_for_taxa($arg);
        }

        # check if arg is an array of Taxon object
        eval { $is_node_array = looks_like_object $arg->[0], _TAXON_ };
        if ( $is_node_array and not $@ ) {
            $tips = $tree->get_nodes_for_taxa($arg);
        }
        else {
            $tips = $arg;    # arg is an array of Node objects
        }
        my $mrca;
        for my $i ( 1 .. $#{$tips} ) {
            $mrca ? $mrca = $mrca->get_mrca( $tips->[$i] ) : $mrca =
              $tips->[0]->get_mrca( $tips->[$i] );
        }
        scalar @{ $mrca->get_terminals } == scalar @{$tips} ? return 1 : return;
    }

=item is_cladogram()

Tests if tree is a cladogram (i.e. no branch lengths)

 Type    : Test
 Title   : is_cladogram
 Usage   : if ( $tree->is_cladogram() ) {
              # do something
           }
 Function: Tests whether the tree is a 
           cladogram (i.e. no branch lengths)
 Returns : BOOLEAN
 Args    : NONE
 Comments:

=cut

    sub is_cladogram {
        my $tree = shift;
        for my $node ( @{ $tree->get_entities } ) {
            return 0 if defined $node->get_branch_length;
        }
        return 1;
    }

=back

=head2 CALCULATIONS

=over

=item calc_branch_length_distance()

Calculates the Euclidean branch length distance between two trees. See
Kuhner & Felsenstein (1994). A simulation comparison of phylogeny algorithms
under equal and unequal evolutionary rates. MBE 11(3):459-468.

 Type    : Calculation
 Title   : calc_branch_length_distance
 Usage   : my $distance = 
           $tree1->calc_branch_length_distance($tree2);
 Function: Calculates the Euclidean branch length distance between two trees
 Returns : SCALAR, number
 Args    : NONE

=cut

    #=item calc_robinson_foulds_distance()
    #
    #Calculates the Robinson and Foulds distance between two trees.
    #
    # Type    : Calculation
    # Title   : calc_robinson_foulds_distance
    # Usage   : my $distance =
    #           $tree1->calc_robinson_foulds_distance($tree2);
    # Function: Calculates the Robinson and Foulds distance between two trees
    # Returns : SCALAR, number
    # Args    : NONE
    #
    #=cut
    #
    #	sub calc_robinson_foulds_distance {
    #		my ( $self, $other ) = @_;
    #		my $tuples = $self->_calc_branch_diffs($other);
    #		my $sum = 0;
    #		for my $tuple ( @{ $tuples } ) {
    #			my $diff = $tuple->[0] - $tuple->[1];
    #			$sum += abs $diff;
    #		}
    #		return $sum;
    #	}
    sub calc_branch_length_distance {
        my ( $self, $other ) = @_;
        my $squared = $self->calc_branch_length_score($other);
        return sqrt($squared);
    }

=item calc_branch_length_score()

Calculates the squared Euclidean branch length distance between two trees.

 Type    : Calculation
 Title   : calc_branch_length_score
 Usage   : my $score = 
           $tree1->calc_branch_length_score($tree2);
 Function: Calculates the squared Euclidean branch
           length distance between two trees
 Returns : SCALAR, number
 Args    : A Bio::Phylo::Forest::Tree object,           
           Optional second argument flags that results should be normalized
=cut

    sub calc_branch_length_score {
        my ( $self, $other, $normalize ) = @_;
        my $tuples = $self->_calc_branch_diffs($other);
        my $sum    = 0;
        for my $tuple ( @{$tuples} ) {
            my $diff = ( $tuple->[0] || 0 ) - ( $tuple->[1] || 0 );
            $sum += $diff**2;
        }
        return $normalize ? $sum / scalar(@{$tuples}) : $sum;
    }


=begin comment

Returns an array ref containing array references, with the first element of 
each nested array ref representing the length of the branch subtending a 
particular split on the invocant (or 0), the second element the length of the 
same branch on argument (or 0), the third element a boolean to indicate whether 
the split was present in both trees, and the fourth element a sorted, comma-separated
list of the MD5-hashed names of all tips subtended by that split.

 Type    : Calculation
 Title   : calc_branch_diffs
 Usage   : my $triples = 
           $tree1->calc_branch_diffs($tree2);
 Function: Creates two-dimensional array of equivalent branch lengths
 Returns : Two-dimensional array (triples)
 Args    : NONE

=end comment

=cut

    sub _calc_branch_diffs {
        my ( $self, $other ) = @_;

        # we create an anonymous subroutine which
        # we will apply to $self and $other
        my $length_for_split_creator = sub {

            # so this will be $self and $other
            my $tree = shift;

            # keys will be hashed, comma-separated tip names,
            # values will be branch lengths
            my %length_for_split;

            # this will assemble the comma-separated,
            # hashed tip names
            my %hash_for_node;

            # post-order traversal, so tips are processed first
            $tree->visit_depth_first(
                '-post' => sub {
                    my $node     = shift;
                    my $id       = $node->get_id;
                    my @children = @{ $node->get_children };
                    my $hash;

                    # we only enter into this case AFTER tips
                    # have been processed, so %hash_for_node
                    # values will be assigned for all children
                    if (@children and $node->get_parent) {

                        # these will be growing lists from
                        # tips to root
                        my $unsorted = join ',',
                          map { $hash_for_node{ $_->get_id } } @children;

                        # we need to split, sort and join
                        # so that splits where the subtended,
                        # higher topology is different still
                        # yield the same concatenated hash
                        $hash = join ',', sort { $a cmp $b } split /,/,
                          $unsorted;

                        # coerce to a numeric type
                        $length_for_split{$hash} = $node->get_branch_length || 0;
                    }
                    else {

                        # this is how we ensure that every tip name is a 
                        # single, unique string without unexpected characters
                        # (especially, commas).
                        # Digest::MD5 was in CORE since 5.7
                        require Digest::MD5;
                        $hash = Digest::MD5::md5( $node->get_name );                        
                    }

                    # store for the next recursion
                    $hash_for_node{$id} = $hash;
                }
            );

            # this is the return value for the anonymous sub
            return %length_for_split;
        };

        # here we execute the anonymous sub. twice.
        my %lengths_self  = $length_for_split_creator->($self);
        my %lengths_other = $length_for_split_creator->($other);
        my @tuples;

        # first visit the splits in $self, which will identify
        # those it shares with $other and those missing in $other
        for my $split ( keys %lengths_self ) {
            my $tuple;
            if ( exists $lengths_other{$split} ) {
                $tuple =
                  [ $lengths_self{$split}, $lengths_other{$split} || 0, 1, $split ];
            }
            else {
                $tuple = [ $lengths_self{$split}, 0, 0, $split ];
            }
            push @tuples, $tuple;
        }

        # then check if there are splits in $other but not in $self
        for my $split ( keys %lengths_other ) {
            if ( not exists $lengths_self{$split} ) {
                push @tuples, [ 0, $lengths_other{$split}, 0, $split ];
            }
        }
        return \@tuples;
    }

=item calc_tree_length()

Calculates the sum of all branch lengths.

 Type    : Calculation
 Title   : calc_tree_length
 Usage   : my $tree_length = 
           $tree->calc_tree_length;
 Function: Calculates the sum of all branch 
           lengths (i.e. the tree length).
 Returns : FLOAT
 Args    : NONE

=cut

    sub calc_tree_length {
        my $self = shift;
        my $tl   = 0;
        $self->visit(sub{
        	$tl += shift->get_branch_length || 0;
        });
        return $tl;
    }

=item calc_tree_height()

Calculates the height of the tree.

 Type    : Calculation
 Title   : calc_tree_height
 Usage   : my $tree_height = 
           $tree->calc_tree_height;
 Function: Calculates the height 
           of the tree.
 Returns : FLOAT
 Args    : NONE
 Comments: For ultrametric trees this 
           method returns the height, but 
           this is done by averaging over 
           all root-to-tip path lengths, so 
           for additive trees the result 
           should consequently be interpreted
           differently.

=cut

    sub calc_tree_height {
        my $self = shift;
        my $th   = $self->calc_total_paths / $self->calc_number_of_terminals;
        return $th;
    }

=item calc_number_of_nodes()

Calculates the number of nodes.

 Type    : Calculation
 Title   : calc_number_of_nodes
 Usage   : my $number_of_nodes = 
           $tree->calc_number_of_nodes;
 Function: Calculates the number of 
           nodes (internals AND terminals).
 Returns : INT
 Args    : NONE

=cut

    sub calc_number_of_nodes {
        my $self     = shift;
        my $numnodes = scalar @{ $self->get_entities };
        return $numnodes;
    }

=item calc_number_of_terminals()

Calculates the number of terminal nodes.

 Type    : Calculation
 Title   : calc_number_of_terminals
 Usage   : my $number_of_terminals = 
           $tree->calc_number_of_terminals;
 Function: Calculates the number 
           of terminal nodes.
 Returns : INT
 Args    : NONE

=cut

    sub calc_number_of_terminals {
        my $self    = shift;
        my $numterm = scalar @{ $self->get_terminals };
        return $numterm;
    }

=item calc_number_of_internals()

Calculates the number of internal nodes.

 Type    : Calculation
 Title   : calc_number_of_internals
 Usage   : my $number_of_internals = 
           $tree->calc_number_of_internals;
 Function: Calculates the number 
           of internal nodes.
 Returns : INT
 Args    : NONE

=cut

    sub calc_number_of_internals {
        my $self   = shift;
        my $numint = scalar @{ $self->get_internals };
        return $numint;
    }

=item calc_number_of_cherries()

Calculates the number of cherries, i.e. the number of nodes that subtend
exactly two tips. See for applications of this metric:
L<http://dx.doi.org/10.1016/S0025-5564(99)00060-7>

 Type    : Calculation
 Title   : calc_number_of_cherries
 Usage   : my $number_of_cherries = 
           $tree->calc_number_of_cherries;
 Function: Calculates the number of cherries
 Returns : INT
 Args    : NONE

=cut

    sub calc_number_of_cherries {
        my $self = shift;
        my %cherry;
        for my $tip ( @{ $self->get_terminals } ) {
            if ( my $parent = $tip->get_parent ) {
                if ( $parent->is_preterminal ) {
                    my $children = $parent->get_children;
                    if ( scalar @{$children} == 2 ) {
                        $cherry{ $parent->get_id }++;
                    }
                }
            }
        }
        my @cherry_ids = keys %cherry;
        return scalar @cherry_ids;
    }

=item calc_total_paths()

Calculates the sum of all root-to-tip path lengths.

 Type    : Calculation
 Title   : calc_total_paths
 Usage   : my $total_paths = 
           $tree->calc_total_paths;
 Function: Calculates the sum of all 
           root-to-tip path lengths.
 Returns : FLOAT
 Args    : NONE

=cut

    sub calc_total_paths {
        my $self = shift;
        my $tp   = 0;
        foreach ( @{ $self->get_terminals } ) {
            $tp += $_->calc_path_to_root;
        }
        return $tp;
    }

=item calc_redundancy()

Calculates the amount of shared (redundant) history on the total.

 Type    : Calculation
 Title   : calc_redundancy
 Usage   : my $redundancy = 
           $tree->calc_redundancy;
 Function: Calculates the amount of shared 
           (redundant) history on the total.
 Returns : FLOAT
 Args    : NONE
 Comments: Redundancy is calculated as
 1 / ( treelength - height / ( ntax * height - height ) )

=cut

    sub calc_redundancy {
        my $self = shift;
        my $tl   = $self->calc_tree_length;
        my $th   = $self->calc_tree_height;
        my $ntax = $self->calc_number_of_terminals;
        my $red  = 1 - ( ( $tl - $th ) / ( ( $th * $ntax ) - $th ) );
        return $red;
    }

=item calc_imbalance()

Calculates Colless' coefficient of tree imbalance.

 Type    : Calculation
 Title   : calc_imbalance
 Usage   : my $imbalance = $tree->calc_imbalance;
 Function: Calculates Colless' coefficient 
           of tree imbalance.
 Returns : FLOAT
 Args    : NONE
 Comments: As described in Colless, D.H., 1982. 
           The theory and practice of phylogenetic 
           systematics. Systematic Zoology 31(1): 100-104

=cut

    sub calc_imbalance {
        my $self = shift;
        my %descendants;
        my $n = 0;
        my $sumdiff = 0;
        $self->visit_depth_first(
            '-post' => sub {
                my $node = shift;
                my @children = @{ $node->get_children };
                
                # node is internal, compute n descendants left and right
                if ( @children == 2 ) {
                    my $li = shift @children;
                    my $ri = shift @children;
                    my $li_ndesc = $descendants{$li->get_id};
                    my $ri_ndesc = $descendants{$ri->get_id};
                    $sumdiff += abs($li_ndesc - $ri_ndesc);
                    $descendants{$node->get_id} = $li_ndesc + $ri_ndesc;
                }
                
                # node is terminal, initialize tally of descendants
                elsif ( @children == 0 ) {
                    $n++;
                    $descendants{$node->get_id} = 1;
                }
                
                # node is either a polytomy or an unbranched internal. Can't proceed in either case.
                else {
                    throw 'ObjectMismatch' => "Colless's imbalance only possible for binary trees";
                }
            }
        );
        if ( $n < 3 ) {
        	$logger->error("too few nodes in tree: $n<=2");
        	return undef;
        }
        else {
        	return $sumdiff / ( ($n-1) * ($n-2) / 2 );
        }
    }

=item calc_i2()

Calculates I2 imbalance.

 Type    : Calculation
 Title   : calc_i2
 Usage   : my $ci2 = $tree->calc_i2;
 Function: Calculates I2 imbalance.
 Returns : FLOAT
 Args    : NONE
 Comments:

=cut

    sub calc_i2 {
        my $self = shift;
        my ( $maxic, $sum, $I2 ) = ( 0, 0 );
        if ( !$self->is_binary ) {
            throw 'ObjectMismatch' => 'I2 imbalance only possible for binary trees';
        }
        my $numtips = $self->calc_number_of_terminals;
        $numtips -= 2;
        while ($numtips) {
            $maxic += $numtips;
            $numtips--;
        }
        foreach my $node ( @{ $self->get_internals } ) {
            my ( $fd, $ld, $ftips, $ltips ) =
              ( $node->get_first_daughter, $node->get_last_daughter, 0, 0 );
            if ( $fd->is_internal ) {
                foreach ( @{ $fd->get_descendants } ) {
                    if ( $_->is_terminal ) {
                        $ftips++;
                    }
                    else {
                        next;
                    }
                }
            }
            else {
                $ftips = 1;
            }
            if ( $ld->is_internal ) {
                foreach ( @{ $ld->get_descendants } ) {
                    if ( $_->is_terminal ) {
                        $ltips++;
                    }
                    else {
                        next;
                    }
                }
            }
            else {
                $ltips = 1;
            }
            next unless ( $ftips + $ltips - 2 );
            $sum += abs( $ftips - $ltips ) / abs( $ftips + $ltips - 2 );
        }
        if ( $maxic == 0 ) {
        	$logger->error("too few nodes in tree: $maxic==0");
        	return undef;
        }
        else {
        	$I2 = $sum / $maxic;
        	return $I2;
        }
    }

=item calc_gamma()

Calculates the Pybus & Harvey (2000) gamma statistic.

 Type    : Calculation
 Title   : calc_gamma
 Usage   : my $gamma = $tree->calc_gamma();
 Function: Calculates the Pybus gamma statistic
 Returns : FLOAT
 Args    : NONE
 Comments: As described in Pybus, O.G. and 
           Harvey, P.H., 2000. Testing
           macro-evolutionary models using 
           incomplete molecular phylogenies. 
           Proc. R. Soc. Lond. B 267, 2267-2272

=cut

    # code due to Aki Mimoto
    sub calc_gamma {
        my $self      = shift;
        my $tl        = $self->calc_tree_length;
        my $terminals = $self->get_terminals;
        my $n         = scalar @{$terminals};
        my $height    = $self->calc_tree_height;

      # Calculate the distance of each node to the root
      #        my %soft_refs;
      #        my $root = $self->get_root;
      #        $soft_refs{$root} = 0;
      #        my @nodes = $root;
      #        while (@nodes) {
      #            my $node     = shift @nodes;
      #            my $path_len = $soft_refs{$node} += $node->get_branch_length;
      #            my $children = $node->get_children or next;
      #            for my $child (@$children) {
      #                $soft_refs{$child} = $path_len;
      #            }
      #            push @nodes, @{$children};
      #        }
      # the commented out block is more efficiently implemented like so:
        my %soft_refs =
          map { $_ => $_->calc_path_to_root } @{ $self->get_entities };

        # Then, we know how far each node is from the root. At this point, we
        # can sort through and create the @g array
        my %node_spread =
          map { ( $_ => 1 ) } values %soft_refs;    # remove duplicates
        my @sorted_nodes = sort { $a <=> $b } keys %node_spread;
        my $prev = 0;
        my @g;
        for my $length (@sorted_nodes) {
            push @g, $length - $prev;
            $prev = $length;
        }
        my $sum = 0;
        eval { require Math::BigFloat };
        if ($@) {                                   # BigFloat is not available.
            for ( my $i = 2 ; $i < $n ; $i++ ) {
                for ( my $k = 2 ; $k <= $i ; $k++ ) {
                    $sum += $k * $g[ $k - 1 ];
                }
            }
            my $numerator = ( $sum / ( $n - 2 ) ) - ( $tl / 2 );
            my $denominator = $tl * sqrt( 1 / ( 12 * ( $n - 2 ) ) );
            $self->_store_cache( $numerator / $denominator );
            return $numerator / $denominator;
        }

        # Big Float is available. We'll use it then
        $sum = Math::BigFloat->new(0);
        for ( my $i = 2 ; $i < $n ; $i++ ) {
            for ( my $k = 2 ; $k <= $i ; $k++ ) {
                $sum->badd( $k * $g[ $k - 1 ] );
            }
        }
        $sum->bdiv( $n - 2 );
        $sum->bsub( $tl / 2 );
        my $denominator = Math::BigFloat->new(1);
        $denominator->bdiv( 12 * ( $n - 2 ) );
        $denominator->bsqrt();
        $sum->bdiv( $denominator * $tl );
        
         # R seems to be unhappy about long numbers, so truncating
        $sum->accuracy(10);
        return $sum;
    }

=item calc_fiala_stemminess()

Calculates stemminess measure of Fiala and Sokal (1985).

 Type    : Calculation
 Title   : calc_fiala_stemminess
 Usage   : my $fiala_stemminess = 
           $tree->calc_fiala_stemminess;
 Function: Calculates stemminess measure 
           Fiala and Sokal (1985).
 Returns : FLOAT
 Args    : NONE
 Comments: As described in Fiala, K.L. and 
           R.R. Sokal, 1985. Factors 
           determining the accuracy of 
           cladogram estimation: evaluation 
           using computer simulation. 
           Evolution, 39: 609-622

=cut

    sub calc_fiala_stemminess {
        my $self      = shift;
        my @internals = @{ $self->get_internals };
        my $total     = 0;
        my $nnodes    = ( scalar @internals - 1 );
        foreach my $node (@internals) {
            if ( $node->get_parent ) {
                my $desclengths = $node->get_branch_length;
                my @children    = @{ $node->get_descendants };
                for my $child (@children) {
                    $desclengths += $child->get_branch_length;
                }
                $total += ( $node->get_branch_length / $desclengths );
            }
        }
        if ( $nnodes ) {
        	return $total /= $nnodes;
        }
        else {
        	$logger->error("too few nodes in tree: n-1=$nnodes");
        	return undef;
        }
    }

=item calc_rohlf_stemminess()

Calculates stemminess measure from Rohlf et al. (1990).

 Type    : Calculation
 Title   : calc_rohlf_stemminess
 Usage   : my $rohlf_stemminess = 
           $tree->calc_rohlf_stemminess;
 Function: Calculates stemminess measure 
           from Rohlf et al. (1990).
 Returns : FLOAT
 Args    : NONE
 Comments: As described in Rohlf, F.J., 
           W.S. Chang, R.R. Sokal, J. Kim, 
           1990. Accuracy of estimated 
           phylogenies: effects of tree 
           topology and evolutionary model. 
           Evolution, 44(6): 1671-1684

=cut

    sub calc_rohlf_stemminess {

        # invocant is a tree
        my $self = shift;
        throw ObjectMismatch => "This algorithm isn't generalized to
			deal with multifurcations" if $self->calc_resolution < 1;
        throw ObjectMismatch => "This algorithm requires branch lengths"
          unless $self->calc_tree_length;

        # all internal nodes in the tree
        my @internals = @{ $self->get_internals };

        # all terminal nodes in the tree
        my @terminals = @{ $self->get_terminals };

        # this will become the sum of all STni
        my $total = 0;

        # 1/(t-2), by which we multiply total
        my $one_over_t_minus_two = 1 / ( scalar @terminals - 2 );

        # iterate over all nodes, as per equation (1)
        for my $node (@internals) {

            # only process nodes that aren't the root
            if ( my $parent = $node->get_parent ) {

                # Wj->i is defined as "the length of the edge
                # (in time units) between HTU i (a hypothetical
                # taxonomic unit, i.e. an internal node) and
                # its ancestor j"
                my $Wj_i = $node->get_branch_length;

                # hj is defined as "the 'height' of HTU j (the
                # time of its origin, a known quantity since we
                # know the true tree in these simulations)".
                my $hj = $parent->calc_path_to_root;
                if ( !$hj ) {
                    next;
                }

                # as per equation (2) in Rohlf et al. (1990)
                $total += ( $Wj_i / $hj );
            }
        }

        # multiply by 1/(t-2) as per equation (1)
        return $one_over_t_minus_two * $total;
    }

=item calc_resolution()

Calculates tree resolution.

 Type    : Calculation
 Title   : calc_resolution
 Usage   : my $resolution = 
           $tree->calc_resolution;
 Function: Calculates the number 
           of internal nodes over the
           total number of internal nodes 
           on a fully bifurcating
           tree of the same size.
 Returns : FLOAT
 Args    : NONE

=cut

    sub calc_resolution {
        my $self = shift;
        my $res  = $self->calc_number_of_internals /
          ( $self->calc_number_of_terminals - 1 );
        return $res;
    }

=item calc_branching_times()

Calculates cumulative branching times.

 Type    : Calculation
 Title   : calc_branching_times
 Usage   : my $branching_times = 
           $tree->calc_branching_times;
 Function: Returns a two-dimensional array. 
           The first dimension consists of 
           the "records", so that in the 
           second dimension $AoA[$first][0] 
           contains the internal node references, 
           and $AoA[$first][1] the branching 
           time of the internal node. The 
           records are orderered from root to 
           tips by time from the origin.
 Returns : SCALAR[][] or FALSE
 Args    : NONE

=cut

    sub calc_branching_times {
        my $self = shift;
        my @branching_times;
        if ( !$self->is_ultrametric(0.01) ) {
            throw 'ObjectMismatch' =>
              'tree isn\'t ultrametric, results would be meaningless';
        }
        else {
            my @temp;
            my $seen_tip = 0;
            $self->visit_depth_first(
                '-pre' => sub {
                    my $node = shift;
                    if ( not $seen_tip or $node->is_internal ) {
                        my $bt = $node->get_branch_length;
                        if ( my $parent = $node->get_parent ) {
                            $bt += $parent->get_generic('bt');
                        }
                        $node->set_generic( 'bt' => $bt );
                        push @temp, [ $node, $bt ];
                        if ( $node->is_terminal ) {
                            $seen_tip++;
                        }
                    }
                }
            );
            @branching_times = sort { $a->[1] <=> $b->[1] } @temp;
        }
        return \@branching_times;
    }

=item calc_waiting_times()

Calculates intervals between splits.

 Type    : Calculation
 Title   : calc_waiting_times
 Usage   : my $waitings = 
           $tree->calc_waiting_times;
 Function: Returns a two-dimensional array. 
           The first dimension consists of 
           the "records", so that in the 
           second dimension $AoA[$first][0] 
           contains the internal node references, 
           and $AoA[$first][1] the waiting 
           time of the internal node. The 
           records are orderered from root to 
           tips by time from the origin.
 Returns : SCALAR[][] or FALSE
 Args    : NONE

=cut

    sub calc_waiting_times {
        my $self  = shift;
        my $times = $self->calc_branching_times;
        for ( my $i = $#{$times} ; $i > 0 ; $i-- ) {
            $times->[$i]->[1] -= $times->[ $i - 1 ]->[1];
        }
        return $times;
    }

=item calc_node_ages()

Calculates node ages.

 Type    : Calculation
 Title   : calc_node_ages
 Usage   : $tree->calc_node_ages;
 Function: Calculates the age of all the nodes in the tree (i.e. the distance
           from the tips) and assigns these to the 'age' slot, such that,
	   after calling this method, the age of any one node can be retrieved
	   by calling $node->get_generic('age');
 Returns : The invocant
 Args    : NONE
 Comments: This method computes, in a sense, the opposite of
           calc_branching_times: here, we compute the distance from the tips
	   (i.e. how long ago the split occurred), whereas calc_branching_times
	   calculates the distance from the root.

=cut

    sub calc_node_ages {
        my $self = shift;
        $self->visit_depth_first(
            '-post' => sub {
                my $node = shift;
                my $age  = 0;
                if ( my $child = $node->get_child(0) ) {
                    $age =
                      $child->get_generic('age') + $child->get_branch_length;
                }
                $node->set_generic( 'age' => $age );
            }
        );
        return $self;
    }

=item calc_ltt()

Calculates lineage-through-time data points.

 Type    : Calculation
 Title   : calc_ltt
 Usage   : my $ltt = $tree->calc_ltt;
 Function: Returns a two-dimensional array. 
           The first dimension consists of the 
           "records", so that in the second 
           dimension $AoA[$first][0] contains 
           the internal node references, and
           $AoA[$first][1] the branching time 
           of the internal node, and $AoA[$first][2] 
           the cumulative number of lineages over
           time. The records are orderered from 
           root to tips by time from the origin.
 Returns : SCALAR[][] or FALSE
 Args    : NONE

=cut

    sub calc_ltt {
        my $self = shift;
        if ( !$self->is_ultrametric(0.01) ) {
            throw 'ObjectMismatch' =>
              'tree isn\'t ultrametric, results are meaningless';
        }
        my $ltt      = ( $self->calc_branching_times );
        my $lineages = 1;
        for my $i ( 0 .. $#{$ltt} ) {
            $lineages += ( scalar @{ $ltt->[$i][0]->get_children } - 1 );
            $ltt->[$i][2] = $lineages;
        }
        return $ltt;
    }

=item calc_symdiff()

Calculates the symmetric difference metric between invocant and argument. This
metric is identical to the Robinson-Foulds tree comparison distance. See
L<http://dx.doi.org/10.1016/0025-5564(81)90043-2>

 Type    : Calculation
 Title   : calc_symdiff
 Usage   : my $symdiff = 
           $tree->calc_symdiff($other_tree);
 Function: Returns the symmetric difference 
           metric between $tree and $other_tree, 
           sensu Penny and Hendy, 1985.
 Returns : SCALAR
 Args    : A Bio::Phylo::Forest::Tree object,
           Optional second argument flags that results should be normalized
 Comments: Trees in comparison must span 
           the same set of terminal taxa
           or results are meaningless.

=cut

    sub calc_symdiff {
        my ( $tree, $other_tree, $normalize ) = @_;
        my $tuples  = $tree->_calc_branch_diffs($other_tree);
        my $symdiff = 0;
        #use Data::Dumper;
        #warn Dumper($tuples);
        for my $tuple ( @{$tuples} ) {
            $symdiff++ unless $tuple->[2];
        }
        return $normalize ? $symdiff / scalar(@{$tuples}) : $symdiff;
    }

=item calc_avtd()

Calculates the average taxonomic distinctiveness. See
Clarke KR, Warwick RM (1998) A taxonomic distinctness index and its statistical 
properties. J Appl Ecol 35:523-525
L<http://dx.doi.org/10.1046/j.1365-2664.1998.3540523.x>

 Type    : Calculation
 Title   : calc_avtd
 Usage   : my $avtd = $tree->calc_avtd;
 Function: Returns the average taxonomic distinctiveness
 Returns : SCALAR
 Args    : A Bio::Phylo::Forest::Tree object
 Comments: 

=cut

	sub calc_avtd {
		my $tree = shift;
		my @tips = @{ $tree->get_terminals };
		my $dist = 0;
		for my $i ( 0 .. $#tips - 1 ) {
			for my $j ( $i + 1 .. $#tips ) {
				$dist += $tips[$i]->calc_patristic_distance($tips[$j]);
			}
		}
		return $dist / scalar(@tips);
	}

=item calc_fp() 

Calculates the Fair Proportion value for each terminal.

 Type    : Calculation
 Title   : calc_fp
 Usage   : my $fp = $tree->calc_fp();
 Function: Returns the Fair Proportion 
           value for each terminal
 Returns : HASHREF
 Args    : NONE

=cut

    # code due to Aki Mimoto
    sub calc_fp {
        my $self = shift;

        # First establish how many children sit on each of the nodes
        my %weak_ref;
        my $terminals = $self->get_terminals;
        for my $terminal (@$terminals) {
            my $index = $terminal;
            do { $weak_ref{$index}++ } while ( $index = $index->get_parent );
        }

        # Then, assign each terminal a value
        my $fp = {};
        for my $terminal (@$terminals) {
            my $name = $terminal->get_name;
            my $fpi  = 0;
            do {
                $fpi +=
                  ( $terminal->get_branch_length || 0 ) / $weak_ref{$terminal};
            } while ( $terminal = $terminal->get_parent );
            $fp->{$name} = $fpi;
        }
        return $fp;
    }

=item calc_fp_mean() 

Calculates the mean Fair Proportion value over all terminals.

 Type    : Calculation
 Title   : calc_fp_mean
 Usage   : my $fp = $tree->calc_fp_mean();
 Function: Returns the mean Fair Proportion 
           value over all terminals
 Returns : FLOAT
 Args    : NONE

=cut
    
    sub calc_fp_mean {
    	my $self = shift;
    	my $fp = $self->calc_fp;
    	my @fp = values %{ $fp };
    	return sum(@fp)/scalar(@fp);
    }

=item calc_es() 

Calculates the Equal Splits value for each terminal

 Type    : Calculation
 Title   : calc_es
 Usage   : my $es = $tree->calc_es();
 Function: Returns the Equal Splits value for each terminal
 Returns : HASHREF
 Args    : NONE

=cut

    # code due to Aki Mimoto
    sub calc_es {
        my $self = shift;

        # First establish how many children sit on each of the nodes
        my $terminals = $self->get_terminals;
        my $es        = {};
        for my $terminal ( @{$terminals} ) {
            my $name    = $terminal->get_name;
            my $esi     = 0;
            my $divisor = 1;
            do {
                my $length   = $terminal->get_branch_length || 0;
                my $children = $terminal->get_children      || [];
                $divisor *= @$children || 1;
                $esi += $length / $divisor;
            } while ( $terminal = $terminal->get_parent );
            $es->{$name} = $esi;
        }
        return $es;
    }  

=item calc_es_mean()

Calculates the mean Equal Splits value over all terminals

 Type    : Calculation
 Title   : calc_es_mean
 Usage   : my $es = $tree->calc_es_mean();
 Function: Returns the Equal Splits value over all terminals
 Returns : FLOAT
 Args    : NONE

=cut

	sub calc_es_mean {
		my $self = shift;
		my $es = $self->calc_es;
		my @es = values %{ $es };
		return sum(@es)/scalar(@es);
	}

=item calc_pe()

Calculates the Pendant Edge value for each terminal.

 Type    : Calculation
 Title   : calc_pe
 Usage   : my $es = $tree->calc_pe();
 Function: Returns the Pendant Edge value for each terminal
 Returns : HASHREF
 Args    : NONE

=cut

    # code due to Aki Mimoto
    sub calc_pe {
        my $self = shift;
        my $terminals = $self->get_terminals or return {};
        my $pe =
          { map { $_->get_name => $_->get_branch_length } @{$terminals} };
        return $pe;
    }    

=item calc_pe_mean()

Calculates the mean Pendant Edge value over all terminals

 Type    : Calculation
 Title   : calc_pe_mean
 Usage   : my $es = $tree->calc_pe_mean();
 Function: Returns the mean Pendant Edge value over all terminals
 Returns : FLOAT
 Args    : NONE

=cut

	sub calc_pe_mean {
		my $self = shift;
		my $pe = $self->calc_pe;
		my @pe = values %{ $pe };
		return sum(@pe)/scalar(@pe);
	}

=item calc_shapley()

Calculates the Shapley value for each terminal.

 Type    : Calculation
 Title   : calc_shapley
 Usage   : my $es = $tree->calc_shapley();
 Function: Returns the Shapley value for each terminal
 Returns : HASHREF
 Args    : NONE

=cut

    # code due to Aki Mimoto
    sub calc_shapley {
        my $self = shift;

        # First find out how many tips are at the ends of each edge.
        my $terminals   = $self->get_terminals or return;    # nothing to see!
        my $edge_lookup = {};
        my $index       = $terminals->[0];

        # Iterate through the edges and find out which side each terminal reside
        _calc_shapley_traverse( $index, undef, $edge_lookup, 'root' );

        # At this point, it's possible to create the calculation matrix
        my $n = @$terminals;
        my @m;
        my $edges = [ keys %$edge_lookup ];
        for my $e ( 0 .. $#$edges ) {
            my $edge = $edges->[$e];
            my $el =
              $edge_lookup->{$edge};    # Lookup for terminals on one edge side
            my $v =
              keys %{ $el
                  ->{terminals} };  # Number of elements on one side of the edge
            for my $l ( 0 .. $#$terminals ) {
                my $terminal = $terminals->[$l];
                my $name     = $terminal->get_name;
                if ( $el->{terminals}{$name} ) {
                    $m[$l][$e] = ( $n - $v ) / ( $n * $v );
                }
                else {
                    $m[$l][$e] = $v / ( $n * ( $n - $v ) );
                }
            }
        }

        # Now we can calculate through the matrix
        my $shapley = {};
        for my $l ( 0 .. $#$terminals ) {
            my $terminal = $terminals->[$l];
            my $name     = $terminal->get_name;
            for my $e ( 0 .. $#$edges ) {
                my $edge = $edge_lookup->{ $edges->[$e] };
                $shapley->{$name} += $edge->{branch_length} * $m[$l][$e];
            }
        }
        return $shapley;
    }

    sub _calc_shapley_traverse {

        # This does a depth first traversal to assign the terminals
        # to the outgoing side of each branch.
        my ( $index, $previous, $edge_lookup, $direction ) = @_;
        return unless $index;
        $previous ||= '';

        # Is this element a root?
        my $is_root = !$index->get_parent;

        # Now assemble all the terminal datapoints and use the soft reference
        # to keep track of which end the terminals are attached
        my @core_terminals;
        if ( $previous and $index->is_terminal ) {
            push @core_terminals, $index->get_name;
        }
        my $parent = $index->get_parent || '';
        my @child_terminals;
        my $child_nodes = $index->get_children || [];
        for my $child (@$child_nodes) {
            next unless $child ne $previous;
            push @child_terminals,
              _calc_shapley_traverse( $child, $index, $edge_lookup, 'tip' );
        }
        my @parent_terminals;
        if ( $parent ne $previous ) {
            push @parent_terminals,
              _calc_shapley_traverse( $parent, $index, $edge_lookup, 'root' );
        }

# We're going to toss the root node and we need to merge the root's child branches
        unless ($is_root) {
            $edge_lookup->{$index} = {
                branch_length => $index->get_branch_length,
                terminals     => {
                    map { $_ => 1 } @core_terminals,
                    $direction eq 'root' ? @parent_terminals : @child_terminals
                }
            };
        }
        return ( @core_terminals, @child_terminals, @parent_terminals );
    }
    
=item calc_shapley_mean()

Calculates the mean Shapley value over all terminals

 Type    : Calculation
 Title   : calc_shapley_mean
 Usage   : my $es = $tree->calc_shapley_mean();
 Function: Returns the mean Shapley value over all terminals
 Returns : HASHREF
 Args    : NONE

=cut

	sub calc_shapley_mean {
		my $self = shift;
		my $sv = $self->calc_shapley;
		my @sv = values %{ $sv };
		return sum(@sv)/scalar(@sv);
	}

=back

=head2 VISITOR METHODS

The following methods are a - not entirely true-to-form - implementation of the Visitor
design pattern: the nodes in a tree are visited, and rather than having an object
operate on them, a set of code references is used. This can be used, for example, to
serialize a tree to a string format. To create a newick string without branch lengths
you would use something like this (there is a more powerful 'to_newick' method, so this
is just an example):

 $tree->visit_depth_first(
	'-pre_daughter'   => sub { print '('             },	
	'-post_daughter'  => sub { print ')'             },	
	'-in'             => sub { print shift->get_name },
	'-pre_sister'     => sub { print ','             },	
 );
 print ';';

=over

=item visit_depth_first()

Visits nodes depth first

 Type    : Visitor method
 Title   : visit_depth_first
 Usage   : $tree->visit_depth_first( -pre => sub{ ... }, -post => sub { ... } );
 Function: Visits nodes in a depth first traversal, executes subs
 Returns : $tree
  Args    : Optional handlers in the order in which they would be executed on an internal node:
			
			# first event handler, is executed when node is reached in recursion
			-pre            => sub { print "pre: ",            shift->get_name, "\n" },

			# is executed if node has a daughter, but before that daughter is processed
			-pre_daughter   => sub { print "pre_daughter: ",   shift->get_name, "\n" },
			
			# is executed if node has a daughter, after daughter has been processed	
			-post_daughter  => sub { print "post_daughter: ",  shift->get_name, "\n" },

			# is executed whether or not node has sisters, if it does have sisters
			# they're processed first	
			-in             => sub { print "in: ",             shift->get_name, "\n" },
			
			# is executed if node has a sister, before sister is processed
			-pre_sister     => sub { print "pre_sister: ",     shift->get_name, "\n" },	
			
			# is executed if node has a sister, after sister is processed
			-post_sister    => sub { print "post_sister: ",    shift->get_name, "\n" },							
			
			# is executed last			
			-post           => sub { print "post: ",           shift->get_name, "\n" },
			
			# specifies traversal order, default 'ltr' means first_daugher -> next_sister
			# traversal, alternate value 'rtl' means last_daughter -> previous_sister traversal
			-order          => 'ltr', # ltr = left-to-right, 'rtl' = right-to-left
 Comments: 

=cut

    sub visit_depth_first {
        my $self = shift;
        if ( my $root = $self->get_root ) {
        	$root->visit_depth_first(looks_like_hash @_);
        }
        return $self;
    }

=item visit_breadth_first()

Visits nodes breadth first

 Type    : Visitor method
 Title   : visit_breadth_first
 Usage   : $tree->visit_breadth_first( -pre => sub{ ... }, -post => sub { ... } );
 Function: Visits nodes in a breadth first traversal, executes handlers
 Returns : $tree
 Args    : Optional handlers in the order in which they would be executed on an internal node:
			
			# first event handler, is executed when node is reached in recursion
			-pre            => sub { print "pre: ",            shift->get_name, "\n" },
			
			# is executed if node has a sister, before sister is processed
			-pre_sister     => sub { print "pre_sister: ",     shift->get_name, "\n" },	
			
			# is executed if node has a sister, after sister is processed
			-post_sister    => sub { print "post_sister: ",    shift->get_name, "\n" },			
			
			# is executed whether or not node has sisters, if it does have sisters
			# they're processed first	
			-in             => sub { print "in: ",             shift->get_name, "\n" },			
			
			# is executed if node has a daughter, but before that daughter is processed
			-pre_daughter   => sub { print "pre_daughter: ",   shift->get_name, "\n" },
			
			# is executed if node has a daughter, after daughter has been processed	
			-post_daughter  => sub { print "post_daughter: ",  shift->get_name, "\n" },				
			
			# is executed last			
			-post           => sub { print "post: ",           shift->get_name, "\n" },
			
			# specifies traversal order, default 'ltr' means first_daugher -> next_sister
			# traversal, alternate value 'rtl' means last_daughter -> previous_sister traversal
			-order          => 'ltr', # ltr = left-to-right, 'rtl' = right-to-left
 Comments: 

=cut

    sub visit_breadth_first {
        my $self = shift;
        my %args = looks_like_hash @_;
        $self->get_root->visit_breadth_first(%args);
        return $self;
    }

=item visit_level_order()

Visits nodes in a level order traversal.

 Type    : Visitor method
 Title   : visit_level_order
 Usage   : $tree->visit_level_order( sub{...} );
 Function: Visits nodes in a level order traversal, executes sub
 Returns : $tree
 Args    : A subroutine reference that operates on visited nodes.
 Comments:

=cut	

    sub visit_level_order {
        my ( $tree, $sub ) = @_;
        if ( my $root = $tree->get_root ) {
            $root->visit_level_order($sub);
        }
        else {
            throw 'BadArgs' => 'Tree has no root';
        }
        return $tree;
    }

=back

=head2 TREE MANIPULATION

=over

=item chronompl()

Modifies branch lengths using the mean path lengths method of
Britton et al. (2002). For more about this method, see:
L<http://dx.doi.org/10.1016/S1055-7903(02)00268-3>

 Type    : Tree manipulator
 Title   : chronompl
 Usage   : $tree->chronompl;
 Function: Makes tree ultrametric using MPL method
 Returns : The modified, now ultrametric invocant.
 Args    : NONE
 Comments: 

=cut

    sub chronompl {
        my $self = shift;
        $self->visit_depth_first(
            '-post' => sub {
                my $node = shift;
                my %paths;
                my $children = $node->get_children;
                for my $child ( @{$children} ) {
                    my $cp = $child->get_generic('paths');
                    my $bl = $child->get_branch_length;
                    for my $id ( keys %{$cp} ) {
                        $paths{$id} = $cp->{$id} + $bl;
                    }
                }
                if ( not scalar @{$children} ) {
                    $paths{ $node->get_id } = 0;
                }
                $node->set_generic( 'paths' => \%paths );
                my $total = 0;
                $total += $_ for values %paths;
                my $mean = $total / scalar keys %paths;
                $node->set_generic( 'age' => $mean );
            }
        );
        return $self->agetobl;
    }

=item grafenbl()

Computes and assigns branch lengths using Grafen's method, which makes
node ages proportional to clade size. For more about this method, see:
L<http://dx.doi.org/10.1098/rstb.1989.0106>

 Type    : Tree manipulator
 Title   : grafenbl
 Usage   : $tree->grafenbl;
 Function: Assigns branch lengths using Grafen's method
 Returns : The modified, now ultrametric invocant.
 Args    : Optional, a power ('rho') to which all node ages are raised
 Comments: 

=cut

    sub grafenbl {
        my ( $self, $rho ) = @_;
        my $total = 0;
        $self->visit_depth_first(
            '-post' => sub {
                my $node = shift;
                if ( $node->is_terminal ) {
                    $node->set_generic( 'adjntips' => 0 );
                    $node->set_generic( 'ntips'    => 1 );
                }
                else {
                    my $children = $node->get_children;
                    my $ntips    = 0;
                    for my $child ( @{$children} ) {
                        $ntips += $child->get_generic('ntips');
                    }
                    $node->set_generic( 'ntips'    => $ntips );
                    $node->set_generic( 'adjntips' => $ntips - 1 );
                    $total = $ntips if $node->is_root;
                }
            }
        );
        $self->visit(
            sub {
                my $node = shift;
                if ($total) {
                    my $age = $node->get_generic('adjntips') / $total;
                    if ($rho) {
                        $age = $age**$rho;
                    }
                    $node->set_generic( 'age' => $age );
                }
            }
        );
        return $self->agetobl;
    }

=item agetobl()

Converts node ages to branch lengths

 Type    : Tree manipulator
 Title   : agetobl
 Usage   : $tree->agetobl;
 Function: Converts node ages to branch lengths
 Returns : The modified invocant.
 Args    : NONE
 Comments: This method uses ages as assigned to the generic 'age' slot
           on the nodes in the trees. I.e. for each node in the tree,
	   $node->get_generic('age') must return a number

=cut

    sub agetobl {
        my $self = shift;
        for my $node ( @{ $self->get_entities } ) {
            if ( my $parent = $node->get_parent ) {
                my $mp = $node->get_generic('age') || 0;
                my $pmp = $parent->get_generic('age');
                $node->set_branch_length( $pmp - $mp );
            }
            else {
                $node->set_branch_length(0);
            }
        }
        return $self;
    }

=item rankprobbl()

Generates branch lengths by calculating the rank probabilities for each node and applying
the expected waiting times under a pure birth process to these ranks. Uses Stadler's 
RANKPROB algorithm as described in: 

B<Gernhard, T.> et al., 2006. Estimating the relative order of speciation 
or coalescence events on a given phylogeny. I<Evolutionary Bioinformatics Online>. 
B<2>:285. L<http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2674681/>.

 Type    : Tree manipulator
 Title   : rankprobbl
 Usage   : $tree->rankprobbl;
 Function: Generates pure birth branch lengths
 Returns : The modified invocant.
 Args    : NONE
 Comments: Tree must be fully bifurcating

=cut

	sub rankprobbl {
		my $self = shift;
		my $root = $self->get_root;
		my $intervals = $root->calc_terminals;
		my @times;
		for my $i ( 1 .. $intervals ) {
			my $previous = $times[-1] || 0;
			push @times, $previous + ( 1 / $i );
		}
		my $total = $times[-1];
		for my $node ( @{ $self->get_internals } ) {
			my $rankprobs = $root->calc_rankprob($node);
			my @weighted_waiting_times;
			for my $i ( 1 .. $#{ $rankprobs } ) {
				push @weighted_waiting_times, $rankprobs->[$i] * $times[$i - 1];
			}
			my $age = $total - sum(@weighted_waiting_times);
			$node->set_generic( 'age' => $age );
		}
		$self->agetobl;
		$root->set_branch_length(1);
		return $self;
	}

=item ultrametricize()

Sets all root-to-tip path lengths equal.

 Type    : Tree manipulator
 Title   : ultrametricize
 Usage   : $tree->ultrametricize;
 Function: Sets all root-to-tip path 
           lengths equal by stretching
           all terminal branches to the 
           height of the tallest node.
 Returns : The modified invocant.
 Args    : NONE
 Comments: This method is analogous to 
           the 'ultrametricize' command
           in Mesquite, i.e. no rate smoothing 
           or anything like that happens, just 
           a lengthening of terminal branches.

=cut

    sub ultrametricize {
        my $tree    = shift;
        my $tallest = 0;
        foreach ( @{ $tree->get_terminals } ) {
            my $path_to_root = $_->calc_path_to_root;
            if ( $path_to_root > $tallest ) {
                $tallest = $path_to_root;
            }
        }
        foreach ( @{ $tree->get_terminals } ) {
            my $newbl =
              $_->get_branch_length + ( $tallest - $_->calc_path_to_root );
            $_->set_branch_length($newbl);
        }
        return $tree;
    }

=item scale()

Scales the tree to the specified height.

 Type    : Tree manipulator
 Title   : scale
 Usage   : $tree->scale($height);
 Function: Scales the tree to the 
           specified height.
 Returns : The modified invocant.
 Args    : $height = a numerical value 
           indicating root-to-tip path length.
 Comments: This method uses the 
           $tree->calc_tree_height method, and 
           so for additive trees the *average* 
           root-to-tip path length is scaled to
           $height (i.e. some nodes might be 
           taller than $height, others shorter).

=cut

    sub scale {
        my ( $tree, $target_height ) = @_;
        my $current_height = $tree->calc_tree_height;
        my $scaling_factor = $target_height / $current_height;
        foreach ( @{ $tree->get_entities } ) {
            my $bl = $_->get_branch_length;
            if ($bl) {
                my $new_branch_length = $bl * $scaling_factor;
                $_->set_branch_length($new_branch_length);
            }
        }
        return $tree;
    }

=item resolve()

Randomly breaks polytomies.

 Type    : Tree manipulator
 Title   : resolve
 Usage   : $tree->resolve;
 Function: Randomly breaks polytomies by inserting 
           additional internal nodes.
 Returns : The modified invocant.
 Args    : Optionally, when passed a true value (e.g. '1'), the newly created nodes
           will be unnamed, otherwise they will be named 'r1', 'r2', 'r3' and so on.
 Comments:

=cut

    sub resolve {
        my ( $tree, $anonymous ) = @_;
        for my $node ( @{ $tree->get_internals } ) {
            my @children = @{ $node->get_children };
            if ( scalar @children > 2 ) {
                my $i = 1;
                while ( scalar @children > 2 ) {
                	my %args = ( '-branch_length' => 0.00 );
                	$args{'-name'} = 'r' . $i++ unless $anonymous;
                    my $newnode = $fac->create_node(%args);
                    $tree->insert($newnode);
                    $newnode->set_parent($node);
                    for ( 1 .. 2 ) {
                        my $i = int( rand( scalar @children ) );
                        $children[$i]->set_parent($newnode);
                        splice @children, $i, 1;
                    }
                    push @children, $newnode;
                }
            }
        }
        return $tree;
    }
    
=item replicate()

Simulates tree(s) whose properties resemble that of the input tree in terms of birth/death
rate, depth, and size/depth distribution of genera. This uses the R environment for 
statistics to get a maximum likelihood estimate of birth/death rates on the source tree
and therefore requires the package L<Statistics::R> to be installed, and the R package
'ape'. The idea is that this is used on a species tree that is ultrametric. To get 
simulated genera whose sizes and root depths approximate those of the source tree, 
annotate genus nodes in the source tree, e.g. using $tree->generize, and provide the 
optional -genera flag of replicate() with a true value.

This method uses the function C<birthdeath> from the R package C<ape>. If you use this
method in a publication, you should therefore B<cite that package> (in addition to 
Bio::Phylo). More information about C<ape> can be found at L<http://ape-package.ird.fr/>.

 Type    : Tree manipulator
 Title   : replicate
 Usage   : my $forest = $tree->replicate;
 Function: Simulates tree(s) whose properties resemble that of the invocant tree
 Returns : Bio::Phylo::Forest
 Args    : Optional: -trees    => number of replicates, default is 1
           Optional: -rootedge => keep the birth/death root branch, then scale the tree(s)
           Optional: -genera   => approximate distribution of source genus sizes and depths 
           (do this by tagging internal nodes: $node->set_rank('genus'))
		   Optional: -seed     => a random integer seed for generating the birth/death tree
 Comments: Requires Statistics::R, and an R environment with 'ape' installed
           Expects to operate on an ultrametric tree

=cut    
    
    sub replicate {
    	my ( $self, %args ) = @_;
    	if ( looks_like_class('Statistics::R') and looks_like_class('Bio::Phylo::Generator') ) {

    		# get birthdeath parameters
    		$logger->info("going to estimate b/d");
    		my $newick = $self->to_newick;
    		my $R = Statistics::R->new;
			if ( my $seed = $args{'-seed'} ) {
				$R->run(qq[set.seed($seed)]);
			}
    		$R->run(q[library("ape")]);
    		$R->run(qq[phylo <- read.tree(text="$newick")]);
    		$R->run(q[bd <- birthdeath(phylo)]);
    		$R->run(q[ratio <- as.double(bd$para[1])]);
    		my $b_over_d = $R->get(q[ratio]);
    		$logger->info("b/d=$b_over_d");
    		
    		# generate the tree
    		$logger->info("going to simulate tree(s)");
    		my $gen = Bio::Phylo::Generator->new;
    		my $forest = $gen->gen_rand_birth_death(
    			'-trees'    => $args{'-trees'} || 1,
    			'-killrate' => $b_over_d,
    			'-tips'     => scalar(@{ $self->get_terminals }),
				);
    		
    		# invent tip labels
    		$forest->visit(sub{
    			my $t = shift;
    			my $n = 0;
    			for my $tip ( @{ $t->get_terminals } ) {
    				my $genus = $self->_make_taxon_name;
    				my $species = $self->_make_taxon_name;
    				if ( $genus =~ /(..)$/ ) {
    					my $suffix = $1;
    					$species =~ s/..$/$suffix/;
    					$tip->set_name( ucfirst($genus) . '_' . $species );
    				}
    			}
    		});
    		
    		# scale the trees
    		my $height = $self->calc_tree_height;
    		$forest->visit(sub{shift->get_root->set_branch_length(0)}) if not $args{'-rootedge'};
    		$forest->visit(sub{shift->scale($height)});
    		$logger->info("tree height is $height");
    		
    		# create similar genera, optionally
    		if ( $args{'-genera'} ) {
    			$logger->info("going to approximate genera");
    		
    			# iterate over trees
    			for my $replicate ( @{ $forest->get_entities } ) {
					
					# get distribution of source genus sizes and depths
					$logger->info("calculating source genus sizes and depths");
					my ( $counter, %genera ) = ( 0 );
					$self->visit(sub{
						my $node = shift;
						my $rank = $node->get_rank;
						if ( $rank ) {
							if ( $rank eq 'genus' ) {
								my $id     = $node->get_id;
								my $height = $height - $node->calc_path_to_root;
								my $size   = scalar(@{ $node->get_terminals });
								my $name   = $node->get_name || 'Genus' . ++$counter;
								$genera{$id} = {
									'name'   => $name,
									'size'   => $size,
									'height' => $height,
									'node'   => $node,
								};
							}
						}
					});
					
					# get distribution of target node sizes and depths
					$logger->info("calculating target genus sizes and depths");					
					my ( %node );
					$replicate->visit(sub{
						my $node   = shift;
						my $id     = $node->get_id;	
						my $height = $height - $node->calc_path_to_root;												
						my $size   = scalar(@{ $node->get_terminals });
						push @{ $node{$size} }, [ $node, $height, $id ];
					});
										
					# keep track of which members from the genera have already been assigned
					my %seen_labels;
					
					# start assigning genera, from big to small
					for my $genus ( sort { $genera{$b}->{'size'} <=> $genera{$a}->{'size'} } keys %genera ) {
						# get key for candidate set of nodes
						my $name = $genera{$genus}->{'name'} || "Genus${genus}";						
						my $size = $genera{$genus}->{'size'};
						my @labels = shuffle map { $_->get_name } @{ $genera{$genus}->{'node'}->get_terminals };
						
						# avoid assigning labels more than once when genera are nested
						@labels = grep { ! $seen_labels{$_} } @labels;
						$seen_labels{$_}++ for @labels;

						$logger->info("processing $name ($size tips)");
						SIZE: while( not $node{$size} ) { last SIZE if --$size <= 1 }
					
						# get target height
						if ( $node{$size} ) {
							$logger->info("found candidate(s) with $size tips");						
							my $h = $genera{$genus}->{'height'};
							my ($node) = map { $_->[0] } 
							            sort { abs($a->[1]-$h) <=> abs($b->[1]-$h) } 
							                @{ $node{$size} };
							
							# assign genus label to node and tips, remove all descendants 
							# (and self!) from list of candidates, as we can't nest genera
							my $sp = 0;							
							$node->set_name($name);
						
							for my $n ( $node, @{ $node->get_descendants } ) {
								$n->set_name($labels[$sp++]) if $n->is_terminal;
								for my $i ( 1 .. $size ) {
									if ( my $array = $node{$i} ) {
										my $id = $n->get_id;
										my @filtered = grep { $id != $_->[2] } @$array;
										@filtered ? $node{$i} = \@filtered : delete $node{$i};
									}
								}
							}
						}
						else {
							$logger->warn("exhausted candidate genera for $genus");
						}
					}
    			}
    		}
    		return $forest;
    	}
    }
    
    sub _make_taxon_name {
    	my %l = (
    		'v' => [ qw(a e i o u) ],
    		'c' => [ qw(qu cr ct p pr ps rs ld ph gl ch l sc v n m) ],
    	);
    	my @suffixes =qw(us os is as es);
    	my $length = 1 + int rand 2;
    	my @order = int rand 2 ? qw(v c) : qw(c v);
    	my ($l1) = shuffle(@{$l{$order[0]}});
    	my ($l2) = shuffle(@{$l{$order[1]}});
		my @name = ( $l1, $l2 );
    	for my $i ( 0 .. $length ) {
	    	($l1) = shuffle(@{$l{$order[0]}});
    		($l2) = shuffle(@{$l{$order[1]}});
    		push @name, $l1, $l2;
    	}
    	my $name = join '', @name;
    	$name =~ s/[aeiou]+$//;
    	my ($suffix) = shuffle(@suffixes);
    	return $name . $suffix;
    }

=item generize()

Identifies monophyletic genera by traversing the tree, taking the first word of the tip
names and finding the MRCA of each word. That MRCA is tagged as rank 'genus' and assigned
the name.

 Type    : Tree manipulator
 Title   : generize
 Usage   : $tree->generize(%args);
 Function: Identifies monophyletic genera
 Returns : Invocant
 Args    : Optional: -delim => the delimiter that separates the genus name from any 
                               following (sub)specific epithets. Default is a space ' '.
           Optional: -monotypic => if true, also tags monotypic genera
           Optional: -polypara  => if true, also tags poly/paraphyletic genera. Any 
                                   putative genera nested within the largest of the 
                                   entangled, poly/paraphyletic genera will be ignored.
 Comments:

=cut
    
    sub generize {
    	my ( $self, %args ) = @_;
    	my $delim = $args{'-delim'} || ' ';
    	
    	# bin by genus name
    	my %genera;
    	for my $tip ( @{ $self->get_terminals } ) {
    		my $binomial = $tip->get_name;
    		my ($genus) = split /$delim/, $binomial;
    		$genera{$genus} = [] if not $genera{$genus};
    		push @{ $genera{$genus} }, $tip;
    	}
    	
    	# identify and tag MRCAs
    	my %skip;
    	for my $genus ( sort { scalar(@{$genera{$b}}) <=> scalar(@{$genera{$a}}) } keys %genera ) {
    		next if $skip{$genus};
    		my $tips = $genera{$genus};
    		
    		# genus is monotypic
    		if ( scalar(@$tips) == 1 ) {
    			$logger->info("$genus is monotypic");
    			$tips->[0]->set_rank('genus') if $args{'-monotypic'};
    		}
    		else {
    		
    			# get the MRCA
				my ( $mrca, %seen );
				my @paths = map { @{ $_->get_ancestors } } @{ $tips };
				$seen{$_->get_id}++ for @paths;
				($mrca) = map { $_->[0] } 
				         sort { $b->[1] <=> $a->[1] } 
				          map { [ $_, $_->calc_path_to_root ] } 
				         grep { $seen{$_->get_id} == @$tips } @paths;
				
				# identify mono/poly/para
				my $clade_size = @{ $mrca->get_terminals };
				my $tip_count  = @{ $tips };
				if ( $clade_size == $tip_count ) {
					$logger->info("$genus is monophyletic");
					$mrca->set_rank('genus');
					$mrca->set_name($genus);
				}
				else {
										
					# we could now have nested, smaller genera inside this one
					$logger->info("$genus is non-monophyletic $clade_size != $tip_count");
					if ( $args{'-polypara'} ) {
						$logger->info("tagging non-monophyletic $genus anyway");
						$mrca->set_rank('genus');
						$mrca->set_name($genus);	
						my @names = map { $_->get_name } @{ $mrca->get_terminals };
						for my $name ( @names ) {
							$name =~ s/^(.+?)${delim}.+$/$1/;
							$skip{$name}++;
						}			
					}
				}
			}
        }
        return $self;
    }

=item prune_tips()

Prunes argument nodes from invocant.

 Type    : Tree manipulator
 Title   : prune_tips
 Usage   : $tree->prune_tips(\@taxa);
 Function: Prunes specified taxa from invocant.
 Returns : A pruned Bio::Phylo::Forest::Tree object.
 Args    : A reference to an array of taxon names, or a taxa block, or a
           reference to an array of taxon objects, or a reference to an
           array of node objects
 Comments:

=cut

    sub prune_tips {
        my ( $self, $tips ) = @_;
        my %prune = map { $_->get_id => 1 } @{ $self->_get_tip_objects($tips) };
        my @keep;
        for my $tip ( @{ $self->get_terminals } ) {
            if ( not $prune{$tip->get_id} ) {
                push @keep, $tip;
            }
        }
        return $self->keep_tips(\@keep);
    }

=item keep_tips()

Keeps argument nodes from invocant (i.e. prunes all others).

 Type    : Tree manipulator
 Title   : keep_tips
 Usage   : $tree->keep_tips(\@taxa);
 Function: Keeps specified taxa from invocant.
 Returns : The pruned Bio::Phylo::Forest::Tree object.
 Args    : Same as prune_tips, but with inverted meaning
 Comments:

=cut

    sub _get_tip_objects {
        my ( $self, $arg ) = @_;
        my @tips;

        # argument is a taxa block
        if ( blessed $arg ) {
            for my $taxon ( @{ $arg->get_entities } ) {
                my @nodes = @{ $taxon->get_nodes };
                for my $node ( @nodes ) {
                    push @tips, $node if $self->contains($node);
                }
            }
        }

        # arg is an array ref
        else {
            my $TAXON = _TAXON_;
            my $NODE  = _NODE_;
            for my $thing ( @{ $arg } ) {

                # thing is a taxon or node object
                if ( blessed $thing ) {
                    if ( $thing->_type == $TAXON ) {
                        my @nodes = @{ $thing->get_nodes };
                        for my $node ( @nodes ) {
                            push @tips, $node if $self->contains($node);
                        }
                    }
                    elsif ( $thing->_type == $NODE ) {
                        push @tips, $thing if $self->contains($thing);
                    }
                }

                # thing is a name
                else {
                    if ( my $tip = $self->get_by_name($thing) ) {
                        push @tips, $tip;
                    }
                }
            }
        }
        return \@tips;
    }

    sub keep_tips {
        my ( $self, $tip_names ) = @_;

        # get node objects for tips
        my @tips = @{ $self->_get_tip_objects($tip_names) };
        
        # identify nodes that are somewhere on the path from tip to root
        my %seen;
        for my $tip ( @tips ) {
            my $node = $tip;
            PARENT: while ( $node ) {
                my $id = $node->get_id;
                if ( not exists $seen{$id} ) {
                    $seen{$id} = 0;
                    $node = $node->get_parent;
                }
                else {
                    last PARENT;
                }
            }
        }

        # now do the pruning
        $self->visit_depth_first(
            '-post' => sub {
                # prune node
                my $n = shift;
                my $nid = $n->get_id;
                my $p = $n->get_parent;
                if ( not exists $seen{$nid} ) {
                    $p->delete($n) if $p;
                    $self->delete($n);
                    # record number of children lost by parent
                    if (defined $p) {
                       my $pid = $p->get_id;
                       if ( exists $seen{$pid} ) {
                          $seen{$pid}++;
                       }
                    }
                    return;
                }
                # remove nodes who lost children and are now down to a single one
                my @children = @{ $n->get_children };
                if ( (scalar @children == 1) && ($seen{$nid} > 0) ) {
                    my ($c) = @children;
                    my $bl  = $n->get_branch_length;
                    my $cbl = $c->get_branch_length;
                    $c->set_branch_length( $bl + $cbl ) if defined $cbl && defined $bl;
                    $self->delete($n);
                    $c->set_parent($p);
                    $p->delete($n) if $p;
                }
            }
        );
        return $self;
    }

=item negative_to_zero()

Converts negative branch lengths to zero.

 Type    : Tree manipulator
 Title   : negative_to_zero
 Usage   : $tree->negative_to_zero;
 Function: Converts negative branch 
           lengths to zero.
 Returns : The modified invocant.
 Args    : NONE
 Comments:

=cut

    sub negative_to_zero {
        my $tree = shift;
        foreach my $node ( @{ $tree->get_entities } ) {
            my $bl = $node->get_branch_length;
            if ( $bl && $bl < 0 ) {
                $node->set_branch_length(0);
            }
        }
        return $tree;
    }

=item ladderize()

Sorts nodes in ascending (or descending) order of number of children. Tips are
sorted alphabetically (ascending or descending) relative to their siblings.

 Type    : Tree manipulator
 Title   : ladderize
 Usage   : $tree->ladderize(1);
 Function: Sorts nodes
 Returns : The modified invocant.
 Args    : Optional, a true value to reverse the sort order

=cut

    sub ladderize {
        my ( $self, $right ) = @_;
        my %child_count;
        $self->visit_depth_first(
            '-post' => sub {
                my $node = shift;
                
                # record the number of descendants for the focal
                # node. because this is a post-order traversal
                # we have already counted the children of the 
                # children, recursively. bin nodes and tips in 
                # separate containers.
                my $id = $node->get_id;
                my @children = @{ $node->get_children };
                my $count = 1;
                my ( @tips, @nodes );
                for my $child (@children) {
                    $count += $child_count{ $child->get_id };
                    if ( $child->is_terminal ) {
                    	push @tips, $child;
                    }
                    else {
                    	push @nodes, $child;
                    }
                }
                $child_count{$id} = $count;
                
                # sort the immediate children. if these are 
                # tips we will sort alphabetically by name (so
                # that cherries are sorted predictably), otherwise
                # sort by descendant count
                my @sorted;
                
				if ($right) {
					@sorted = map { $_->[0] }
					  sort { $b->[1] <=> $a->[1] }
					  map { [ $_, $child_count{ $_->get_id } ] } @nodes;
					push @sorted, sort { $b->get_name cmp $a->get_name } @tips;
				}
				else {					
					@sorted = map { $_->[0] }
					  sort { $a->[1] <=> $b->[1] }
					  map { [ $_, $child_count{ $_->get_id } ] } @nodes;
					unshift @sorted, sort { $a->get_name cmp $b->get_name } @tips;
				}

				# apply the new sort order                
                for my $i ( 0 .. $#sorted ) {
                    $node->insert_at_index( $sorted[$i], $i );
                }
            }
        );
        return $self;
    }

=item sort_tips()

Sorts nodes in (an approximation of) the provided ordering. Given an array
reference of taxa, an array reference of name strings, or a taxa object, this
method attempts to order the tips in the same way. It does this by recursively
computing the rank for all internal nodes by taking the average rank of its
children. This results in the following orderings:

 (a,b,c,d,e,f); => $tree->sort_tips( [ qw(a c b f d e) ] ) => (a,c,b,f,d,e);
 
 (a,b,(c,d),e,f); => $tree->sort_tips( [ qw(a b e d c f) ] ); => (a,b,(e,(d,c)),f);
 
 ((a,b),((c,d),e),f); => $tree->sort_tips( [ qw(a e d c b f) ] ); => ((e,(d,c)),(a,b),f);

 Type    : Tree manipulator
 Title   : sort_tips
 Usage   : $tree->sort_tips($ordering);
 Function: Sorts nodes
 Returns : The modified invocant.
 Args    : Required, an array reference (or taxa object) whose ordering to match

=cut

    sub sort_tips {
        my ( $self, $taxa ) = @_;
        my @taxa =
          UNIVERSAL::can( $taxa, 'get_entities' )
          ? @{ $taxa->get_entities }
          : @{$taxa};
        my @names =
          map { UNIVERSAL::can( $_, 'get_name' ) ? $_->get_name : $_ } @taxa;
        my $i = 1;
        my %rank = map { $_ => $i++ } @names;
        $self->visit_depth_first(
            '-post' => sub {
                my $node     = shift;
                my @children = @{ $node->get_children };
                if (@children) {
                    my @ranks = map { $_->get_generic('rank') } @children;
                    my $sum   = sum @ranks;
                    my $mean  = $sum / scalar(@ranks);
                    $node->set_generic( 'rank' => $mean );
                    $node->clear;
                    $node->insert(
                        sort {
                            $a->get_generic('rank') <=> $b->get_generic('rank')
                          } @children
                    );
                }
                else {
                    $node->set_generic( 'rank' => $rank{ $node->get_name } );
                }
            }
        );
        return $self->_analyze;
    }

=item exponentiate()

Raises branch lengths to argument.

 Type    : Tree manipulator
 Title   : exponentiate
 Usage   : $tree->exponentiate($power);
 Function: Raises branch lengths to $power.
 Returns : The modified invocant.
 Args    : A $power in any of perl's number formats.

=cut

    sub exponentiate {
        my ( $tree, $power ) = @_;
        if ( !looks_like_number $power ) {
            throw 'BadNumber' => "Power \"$power\" is a bad number";
        }
        else {
            foreach my $node ( @{ $tree->get_entities } ) {
                my $bl = $node->get_branch_length;
                $node->set_branch_length( $bl**$power );
            }
        }
        return $tree;
    }

=item multiply()

Multiples branch lengths by argument.

 Type    : Tree manipulator
 Title   : multiply
 Usage   : $tree->multiply($num);
 Function: Multiplies branch lengths by $num.
 Returns : The modified invocant.
 Args    : A $number in any of perl's number formats.

=cut
    
    sub multiply {
    	my ( $tree, $num ) = @_;
    	if ( !looks_like_number $num ) {
    		throw 'BadNumber' => "Number '$num' is a bad number";
    	}
    	$tree->visit(sub{
    		my $node = shift;
    		my $length = $node->get_branch_length;
    		if ( $length ) {
    			$node->set_branch_length( $length * $num );
    		}
    	});
    	return $tree;
    }

=item log_transform()

Log argument base transform branch lengths.

 Type    : Tree manipulator
 Title   : log_transform
 Usage   : $tree->log_transform($base);
 Function: Log $base transforms branch lengths.
 Returns : The modified invocant.
 Args    : A $base in any of perl's number formats.

=cut

    sub log_transform {
        my ( $tree, $base ) = @_;
        if ( !looks_like_number $base ) {
            throw 'BadNumber' => "Base \"$base\" is a bad number";
        }
        else {
            foreach my $node ( @{ $tree->get_entities } ) {
                my $bl = $node->get_branch_length;
                my $newbl;
                eval { $newbl = ( log $bl ) / ( log $base ); };
                if ($@) {
                    throw 'OutOfBounds' =>
                      "Invalid input for log transform: $@";
                }
                else {
                    $node->set_branch_length($newbl);
                }
            }
        }
        return $tree;
    }

=item remove_unbranched_internals()

Collapses internal nodes with fewer than 2 children.

 Type    : Tree manipulator
 Title   : remove_unbranched_internals
 Usage   : $tree->remove_unbranched_internals;
 Function: Collapses internal nodes 
           with fewer than 2 children.
 Returns : The modified invocant.
 Args    : NONE
 Comments:

=cut

    sub remove_unbranched_internals {
        my $self = shift;
        my @delete;
        $self->visit_depth_first(
            '-post' => sub {
                my $node = shift;
                my @children = @{ $node->get_children };
                
                # the node is interior, now need to check for each child
                # if it's interior as well
                if ( @children ) {
                
                	# special case for the root with unbranched child
                	if ( $node->is_root and 1 == @children ) {
                		my ($child) = @children;
						for my $gchild ( @{ $child->get_children } ) {
					
							# compute the new branch length for $gchild
							my $clength = $child->get_branch_length;
							my $glength = $gchild->get_branch_length;
							my $length = $clength if defined $clength;
							$length += $glength if defined $glength;
							$gchild->set_branch_length($length) if defined $length;
							
							# connect grandchild to root
							$gchild->set_parent($node);
							$node->delete($child);
					
							# will delete these nodes from the tree array
							# after the recursion
							push @delete, $child;						
						}              		
                	}
                	else {
                    
						# iterate over children 
						for my $child ( @children ) {
							my $child_name = $child->get_name;
							my @grandchildren = @{ $child->get_children };
						
							# $child is an unbranched internal, so $grandchildren[0]
							# needs to be connected to $node
							if ( 1 == scalar @grandchildren ) {
								my $gchild = $grandchildren[0];
							
								# compute the new branch length for $gchild
								my $clength = $child->get_branch_length;
								my $glength = $gchild->get_branch_length;
								my $length = $clength if defined $clength;
								$length += $glength if defined $glength;
								$gchild->set_branch_length($length) if defined $length;
								
								$gchild->set_parent($node);
								$node->delete($child);
							
								# will delete these nodes from the tree array
								# after the recursion
								push @delete, $child;						
							}
						}
                    }				
                }
            }
        );
        $self->delete($_) for @delete;
        return $self;
    }

=item remove_orphans()

Removes all unconnected nodes.

 Type    : Tree manipulator
 Title   : remove_orphans
 Usage   : $tree->remove_orphans;
 Function: Removes all unconnected nodes
 Returns : The modified invocant.
 Args    : NONE
 Comments:

=cut

    sub remove_orphans {
    	my $self = shift;
    	
    	# collect all nodes that are topologically connected
    	my %seen;
    	$self->visit_depth_first(
    		'-pre' => sub {
    			$seen{ shift->get_id }++;
    		}
    	);
    	
    	# collect all nodes
    	my @delete;
    	$self->visit(sub {
    		my $node = shift;
    		push @delete, $node if not $seen{$node->get_id};
    	});
    	$self->delete($_) for @delete;
    	
    	# notify user
    	if ( scalar @delete ) {
    		$logger->warn("deleted ".scalar(@delete)." orphaned nodes");
    	}
    	
    	return $self;
    }

=item deroot()

Collapses one of the children of a basal bifurcation

 Type    : Tree manipulator
 Title   : deroot
 Usage   : $tree->deroot;
 Function: Removes root
 Returns : The modified invocant.
 Args    : Optional: node to collapse
 Comments:

=cut

    sub deroot {
        my ($self,$collapsible) = @_;
        my $root = $self->get_root;
        my @children = @{ $root->get_children };
        if ( scalar @children < 3 ) {
        	if ( not $collapsible) {
            	($collapsible) = grep { $_->is_internal } @children;
            }
            $collapsible->collapse;
            return $self;
        }
        else {
            return $self;
        }
    }

=back

=head2 UTILITY METHODS

=over

=item clone()

Clones invocant.

 Type    : Utility method
 Title   : clone
 Usage   : my $clone = $object->clone;
 Function: Creates a copy of the invocant object.
 Returns : A copy of the invocant.
 Args    : Optional: a hash of code references to 
           override reflection-based getter/setter copying

           my $clone = $object->clone(  
               'set_forest' => sub {
                   my ( $self, $clone ) = @_;
                   for my $forest ( @{ $self->get_forests } ) {
                       $clone->set_forest( $forest );
                   }
               },
               'set_matrix' => sub {
                   my ( $self, $clone ) = @_;
                   for my $matrix ( @{ $self->get_matrices } ) {
                       $clone->set_matrix( $matrix );
                   }
           );

 Comments: Cloning is currently experimental, use with caution.
           It works on the assumption that the output of get_foo
           called on the invocant is to be provided as argument
           to set_foo on the clone - such as 
           $clone->set_name( $self->get_name ). Sometimes this 
           doesn't work, for example where this symmetry doesn't
           exist, or where the return value of get_foo isn't valid
           input for set_foo. If such a copy fails, a warning is 
           emitted. To make sure all relevant attributes are copied
           into the clone, additional code references can be 
           provided, as in the example above. Typically, this is
           done by overrides of this method in child classes.

=cut

    sub clone {
        my $self = shift;
        $logger->info("cloning $self");
        my %subs = @_;

        # override, because we'll handle insert
        $subs{'set_root'}      = sub { };
        $subs{'set_root_node'} = sub { };

        # we'll clone node objects, so no raw copying
        $subs{'insert'} = sub {
            my ( $self, $clone ) = @_;
            my %clone_of;
            for my $node ( @{ $self->get_entities } ) {
                my $cloned_node = $node->clone;
                $clone_of{ $node->get_id } = $cloned_node;
                $clone->insert($cloned_node);
            }
            for my $node ( @{ $self->get_entities } ) {
                my $cloned_node = $clone_of{ $node->get_id };
                if ( my $parent = $node->get_parent ) {
                    my $cloned_parent_node = $clone_of{ $parent->get_id };
                    $cloned_node->set_parent($cloned_parent_node);
                }
            }
        };
        return $self->SUPER::clone(%subs);
    }

=back

=head2 SERIALIZERS

=over

=item to_nexus()

Serializes invocant to nexus string.

 Type    : Stringifier
 Title   : to_nexus
 Usage   : my $string = $tree->to_nexus;
 Function: Turns the invocant tree object 
           into a nexus string
 Returns : SCALAR
 Args    : Any arguments that can be passed to Bio::Phylo::Forest::to_nexus

=cut

    sub to_nexus {
        my $self   = shift;
        my $forest = $fac->create_forest;
        $forest->insert($self);
        return $forest->to_nexus(@_);
    }

=item to_newick()

Serializes invocant to newick string.

 Type    : Stringifier
 Title   : to_newick
 Usage   : my $string = $tree->to_newick;
 Function: Turns the invocant tree object 
           into a newick string
 Returns : SCALAR
 Args    : NONE

=cut

    sub to_newick {
        my $self   = shift;
        my %args   = @_;
        my $newick = unparse( '-format' => 'newick', '-phylo' => $self, %args );
        return $newick;
    }

=item to_xml()

Serializes invocant to xml.

 Type    : Serializer
 Title   : to_xml
 Usage   : my $xml = $obj->to_xml;
 Function: Turns the invocant object into an XML string.
 Returns : SCALAR
 Args    : NONE

=cut

    sub to_xml {
        my $self     = shift;
        my $xsi_type = 'nex:IntTree';
        for my $node ( @{ $self->get_entities } ) {
            my $length = $node->get_branch_length;
            if ( defined $length and $length !~ /^[+-]?\d+$/ ) {
                $xsi_type = 'nex:FloatTree';
            }
        }
        $self->set_attributes( 'xsi:type' => $xsi_type );
        my $xml = $self->get_xml_tag;
        if ( my $root = $self->get_root ) {
            $xml .= $root->to_xml;
        }
        $xml .= $self->sets_to_xml . sprintf('</%s>', $self->get_tag);
        return $xml;
    }

=item to_svg()

Serializes invocant to SVG.

 Type    : Serializer
 Title   : to_svg
 Usage   : my $svg = $obj->to_svg;
 Function: Turns the invocant object into an SVG string.
 Returns : SCALAR
 Args    : Same args as the Bio::Phylo::Treedrawer constructor
 Notes   : This will only work if you have the SVG module
           from CPAN installed on your system.

=cut

    sub to_svg {
        my $self   = shift;
        my $drawer = $fac->create_drawer(@_);
        $drawer->set_tree($self);
        return $drawer->draw;
    }

=item to_dom()

 Type    : Serializer
 Title   : to_dom
 Usage   : $tree->to_dom($dom)
 Function: Generates a DOM subtree from the invocant
           and its contained objects
 Returns : an Element object
 Args    : DOM factory object

=cut

    sub to_dom {
        my ( $self, $dom ) = @_;
        $dom ||= $Bio::Phylo::NeXML::DOM::DOM;
        unless ( looks_like_object $dom, _DOMCREATOR_ ) {
            throw 'BadArgs' => 'DOM factory object not provided';
        }
        my $xsi_type = 'nex:IntTree';
        for my $node ( @{ $self->get_entities } ) {
            my $length = $node->get_branch_length;
            if ( defined $length and $length !~ /^[+-]?\d+$/ ) {
                $xsi_type = 'nex:FloatTree';
            }
        }
        $self->set_attributes( 'xsi:type' => $xsi_type );
        my $elt = $self->get_dom_elt($dom);
        if ( my $root = $self->get_root ) {
            $elt->set_child($_) for $root->to_dom($dom);
        }
        return $elt;
    }

=begin comment

 Type    : Internal method
 Title   : _consolidate
 Usage   : $tree->_consolidate;
 Function: Does pre-order traversal, only keeps
           nodes seen during traversal in tree,
           in order of traversal
 Returns :
 Args    :

=end comment

=cut

    sub _consolidate {
        my $self = shift;
        my @nodes;
        $self->visit_depth_first( '-pre' => sub { push @nodes, shift } );
        $self->clear;
        $self->insert(@nodes);
    }

=begin comment

 Type    : Internal method
 Title   : _container
 Usage   : $tree->_container;
 Function:
 Returns : CONSTANT
 Args    :

=end comment

=cut

    sub _container { $CONTAINER_CONSTANT }

=begin comment

 Type    : Internal method
 Title   : _type
 Usage   : $tree->_type;
 Function:
 Returns : CONSTANT
 Args    :

=end comment

=cut

    sub _type { $TYPE_CONSTANT }
    sub _tag  { 'tree' }

=back

=cut

    # podinherit_insert_token

=head1 SEE ALSO

There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> 
for any user or developer questions and discussions.

=over

=item L<Bio::Phylo::Listable>

The L<Bio::Phylo::Forest::Tree|Bio::Phylo::Forest::Tree> object inherits from
the L<Bio::Phylo::Listable|Bio::Phylo::Listable> object, so the methods defined
therein also apply to trees.

=item L<Bio::Phylo::Manual>

Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.

=back

=head1 CITATION

If you use Bio::Phylo in published research, please cite it:

B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
I<BMC Bioinformatics> B<12>:63.
L<http://dx.doi.org/10.1186/1471-2105-12-63>

=cut

}
1;
__DATA__
sub get_nodes {
	my $self = shift;
	my $order = 'depth';
	my @nodes;
	if ( @_ ) {
		my %args = @_;
		if ( $args{'-order'} and $args{'-order'} =~ m/^b/ ) {
			$order = 'breadth';
		}
	}
	if ( my $root = $self->get_root ) {	
        if ( $order eq 'depth' ) {      
            $root->visit_depth_first(
                -pre => sub { push @nodes, shift }
            );
        }
        else {
            $root->visit_level_order( sub { push @nodes, shift } ); # XXX bioperl is wrong
        }
	}
	return @nodes;
}

sub set_root {
	my ( $self, $node ) = @_;
	my @nodes = ($node);
	if ( my $desc = $node->get_descendants ) {
		push @nodes, @{ $desc };
	}
	$self->clear;
	$self->insert(@nodes);
	return $node;
}

*set_root_node = \&set_root;

*as_string = \&to_newick;

sub get_root_node{ shift->get_root }

sub number_nodes { shift->calc_number_of_nodes }

sub total_branch_length { shift->calc_tree_length }

sub height {
	my $self = shift;
	my $nodect =  $self->calc_number_of_nodes;
	return 0 if( ! $nodect ); 
	return log($nodect) / log(2);
}

sub id {
	my $self = shift;
	if ( @_ ) {
		$self->set_name(shift);
	}
	return $self->get_name;
}

sub score {
	my $self = shift;
	if ( @_ ) {
		$self->set_score(shift);
	}
	return $self->get_score;
}

sub get_leaf_nodes {
	my $self = shift;
	my $tips = $self->get_terminals;
	if ( $tips ) {
		return @{ $tips };
	}
	return;
}

sub _parse_newick {
	my $self = shift;
	my $newick = join ('', @{ $_[0] } ) . ';';
	my $forest = Bio::Phylo::IO::parse( '-format' => 'newick', '-string' => $newick );
	my $tree = $forest->first;
	my @nodes = @{ $tree->get_entities };
	for my $node ( @nodes ) {
		$self->insert($node);
		$tree->delete($node);
	}
	$tree->DESTROY;
	$forest->DESTROY;
}

sub find_node {
   my $self = shift;
   if( ! @_ ) { 
       $logger->warn("Must request a either a string or field and string when searching");
   }
   my ( $field, $value );
   if ( @_ == 1 ) {
        ( $field, $value ) = ( 'id', shift );
   }
   elsif ( @_ == 2 ) {
        ( $field, $value ) = @_;
        $field =~ s/^-//;
   }
   my @nodes;
   $self->visit(
        sub {
            my $node = shift;
            push @nodes, $node if $node->$field and $node->$field eq $value;
        }
   );
   if ( wantarray) { 
       return @nodes;
   } 
   else { 
       if( @nodes > 1 ) { 
	        $logger->warn("More than 1 node found but caller requested scalar, only returning first node");
       }
       return shift @nodes;
   }   
}

sub verbose {
    my ( $self, $level ) = @_;
    $level = 0 if $level < 0;
    $self->VERBOSE( -level => $level );
}

sub reroot {
    my ( $self, $node ) = @_;
    my $id = $node->get_id;
    my $new_root = $node->set_root_below;
    if ( $new_root ) {
        my @children = grep { $_->get_id != $id } @{ $new_root->get_children };
        $node->set_child($_) for @children;
        return 1;    
    }
    else {
        return 0;
    }
}

sub remove_Node {
    my ( $self, $node ) = @_;
    if ( not ref $node ) {
        ($node) = grep { $_->get_name eq $node } @{ $self->get_entities };
    }
    if ( $node->is_terminal ) {
        $node->get_parent->prune_child( $node );
    }
    else {
        $node->collapse;
    }
    $self->delete($node);
}

sub splice {
    my ( $self, @args ) = @_;
    if ( ref($args[0]) ) {
        $_->collapse for @args;
    }
    else {
        my %args = @args;
        my ( @keep, @remove );
        for my $key ( keys %args ) {
            if ( $key =~ /^-keep_(.+)$/ ) {
                my $field = $1;
                my %val;
                if ( ref $args{$key} ) {
                    %val = map { $_ => 1 } @{ $args{$key} };
                }
                else {
                    %val = ( $args{$key} => 1 );
                }
                push @keep, grep { $val{ $_->$field } } @{ $self->get_entities };
            }
            elsif ( $key =~ /^-remove_(.+)$/ ) {
                my $field = $1;
                my %val;
                if ( ref $args{$key} ) {
                    %val = map { $_ => 1 } @{ $args{$key} };
                }
                else {
                    %val = ( $args{$key} => 1 );
                }
                push @remove, grep { $val{ $_->$field } } @{ $self->get_entities };           
            }
        }
        my @netto;
        REMOVE: for my $remove ( @remove ) {
            for my $keep ( @keep ) {
                next REMOVE if $remove->get_id == $keep->get_id;
            }
            push @netto, $remove;
        }
        my @names = map { $_->id } @netto;
        my @keep_names = map { $_->id } @keep;
        if ( @names ) {
            $self->prune_tips(\@names);
        }
        elsif ( @keep_names ) {
            $self->keep_tips( \@keep_names );
        }
    }
}

sub move_id_to_bootstrap {
    my $self = shift;
    $self->visit( 
        sub { 
            my $node = shift; 
            $node->bootstrap( $node->id ) if defined $node->id;
            $node->id("");
        } 
    );
}


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.