Group
Extension

zhangbo-NLP-plugin_eng/lib/zhangbo/NLP/sim_word_babelnet.pl

#!/usr/bin/perl
#词词距离计算
#输入为:
#老虎
#鲜花
#月亮
#狮子
 use POSIX;
#use JSON::XS;
 use Clone qw(clone);




use MongoDB;
use Smart::Comments;
use lib "/home/wyb/shell/";
#use Conn_mongo_jc;
use Data::Dumper;
use IO::File;
#use Add_info;


# http://poe.perl.org/?POE_Cookbook/TCP_Servers


# Include POE and POE::Component::Server::TCP.
 our $hash=();
 $hash_mn=();
open(FD, "babel_all.txt");
	my	$tmp=();
while(<FD>)
{
#DEF={Unit|单位:host={information|信息:belong={computer|电脑}}}
#DEF={tool|用具:modifier={able|能:scope={bring|携带:content={$}}},{listen|听:content={music|音乐},instrument={~}}}

        $line=$_;
				if($line=~/(.*?)----(.*)/)
				{
					$s=$1;
					$m=$2;
					 my $name;
					@a=split(" ",$s);				
					@b=split(" ",$m);				
					$f=0;
					foreach	$i (@a)
					{
						if($f==0)
						{
							$f++;
							$name=$i;	
						}
						else
						{
							$hash->{$name}->{$i}=0.6;
						}
					}
					foreach $i (@b)
                                        {
							$hash->{$name}->{$i}=0.01;
                                        }
				}


}
$file=$ARGV[0];
$f=0;
$n=0;
$thash=();
my @input_ar;
open(FD_arr,"<$file");
#open(FD_hash,">$file.txt");
while(<FD_arr>)
{
	$input=$_;
		chomp($input);		
	$input_ar[$f]=$input;
	$my_hash2={};
	if($f==0)
	{
	#	if(exists($hash->{$input}))
	#	{
	#		while(($k,$v)=each($hash->{$input}))
	#		{
	#			$d->{$k}=$v;
#			}
#		}
		%my_hash=txt2arr($input,$word_exp_hash);
		
	$org=$input;
		
	}
	else
	{
		 %my_hash2=txt2arr($input,$word_exp_hash);

		$n=comp_hash(\%my_hash,\%my_hash2);
		$kk++;
		$thash->{$kk}=$n;
	}
	

$f++;
}
	
		close(FD_arr);
#		close(FD_hash);
        my @key =sort {$thash->{$b} <=> $thash->{$a}} keys %{$thash};
        #print @key;
        foreach $i (@key)
        {
                $o_n++;
                if($o_n >50){ last;}
                print $thash->{$i},"-",$input_ar[$i],"\n";
        }


#	}
#	);
#$poe_kernel->run();

sub txt2arr
{
	$w=$_[0];
	$exp_h=$_[1];
$tmp_hash=();
$rand=rand();
@res=();
$part=0;
#print @res;
			if(exists($hash->{$w}))
			{
				my $t_n=0;

				while(($k,$v)=each($hash->{$w}))
				{
							$tmp_hash->{$k}=$v;


				}
			}



			while(($k,$v)=each(%{$tmp_hash}))
	
                                {
					print "meaning  $k  $v\n";
				}
	return (%{$tmp_hash});
}				

#print "output = $res\n";
#		 $heap->{client}->stop();
#	$session->stop();


# Start the server.

sub  comp_hash
{
my	$h1=clone(@_[0]);
my	$h2=clone(@_[1]);
my	$akv=0;
	$k=();$v=();
	my $all;
	my %all;
	my $vall;
	my $vall2;
	my $vtotal;
	my $vallb;
	my $vall2b;
my		 $my_deep=0;
my		 $my_deep1=0;
my		 $my_deep2=0;
my		 $my_deep3=0;
my		 $my_deep4=0;
	    while(($k,$v)=each(%{$h1}))
                                {
				$all->{$k}++;
				$vall++;
					if($v==0.01)
					{$vallb++;}
                                       print " hash 1$k  $v\n";
                               }
	$k=();$v=();
	  while(($k,$v)=each(%{$h2}))
                                {
				$all->{$k}++;
		
                                      print " hash 2$k  $v\n";
				$vall2++;
					if($v==0.01)
					{$vall2b++;}
                               }
	$k=();$v=();
	 while(($k,$v)=each(%{$all}))
                                {
						 if($v >= 2)
                                	        {
                                	                #$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
              			                        #$kv2=((($h1->{$k})/$vall) + ($h2->{$k}/$vall2))/2;
                	                                #$kv3=($kv+$kv2)/2;
               		                                 #print  FD_log "double $k $v kv $kv kv2 $kv2\n";
        	                                        #$akv+=$kv3;
							if($hash->{$k}==0.6)
							{
							$akv=0.61;}
							else
							{
								if($akv>=0.6)
								{
								$akv=$akv+(0.4/($vallb+$vall2b));
								}
								else
								{
									 $akv=$akv+(1/($vallb+$vall2b));
								}
							}
	                                        }



					
                               }
						#print "--akv-$my_deep--1 $my_deep1-2 $my_deep2-3 $my_deep3-4 $my_deep4-akv-$akv-----\n";
					
#
 #                               print "异议估算  ",0.12*(($vall+$vall2-$vtotal)/($vall+$vall2)),"\n";
  #                              }

		if($akv>1){$akv=1;}
	$akv;

}

sub cdeep
{
my      $h1=clone(@_[0]);
my      $h2=clone(@_[1]);

my $totle=0;
my $min=0;
my $sum=0;
my $res=0;
my $a_h1=0;
my $a_h2=0;
my $ppk;
                        while(($ppk,$v)=each(%{$h1}))
                                {
			$a_h1++;
         #                               print " cdeep input 1 $ppk  $v\n";
                                }
 			while(($ppk,$v)=each(%{$h2}))
                                {
			$a_h2++;
          #                              print " cdeep input 2 $ppk  $v\n";
                                }
			if($a_h1< $a_h2)
			{
			$min=$a_h1;
			}
			else{
				
			$min=$a_h2;}
#			print "min = $min $a_h1  $a_h2\n";
			for (1 .. $min)
			{
				my $n=$_;
				if($h1->{$n} == $h2->{$n} )
				{
					$sum++;	
		#			print "same $sum\n";	
					
				}
				else
				{
					last;
				}
				
			}
#全包含
			if(($sum==$a_h1)||($sum==$a_h2))
			{
				if($a_h1==$a_h2)
				{
		#全相同
				$sum=3.5;
				}
				elsif($sum<4)
				{
					$sum=3.6;	
				}
				else
			#从属
					{$sum=4}
			}
	
					if($sum>=6)
					{$sum=4.6;}
	#浅层相关
					if($sum==1)	
					{
						$res=0;
					}
					elsif($sum==2)	
					{
						$res=0.01;
					}
					elsif($sum==3)
					{
						
						$res=0.1;
					}
					elsif($sum==4)
                                        {

                                                $res=0.18;
                                        }

					else
					{
							$res=1/(8-$sum);
					}
			
					
#					print "same level $sum $res\n";
			#$res=$res*$res*$res*3;
#		$res=0.9;
		return $res;

}
sub ndate
{
        ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time());
       $year=~s/1(.*)/20$1/;
       $mon+=1;
       if($mon=~/^\d$/)
       {
               $mon="0"."$mon";
       }
       if($mday=~/^\d$/)
       {
               $mday="0"."$mday";
       }
               if($hour=~/^\d$/)
       {
               $hour="0"."$hour";
       }

               if($min=~/^\d$/)
       {
               $min="0"."$min";
       }
               if($sec=~/^\d$/)
       {
               $sec="0"."$sec";
       }




my       $res=$year."-".$mon."-".$mday." ".$hour.":"."$min".":"."$sec";
#print $res,"\n";
        $res;

}

sub same_syna
{
	@_;
	foreach my $w (@_)
	{
		if(exists($hash->{$w}))
                        {
                                my $t_n=0;

                                while(($k,$v)=each($hash->{$w}))
                                {

                                        $tmp_hash->{$k}=$v;
                                }
                        }

	}
	

	return "$syna","$n"
}
sub same_father
{

}
sub one_of_all
{
}
sub lennovo
{
	@_;
	
}
sub anto_sy
{
                        if(exists($hash->{$w}))
                        {
                                my $t_n=0;

                                while(($k,$v)=each($hash->{$w}))
                                {

                                        $tmp_hash->{$k}=$v;
                                }
                        }

}


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.