zhangbo-NLP-plugin_eng/lib/zhangbo/NLP/sim_word_babelnet.pl
#!/usr/bin/perl
#词词距离计算
#输入为:
#老虎
#鲜花
#月亮
#狮子
use POSIX;
#use JSON::XS;
use Clone qw(clone);
use MongoDB;
use Smart::Comments;
use lib "/home/wyb/shell/";
#use Conn_mongo_jc;
use Data::Dumper;
use IO::File;
#use Add_info;
# http://poe.perl.org/?POE_Cookbook/TCP_Servers
# Include POE and POE::Component::Server::TCP.
our $hash=();
$hash_mn=();
open(FD, "babel_all.txt");
my $tmp=();
while(<FD>)
{
#DEF={Unit|单位:host={information|信息:belong={computer|电脑}}}
#DEF={tool|用具:modifier={able|能:scope={bring|携带:content={$}}},{listen|听:content={music|音乐},instrument={~}}}
$line=$_;
if($line=~/(.*?)----(.*)/)
{
$s=$1;
$m=$2;
my $name;
@a=split(" ",$s);
@b=split(" ",$m);
$f=0;
foreach $i (@a)
{
if($f==0)
{
$f++;
$name=$i;
}
else
{
$hash->{$name}->{$i}=0.6;
}
}
foreach $i (@b)
{
$hash->{$name}->{$i}=0.01;
}
}
}
$file=$ARGV[0];
$f=0;
$n=0;
$thash=();
my @input_ar;
open(FD_arr,"<$file");
#open(FD_hash,">$file.txt");
while(<FD_arr>)
{
$input=$_;
chomp($input);
$input_ar[$f]=$input;
$my_hash2={};
if($f==0)
{
# if(exists($hash->{$input}))
# {
# while(($k,$v)=each($hash->{$input}))
# {
# $d->{$k}=$v;
# }
# }
%my_hash=txt2arr($input,$word_exp_hash);
$org=$input;
}
else
{
%my_hash2=txt2arr($input,$word_exp_hash);
$n=comp_hash(\%my_hash,\%my_hash2);
$kk++;
$thash->{$kk}=$n;
}
$f++;
}
close(FD_arr);
# close(FD_hash);
my @key =sort {$thash->{$b} <=> $thash->{$a}} keys %{$thash};
#print @key;
foreach $i (@key)
{
$o_n++;
if($o_n >50){ last;}
print $thash->{$i},"-",$input_ar[$i],"\n";
}
# }
# );
#$poe_kernel->run();
sub txt2arr
{
$w=$_[0];
$exp_h=$_[1];
$tmp_hash=();
$rand=rand();
@res=();
$part=0;
#print @res;
if(exists($hash->{$w}))
{
my $t_n=0;
while(($k,$v)=each($hash->{$w}))
{
$tmp_hash->{$k}=$v;
}
}
while(($k,$v)=each(%{$tmp_hash}))
{
print "meaning $k $v\n";
}
return (%{$tmp_hash});
}
#print "output = $res\n";
# $heap->{client}->stop();
# $session->stop();
# Start the server.
sub comp_hash
{
my $h1=clone(@_[0]);
my $h2=clone(@_[1]);
my $akv=0;
$k=();$v=();
my $all;
my %all;
my $vall;
my $vall2;
my $vtotal;
my $vallb;
my $vall2b;
my $my_deep=0;
my $my_deep1=0;
my $my_deep2=0;
my $my_deep3=0;
my $my_deep4=0;
while(($k,$v)=each(%{$h1}))
{
$all->{$k}++;
$vall++;
if($v==0.01)
{$vallb++;}
print " hash 1$k $v\n";
}
$k=();$v=();
while(($k,$v)=each(%{$h2}))
{
$all->{$k}++;
print " hash 2$k $v\n";
$vall2++;
if($v==0.01)
{$vall2b++;}
}
$k=();$v=();
while(($k,$v)=each(%{$all}))
{
if($v >= 2)
{
#$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
#$kv2=((($h1->{$k})/$vall) + ($h2->{$k}/$vall2))/2;
#$kv3=($kv+$kv2)/2;
#print FD_log "double $k $v kv $kv kv2 $kv2\n";
#$akv+=$kv3;
if($hash->{$k}==0.6)
{
$akv=0.61;}
else
{
if($akv>=0.6)
{
$akv=$akv+(0.4/($vallb+$vall2b));
}
else
{
$akv=$akv+(1/($vallb+$vall2b));
}
}
}
}
#print "--akv-$my_deep--1 $my_deep1-2 $my_deep2-3 $my_deep3-4 $my_deep4-akv-$akv-----\n";
#
# print "异议估算 ",0.12*(($vall+$vall2-$vtotal)/($vall+$vall2)),"\n";
# }
if($akv>1){$akv=1;}
$akv;
}
sub cdeep
{
my $h1=clone(@_[0]);
my $h2=clone(@_[1]);
my $totle=0;
my $min=0;
my $sum=0;
my $res=0;
my $a_h1=0;
my $a_h2=0;
my $ppk;
while(($ppk,$v)=each(%{$h1}))
{
$a_h1++;
# print " cdeep input 1 $ppk $v\n";
}
while(($ppk,$v)=each(%{$h2}))
{
$a_h2++;
# print " cdeep input 2 $ppk $v\n";
}
if($a_h1< $a_h2)
{
$min=$a_h1;
}
else{
$min=$a_h2;}
# print "min = $min $a_h1 $a_h2\n";
for (1 .. $min)
{
my $n=$_;
if($h1->{$n} == $h2->{$n} )
{
$sum++;
# print "same $sum\n";
}
else
{
last;
}
}
#全包含
if(($sum==$a_h1)||($sum==$a_h2))
{
if($a_h1==$a_h2)
{
#全相同
$sum=3.5;
}
elsif($sum<4)
{
$sum=3.6;
}
else
#从属
{$sum=4}
}
if($sum>=6)
{$sum=4.6;}
#浅层相关
if($sum==1)
{
$res=0;
}
elsif($sum==2)
{
$res=0.01;
}
elsif($sum==3)
{
$res=0.1;
}
elsif($sum==4)
{
$res=0.18;
}
else
{
$res=1/(8-$sum);
}
# print "same level $sum $res\n";
#$res=$res*$res*$res*3;
# $res=0.9;
return $res;
}
sub ndate
{
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time());
$year=~s/1(.*)/20$1/;
$mon+=1;
if($mon=~/^\d$/)
{
$mon="0"."$mon";
}
if($mday=~/^\d$/)
{
$mday="0"."$mday";
}
if($hour=~/^\d$/)
{
$hour="0"."$hour";
}
if($min=~/^\d$/)
{
$min="0"."$min";
}
if($sec=~/^\d$/)
{
$sec="0"."$sec";
}
my $res=$year."-".$mon."-".$mday." ".$hour.":"."$min".":"."$sec";
#print $res,"\n";
$res;
}
sub same_syna
{
@_;
foreach my $w (@_)
{
if(exists($hash->{$w}))
{
my $t_n=0;
while(($k,$v)=each($hash->{$w}))
{
$tmp_hash->{$k}=$v;
}
}
}
return "$syna","$n"
}
sub same_father
{
}
sub one_of_all
{
}
sub lennovo
{
@_;
}
sub anto_sy
{
if(exists($hash->{$w}))
{
my $t_n=0;
while(($k,$v)=each($hash->{$w}))
{
$tmp_hash->{$k}=$v;
}
}
}