Group
Extension

zhangbo-NLP-plugin_eng/lib/zhangbo/NLP/sim2.pl

#!/usr/bin/perl
#词词距离计算
#输入为:
#老虎
#鲜花
#月亮
#狮子
 use POSIX;
#use JSON::XS;
 use Clone qw(clone);




use MongoDB;
use Smart::Comments;
use lib "/home/wyb/shell/";
#use Conn_mongo_jc;
use Data::Dumper;
use IO::File;
#use Add_info;


# http://poe.perl.org/?POE_Cookbook/TCP_Servers


# Include POE and POE::Component::Server::TCP.
 $hash=();
 $hash_mn=();
open(FD, "word.txt");
	my	$tmp=();
while(<FD>)
{
#DEF={Unit|单位:host={information|信息:belong={computer|电脑}}}
#DEF={tool|用具:modifier={able|能:scope={bring|携带:content={$}}},{listen|听:content={music|音乐},instrument={~}}}

        $line=$_;
        chomp($line);
        if($line=~/^W_C=(.*)/)
        {
                $tmp=$1;
                #print $tmp,"\n";
                $r=1;

        }
        elsif($line=~/^DEF=(.*)/)
        {
                $r=0;
                $tmp_m=$1;
		$org_m=0;
               # print $tmp_m,"\n";
               my  @arry=split('\|',$tmp_m);
                foreach $tmps (@arry)
                {
               #        print "-----------------$tmps ------------\n";
                        if($tmps=~/(.*?)[\:|\}]/)
                        {
                        my        $m=$1;
				$org_m++;
		if($m eq "专")
		{next;}
                #                print "$tmp - $m-----\n";
				if($org_m==1)
				{
                                $hash->{$tmp}->{$m}=0.5;
                                $hash_mn->{$tmp}+=1;
				}
				else
				{
					$hash->{$tmp}->{$m}=0.1;

				}
                        }
                }


        }
}
#- {event|事件}
#  ├ {static|静态} {event|事件}
#  │ ├ {relation|关系} {static|静态}
#  │ │ ├ {isa|是非关系} {relation|关系}
#  │ │ │ ├ {be|是} {isa|是非关系:isa={*},relevant={*}};{isa|是非关系:descriptive={*},relevant={*}}
#  │ │ │ │ ├ {become|成为} {be|是:isa={*},relevant={*}};{be|是:descriptive={*},relevant={*}}
#  │ │ │ │ └ {mean|指代} {be|是:isa={*},relevant={*}}
#  │ │ │ └ {BeNot|非} {isa|是非关系:isa={*},relevant={*}};{isa|是非关系:descriptive={*},relevant={*}}
#  │ │ ├ {possession|领属关系} {relation|关系}
$level_stat={};
$word_hash={};
$now_sub_level=0;
$olddeep=1;
open(FD_evt,"evt.txt");
while(<FD_evt>)
{
	$_=~/^(.*?)\{(.*)/;
	$tmpnar=$1;
	$exp=$2;
#	print $_;
	$deep=((length($tmpnar)-2)/4);
	$deep= ceil($deep);   # 4
	#print "$tmpnar=",length($tmpnar),"deep =$deep \n";
	if($deep<1)
	{$deep=1;}
			$level_stat->{$deep}++;
			
			
		if($olddeep> $deep)	
		{
			for( $deep .. $olddeep)
			{
				$ndep=$_+1;
				delete($level_stat->{$ndep});
			}
		}
		$olddeep=$deep;

	@arr_exp=split('\|',$exp);	
	 $word=();
	my $begin=0;
	foreach $tmp_exp (@arr_exp)
	{
			if($tmp_exp=~/(.*?)[\:|\}]/)
                        {
			
				$word=$1;	
#				print "find $word  $tmp_exp\n";
				$begin++;	
		if(exists($word_hash->{$word}))
		{
		#print "重复 $word\n";
       while(($k,$v)=each( %{$level_stat}))
       {
               #print "$word is $k  ,    $v\n";
       }
			}
			
		if($begin==1)
		{
			$word_hash->{$word}=clone($level_stat);
#	while(($k,$v)=each( %{$word_hash->{$word}}))
#	{
#		print "$word level $k  ,  n  $v\n";	
#	}
		}
		else
		{
			$word_exp_hash->{$word}=clone($level_stat);
#	while(($k,$v)=each( %{$word_exp_hash->{$word}}))
#	{
#		print "$word exp level $k  ,  $v\n";	
#	}
		}
	
		}	
	}
	
}
#print "server start\n";
#use POE qw(Component::Server::TCP);
#open(FD,">/tmp/sim_s.log");

#my $n=0;
#our $f=0;
#POE::Component::Server::TCP->new(
#  Alias       => "echo_server",
#  Port        => 11212,
#  ClientInput => sub {

#    my ($session, $heap, $input) = @_[SESSION, HEAP, ARG0];
#    print "Session ", $session->ID(), " got input: ".$input."\n";
    #print "Session ", $session->ID(), " got input: ".length($input)."\n";
#    $heap->{client}->put($input);
#$input="坦克,我的希望非常诱人电视里有主持人,那里有大熊猫";
#	$input="网络和网民的意见,现在正行驶在通州回北京的高速公路上,十评论员单仁平的文章,标题是“做大众政治焦点,茅于轼的选择”。这篇文章的核心意见是,茅于轼应该做中国社会团结的促进者,不应该";
#$input="致力于宣传 市场万能 剥削有理 汉奸人性 保钓无用 保粮错误替富人说话的茅于轼,今天下午两点在北京海淀翠宫饭店演讲顽强继续。未知海淀区委书记隋振江,宣传部长陈名杰是否到场。外媒问我是否到场,告曰:先参加央视《苦难辉煌》座谈会,或会晚到一会儿。今天要长见识了";
#	$input2="茅于轼刚刚吃完胡辣汤和烧饼夹猪头肉。现在正行驶在通州回北京的高速公路上,十五分钟以后要开始腾讯微访谈直播,我和方舟子拟就昨天的热点问题,回答网友的提问。敬请各位网友提示一下,昨天有什么热点问题需要谈一谈?";
$w1=$ARGV[0];
$w2=$ARGV[1];
$f=0;
$n=0;
$thash=();
my @input_ar;
		%my_hash=txt2arr($w1,$word_exp_hash);
		 %my_hash2=txt2arr($w2,$word_exp_hash);

		$n=comp_hash(\%my_hash,\%my_hash2);


print $n,"\n";
#	}
#	);
#$poe_kernel->run();

sub txt2arr
{
	$w=$_[0];
	$exp_h=$_[1];
$tmp_hash=();
$rand=rand();
@res=();
$part=0;
#print @res;
			if(exists($hash->{$w}))
			{
				my $t_n=0;

				while(($k,$v)=each($hash->{$w}))
				{

					$tmp_hash->{$k}=$v;
				}
			}



#			while(($k,$v)=each(%{$tmp_hash}))
	
 #                               {
					#print "meaning  $k  $v\n";
#				}
	return (%{$tmp_hash});
}				

#print "output = $res\n";
#		 $heap->{client}->stop();
#	$session->stop();


# Start the server.

sub  comp_hash
{
my	$h1=clone(@_[0]);
my	$h2=clone(@_[1]);
my	$akv=0;
	$k=();$v=();
	my $all;
	my %all;
	my $vall;
	my $vall2;
	my $vtotal;
my		 $my_deep=0;
my		 $my_deep1=0;
my		 $my_deep2=0;
my		 $my_deep3=0;
my		 $my_deep4=0;
	    while(($k,$v)=each(%{$h1}))
                                {
				$all->{$k}++;
				$vall++;
#                                       print " hash 1$k  $v\n";
                               }
	$k=();$v=();
	  while(($k,$v)=each(%{$h2}))
                                {
				$all->{$k}++;
		
#                                      print " hash 2$k  $v\n";
				$vall2++;
                               }
	$k=();$v=();
	 while(($k,$v)=each(%{$all}))
                                {
					if($v >= 2)
					{
							
						$akv=0.5;
						$vtotal+=2;;
						#$kv=($h1->{$k}+$h2->{$k})/2;
						#if($h1->{$k}<$h2->{$k})
						#{
								
						#}
					#	print "double $k $v $vtotal\n";
				#		delete($h1->{$k});
				#		delete($h2->{$k});
				#		if (exists($h1->{$k}))
				#		{
				#		print "exists $k $v $kv\n";
				#		}
				#		else
				#		{
				#			print "no exists\n";
				#		}
					}

					
                               }
				if($akv>=0.5)
			{
			$akv=$akv-(0.12*(($vall+$vall2-$vtotal)/($vall+$vall2)));
							
				#print "异议估算  ",0.12*(($vall+$vall2-$vtotal)/($vall+$vall2)),"\n";
				}
				#else
				#{
				#	 $akv=$akv-(0.1*(($vall+$vall2-$vtotal)/($vall+$vall2)));	
				#}
				
				#	if($akv> 0.6)	
				#	{ $akv=0.6}
		$max_di=0;
		 $k,$v=0;
		 $k2,$v2=0;
			$def=0;
		 $existk,$existexpk,$existk2,$existexpk2=0;
		 while(($k,$v)=each(%{$h1}))
		 {
			$existexpk=exists ($word_exp_hash->{$k});
			$existk=exists ($word_hash->{$k});
			if(!($existk||$existexpk ))
			{
		#	$def++;
#		print "loop next  k1-$existk-$existexpk\n";
			next;}
		
#		print "loop k1-$existk-$existexpk\n";
			  while(($k2,$v2)=each(%{$h2}))
			{
			$existk2=exists ($word_hash->{$k2});
			$existexpk2=exists ($word_exp_hash->{$k2});
			if(!($existk2||$existexpk2 ))
			{
		#	$def++;
#		print "loop next  k2-$existk-$existexpk\n";
			next;}
				if (($existk)&&($existk2))
				{
					$my_deep1=cdeep($word_hash->{$k},$word_hash->{$k2});

					 if($my_deep1 > $my_deep)
                                        {
                                                $my_deep=$my_deep1;
                                                #print "my_deep 1 k=$k k2=$k2 deep=$my_deep\n";
                                        }
				#	else
				#	{$my_deep+=$my_deep1*0.1;}

				#print "both 1-$k-$k2-$my_deep--------\n";
				}
	#		if (($existexpk)&&($existexpk2))
         #                       {
          #                              $my_deep2=cdeep($word_exp_hash->{$k},$word_exp_hash->{$k2});
	#				if($my_deep2 > $my_deep)
	#				{
	#					$my_deep=$my_deep2* 0.3;
#						print "my_deep 22222222222 $my_deep\n";
	#				}
			#		else
			#		{$my_deep+=$my_deep1*0.1;}
			#	print "both 2-$k-$k2---$my_deep------\n";
                        #        }
				if (($existk)&&($existexpk2))
                                {
                                        $my_deep3=cdeep($word_hash->{$k},$word_exp_hash->{$k2});
					$my_deep3 =$my_deep3*0.8;
					if($my_deep3 > $my_deep)
					{
						$my_deep=$my_deep3;
						#print "my_deep 333333333333333 k=$k k2=$k2 deep=$my_deep\n";
					}
				#	else
				#	{$my_deep+=$my_deep1*0.01;}
				#print "exp2 man 1-$k-$k2---$my_deep------\n";
                                }
				if (($existexpk)&&($existk2))
                                {
                                        $my_deep4=cdeep($word_exp_hash->{$k},$word_hash->{$k2});
					$my_deep4 =$my_deep4*0.8;
					 if($my_deep4 > $my_deep)

					{
						$my_deep=$my_deep4;
						#print "my_deep 444444444444444 $my_deep\n";
					}
				#	else
				#	{$my_deep+=$my_deep1*0.01;}
			#print "exp1 man 2-$k-$k2--$my_deep4-------\n";
                                }
				

					
			}
		}
						#print "--akv-$my_deep--1 $my_deep1-2 $my_deep2-3 $my_deep3-4 $my_deep4-akv-$akv-----\n";
					
#				        if($akv>=0.5)
#                        {
 #                       $akv=$akv-(0.12*(($vall+$vall2-$vtotal)/($vall+$vall2)));
#
 #                               print "异议估算  ",0.12*(($vall+$vall2-$vtotal)/($vall+$vall2)),"\n";
  #                              }

			print "$akv+$my_deep-$def\n";
			$akv=$akv+$my_deep-$def;
	$akv;

}

sub cdeep
{
my      $h1=clone(@_[0]);
my      $h2=clone(@_[1]);

my $totle=0;
my $min=0;
my $sum=0;
my $res=0;
my $a_h1=0;
my $a_h2=0;
my $ppk;
                        while(($ppk,$v)=each(%{$h1}))
                                {
			$a_h1++;
                                        print " cdeep input 1 $ppk  $v\n";
                                }
 			while(($ppk,$v)=each(%{$h2}))
                                {
			$a_h2++;
                                        print " cdeep input 2 $ppk  $v\n";
                                }
			if($a_h1< $a_h2)
			{
			$min=$a_h1;
			}
			else{
				
			$min=$a_h2;}
#			print "min = $min $a_h1  $a_h2\n";
			for (1 .. $min)
			{
				my $n=$_;
				if($h1->{$n} == $h2->{$n} )
				{
					$sum++;	
		#			print "same $sum\n";	
					
				}
				else
				{
					last;
				}
				
			}
#全包含
			if(($sum==$a_h1)||($sum==$a_h2))
			{
				if($a_h1==$a_h2)
				{
		#全相同
				$sum=3.5;
				}
				elsif($sum<4)
				{
					$sum=3.6;	
				}
				else
			#从属
					{$sum=4}
			}
	
					if($sum>=6)
					{$sum=4.6;}
	#浅层相关
					if($sum==1)	
					{
						$res=0;
					}
					elsif($sum==2)	
					{
						$res=0.01;
					}
					elsif($sum==3)
					{
						
						$res=0.1;
					}
					elsif($sum==4)
                                        {

                                                $res=0.18;
                                        }

					else
					{
							$res=1/(8-$sum);
					}
			
					
#					print "same level $sum $res\n";
			#$res=$res*$res*$res*3;
#		$res=0.9;
		return $res;

}
sub ndate
{
        ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time());
       $year=~s/1(.*)/20$1/;
       $mon+=1;
       if($mon=~/^\d$/)
       {
               $mon="0"."$mon";
       }
       if($mday=~/^\d$/)
       {
               $mday="0"."$mday";
       }
               if($hour=~/^\d$/)
       {
               $hour="0"."$hour";
       }

               if($min=~/^\d$/)
       {
               $min="0"."$min";
       }
               if($sec=~/^\d$/)
       {
               $sec="0"."$sec";
       }




my       $res=$year."-".$mon."-".$mday." ".$hour.":"."$min".":"."$sec";
#print $res,"\n";
        $res;

}

sub same_syna
{
	@_;
	foreach my $w (@_)
	{
		if(exists($hash->{$w}))
                        {
                                my $t_n=0;

                                while(($k,$v)=each($hash->{$w}))
                                {

                                        $tmp_hash->{$k}=$v;
                                }
                        }

	}
	

	return "$syna","$n"
}
sub same_father
{

}
sub one_of_all
{
}
sub lennovo
{
	@_;
	
}
sub anto_sy
{
                        if(exists($hash->{$w}))
                        {
                                my $t_n=0;

                                while(($k,$v)=each($hash->{$w}))
                                {

                                        $tmp_hash->{$k}=$v;
                                }
                        }

}


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.