Group
Extension

zhangbo-NLP-plugin_eng/lib/zhangbo/NLP/sim_s.pl

#!/usr/bin/perl
#use JSON::XS;
 use Clone qw(clone);



use MongoDB;
use Smart::Comments;
use lib "/home/wyb/shell/";
use Conn_mongo_jc;
use Data::Dumper;
use IO::File;
#use Add_info;


# http://poe.perl.org/?POE_Cookbook/TCP_Servers


# Include POE and POE::Component::Server::TCP.
 $hash=();




open(FD, "word.txt");
	my	$tmp=();
while(<FD>)
{
#DEF={Unit|单位:host={information|信息:belong={computer|电脑}}}
#DEF={tool|用具:modifier={able|能:scope={bring|携带:content={$}}},{listen|听:content={music|音乐},instrument={~}}}

        $line=$_;
        chomp($line);
        if($line=~/^W_C=(.*)/)
        {
                $tmp=$1;
                #print $tmp,"\n";
                $r=1;

        }
        elsif($line=~/^DEF=(.*)/)
        {
                $r=0;
                $tmp_m=$1;
               # print $tmp_m,"\n";
               my  @arry=split('\|',$tmp_m);
                foreach $tmps (@arry)
                {
               #        print "-----------------$tmps ------------\n";
                        if($tmps=~/(.*?)[\:|\}]/)
                        {
                        my        $m=$1;
                #                print "$tmp - $m-----\n";
                                $hash->{$tmp}->{$m}=1;
                        }
                }


        }
}
#print "server start\n";
#use POE qw(Component::Server::TCP);
#open(FD,">/tmp/sim_s.log");

#my $n=0;
#our $f=0;
#POE::Component::Server::TCP->new(
#  Alias       => "echo_server",
#  Port        => 11212,
#  ClientInput => sub {

#    my ($session, $heap, $input) = @_[SESSION, HEAP, ARG0];
#    print "Session ", $session->ID(), " got input: ".$input."\n";
    #print "Session ", $session->ID(), " got input: ".length($input)."\n";
#    $heap->{client}->put($input);
#$input="坦克,我的希望非常诱人电视里有主持人,那里有大熊猫";
#	$input="网络和网民的意见,现在正行驶在通州回北京的高速公路上,十评论员单仁平的文章,标题是“做大众政治焦点,茅于轼的选择”。这篇文章的核心意见是,茅于轼应该做中国社会团结的促进者,不应该";
#$input="致力于宣传 市场万能 剥削有理 汉奸人性 保钓无用 保粮错误替富人说话的茅于轼,今天下午两点在北京海淀翠宫饭店演讲顽强继续。未知海淀区委书记隋振江,宣传部长陈名杰是否到场。外媒问我是否到场,告曰:先参加央视《苦难辉煌》座谈会,或会晚到一会儿。今天要长见识了";
#	$input2="茅于轼刚刚吃完胡辣汤和烧饼夹猪头肉。现在正行驶在通州回北京的高速公路上,十五分钟以后要开始腾讯微访谈直播,我和方舟子拟就昨天的热点问题,回答网友的提问。敬请各位网友提示一下,昨天有什么热点问题需要谈一谈?";
$file=$ARGV[0];
$f=0;
$n=0;
$thash=();
my @input_ar;
open(FD_arr,"$file");
#open(FD_hash,">$file.txt");
while(<FD_arr>)
{
	$input=$_;
	$input_ar[$f]=$input;
	if($f==0)
	{
	%my_hash=txt2arr($input);
	while (($k,$v)=each(%my_hash))
	{
		print FD_hash "$k,$v\t";
	}
		print FD_hash "\n";
	
	
	$d= clone(\%my_hash);
	#print Data::Dumper->Dump([%{$d}]);
#	%my_hash2=txt2arr($input2);
#	$n=comp_hash(\%my_hash,\%my_hash2);
	$org=$input;
	}
	elsif($f==1)
	{
		%my_hash2=txt2arr($input);

	while (($k,$v)=each(%my_hash2))
	{
		print FD_hash "$k,$v\t";
	}
		print FD_hash "\n";

		$n=comp_hash($d,\%my_hash2);
	$org2=$input;
	$org_n=$n;
	$kk++;
	$thash->{$kk}=$n;

	}
	else
	{
		 %my_hash2=txt2arr($input);
	        while (($k,$v)=each(%my_hash2))
        {
                print FD_hash "$k,$v\t";
        }
                print FD_hash "\n";

		$n=comp_hash($d,\%my_hash2);
		$kk++;
		$thash->{$kk}=$n;
	}
	

$f++;
}
	
		close(FD_hash);
        my @key =sort {$thash->{$b} <=> $thash->{$a}} keys %{$thash};
        #print @key;
        foreach $i (@key)
        {
                $o_n++;
                if($o_n >60){ last;}
                print $thash->{$i},"-<n>-",$input_ar[$i];
        }

if($ARGV[1]=~/all/)
{
	@file_r=`cat $file.txt`; 
	$f_n=scalar(@file_r);
	my $rn=0;
	foreach $line (@file_r)
	{
		
		@arry1=split("\t",$line);
		my $tmphash=();
		foreach $c (@arry1)
		{
			if( $c=~/(.*?)\,(.*)/)
			{
				my $w=$1;
				my $n=$2;
				$tmphash->{$w}=$n;
			}
		}
		$hashleft[$rn]=clone($tmphash);
		$rn++;
	}
        #          while(($k,$v)=each(%{$hashleft[$rn-1]}))
        #                        {
        #                               print " mmmmmm $k  $v\n";
        #                       }
		for( 0 .. ($f_n-1))
		{
			my $nn=$_;
			for (($nn+1) .. ($f_n-1))
			{
				$nn2=$_;
				$n=comp_hash($hashleft[$nn],$hashleft[$nn2]);
				$n=$n * 100;
				if ($n>25)
				{
				print "$n $nn $nn2 \n";
				}
			}
	
		}
		
}

#	}
#	);
#$poe_kernel->run();

sub txt2arr
{
	$input_t=$_[0];
$tmp_hash=();
#print "input =$input_t\n";
$rand=rand();
open(FD_r,">/tmp/$rand");
print FD_r  "$input_t\n"; 
close(FD_r);
$dir="/home/wyb/stanford-parser-2012-07-09";
@res=();
@res=`$dir/parser-pca3.pl /tmp/$rand 1`;
$part=0;
unlink("/tmp/$rand");
print @res;
foreach $tmp_res (@res)
{
		chomp($tmp_res);
		if ($tmp_res=~/==/)
		{
			$part++;
		}
		if (($part==1)&&($tmp_res=~/(.*?),(.*)/))
		{
			#print "切词1=$1,2=$2\n";
			my $w=$1;
			my $value=$2;
			if(exists($hash->{$w}))
			{

				while(($k,$v)=each($hash->{$w}))
				{

                                                #print  "正常词 $w  转换",$tmp_hash->{$k},"   $k\n";
					$tmp_hash->{$k}=$value;
		# $heap->{client}->put("$k $v\n");
				}
			}
			else
			{

				 $tmp_hash->{$w}=$value;
					#	print "no exists wordnet 切词 k=$w,v=$value\n";
			}
		}
		elsif(($part==2)&&($tmp_res=~/(.*?),(.*)/))
		{
                        my $w=$1;
                        my $value=$2;
#			          print "3连词  $w,$value\n";
			@tmp_ar=split("-",$w);
			foreach $ar_i(@tmp_ar)
			{
#			print "3连词 之一$ar_i,======\n";
                        if(exists($hash->{$ar_i}))
                        {

                                while(($k,$v)=each($hash->{$ar_i}))
                                {

 #                                               print  "热词 $ar_i 转换",$tmp_hash->{$k},"   $k\n";

                                        $tmp_hash->{$k}+=($value/10);
                # $heap->{client}->put("$k $v\n");
                                }
                        }
			else
                        {
                                 $tmp_hash->{$ar_i}+=($value/10);
#						print "no exists wordnet 附加 k=$w,v=$value\n";
                        }
			}


		}
}

	#		while(($k,$v)=each($tmp_hash))
	#
         #                       {
#					print "权重结果 $k  $v\n";
	#			}
	return (%{$tmp_hash});
}				

#print "output = $res\n";
#		 $heap->{client}->stop();
#	$session->stop();


# Start the server.

sub  comp_hash
{
	$h1=@_[0];
	$h2=@_[1];
	$akv=0;
	$k=();$v=();
	my $all;
	my $vall;
	my $vall2;
	    while(($k,$v)=each(%{$h1}))
                                {
				$all->{$k}++;
				$vall=$v;
#                                       print " hash 1$k  $v\n";
                               }
	$k=();$v=();
	  while(($k,$v)=each(%{$h2}))
                                {
				$all->{$k}++;
		
#                                      print " hash 2$k  $v\n";
				$vall2=$v;
                               }
	$k=();$v=();
	 while(($k,$v)=each(%{$all}))
                                {
					if($v >= 2)
					{
						$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
						#if($h1->{$k}<$h2->{$k})
						#{
								
						#}
#						print "double $k $v $kv\n";
						$akv+=$kv;
					}
                               }


	$akv;

}

sub ndate
{
        ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time());
       $year=~s/1(.*)/20$1/;
       $mon+=1;
       if($mon=~/^\d$/)
       {
               $mon="0"."$mon";
       }
       if($mday=~/^\d$/)
       {
               $mday="0"."$mday";
       }
               if($hour=~/^\d$/)
       {
               $hour="0"."$hour";
       }

               if($min=~/^\d$/)
       {
               $min="0"."$min";
       }
               if($sec=~/^\d$/)
       {
               $sec="0"."$sec";
       }




my       $res=$year."-".$mon."-".$mday." ".$hour.":"."$min".":"."$sec";
#print $res,"\n";
        $res;

}



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.