Group
Extension

zhangbo-NLP-plugin_eng/lib/zhangbo/NLP/engine_dis.pl

#!/usr/bin/perl
use Encode;
#use JSON::XS;
 use Clone qw(clone);



use MongoDB;
use Smart::Comments;
use lib "/home/wuyabo/shell/";
use Conn_mongo_jc;
use Data::Dumper;
use IO::File;
#use Add_info;


# http://poe.perl.org/?POE_Cookbook/TCP_Servers


# Include POE and POE::Component::Server::TCP.
 our $hash=();
 $hash_mn=();
my $hash_sc=();

use POE qw(Component::Server::TCP);
my $w_c_v;
all_word_id2();
word_class_value();
exit;
word_class_value2();
sub all_word_id
{
my $col = Conn_mongo_jc->new("dic", "sogoudic_c");
 my $news=$col->find();
my $id,$word;
my $nnn=0;
my $nn2=0;
open(FDdic,">allword.txt");
        while(my $one =$news->next)
        {
                        $id=$one->{'id'};
                        $word=$one->{'word'};
			$word=encode("utf-8",$word);
                        $hash->{$id}=$word;
			
			print FDdic  $hash->{$id},"\t",$id,"\n";
	#	$nnn++;
	#	if($nnn ==10000)
	#	{$nn2++;print "$nn2\n";$nnn=0;}
        }
close(FDdic);
	
print "loading dic ok $word $id\n";
}
sub word_class_value2
{
my $nnn=0;
my $nn2=0;
open(FDdic,"log");
        while(<FDdic>)
        {
		my $line=$_;
                if($line=~/(.*?)\t(.*?)\t(.*)/)
		{
                $w_c_v->{$1}->{$2}=$3;
			#print " $1  $2 $3\n";
		}

        }
close(FDdic);

}

sub all_word_id2
{
	open(FD,"allword.txt");
	while(<FD>)
	{
		$_=~/(.*?)\t(.*)/;
		$hash->{$2}=$1;
	}
	close FD;
}
sub word_class_value
{
my $col = Conn_mongo_jc->new("classifier", "classifiers");
		my $some_dir="./class";
opendir(my $dh, $some_dir) || die;
                   while(readdir $dh) {
                       my $dir=$_;
			if(length($dir)<3)
			{next;}
				my $id2 = MongoDB::OID->new(value=>$dir);
			my $site=$col->find_one({"_id"=> $id2});
			my $name=$site->{'name'};
			$name=encode('utf-8',$name);
		print $name,"name $dir \n";
		
			while(($k,$v)=each(%{$site->{'classes'}}))
			{
			#	while(($k1,$v1)=each($k))
			#	{
						my $cn=encode('utf-8',$v->{'name'});
						my $cnid=$v->{'id'};
				print "$k ----  ",$cn, $cnid,"\n";
			#	}
			$hash_c_i->{$name}->{$cn}->{$cnid}=0;
			}
	
			my $has1=0;
			my $has2=0;
			my $start=0;

		my $ok=0;	
			if((-e "./class/$dir/__MAXENT_PARAMS.model")&&(-e "./class/$dir/__MAXENT_MAPPING.model"))
			{
		my $ok=1;	
				
				
				#open(FD,"./class/$dir/__MAXENT_PARAMS.model");
				my @clm=`cat ./class/$dir/__MAXENT_MAPPING.model`;
				my $ct1,$ct2;
				my $w_v;
				foreach my $tpl(@clm)
				{
					chomp($tpl);
					my $ltpl=length($tpl);
		#			print "tpl $tpl ",$ltpl,"\n";
					if($tpl=~/^(\d+) (\d)$/)
					{
							my $y_id=$1;	
							my $tid=$2;	
						if(exists($hash_c_i->{$name}->{"是"}->{$y_id}))
						{
							print "exists $name $y_id $tid $dir 是\n";
							$has1=1;
						}
						elsif(exists($hash_c_i->{$name}->{"否"}->{$y_id}))
							{
								$has2=1;
						print "exists $name $y_id $tid $dir 否\n";}
						elsif(exists($hash_c_i->{$name}->{"负面"})){$has1=1;$has2=1;}
						else	{print "error $cnid $2 $name $y_id $dir\n";$ok=0;;}
					}
					#else{print length($tpl),"length\n";}

					elsif(  $ltpl==1 )
					{
						print "find \n";
						if(($has1==1)&&($has2==1))
						{$start=1;}
						else{last;}
					}
					elsif($start==1)
					{
							my $wid,$twid;
						if($tpl=~/^(\d+)\:(\d+) (\d+)$/)	
						{
							 $wid=$1;
							 $twid=$3;
							$w_v->{$twid}=$wid;	
							#print "$wid $twid\n";
						}
						else{print "error map data $tpl\n";exit;}
					}
					else
					{last;}
						
				}
			if($ok==0){next;}	
				my @clm=`cat ./class/$dir/__MAXENT_PARAMS.model`;
                                my $ct1,$ct2;
                                foreach my $tpl(@clm)
                                {
						if($tpl=~/^(\d+?) (\d) (.*)$/)
						{
							my $v1=$1;
							my $v2=$2;
							my $v3=$3;
							if(exists($w_v->{$v1}))
							{
							$tmpid=$w_v->{$v1};
								my $add=$v2;
								my $ccv=$v3;
							if($add==2)
							{$ccv=-$ccv;}
								if(exists($hash->{$tmpid}))
								{
							my $orgw=$hash->{$tmpid};	
							print "$orgw	$name	$ccv\n";
							$w_c_v->{$orgw}->{$name}=$ccv;;
								}
								else
								{print "error no id $tmpid\n";exit;}
							}
							else
							{print "error no match mapping $v1 $v2 $v3\n";
									print Data::Dumper->Dump([%{$w_v}]);
							exit;}
						}
				}
					
				
			}
			else{print "error no module $dir $name\n";}
				
#l __MAXENT_PARAMS.model__MAXENT_MAPPING.model
		
                   }
                   closedir $dh;
}



my $n=0;
#our $f=0;
print "server start\n";
POE::Component::Server::TCP->new(
  Alias       => "echo_server",
  Port        => 11212,
  ClientInput => sub {

    my ($session, $heap, $input) = @_[SESSION, HEAP, ARG0];
    #print "Session ", $session->ID(), " got input: ".$input."\n";
    #print "Session ", $session->ID(), " got input: ".length($input)."\n";
my $d;
my $randout;
$randout=rand();
open(FD_log,">/tmp/$randout.log");
#print FD_log "$input\n";
#system("cp $input /tmp/");
#print "input file $input    ",length($input),"\n";
my	@res=search_article($input);	
#print FD_log @res;
#close(FD_log);
#print scalar(@res)," num\n";
    $heap->{client}->put("/tmp/$randout.log\n");
	  #$session->yield("shutdown");
	  $_[KERNEL]->yield("shutdown");
             return;

},
  ClientDisconnected => sub {
           #print "Client disconnected\n"; # log it
         }
#, ClientFlushed => sub {
#           my $data_source = $_[HEAP]{file_handle};
#           my $read_count = sysread($data_source, my $buffer = "", 65536);
#           if ($read_count) {
#             $_[HEAP]{client}->put($buffer);
#           }
#           else {
#		print FD_log "------------------error\n";
#             $_[KERNEL]->yield("shutdown");
#           }
         #}

);
POE::Kernel->run;

#$input="坦克,我的希望非常诱人电视里有主持人,那里有大熊猫";
#	$input="网络和网民的意见,现在正行驶在通州回北京的高速公路上,十评论员单仁平的文章,标题是“做大众政治焦点,茅于轼的选择”。这篇文章的核心意见是,茅于轼应该做中国社会团结的促进者,不应该";
#$input="致力于宣传 市场万能 剥削有理 汉奸人性 保钓无用 保粮错误替富人说话的茅于轼,今天下午两点在北京海淀翠宫饭店演讲顽强继续。未知海淀区委书记隋振江,宣传部长陈名杰是否到场。外媒问我是否到场,告曰:先参加央视《苦难辉煌》座谈会,或会晚到一会儿。今天要长见识了";
#	$input2="茅于轼刚刚吃完胡辣汤和烧饼夹猪头肉。现在正行驶在通州回北京的高速公路上,十五分钟以后要开始腾讯微访谈直播,我和方舟子拟就昨天的热点问题,回答网友的提问。敬请各位网友提示一下,昨天有什么热点问题需要谈一谈?";
#$file=$ARGV[0];
sub search_article
{
$file=$_[0];
chomp($file);
#print "file=$file\n";
$expword=$ARGV[1];
$exp_hash=();
@exp_w=split(",",$expword);
foreach $exp (@exp_w)
{
$exp_hash->{$exp}=1;
}
my $f=0;
my $n=0;
my $kk;
my $thash=();
#my @input_ar=`cat $file`;
my %my_hash;
my %my_hash2;
my %thash;
my $j;
#my $rand2=rand();
#open(FD_hash,">$file.txt");
#print "openfile $file\n";
open(FD_arr,"<$file");
my $first=0;
my $rand=rand();
open(FD_aro,">/tmp/$rand.sctxt");
my $j=1;
while(<FD_arr>)
{
#print FD_log $j,"----------\n";
#print $j,"----------\n";

	my $in=$_;
	chomp($in);
		if(length($in)>500)
		{	$in=substr $in,0,500;}
		$in=~s/\@.*?\:/ /g;
		$in=~s/\@.*?\s/ /g;
#		$in=~s/机场|航班|深圳|弧形筛/ /g;
		 print FD_aro "$j ",$in,"\n";
	$j++;
			
#print FD_log $j,"-p----",length($in),"\n";
#print $j,"-p----",length($in),"\n";
}

close FD_arr;
close FD_aro;
my $tot;
system("/home/wuyabo/xs/bin/scws -I  -E -N -d /home/wuyabo/xs/etc/dict.utf8.xdb -r  /home/wuyabo/xs/etc/rules.utf8.ini  -c utf-8   /tmp/$rand.sctxt -o /tmp/$rand.sctxt2");
#$cc=`cat /tmp/$rand.sctxt`;
#print $cc,"$rand------";
#exit;
unlink("/tmp/$rand.sctxt");

open(FD_scws,"/tmp/$rand.sctxt2");
while(<FD_scws>)
#foreach my $in (@output)
{
	my $in=$_;
#print FD_log "line $in \n";
#print "line $kk $in \n";
	chomp($in);
	#$input_ar[$f]=$in;
		my $n_line;
my $tot;
	my @arry=split('\ ',$in);
#	        while (($k,$v)=each(%my_hash2))
#        {
#                print FD_hash "$k,$v\t";
#        }
#                print FD_hash "\n";
		foreach my $tp (@arry)
		{
			if(exists($w_c_v->{$tp}))
			{
    	 	 	         while (($k,$v)=each(%{$w_c_v->{$tp}}))
    				    {
    				         #   print "class $k $v\n";
						$tot->{$k}+=$v;
    				    }
			}

		}

#print FD_log "totle$n_line=$n\n";
#		$thash->{$n_line}=$n;
		print FD_log $arry[0],"-";
        my @key =sort {$tot->{$b} <=> $tot->{$a}} keys %{$tot};
	my $o_n;
        foreach $i (@key)
        {
               print FD_log $tot->{$i}.":$i\t";

        }

		
	print FD_log "\n";
	

}
unlink("/tmp/$rand.sctxt2");
	
}


#	}
#	);
#$poe_kernel->run();

sub txt2arr
{
my	$input_t=$_[0];
my	$exp_h=$_[1];
my $tmp_hash=();
#		chomp($input_t);
my @list=split(" ",$input_t);
#print "txt2arr input =$input_t\n";
#$rand=rand();
#open(FD_r,">/tmp/$rand");
print FD_log  "$input_t\n"; 
#close(FD_r);
#$dir="/home/wyb/stanford-parser-2012-07-09";
@res=();
#@res=`$dir/parser-10.pl /tmp/$rand 0 50`;
#$part=0;
#unlink("/tmp/$rand");
#print @res;
my $n_a=0;
my $n_b=0;
my $v_a=0;
my $v_b=0;
my $n1;
my $n2;
my $v1;
my $v2;
my $nline;
my $tmp_first=0;
foreach $tmp_res (@list)
{
		if ($tmp_first==0)
		{
			$nline=$tmp_res;	
			$tmp_first++;
			next;
		}
		
#			my $l=length($tmp_res);
#		if($l<=3)
#		{
				#if($hash_sc->{$tmp_res}->{'a'} !~/n|v|a/)	
				#{
				#print FD_log "seek $tmp_res\n";
				#next;}
			if(exists($hash_all->{$tmp_res}))
			{
				$tmp_hash->{$tmp_res}=clone($hash->{$tmp_res});
			}
}
	return (\%{$tmp_hash});
}				

#print "output = $res\n";
#		 $heap->{client}->stop();
#	$session->stop();


# Start the server.

sub  comp_hash
{
	$h1=@_[0];
	$h2=@_[1];
	$akv=0;
	$k=();$v=();
	my $all;
	my %all;
	my $vall=0.01;
	my $vall2=0.01;
	    while(($k,$v)=each(%{$h1}))
                                {
				$all->{$k}++;
				$vall+=$v;
                                       print FD_log " hash 1$k  $v\n";
                               }
	$k=();$v=();
	  while(($k,$v)=each(%{$h2}))
                                {
				$all->{$k}++;
		
                                      print FD_log " hash 2$k  $v\n";
				$vall2+=$v;
                               }
	$k=();$v=();
	 while(($k,$v)=each(%{$all}))
                                {
					if($v >= 2)
					{
						$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
						$kv2=((($h1->{$k})/$vall) + ($h2->{$k}/$vall2))/2;
						$kv3=($kv+$kv2)/2;
						#if($h1->{$k}<$h2->{$k})
						#{
								
						#}
						print  FD_log "double $k $v kv $kv kv2 $kv2\n";
						$akv+=$kv3;
					}
                               }
					#	print "---------------\n";


	$akv;

}

sub ndate
{
        ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time());
       $year=~s/1(.*)/20$1/;
       $mon+=1;
       if($mon=~/^\d$/)
       {
               $mon="0"."$mon";
       }
       if($mday=~/^\d$/)
       {
               $mday="0"."$mday";
       }
               if($hour=~/^\d$/)
       {
               $hour="0"."$hour";
       }

               if($min=~/^\d$/)
       {
               $min="0"."$min";
       }
               if($sec=~/^\d$/)
       {
               $sec="0"."$sec";
       }




my       $res=$year."-".$mon."-".$mday." ".$hour.":"."$min".":"."$sec";
#print $res,"\n";
        $res;

}

sub txt2arr_one
{
my        $w=$_[0];
	
my        $exp_h=$_[1];
$tmp_hash=();
$rand=rand();
@res=();
$part=0;
#print @res;

	my @w_arr=split(",",$w);
	
	foreach my $w_tmp (@w_arr)
	{
			if($w_tmp=~/(.*?) (.*)/)
			{
			$w=$1;
			$vh=$2;
			#print "$w -- $vh\n";
                        if(exists($hash->{$w}))
                        {
                                my $t_n=0;

                                while(($k,$v)=each($hash->{$w}))
                                {
					if($v==0)
					{
						$v=1;
					}
                                        $tmp_hash->{$k}=$vh;
                                }
                        }
				$tmp_hash->{$w}=$w;

			}
			else
			{
				print "input error\n";
			}
	}
                 #       while(($k,$v)=each(%{$tmp_hash}))
#
 #                               {
                                        #print "meaning  $k  $v\n";
  #                              }
#	}
        return (%{$tmp_hash});
}



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.