Group
Extension

zhangbo-NLP-plugin_eng/lib/zhangbo/NLP/eng_org.pl

#!/usr/bin/perl
use Socket;
use IO::Handle;
use IO::Select;
use IO::Socket::INET;
use lib "./";
use plugin_eng;
use POE qw(Component::Server::TCP);
#use JSON::XS;
 use Clone qw(clone);
use Encode;





#use MongoDB;
use Smart::Comments;
#use Conn_mongo_jc;
use Data::Dumper;
#use IO::File;
#use Add_info;
our @rewrite_g;
our @rewrite_l;
our @rewrite_n;
our %rewrite_all;
our $rewrite_all;
our $tfidf;
our $sem_hash;



 #my $sel = IO::Select->new($socket); ##建立select对象

#system("./test_files2.py 1>/dev/null 2>/dev/null &");
#system("./post_server.pl &");

open(FDst,"stop.txt");
our $hash_st;
while(<FDst>)
{
	my $line;
$line=$_;
	chomp($line);	
		$hash_st->{$line}=1;

}
close FDst;

# http://poe.perl.org/?POE_Cookbook/TCP_Servers


# Include POE and POE::Component::Server::TCP.
 our $hash=();
 our $hash_bk=();
 $hash_mn=();


my $hash_sc=();
#open(FD_sc, "dict.utf8.txt");
#open(FD_log, ">/tmp/engine.txt");

#while(<FD_sc>)
#{
#招收学员        13.87   8.50    n
#	my $line=$_;
#	if($line=~/(.*?)\t(.*?)\t(.*?)\t(.*)/)
#	{
#		$tt1=$1;
#		$tt2=$2;
#		$tt3=$3;
#		$tt4=$4;
#		$hash_sc->{$tt1}->{"i"}=$tt2;
#		$hash_sc->{$tt1}->{"h"}=$tt3;
#		$hash_sc->{$tt1}->{"a"}=$tt4;
#
#	}
#}
#system("pwd");
#system("ls ../");
#print "@INC\n";
#print "$INC[1]\n";
#if(! -e "./word.txt")
#{print "dic file  found ./\n";
#};
#if(! -e "$INC[1]/word.txt")
#{print "dic file  found /\n";
#};
#if(! -e "../word.txt")
#{print "dic file  found ../\n";
#};
#if(! -e "/home/lzj/shell2/word.txt")
#{print "dic file  found /home/lzj/shell2/\n";
#};

open(FD, "sy.txt");
my $sytmp;
while(<FD>)
{
my      $line=$_;
        if($line=~/\#(.*)/)
        {
                 $tsy=$1;
                if(!exists($sytmp->{$tsy}))
                {
                        $sytmp->{$tsy}=$tsy;
                        $eat=1;

#                       print "id=$tsy\n";
                }
                else
                {

                        $eat=0;
                }
        }
        elsif($eat==1)
        {
                chomp($line);
		if(length($tsy)<=3)
		{next;}
		if(length($line)<=3)
		{next;}
                $hash->{$line}->{$tsy}=0.2;
#                $hash->{$tsy}->{$line}=0.2;
#                       print "-$tsy-$line\n";

        }

}
close FD;

open(FD, "word.txt");
	my	$tmp=();
while(<FD>)
{
#DEF={Unit|单位:host={information|信息:belong={computer|电脑}}}
#DEF={tool|用具:modifier={able|能:scope={bring|携带:content={$}}},{listen|听:content={music|音乐},instrument={~}}}

        $line=$_;
        chomp($line);
	
        if($line=~/^W_C=(.*)/)
        {
                $tmp=$1;
                #print $tmp,"\n";
                $r=1;

        }
        elsif($line=~/^DEF=(.*)/)
        {
                $r=0;
                $tmp_m=$1;
               # print $tmp_m,"\n";
 		if(length($tmp)<=3)
		{next;}
			if(exists($hash_st->{$tmp}))
			{next;}
               my  @arry=split('\|',$tmp_m);
                foreach $tmps (@arry)
                {
               #        print "-----------------$tmps ------------\n";
                        if($tmps=~/(.*?)\:/)
                        {
                        my        $m=$1;
			if(exists($hash_st->{$m}))
			{next;}
#		if($m eq "专")
  if(($m eq "专")||($m eq "功能词")||($m eq "人")||($m eq "事情")||($m eq "时间")||($m eq "特定")||($m eq "部件")||($m eq "地方"))
		{next;}
                #                print "$tmp - $m-----\n";
			       if(exists($hash_sc->{$m}))
				{
					if($r==0)
					{
                                $hash->{$tmp}->{$m}=$hash_sc->{$m}->{"h"};
					}
					else{ $hash->{$tmp}->{$m}=$hash_sc->{$m}->{"h"}*0.2;}
                                #$hash->{$tmp}->{$m}=$hash_sc->{$m}->{"h"};
			if($hash->{$tmp}->{$m}<0.01){
					$hash->{$tmp}->{$m}=0.1;}
#	print FD_log $hash->{$tmp}->{$m}," $m\n";
				}
				else
				{
					$hash->{$tmp}->{$m}=0.1;
#	print FD_log $hash->{$tmp}->{$m}," $m\n";
				}
                                $hash_mn->{$tmp}+=0.1;
				$r++;
                        }
			elsif($tmps=~/(.*?)\}/)
	
                        {
                        	my        $m=$1;
#  if($m eq "专")
  #if(($m eq "专")||($m eq "功能词")||($m eq "人")||($m eq "事情")||($m eq "时间")||($m eq "特定")||($m eq "部件"))
  if(($m eq "专")||($m eq "功能词")||($m eq "人")||($m eq "事情")||($m eq "时间")||($m eq "特定")||($m eq "部件")||($m eq "地方"))

                        {next;}

			if(exists($hash_st->{$m}))
			{next;}
				if($r==0)
				{	
					  if(exists($hash_sc->{$m}))
                                	{
                                	$hash->{$tmp}->{$m}=$hash_sc->{$m}->{"h"};
			if($hash->{$tmp}->{$m}<0.01){
					$hash->{$tmp}->{$m}=0.2;}
#	print FD_log $hash->{$tmp}->{$m}," $m\n";
                                	}
                                	else
                        	        {
                	                        $hash->{$tmp}->{$m}=0.1;
#	print FD_log $hash->{$tmp}->{$m}," $m\n";
        	                        }

	                                $hash_mn->{$tmp}+=0.1;
				}
				else
				{
					

					  if(exists($hash_sc->{$m}))
                                	{
                                		$hash->{$tmp}->{$m}=$hash_sc->{$m}->{"h"}*(0.5);
						if($hash->{$tmp}->{$m}<0.01){
                                        $hash->{$tmp}->{$m}=0.2;}
#	print FD_log $hash->{$tmp}->{$m}," $m\n";
                                	}
                                	else
                        	        {
                	                        $hash->{$tmp}->{$m}=1;
#	print FD_log $hash->{$tmp}->{$m}," $m\n";
        	                        }
	
                                	$hash_mn->{$tmp}+=0.5;

				}

				$r++;
			}

                }

	
        }
}

open(FDb, "0babel_all_path_fin20.txt");
        my      $tmp=();
while(<FDb>)
{
#DEF={Unit|单位:host={information|信息:belong={computer|电脑}}}
#DEF={tool|用具:modifier={able|能:scope={bring|携带:content={$}}},{listen|听:content={music|音乐},instrument={~}}}

        $line=$_;
                                        $s=$line;
                                       # $m=$2;
                                         my $name;
                                        @a=split("\t",$s);
						my $dob=0;
                                        #@b=split(" ",$m);
                                        $f=0;
                                        foreach $i (@a)
                                        {
                                                if($f==0)
                                                {
                                                        $f++;
                                                        $name=$i;
							if(length($name)<3)
							{last;}
							if(exists($hash->{$name}))
							{
								#print "$name\n";
								$dob=1;
						#		last;
							}
							   if(exists($hash_st->{$name}))
					                        {last;}

                                                }
                                                else
                                                {
                                                        if($i=~/(.*?)\:(.*)/)
                                                        {
                                                                my $av=$1;
                                                                my $bv=$2;
                                                                if(exists($hash->{$name}->{$av}))
                                                                {next;}
								if($dob==1)
								{
                                                        $hash->{$name}->{$av}=$bv*0.3;
								}
								else
									{
									$hash->{$name}->{$av}=$bv;
									}
                                                        }
                                                }

                                        }
                                        #foreach $i (@b)
                                        #{
                                        #                $hash->{$name}->{$i}+=0.01;
                                        #}


}
close(FDb);
sub reloaddic
{

open(FDst,"stop.txt");
our $hash_st;
while(<FDst>)
{
        my $line;
        $line=$_;
        chomp($line);
                $hash_st->{$line}=1;

}
close FDst;
#load_dic("babel_add.txt",30,0.5);
#load_dic("名词.txt",20,0.5);
#load_dic("动词.txt",10,0.5);
load_dic("more.txt",20,1);
load_dic("zw.txt",10,0.2);
}

sub load_tfidf
{
open(FDb, "$_[0]");
        my      $tmp=();
while(<FDb>)
{
        my $line=$_;
	my @ta=split("\t",$line);
	$tfidf->{$ta[0]}->{"t"}=$ta[1];
	$tfidf->{$ta[0]}->{"i"}=$ta[2];
	$tfidf->{$ta[0]}->{"ti"}=$ta[2]*$ta[1];
}

}

sub load_dic
{
open(FDb, "$_[0]");
my $lsize=$_[1];
my $zoomin=0.5;
if($_[2]>0)
{$zoomin=$_[2];}
        my      $tmp=();
while(<FDb>)
{
        $line=$_;
                                        $s=$line;
                                       # $m=$2;
                                         my $name;
                                        @a=split("\t",$s);
                                                my $dob=0;
                                        #@b=split(" ",$m);
                                        $f=0;
					my $ccn=0;
                                        foreach $i (@a)
                                        {
                                                if($f==0)
                                                {
                                                        $f++;
                                                        $name=$i;

                                                }
                                                else
                                                {
                                                        if($i=~/(.*?)\:(.*)/)
                                                        {
                                                                my $av=$1;
                                                                my $bv=$2;
#覆盖
                                                           if(exists($hash_st->{$av}))
                                                                {next;}
							if($ccn>$lsize)
							{last;}
								$ccn++;	

                                                                        $hash->{$name}->{$av}=$bv*$zoomin;
                                                        }
                                                }

                                        }


}
close (FDb);
print "load $_[0] ok\n";
}
sub load_sem_dic
{
open(FDb, "$_[0]");
my $lsize=$_[1];
my $zoomin=0.5;
if($_[2]>0)
{$zoomin=$_[2];}
my      $tmp=();
while(<FDb>)
{
$line=$_;
$s=$line;
chomp($s);
# $m=$2;
my $name;
my @a=split(" ",$s);
my $dob=0;
#@b=split(" ",$m);
$f=0;
my $ccn=0;
foreach $i (@a)
{
if($f==0)
{
$f++;
$name=$i;
}
else
{
#覆盖
$sem_hash->{$name}->{$i}=1;
}

}


}
close (FDb);
print "load $_[0] ok\n";
}


sub load_dic_back
{
open(FDb, "$_[0]");
my $lsize=$_[1];
my $zoomin=0.5;
if($_[2]>0)
{$zoomin=$_[2];}
        my      $tmp=();
while(<FDb>)
{
        $line=$_;
                                        $s=$line;
                                       # $m=$2;
                                         my $name;
                                        @a=split("\t",$s);
                                                my $dob=0;
                                        #@b=split(" ",$m);
                                        $f=0;
					my $ccn=0;
                                        foreach $i (@a)
                                        {
                                                if($f==0)
                                                {
                                                        $f++;
                                                        $name=$i;

                                                }
                                                else
                                                {
                                                        if($i=~/(.*?)\:(.*)/)
                                                        {
                                                                my $av=$1;
                                                                my $bv=$2;
#覆盖
                                                           if(exists($hash_st->{$av}))
                                                                {next;}
							if($ccn>$lsize)
							{last;}
								$ccn++;	

#                                                                        $hash->{$name}->{$av}=$bv*$zoomin;
                                                                        $hash_bk->{$av}->{$name}=1;
                                                        }
                                                }

                                        }


}
close (FDb);
print "load $_[0] ok\n";
}






#close FD_log;

load_tfidf("dict.utf8.txt");
load_dic("babel_all_path_fin20.txt",30,1);
load_dic_back("babel_all_path_fin20.txt",1000,1);
load_dic("babel_add.txt",30,0.6);
load_dic("名词.txt",20,0.6);
load_dic("动词.txt",20,0.6);
load_dic("shangpin.txt",20,0.7);
load_dic("more.txt",20,1);
load_dic_add("wordjieba_vec.txt",21,0.001);
load_sem_dic("2q.dic");

#load_dic("base_total.txt",30,1);
#load_dic("zw.txt",10,0.2);
#reloaddic();
load_rewrite();
#	print Data::Dumper->Dump([%{$rewrite_g[1]}]);
	#print Data::Dumper->Dump([%{$hash}]);

#print "-----\n";
#	print Data::Dumper->Dump([%{$rewrite_all}]);

#print "-----\n";
print "server start\n";

my $n=0;
#our $f=0;
POE::Component::Server::TCP->new(
  Alias       => "echo_server",
  Port        => 11221,
  ClientInput => sub {

    my ($session, $heap, $input) = @_[SESSION, HEAP, ARG0];
    #print "Session ", $session->ID(), " got input: ".$input."\n";
    #print "Session ", $session->ID(), " got input: ".length($input)."\n";
my $d;
my $d_seq;
#open(FD_log,">/tmp/engine.log");
#print FD_log "$input\n";
#system("cp $input /tmp/");
print "input file $input",length($input),"\n";
my @res;
my $noshudown=0;
if(length($input)<3){
   $_[KERNEL]->yield("shutdown");
             return;
}
if ($input=~/^reload_dic/)
{
#@res=search_article2($input);
reloaddic();
load_rewrite();
#my $psid=`ps aux|grep test_files2.py|grep -v grep|awk '{print \$2}'`;
#if($psid >0)
#{
#system("kill -9 $psid");
#print ("kill $psid\n");
#}
#system("./test_files2.py 1>/dev/null 2>/dev/null &");
#my $psid=`ps aux|grep test_files2.py|grep -v grep|awk '{print \$2}'`;
#print $psid,"\n";

#print FD_log "./test_files2.py 1>/dev/null 2>/dev/null &\n";
}
elsif ($input=~/^reload_plugin (.*)/)
{
my $kkk=$1;


delete $INC{"$kkk.pm"};
eval("require $kkk;");
print "require $kkk\n";
}
elsif($input=~/(.*) pinyin/)
{
#print "num ----------------- start\n";
        my $kkk=$1;
         @res=plugin_eng::pinyin($kkk);
}
elsif($input=~/(.*) rela/)
{
#print "num ----------------- start\n";
        my $kkk=$1;
	if(length($kkk)>2)
	{
	@res=get_w_rela($kkk);
	}
	else
	{
	@res;
	}
}

elsif($input=~/(.*) num_n (\d+) (\d+)/)
{
#print FD_log  "num ----------------- start\n";
        my $kkk=$1;
         @res=plugin_eng::num_n($kkk,$2,$3);
}
elsif($input=~/^dist_seg (.*?) (.*?) (.*)/)
{
my $i1=$1;
my $i2=$2;
my $i3=$3;

#直接给出 2个句子/词的 距离
        @res=dist($i1,$i2,$i3);
print "句子距离\n";
	$noshudown="yes";
}
elsif($input=~/^art_sem (.*)/)
	 {
	my $i1=$1;
#直接给出 2个句子/词的 距离
	my $allart=`cat $il`;
		$allart=~s/\n/ /g;
	my $art=seg_txt($allart);
		my @artlist=split(" ",$art);
		my $sem_obj;
		my $name_obj;
			for(0 .. scalar(@artlist)-1)
			{
			my $seqi=$_;
			my $wordn=$artlist[$seqi];
			my $pos=plugin_eng::_ispos($wordn);
				if($pos =~/n|r/)
				{
					$name_obj->{$seqi}=$wordn;
				}
				else
				{
		while(my ($sk,$sv)=each(%{$sem_hash}))
		{
			if(exists($sem_hash->{$sk}->{$wordn}))
			{
				$sem_obj->{$sk}->{$seqi}=$wordn;	
			}
		}
				}
			}
my $res_sem;
			while(my ($kk,$vv)=each(%{$sem_obj}))
			{
					while(my ($kk1,$vv1)=each(%{$vv}))	
					{
						my $minn=10000000;
						my $org_w;
							while(my ($kk2,$vv2)=each(%{$name_obj}))	
							{
								my $dism=abs($kk2-$kk1);
								if($dism < $minn)
								{$minn=$dism;
								 $org_w=$vv2;		
								}
							}
							if(length($org_w)>1)
							{
							$res_sem->{$org_w} .="$vv1 ";
							}
					}
			}
			while(my ($kk1,$vv1)=each(%{$res_sem}))
			{
				push(@res,"$kk1:$vv1\n");	
			}
			push(@res,"\n");
			     }
elsif($input=~/^tfidf (.*?) (.*?) (.*)/)
{
my $i1=$1;
my $i2=$2;
my $i3=$3;

#
        @res=tfidf($i1,$i2,$i3);
}
elsif($input=~/^dist (.*?) (.*)/)
{
#直接给出 2个句子/词的 距离
        @res=dist($1,$2);
print "词距离\n";
        $noshudown="yes";
}
elsif($input=~/^get_event (.*)/)
{
#事件抽取
	my $i=$1;
	$i=~s/\n//g;
my $res=get_event($i);
$heap->{client}->put($res." ");
 $_[KERNEL]->yield("shutdown");
return ;
}




elsif($input=~/.* deep/)
{
#深度分析间接语义联系
	@res=deep_article($input);
}
elsif($input=~/~vsm_all (.*)/)
	 {
	#全文向量
		 my $i=$1;
		 $i=~s/\n//g;
	$res[0]=vsm_all($i);
	}
elsif($input=~/vsm_all_sock (.*)/)
	{
		#全文向量
		my $i=$1;
		   $i=~s/\n//g;
		    @res=vsm_all_sock($i);
			 }
elsif($input=~/modif (.*?) (.*)/)
{
#纠正学习向量距离
			   my $ic=$1;
			   my $tv=$2;
#$i=~s/\n//g;
			my @allf=`cat $ic`;
#			print "@allf\n";
#			print "$ic $tv arg\n";
		   my  @fdebug=search_article($ic,3);

## @fdebug
#print "$fdebug[0]\n";
		   #exit;
		   my $remove_log;
		   my $addlog;
#		   1-2-0.69466565037111-芹菜土豆
#			    _debug_ 甘蓝  伞形科  芥蓝莱  苦瓜  辣椒  莴笋  莼菜  金瓜  料理  芋艿  吃  公司  柿子椒  凉瓜  菜蔬  莱菔  茼蒿  番薯  大洲  花菜  南瓜  食用  大蒜  地瓜  巢菜  定植  根  菜瓜  芦笋  丝瓜  品种  菠菜  食物  植物  萝卜  白萝卜  番茄  红薯  苣荬菜  红苕  大白菜  黄瓜  西红柿  冬瓜  慈姑  白薯  红葱  甜薯  黄芽白  红萝卜  莴苣  土豆  芹菜  茄子  番瓜  山药  时期  马齿苋  野菜  豆角  葱头  维生素  胡瓜  小白菜  胡萝卜  卷心菜  结球甘蓝  三鲜  蓬蒿  食品  青菜  落苏  洋葱  种植  茭白  种子  海椒  基本药物  蔬菜  薯蓣  肉末  山楂  香菇  菜花  荠菜  马铃薯  作用  甘薯  栽培  山芋  药用植物  播种  饮食  花椰菜  毫克  生菜
#				 1-1-0.136854610587099-白菜豪猪
#				  (debug 0) 亚目  体重  毫克  生菜  栽培  狒狒  柿子  倍液  动物  白菜  肉末  香菇  蔬菜  物种  青菜  学名  胡萝卜  香菜  灭绝  维生素  豆腐  芸薹属  木耳  饲养  茄子  十字花科  懒猴  饲料  大白菜  番茄  哺乳类  黄瓜  冬瓜  植物  产仔  大洲  南瓜  食用  浇水  加洲  吃  走兽  辣椒  苦瓜  青椒  甘蓝  农药
		   my $iter=0;
			
## @fdebug
			if(($fdebug[0]=~/1-1-0-.*/)||($fdebug[1]=~/1-1-0-.*/))
			{
				my $asp=seg_txt($allf[0]);chomp($asp);
				chomp($asp);
## $asp
				 $remove_log .=reduce_v($allf[2],$asp,$tv);
						$res[$iter]=$remove_log;
						$iter++;

			}
			if ($fdebug[0]=~/1-2-(.*?)-/)
			{
					my $dn=$1;
					my $dt;
					my $dn2;
					if($fdebug[1]=~/1-1-(.*?)-/)
					{
						 $dn2=$1;
						$dt=$dn2;	
						$dt=abs($dn-$dt);
					}
					
				while($dn >0)
				{
					print "error $dn\n";
							my @asp=split("_debug_",$fdebug[0]);
## @asp
							my @asp2=split("_debug_",$fdebug[1]);
					$remove_log=reduce_v($allf[3],$asp[1],(0-$tv));
					$remove_log .=reduce_v($allf[2],$asp2[1],($dt/3+$tv));
#						@fdebug=`/mnt/sdb/shell2/client.pl $ic showtop 3`;
						@fdebug=search_article($ic,3);
						print "resmsg @fdebug\n";
						if ($fdebug[0]=~/1-2-(.*?)-/)
						{
							$dn=$1;	
							if($fdebug[1]=~/1-1-(.*?)-/)
							{
							$dt=$1;
							$dt=abs($dn-$dt);
							}
							else{$dn=0;$remove_log .="error $fdebug[0] \n";}
						}
						else{$dn=0; $remove_log .="ok over\n";}
#print "log $remove_log\n";
						$res[$iter]=$remove_log;
						$iter++;
				}
			}
			
}

elsif($input=~/(.*) path/)
{
#分析相关路径
	my $kkk=$1;
	if(! -e $kkk)
{
        $res[0]="no file ";
	return $res[0];
}
        my @res1=mean_path("$kkk path");
	my @res2=deep_article_path($kkk);
	@res=(@res1,@res2);
}
elsif($input=~/(.*) shangxia/)
{
#分析相关路径
my $kkk=$1;
if(! -e $kkk)
{
$res[0]="no file ";
return $res[0];
}
my @res1=mean_path("$kkk path");
#my @res2=deep_article_path($kkk);
my $r1=scalar(@res1);
#my $r2=scalar(@res2);
if($r1>40)
{
	my $one=`head -1 $kkk`;
	chomp($one);
	my $two=`tail -1 $kkk`;
	chomp($two);
	my $ra1;
	my $ra2;
		my $idf1=tfidf("1","t",$one);
		my $idf2=tfidf("1","t",$two);
		if($idf1=~/.*?\:(.*?) /)
		{
		$ra1=$1;	
		}
		if($idf2=~/.*?\:(.*?) /)
		{
	     $ra2=$1;	
		}
		if(($ra1>0)&&($ra2>0))
		{
			if($ra1 > ($ra2 * 1))
			{
			$res[0]= "$two 属于 $one $ra1 $ra2";
			}
			elsif($ra2 > ($ra1 * 1))
				{
			$res[0]= "$one 属于 $two $ra1 $ra2";	
				}
				else
				{
				 $res[0]= "$one 同位 $two $ra1 $ra2";	
				}
		}
		else

		{
	  $ra1=`grep $one base_total.txt|wc`;
	  $ra2=`grep $two base_total.txt|wc`;
	if((length($ra1)<=0)||(length($ra2)<=0))
	{$res[0]="inputerror ";return;}
	my $t1=0;
	my $t2=0;
	if($ra1=~/(\d+)\s.*/)
	{
	$t1=$1;
	}
	else{return;}
	if($ra2=~/(\d+)\s.*/)
	{
	$t2=$1;
	}else{return;}
	if($t1>($t2*3))
	{
		$res[0]="$two 属于 $one $t1 $t2";	
	}elsif($t2>($t1*3))
	{ $res[0]="$one 属于 $two $t1 $t2";}
	else{
	$res[0]="$one 同位 $two $t1 $t2";
	}
		}
}else
{
$res[0]="可能关联不大或者同位 $two $one $r1";
}
}
elsif($input=~/(.*) simple_path/)
{
#分析相关路径
        my $kkk=$1;
	my @tmpr;
if(! -e $kkk)
{
	$res[0]="no file ";
}
	else
	{
	my @tmpr=deep_article_path($kkk);
	$res[0]=join(" ",@tmpr);
	}

}
else
{

	if($input=~/(.*) showtop (\d+)/)
	#@res=deep_article($input);
	{
		@res=search_article($1,$2);	
	}
	elsif($input=~/(.*) showtop_more (\d+) (\d)/)
        #@res=deep_article($input);
        {
                @res=search_article($1,$2,$3);
        }

	else
	{
	@res=search_article($input);	
	}
}
#print FD_log @res;
#close(FD_log);
#print scalar(@res)," num\n";
    $heap->{client}->put(@res);
    $heap->{client}->put("\n");
	  #$session->yield("shutdown");
if($noshudown eq  "yes")
{
}
else
{
		  $_[KERNEL]->yield("shutdown");
             return;

}

},
  ClientDisconnected => sub {
           #print "Client disconnected\n"; # log it
         }
#, ClientFlushed => sub {
#           my $data_source = $_[HEAP]{file_handle};
#           my $read_count = sysread($data_source, my $buffer = "", 65536);
#           if ($read_count) {
#             $_[HEAP]{client}->put($buffer);
#           }
#           else {
#		print FD_log "------------------error\n";
#             $_[KERNEL]->yield("shutdown");
#           }
         #}

);
POE::Kernel->run;

#$file=$ARGV[0];
sub mean_path
{
my $file=$_[0];
chomp($file);
$file=~s/ path//g;
my $f=0;
my $n=0;
my $kk;
my $thash=();
my @thash;
my @input_ar=`cat $file`;
my %my_hash;
my %my_hash2;
my %my_hash2_seq;
my %thash;
my $j;
open(FD_arr,"<$file");
my $first=0;
my $rand=rand();
open(FD_aro,">/tmp/$rand.sctxt");
my $j=1;
my @inputa;
my @inputb;
while(<FD_arr>)
{
          my $in=$_;
        if($first==0)
        {
                if($in=~/^-/)
{
        $first++;
                print FD_aro "$in";
                }
                else{
                        print FD_aro "0 $in";
                        push (@inputa,$in);
                        }
        }
        else
        {
                print FD_aro "$j $in";
                        push (@inputb,$in);
        $j++;
        }
        }

close FD_arr;
close FD_aro;
#system("./client_qc.pl /tmp/$rand.sctxt  /tmp/$rand.sctxt2 ");
system("cp -a /tmp/$rand.sctxt  /tmp/$rand.sctxt2 ");
#print("./client_qc.pl   /tmp/$rand.sctxt  /tmp/$rand.sctxt2 2>/dev/null 1>/dev/null");
unlink("/tmp/$rand.sctxt");
open(FD_scws,"</tmp/$rand.sctxt2");
print FD_log "open /tmp/$rand.sctxt2 \n";
my $disthash;
my @disthash;
my $dist_seq_hash;
my @dist_seq_hash;

my $in=<FD_scws>;
#foreach my $in (@output)
 chomp($in);
$in=~s/0 (.*)/$1/g;
my $first=$in;
print FD_log "first in= $first\n";

my $innext=<FD_scws>;
print FD_log "innext = $innext\n";
my $to=<FD_scws>;
#foreach my $in (@output)
 chomp($to);
$to=~s/1 (.*)/$1/g;
#$to=~s/.* (.*)/$1/g;
print FD_log "to=$to\n";
my $path_road;
my @m_res;

print FD_log "deep in= $in\n";
			$in1="0 ".$in;
#                        ($d,$c)=txt2arr_input($in1,$exp_hash);
#开启序列监测      
                  ($d,$c,$d_seq,$og)=txt2arr_input($in1,$exp_hash);
                                my $deep;

$total1=0;
$total2=0;
$total3=0;
$total4=0;
$total5=0;
$total6=0;

                                        while(my ($k,$v)=each($d))
                                        {
$total1++;
#print FD_log "deep k=$k| to=$to|\n";
						if($k eq $to)
						{
#print FD_log "patth "."$in-$to-"."直接相关 $v\n";
							push (@m_res,"$in-$to-"."直接相关 $v");
						}
#print FD_log "debug -$k-$to-\n";
                                                $k1="0 ".$k;
							#if(exists($path_road->{$k."-".$deek}))
							#{next;}

                                                my ($dee,$deec,$deed_seq,$deeog)=txt2arr_input($k1,$exp_hash);
                                                while(($deek,$deev)=each($dee))
                                                {
$total2++;
print FD_log "deep deek2= $deek\n";
					#		if(exists($path_road->{$k."-".$deek}))
					#		{next;}
	                                                if($deek eq $to)
       		                                         {
#print FD_log "patth $in-$to-"."1层 ->$k ".$v." "."$deev\n";
									 push (@m_res,"$in-$to-"."1层->$k -> $deek :".$v." ".$deev);

								}
                                                		$deek1="0 ".$deek;
							  my ($deek2,$deec2,$deed_seq2,$deeog2)=txt2arr_input($deek1,$exp_hash);
							if(exists($hash->{$deek}))
							{
							 while(($deek3,$deev3)=each($deek2))
		                                                {
$total3++;
#print FD_log "deep deek3= $deek3\n";
									if(exists($path_road->{$deek."-".$deek3}))
			                                                        {next;}
									 if($deek3 eq $to)
       			                                                  {
#print FD_log "patth $in-$to-"."2层通过$k和$deek:".$v." ".$deev." "."$deev3\n";
                                                                         push (@m_res,"$in-$to-"."2层 ->$k -> $deek -> $deek3 :".$v." ".$deev." ".$deev3);

                                                                }
									$path_road->{$deek."-".$deek3}=1;
								}
							}
							$path_road->{$k."-".$deek}=1;

							
                                                }
					$path_road->{$in."-".$k}=1;
	
                                        }
			$to1="0 ".$to;
              my          ($d,$c,$d_seq,$og)=txt2arr_input($to1,$exp_hash);
			  my $deep;
#					if(exists($hash->{$to}))
#                                        {

                                        while(my ($k,$v)=each($d))

                                        {
$total4++;
#print FD_log "re deep k=$k in=$in\n";
                                                if($k eq $in)
                                                {
#print FD_log "re patth "."$to-$in-"."直接相关 $v\n";
                                                        push (@m_res,"$to-$in-"."直接相关 $v");
                                                }
#print FD_log "re debug -$k-$in-\n";
                                                $k1="0 ".$k;
                                                my ($dee,$deec,$deed_seq,$deeog)=txt2arr_input($k1,$exp_hash);
#					if(exists($hash->{$k}))
#                                        {

                                                while(($deek,$deev)=each($dee))
                                                {
$total5++;
#print FD_log "re deep2 deek= $deek\n";
#                                                        if(exists($path_road->{$k."-".$deek}))
#                                                        {next;}
                                                        if($deek eq $in)
                                                         {
#print FD_log "re patth $to-$in-"."1层->$k ".$v." "."$deev\n";
                                                                         push (@m_res,"$to-$in-"."1层-> $k -> $deek :".$v." ".$deev);
#
                                                                }
                                                                $deek1="0 ".$deek;
                                                          my ($deek2,$deec2,$deed_seq2,$deeog2)=txt2arr_input($deek1,$exp_hash);

                                                         while(($deek3,$deev3)=each($deek2))
                                                                {
$total6++;
#print FD_log "re deep3 deek3= $deek3\n";
                                                                        if(exists($path_road->{$deek."-".$deek3}))
                                                                                {next;}
                                                                         if($deek3 eq $in)
                                                                          {
#print FD_log "re patth $to-$in-"."2层通过$k和$deek:".$v." ".$deev." "."$deev3\n";
                                                                         push (@m_res,"$to-$in-"."2层-> $k -> $deek -> $deek3 :".$v." ".$deev." ".$deev3);

                                                              		  }
                                                                        $path_road->{$deek."-".$deek3}=1;
                                                              }
                                                        $path_road->{$k."-".$deek}=1;


                                                }
					#}
                                        $path_road->{$to."-".$k}=1;

                                        }




 push (@m_res,"循环次数 .$total1 $total2 $total3 $total4 $total5 $total6");
        @m_res;






}
sub deep_article
{
my $file=$_[0];
chomp($file);
$file=~s/ deep//g;
my $f=0;
my $n=0;
my $kk;
my $thash=();
my @thash;
my @input_ar=`cat $file`;
my %my_hash;
my %my_hash2;
my %my_hash2_seq;
my %thash;
my $j;
open(FD_arr,"<$file");
my $first=0;
my $rand=rand();
open(FD_aro,">/tmp/$rand.sctxt");
my $j=1;
my @inputa;
my @inputb;
while(<FD_arr>)
{
	  my $in=$_;
        if($first==0)
        {
                if($in=~/^-/)


                {
        $first++;
                print FD_aro "$in";
                }
                else{
                        print FD_aro "0 $in";
                        push (@inputa,$in);
                        }
        }
        else
        {
		print FD_aro "$j $in";
                        push (@inputb,$in);
        $j++;
        }
	}

close FD_arr;
close FD_aro;
system("./client_qc.pl /tmp/$rand.sctxt  /tmp/$rand.sctxt2 ");
print("./client_qc.pl   /tmp/$rand.sctxt  /tmp/$rand.sctxt2 2>/dev/null 1>/dev/null");
unlink("/tmp/$rand.sctxt");
open(FD_scws,"</tmp/$rand.sctxt2");
print FD_log "open /tmp/$rand.sctxt2 \n";
my $disthash;
my @disthash;
my $input_dis;
my @input_dis;
my $dist_seq_hash;
my @dist_seq_hash;
while(<FD_scws>)
#foreach my $in (@output)
{
	 my $in=$_;
print FD_log "start  \n";
print FD_log "line ttt $kk $in \n";
        chomp($in);
        #$input_ar[$f]=$in;
        if($f==0)
        {
                if(($in=~/^- -/)||($in=~/^ - -/))
                {
        $f++;
                next;
                }
                #if(length($in)>12)
                #{
                        $in=rewrite_gx($in);
                        print "in == $in\n";
                        ($d,$c,$d_seq,$og)=txt2arr_input($in,$exp_hash);
				my $deep;
					while(($k,$v)=each($d))
					{ 
						$k="0 ".$k;
						my ($dee,$deec,$deed_seq,$deeog)=txt2arr_input($k,$exp_hash);
						while(($deek,$deev)=each($dee))
						{	
						$deep->{$deek}+=$deev;print FD_log "deep $deek $deev\n";}
					}

                        $disthash[$kk]=clone($deep);
#                        $disthashog[$kk]=clone($og);
#                        $dist_seq_hash[$kk]=clone($d_seq);
                $kk++;

                #}
                next;
        #print Data::Dumper->Dump([%{$d}]);
#       %my_hash2=txt2arr($input2);
#       $n=comp_hash(\%my_hash,\%my_hash2);
        }
        else
        {

                my $n_line;
                        $in=rewrite_gx($in);
                        print "in2 == $in\n";
                my ($my_hash2,$n_line,$my_hash2_seq,$og2)=txt2arr3($in,$exp_hash);
 for (0 .. (scalar(@disthash)-1))
                {
                        my $nnn=$_;
                $n=comp_hash($disthash[$nnn],$my_hash2,$dist_seq_hash[$nnn],$my_hash2_seq,$disthashog[$nnn],$og2);
                        my $shnn=$nnn+1;
print FD_log "totle  $in    $shnn-$n_line=$n\n";
print FD_log "$inputa[$nnn]\n$inputb[$n_line-1]\n";
                $thash[$nnn]->{$n_line}=$n;
                }
        }
        $f++;


}
#unlink("/tmp/$rand.sctxt2");

                #close(FD_hash);
                close(FD_arr);
                my @m_res;
        for(0 .. (scalar(@disthash)-1))
        {
                my $nnn=$_;
                my @key =sort {$thash[$nnn]->{$b} <=> $thash[$nnn]->{$a}} keys %{$thash[$nnn]};
                #print @key;
                my $o_n;
                foreach $i (@key)
                {
                #               if( $thash[$nnn]->{$i}>0)
                #               {
                                        my $shnnn=$nnn+1;
                                push (@m_res,"$shnnn-$i-".$thash[$nnn]->{$i});
                                #push (@m_res,"$nnn-$i-".$thash[$nnn]->{$i}.$input_ar[$i]);
                #               $o_n++;

                #               }
                }
        }
        print FD_log "return ",scalar(@m_res),"\n";
        @m_res;




}
sub search_article
{
$file=$_[0];
chomp($file);
#print "file=$file\n";
my $showtop=$_[1]||0;
my $show_more=$_[2];

my $f=0;
my $n=0;
my $kk;
my $thash=();
my @thash;
my @input_all=`cat $file`;
my %my_hash;
my %my_hash2;
my %my_hash2_seq;
my %thash;
my $j;
my $input_dis;
my %input_dis;
#my $rand2=rand();
#open(FD_hash,">$file.txt");
#print "openfile $file\n";
#open(FD_arr,"<$file");
#my $first=0;
#my $rand=rand();
#open(FD_aro,">/tmp/$rand.sctxt");
my $j=1;
my @inputa;
my @inputb;
#while(<FD_arr>)
#{
#print FD_log $j,"----------\n";
#print $j,"----------\n";

#	my $in=$_;
#	if($first==0)
#	{
#		if($in=~/^-/)


#		{
#	$first++;
#		print FD_aro "$in";
#		}
#		else{
#			print FD_aro "0 $in";
#			push (@inputa,$in);
#			}
#	}
#	else
#	{
		#if(length($in)>500)
		#{	$in=substr $in,0,500;}
#		$in=~s/\@.*?\:/ /g;
#		$in=~s/\@.*?\s/ /g;
#		$in=~s/机场|航班|深圳/ /g;
#		my $jid;
#		if($in=~/^id=(.*?) /)
#		{
#			$jid=$1;
#			}
#		else{$jid=$j;}
#		 print FD_aro "$jid $in";
#			push (@inputb,$in);
#	$j++;
			
#print FD_log $j,"-p----",length($in),"\n";
#print $j,"-p----",length($in),"\n";

#	}
	
	
#}

#close FD_arr;
#close FD_aro;
#system("./client_qc.pl /tmp/$rand.sctxt  /tmp/$rand.sctxt2 ");
#print("./client_qc.pl   /tmp/$rand.sctxt  /tmp/$rand.sctxt2 2>/dev/null 1>/dev/null");
#system("/home/lzj/shell/jieba-0.31/test/test_file.py   /tmp/$rand.sctxt  /tmp/$rand.sctxt2 2>/dev/null 1>/dev/null");
#system("/home/wuyabo/xs/bin/scws -I  -E -N -d /home/wuyabo/xs/etc/dict.utf8.xdb:/home/wuyabo/stanford/dict_user.txt -c utf-8   /tmp/$rand.sctxt -o /tmp/$rand.sctxt2");
#system("cp /tmp/$rand.sctxt2 66.txt");
#system("cp /tmp/$rand.sctxt 55.txt");
#system("cp $file 44.txt");
#print ("cp ok\n");
#unlink("/tmp/$rand.sctxt");
#open(FD_scws,"</tmp/$rand.sctxt2");
#print FD_log "open /tmp/$rand.sctxt2 \n";
my $disthash;
my @disthash;
my $dist_seq_hash;
my @dist_seq_hash;
my $kk=0;
my $kj=0;
my $samel;
#my $all=`cat `;
#while(<FD_scws>)
my $input_b;
my @input_b;
my %input_b;
for( 0 .. scalar(@input_all)-1)
{
	my $nln=$_;
	my $in=$input_all[$nln];
print FD_log "start  \n";
print FD_log "line ttt $kk $in \n";
	chomp($in);
	$in=seg_txt($in);
	if($f==0)
	{
		if(($in=~/^- -/)||($in=~/^ - -/))
		{
	$f++;
		next;
		}
		#if(length($in)>12)
		#{
#			$in=rewrite_gx($in);
#			print FD_log "rewrite == $in\n";
			($d,$c,$d_seq,$og)=txt2arr_input("$kk $in",$exp_hash);	
			$disthash[$kk]=clone($d);
			$disthashog[$kk]=clone($og);
			$dist_seq_hash[$kk]=clone($d_seq);
		$kk++;
			
		#}
		next;
	#print Data::Dumper->Dump([%{$d}]);
#	%my_hash2=txt2arr($input2);
#	$n=comp_hash(\%my_hash,\%my_hash2);
	}
	else
	{
		
		my $n_line;
#			$in=rewrite_gx($in);
#			print "in2 == $in\n";
			$input_b[$kj]=$input_all[$nln];
		my ($my_hash2,$n_line,$my_hash2_seq,$og2)=txt2arr3("$kj $in",$exp_hash);
$kj++;
#	        while (($k,$v)=each(%my_hash2))
#        {
#                print FD_hash "$k,$v\t";
#        }
#                print FD_hash "\n";
		for (0 .. (scalar(@disthash)-1))
		{
			my $nnn=$_;
		($n,$samel)=comp_hash($disthash[$nnn],$my_hash2,$dist_seq_hash[$nnn],$my_hash2_seq,$disthashog[$nnn],$og2);
			my $shnn=$nnn+1;
#print FD_log "totle  $in    $shnn-$n_line=$n\n";
#print FD_log "$inputa[$nnn]\n$inputb[$n_line-1]\n";
		$thash[$nnn]->{$n_line}=$n;
		$debugsame[$nnn]->{$n_line}=$samel;
		}
		if($showtop>0)
		{
			chomp($in);
			if($show_more > 0)
			{
			#	for(0 .. $show_more-1)
			#	{
			#		my $nl=$_;
			#	$input_dis->{$n_line} .=$input_all[$nln-$show_more+$nl];
					
			#	}
#				$input_dis->{$n_line} .=" >>> debug ".$samel." ".$input_all[$nln]."  ";
$input_dis->{$n_line} .=" >>> ".$input_all[$nln]."  ";
				for(0 .. $show_more-1)
                                {
                                        my $nl=$_;
                                $input_dis->{$n_line} .=$input_all[$nln+1+$nl];

                                }
			}
			else
			{

			$input_dis->{$n_line}=$input_b[$n_line]." _debug_ $samel";
			}
			#$input_dis->{$n_line}=$in;
#print FD_log "inputdis $n_line  $input_dis->{$n_line}\n";
		}
	}
	$f++;
	

}
#unlink("/tmp/$rand.sctxt2");
	
		#close(FD_hash);
		close(FD_arr);
		my @m_res;

	if($showtop>0)
	{
		 for(0 .. (scalar(@disthash)-1))
	        {
                my $nnn=$_;
                my @key =sort {$thash[$nnn]->{$b} <=> $thash[$nnn]->{$a}} keys %{$thash[$nnn]};
                #print @key;
                my $o_n;
		my $toptmp=0;
                foreach $i (@key)
                {
				if($toptmp>=$showtop)
			{last;}
                #               if( $thash[$nnn]->{$i}>0)
                #               {
                                        my $shnnn=$nnn+1;
						$jjkk=$i+1;
                                push (@m_res,"$shnnn-$jjkk-".$thash[$nnn]->{$i}."-".$input_dis->{$i});
#print FD_log "inputdis show $i $input_dis->{$i}\n";
                                #push (@m_res,"$nnn-$i-".$thash[$nnn]->{$i}.$input_ar[$i]);
                #               $o_n++;

                #               }
			$toptmp++;
                }
        }


	}

	else
	{
	for(0 .. (scalar(@disthash)-1))
	{
		my $nnn=$_;
	        my @key =sort {$thash[$nnn]->{$b} <=> $thash[$nnn]->{$a}} keys %{$thash[$nnn]};
	        #print @key;
		my $o_n;
	        foreach $i (@key)
	        {
		#		if( $thash[$nnn]->{$i}>0)
		#		{
					my $shnnn=$nnn+1;
						$jjkk=$i+1;
	        	        push (@m_res,"$shnnn-$jjkk-".$thash[$nnn]->{$i});
	        	        #push (@m_res,"$nnn-$i-".$thash[$nnn]->{$i}.$input_ar[$i]);
	        #	        $o_n++;
		
		#		}	
	        }
	}
	}
#	print FD_log "return ",scalar(@m_res),"\n";
	@m_res;
}


#	}
#	);
#$poe_kernel->run();

sub txt2arr
{
my	$input_t=$_[0];
my	$exp_h=$_[1];
my $tmp_hash=();
#		chomp($input_t);
my @list=split(" ",$input_t);
#print "txt2arr input =$input_t\n";
#$rand=rand();
#open(FD_r,">/tmp/$rand");
#print FD_log  "$input_t\n"; 
#close(FD_r);
#$dir="/home/wyb/stanford-parser-2012-07-09";
@res=();
#@res=`$dir/parser-10.pl /tmp/$rand 0 50`;
#$part=0;
#unlink("/tmp/$rand");
#print @res;
my $n_a=0;
my $n_b=0;
my $v_a=0;
my $v_b=0;
my $n1;
my $n2;
my $v1;
my $v2;
my $nline;
my $tmp_first=0;
foreach $tmp_res (@list)
{
		if ($tmp_first==0)
		{
			$nline=$tmp_res;	
			$tmp_first++;
			next;
		}
		
		if(exists($hash_st->{$tmp_res}))
		{next;}
			my $l=length($tmp_res);
#		if($l<=3)
#		{
				#if($hash_sc->{$tmp_res}->{'a'} !~/n|v|a/)	
				#{
				#print FD_log "seek $tmp_res\n";
				#next;}
				if($l<3)
				{next;}
			
#主题放大
			$tmp_hash->{$tmp_res}++;

			#	}
	#	print $tmp_res,"---",length($tmp_res),"\n";
		
		#if (($part==1)&&($tmp_res=~/(.*?),(.*)/))
		#{
			#print "切词1=$1,2=$2\n";
			my $w=$tmp_res;
#			my $value=$2;
		#	if(exists($exp_h->{$w}))
		#	{
			#	print "$w\n";
		#	next;}
			if(exists($hash->{$w}))
			{
				my $t_n=0;

				while(($k,$v)=each(%{$hash->{$w}}))
				{
					if(exists($hash_st->{$k}))
       				         {next;}

					
						$tmp_hash->{$k}+=$v;;
					#$tmp_hash->{$k}=$v;
					#$tmp_hash->{$k}=$hash_sc->{$w}->{'h'};
#                                                print FD_log  "正常词 $w  转换",$hash_sc->{$w}->{'h'},"--",$tmp_hash->{$k},"   $k\n";
							       #正常词 交  转换0.00--0.00   相互
					#if(exists($sytmp->{$k}))
					#{
					#	print "归一 $k ",$sytmp->{$k},"\n";
				#		$k=$sytmp->{$k};
				#}
#						if($value>20)
		# $heap->{client}->put("$k $v\n");
				}
			}
	}



	return (\%{$tmp_hash},$nline);
}				

#print "output = $res\n";
#		 $heap->{client}->stop();
#	$session->stop();


# Start the server.

sub  comp_hash
{
my	$h1=@_[0];
my	$h2=@_[1];
my	$h3=@_[2];
my	$h4=@_[3];
my	$org1=@_[4];
my	$org2=@_[5];
#while(($k,$v)=each(%{$h3}))
#                                {
#print FD_log "h3 show $k, $v\n";
#                                }
#while(($k,$v)=each(%{$h4}))
#                                {
#print FD_log "h4 show $k, $v\n";
#                                }

my $samel;
my	$akv=0;
	my $all;
	my $an_all;
	my %all;
	my $vall=0.001;
	my $vall2=0.001;
my $k,$v;
	    while(($k,$v)=each(%{$h1}))
                                {
				$all->{$k}++;
				if($v>0)
				{
				$an_all++;
				$vall+=$v;
				}
				
                                       print FD_log " hash 1 k $k v  $v\n";
                               }
my $k,$v;
	  while(($k,$v)=each(%{$h2}))
                                {
				  if($v>0)
                                {
				$all->{$k}++;
				$an_all++;

		
				#$vall2++;
				$vall2+=$v;
				}
                                      print FD_log " hash 2 k $k v $v\n";
                               }
					
my $k,$v;
my $samelist;
my $std_n;
		
	 while(($k,$v)=each(%{$all}))
                                {
					if($v >= 2)
					{

						# my $sum;
                                                #if($h1->{$k}>$h2->{$k})
                                                #{$sum=$h2->{$k};}
                                                #else
                                                #{$sum=$h1->{$k};}
                                                #$kv=$sum/($vall+$vall2);
						if($h1->{$k}<0)
						{next;}
						$an_all=$an_all-2;
                                        # 平均算法 适用于多词找文   对词典中噪音有包容性   #$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
                                                #$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
						if(($h1->{$k}<0.001)||($h2->{$k}<0.001))
						{
								$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
								 $kv2=((($h1->{$k})/$vall) + ($h2->{$k}/$vall2))/2;
								 $kv3=($kv+$kv2)/20;
#								 print "kv3 trainvec ------$k-----$kv3\n";
						}
						else
						{
						$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2);
                                                $kv2=((($h1->{$k})/$vall) + ($h2->{$k}/$vall2))/2;
                                                $kv3=($kv+$kv2)/2;
#print "kv3 old-----".$h1->{$k}."---".$h2->{$k}."-$k--$kv3\n";
												
						}
						if(($h1->{$k}>2) &&($h2->{$k}>2))
						{
							$samel .=" 原词($k) ";
						}
						else
						{
								 $samel .=" $k ";	
#$samel .=" 扩展($k) ";	
						}
                                        # log2算法 适用于文找文     $kv=($h1->{$k}+$h2->{$k})/($vall+$vall2+log2($an_all));
						#$kv=($h1->{$k}+$h2->{$k})/($vall+$vall2+log2($an_all)/2);
						#$kv3=$kv;
                                                #$kv2=((($h1->{$k})/$vall) + ($h2->{$k}/$vall2))/2;
                                                #if($h1->{$k}<$h2->{$k})
                                                #{

                                                #}
                                                $akv+=$kv3;
#print  FD_log "double $k $v kv sum= $h1->{$k}+$h2->{$k} ($org1->{$k}, $org2->{$k}) kv= $kv  val1= $vall  val2= $vall2 kv2=$kv2   kv3 =$kv3 std_n=$std_n\n";
#						print FD_log "相似语义 $k 来自前句 from ";
						#while(($kc,$vc)=each($org1->{$k}))
						#{
						#	print	"$kc,";	
						#}
						#print " 来自后句 ";
#						if (exists($org2->{$k}))
#						{
#						    while(($kc,$vc)=each($org2->{$k}))
#                                               {
#print   "$kc,";
#                                               }
#						print "\n";
						
#						}
						$samelist->{$k}=$h3->{$k};

						$std_n++;
					}
                               }
#序列相似评估
#---------------------------------------
		
			if(0)
#if(($std_n>0)&&($std_n !=1))
			{
# make samelist ->std  test_seq->obj_seq;
			 my @key =sort {$samelist->{$b} <=> $samelist->{$a}} keys %{$samelist};
                #print @key;
                my $std_seq;
		my $t_n=1;	
                my $test_seq;
		my $last_tmp;
		my $my_fi=0;
		my $std_sq;
                foreach $i (@key)
                {
                                if(exists($std_sq->{$samelist->{$i}}))
                                {
					$std_seq->{$i}=($std_n-$t_n+1);
					$test_seq->{$i}=$h4->{$i};	
#		print  FD_log "def   test_seq $i  =$h4->{$i}\n";
#		print  FD_log "std_seq last $i seq =$std_seq->{$i} h3 $h3->{$i} std_n=$std_n\n";
				}
				else
				{
				$t_n++;
				$std_sq->{$samelist->{$i}}=1;
                                $std_seq->{$i}=($std_n-$t_n+1);
#		print  FD_log "std_seq  $i  seq=$std_seq->{$i}\n";
#		print  FD_log "def   test_seq $i  =$h4->{$i}\n";
				$test_seq->{$i}=$h4->{$i};	
				}
		}
                         my @key =sort {$test_seq->{$b} <=> $test_seq->{$a}} keys %{$test_seq};
                my $t_n=1;
	
		my $last_tmp;
		my $obj_tmp;
		my $test_sq;
                foreach $i (@key)
                {
#			print FD_log "$i \"$last_tmp\" == \"$test_seq->{$i}\"  ??\n";
#				if($last_tmp eq $test_seq->{$i})
				if(exists($test_sq->{$test_seq->{$i}}))
                                {
                                $obj_seq->{$i}=($std_n-$t_n+1);
#		print  FD_log "test_seq  last $i seq= $obj_seq->{$i} std_n=$std_n\n";
				}
				else
				{
				$t_n++;
				$test_sq->{$test_seq->{$i}}=1;
                                $obj_seq->{$i}=($std_n-$t_n+1);
				}
# test_seq  受体拮抗剂 last_tmp = 26  25
#		print  FD_log "test_seq  $i  seq= $obj_seq->{$i}\n";
                }
		my $diff_sum=0;
				while(($k,$v)=each(%{$std_seq}))
                                {
					$diff_sum=$diff_sum+abs($v-$obj_seq->{$k});			
#		print  FD_log "diff_sum $diff_sum $k  $v  $obj_seq->{$k}\n";

				}
			my $sam_rate=1-($diff_sum/(($std_n-1)*($std_n-1)));
			my $diff_n=$akv*(1-$sam_rate)*0.15;
			$akv=$akv-$diff_n;
#print  FD_log "seq diff sam_rate $sam_rate akv  $akv diff_n $diff_n\n";
			
			}


#-----------------------------------------------
				
			#	异议评估 说多了远的,深意不在这里
					#	my $maxd=log2($an_all+1);
					#	if($maxd>100)
					#	{$akv=$akv*0.618;}
					#	elsif($maxd==0)
					#	{
							
					#	}
					#	elsif($maxd>1)
					#	{	
					#	$akv=$akv*(0.8+0.2/($maxd+1));
					#	}
					#	print "---------------\n";


return($akv,$samel);

}

sub vsm_all
{
my $la=`cat $_[0]`;
$la=~s/\n//g;
	my $stra=seg_txt($la);
	my ($arr)=txt2arr3("0 ".$stra);
## $arr
	my @key =sort {$arr->{$b} <=> $arr->{$a}} keys %{$arr};
my @vr;
	for(0 .. scalar(@key)-1)
	{
		my $n=$_;
		push(@vr,$key[$n]." ".$arr->{$key[$n]}." $n\n");	
	}
@vr;

}
 sub vsm_all_sock
{
my $la=$_[0];
$la=~s/ //g;
my $stra=seg_txt($la);
my ($arr)=txt2arr3("0 ".$stra);
## $arr
my @key =sort {$arr->{$b} <=> $arr->{$a}} keys %{$arr};
my $vr;
for(0 .. scalar(@key)-1)
{
my $n=$_;
$vr .=$key[$n].":".$arr->{$key[$n]}." ";
}
$vr;
}
sub ndate
{
        ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time());
       $year=~s/1(.*)/20$1/;
       $mon+=1;
       if($mon=~/^\d$/)
       {
               $mon="0"."$mon";
       }
       if($mday=~/^\d$/)
       {
               $mday="0"."$mday";
       }
               if($hour=~/^\d$/)
       {
               $hour="0"."$hour";
       }

               if($min=~/^\d$/)
       {
               $min="0"."$min";
       }
               if($sec=~/^\d$/)
       {
               $sec="0"."$sec";
       }




my       $res=$year."-".$mon."-".$mday." ".$hour.":"."$min".":"."$sec";
#print $res,"\n";
        $res;

}

sub txt2arr_one
{
my        $w=$_[0];
	
my        $exp_h=$_[1];
$tmp_hash=();
$rand=rand();
@res=();
$part=0;
#print @res;

	my @w_arr=split(",",$w);
	
	foreach my $w_tmp (@w_arr)
	{
			if($w_tmp=~/(.*?) (.*)/)
			{
			$w=$1;
			$vh=$2;
			#print "$w -- $vh\n";
                        if(exists($hash->{$w}))
                        {
                                my $t_n=0;

                                while(($k,$v)=each(%{$hash->{$w}}))
                                {
					if($v==0)
					{
						$v=1;
					}
                                        $tmp_hash->{$k}=$vh;
                                }
                        }
				$tmp_hash->{$w}=$w;

			}
			else
			{
				print "input error\n";
			}
	}
                 #       while(($k,$v)=each(%{$tmp_hash}))
#
 #                               {
                                        #print "meaning  $k  $v\n";
  #                              }
#	}
        return (%{$tmp_hash});
}
sub txt2arr_input
{
my      $input_t=$_[0];
my      $exp_h=$_[1];
my $tmp_hash=();
my $tmp_sequ;
my $sequ=0;
my @list=split(" ",$input_t);
print FD_log  "$input_t\n";
@res=();
my $n_a=0;
my $n_b=0;
my $v_a=0;
my $v_b=0;
my $n1;
my $n2;
my $v1;
my $v2;
my $nline;
my $tmp_first=0;
my $og;











my $linst_len;
$list_len=scalar(@list);
for (0 .. ($list_len-1))
#foreach $tmp_res (@list)
{
	
		$nown=$_;
		
		$tmp_res=$list[$_];
                if ($tmp_first==0)
                {
                        $nline=$tmp_res;
                        $tmp_first++;
                        next;
                }

                        my $l=length($tmp_res);
                                if(($l<3)&&($l!~/^[a-z|A-Z]/))
                               {next;}
if(exists($hash_st->{$tmp_res}))
                        {next;}

		if($tmp_res=~/^\d/)
		{next;}
		if($tmp_res=~/^-/)
                {next;}
#原始词保留
#主题放大
my $zs=1;
#-----------------------------------------------------------------------
                                $w=$tmp_res;
				if(($list[$nown+1]=~/^\d/)&&(length($input_t)<10))
				{
                                $tmp_hash->{$w}+=2*$list[$nown+1];
				$zs=$list[$nown+1];
				}
				elsif(($list[$nown+1]=~/^-/))
                                {
                                $tmp_hash->{$w}-=2*$list[$nown+2];
                                $zs=-$list[$nown+2];
                                }
				else
				{
#控制重复词语数量
					if($tmp_hash->{$w}>($list_len/20))
					#if($tmp_hash->{$w}>($list_len/10))
	                                {next;}
	                                else
	                                {$tmp_hash->{$w}+=2;
						$og->{$w}->{$w}=1;
					}

					$tmp_sequ->{$w}=$sequ;
					$sequ++;
				}
#----------------------------------------------------------
##序列



                                 if(exists($hash->{$w}))
                        	{
                                my $t_n=0;

                                while(($k,$v)=each(%{$hash->{$w}}))
                                {
                                               $tmp_hash->{$k}+=$v*$zs;
						$og->{$k}->{$w}=1;
                                               $tmp_sequ->{$k}=$sequ;
#                                                print FD_log  "正常词 $w  转换",$hash_sc->{$w}->{'h'},"--",$tmp_hash->{$k},"k=$k sequ=$sequ\n";
                                }
                        }
                        else
                        {
                                                $tmp_hash->{$w}+=0.1;
						$og->{$w}->{$w}=1;
                                               $tmp_sequ->{$k}=$sequ;
 #                                               print FD_log  "正常词 $w  转换",$hash_sc->{$w}->{'h'},"--",$tmp_hash->{$k},"k=$k  sequ=$sequ\n";

                        }



}

        return (\%{$tmp_hash},$nline,\%{$tmp_sequ},\%{$og});
}


sub txt2arr3
{
#需要改进为 先判断文本域 再根据域  重写词向量值  域向量集 可以用clone覆盖已有向量
#例如黑话库 的词依存 规则
my      $input_t=$_[0];
my      $exp_h=$_[1];
my $tmp_hash=();
my $tmp_hash_seq=();
my @list=split(" ",$input_t);
print FD_log  "$input_t\n";
@res=();
my $og;
my $n_a=0;
my $n_b=0;
my $v_a=0;
my $v_b=0;
my $n1;
my $n2;
my $v1;
my $v2;
my $nline;
my $seq=0;
my $tmp_first=0;
my $list_len;
$list_len=scalar(@list);
foreach $tmp_res (@list)
{
                if ($tmp_first==0)
                {
                        $nline=$tmp_res;
                        $tmp_first++;
                        next;
                }

                        my $l=length($tmp_res);
                                #if($l<=3)
                                #{next;}
                                if(($l<3)&&($l!~/^[a-z|A-Z]/))
                                {next;}
			if(exists($hash_st->{$tmp_res}))
                        {next;}

#原始词保留 
#主题放大
				my $w=$tmp_res;
				if($tmp_hash->{$w}>($list_len/10))	
				{next;}
				else
				{$tmp_hash->{$w}+=2;
					$og->{$w}->{$w}=1;
				}
				$tmp_hash_seq->{$w}=$seq;
				$seq++;
				 if(exists($hash->{$w}))
                        {
                                my $t_n=0;

                                while(($k,$v)=each(%{$hash->{$w}}))
                                {
						$tmp_hash->{$k}+=$v;	
						$tmp_hash_seq->{$k}=$seq;
						$og->{$k}->{$w}=1;
#                                                print FD_log  "正常词 $w  转换",$hash_sc->{$w}->{'h'},"--",$tmp_hash->{$k},"   $k seq=$seq\n";
				}
			}
			else
			{
						$tmp_hash->{$w}+=0.1;	
						$og->{$w}->{$w}=1;
						$tmp_hash_seq->{$w}=$seq;
 #                                               print FD_log  "正常词 $w  转换",$hash_sc->{$w}->{'h'},"--",$tmp_hash->{$k},"   $k seq=$seq\n";

			}



}
                                                              
        return (\%{$tmp_hash},$nline,\%{$tmp_hash_seq}, \%{$og});
}

sub log2 {
 my $n = shift;
 return log($n)/log(2);
}
sub rewrite_gx
{
my $inp=$_;
my @re_res;
my @tl=split(" ",$inp);
my $otl;
my $has_re;
my $has_n=0;
my $no_has;
my $r;
print "scalar= ",scalar(@tl),"\n";
push(@re_res,$tl[0]);
for(1 ..(scalar(@tl)-1))
{
	my $n=$_;
	my $tmp=$tl[$_];
	if(exists($rewrite_all->{$tmp}))	
	{
		$has_re->{$tmp}=$rewrite_all->{$tmp};
		$has_local->{$tmp}=$tmp;	
		$has_time->{$tmp}=$has_n;	
		$has_n++;
	}
}
if($has_n<2){	print "rewrite skip $has_n $inp\n";return $inp;}
		my $exists_tkl;
while(($k,$v)=each(%{$has_re}))
	{
		my @kl=split(" ",$v);
		foreach my $tkl  (@kl)
		{
			my $is_rewrite=1;
			if(($rewrite_n[$tkl]>$has_n)||(exists($exists_tkl->{$tkl})))
			{
				next;
			}
			while(my($k1,$v1)=each(%{$rewrite_g[$tkl]}))
			{
				if(exists($has_re->{$k1}))	
				{}
				else{$is_rewrite=0;last;}
			}
			if($is_rewrite==1)
			{
				$exists_tkl->{$tkl}=1;
				 push (@re_res,$rewrite_l[$tkl]);
	                        while(my($k1,$v1)=each(%{$rewrite_g[$tkl]}))
       		                 {	$no_has->{$k1}=1;}
			}
		}
	}
my $f=0;
foreach my $tt (@tl)
{
	if($f==0){$f++;next;}
	if(exists($no_has->{$tt}))
	{
		next;	
	}
	push(@re_res,$tt);
}
$r=join(" ",@re_res);
return $r;
}
sub load_rewrite
{
my $rg=0;
my $name;
my $rg_hs;
	open(FDre,"rewrite.txt");
	while(<FDre>)
	{
		my $line=$_;
		my @a=split("\t",$line);
		my $f=0;
		 foreach $i (@a)
                {
                        if($f==0)
                        {
                           $f++;
	  			$rg++;
                           $name=$i;
                           if(length($name)<3)
                           {last;}
				 $rewrite_l[$rg]=$name;
				
				my @rew=split("_",$name);
				
				foreach my $t_rew (@rew)
				{
				 	$rewrite_n[$rg]++;
					$rewrite_g[$rg]->{$t_rew}=1;
			#		$rg_hs[$rg]->{$t_rew}=$rg;
					if(exists( $rewrite_all->{$t_rew}))
					{
						my $t_add=$rewrite_all->{$t_rew}." ".$rg;
						$rewrite_all->{$t_rew}=$t_add;
					}
					else{$rewrite_all->{$t_rew}=$rg;}
				}
			}
                          else
                      {
			 if($i=~/(.*?)\:(.*)/)
                         {
                                      my $av=$1;
                                      my $bv=$2;
                                   $hash->{$name}->{$av}=$bv;
                         }


			}
		
	
		}
	}

}





sub deep_article_path
{
my $file=$_[0];
#chomp($file);
my @m_res;
my $f=0;
my $n=0;
my $kk;
my $thash=();
my @thash;
open(FD_scws,"<$file");
my $key1=<FD_scws>;
 chomp($key1);

my $innext=<FD_scws>;
my $key2=<FD_scws>;
 chomp($key2);
print "----$key1|$key2\n";
if(exists($hash->{$key1}))
{
			while(my ($nk,$nv)=each($hash->{$key1}))
			{
				if(exists($hash->{$key2}))
				{

				while(my($nk2,$nv2)=each($hash->{$key2}))		
				{
					if($nk eq $nk2)
					{
						push(@m_res,"same $nk");
					}
				}
				}
			}
			
			
}


unlink("/tmp/$rand.sctxt2");

                #close(FD_hash);
        @m_res;




}
sub dist
{
my @m_res;
my $la=$_[0];
my $lb=$_[1];
my $seg=$_[2];
if($seg eq "qc")
{
#my $a=`./client_qc_seg.pl $la`;
#my $b=`./client_qc_seg.pl $lb`;
my $a=seg_txt($la);
my $b=seg_txt($lb);
chomp($a);
chomp($b);
my $ina="0 $a";
my $inb="1 $b";
print "$ina,$inb inainb\n";
#$ina=decode("utf-8",$ina);
#$inb=decode("utf-8",$inb);

my ($my_hash)=txt2arr3($ina,$exp_hash);
my ($my_hash2)=txt2arr3($inb,$exp_hash);
$n=comp_hash($my_hash,$my_hash2);
$m_res[0]="0-1-$n";
	
}
else
{
my $ina="0 $la";
my $inb="1 $lb";
#my ($my_hash,$n_line,$my_hash2_seq,$og2)=txt2arr3($in1,$exp_hash);
my ($my_hash)=txt2arr3($ina,$exp_hash);
my ($my_hash2)=txt2arr3($inb,$exp_hash);
$n=comp_hash($my_hash,$my_hash2);
$m_res[0]="0-1-$n";
}
@m_res;
}
sub comp_pin
{
my $a=$_[0];
my $b=$_[1];
print "a b $a $b\n";

foreach my $ap (@{$pinyinhash->{$a}})
        {
                foreach my $bp (@{$pinyinhash->{$b}})
                {
                        if($bp eq $ap)
                        {
#                print "sw $a-$b-$ap-$bp\n";

                        return "1";}
                }

        }
#                print "dw $a-$b\n";
return "0";
}


sub seg_txt
{
my $txt=$_[0];
my $socket = IO::Socket::INET->new(
                                         PeerAddr => "127.0.0.1",
                                         PeerPort => "11229",
                                         Type => SOCK_STREAM,
                                         Proto => "tcp",
                                       )
       or die "Can not create socket connect.$@";
    $socket->autoflush(1); 

 $sel = IO::Select->new($socket); 
    $socket->send("$txt\n",0);  ##发送消息至服务器端。
#print "分词 $txt\n";
my $i;
#                       print "$i socket\n";
    while (my @ready = $sel->can_read) {    ##等待服务端返回的消息
            foreach my $fh (@ready) {
                if ($fh == $socket) {
                    while (<$fh>) {
                      $i=$_;
#print "read====$i==================\n";
                    }
                    $sel->remove($fh); 
                    close $fh;
		}
		}
		}
$i;

}
sub tfidf
{
#
#输入一句话 返回前n个核心词 n默认为1
my $top=$_[0];
my $tori=$_[1];
#tf or idf 可以是 空
my $txt=$_[2];
#print "tfidf txt $txt\n";
if(length($tori)<1)
{$tori="ti";}
if(length($top)<1)
{$top=1;}
chomp($txt);
my $st=seg_txt($txt);
chomp($txt);
my @list=split(" ",$st);
my $tlist;
my $res;
foreach my $i (@list)
{
#print "$i foreach \n";
	if(exists($tfidf->{$i}))
	{
	$tlist->{$i}=$tfidf->{$i}->{$tori};
	}
	
}
 my @key =sort {$tlist->{$b} <=> $tlist->{$a}} keys %{$tlist};
                #print @key;
                my $o_n=0;
                foreach $i (@key)
                {
			$res .="$i:$tlist->{$i} ";
			if($o_n>=$top)
			{last;}
			$o_n++;
		}

return $res;

}
sub get_w_rela
{
	my $in=$_[0];
	my @br;
	if(exists($hash_bk->{$in}))
	{
		while(my ($k,$v)=each(%{$hash_bk->{$in}}))
		{
			push(@br,$k);
		}
	}
	my $bres=join(" " ,@br);
return $bres;
}
sub get_event
{
	my $in=$_[0];
	$in=seg_txt($in);
#叙事性事件抽取	
	my $time;
	my $place;
	my $name;
	my $event;
	my $path;
	my $subs;
	$time=get_time($in);
#$place=get_place($in);
#	$name=get_name($in);
#	$event=get_event($in);
#	$path=get_event($in);
#my @res=($time,$place,$name,$event,$path,$subs);
#return(@res);
return($time);
}
sub get_time
{
	my $in=$_[0];	
	my $timein="\:|\\|\/|\-|夏|冬|上|大|百|第|两|之|来|右|晨|昨|春|年|二|半|纪|过|历|八|度|三|四|日|稍|星|少|今|午|早|刻|差|夜|长|明|月|间|万|秒|马|末|五|候|去|数|一|段|七|现|期|感|多|在|点|号|和|左|好|千|本|六|的|周|零|当|充|节|近|届|天|底|未|圣|诞|头|世|几|时|钟|凌|晚|后|新|小|九|岁|久|分|清|下|每|季|前|这|个|那|满|古|十";
	my @wr=split(" ",$in);
	 my @hash_hc;
	 my @tmp_str;
	 my $m_res;
	 my $has=0;
	 my $start=0;
	 for(0 .. scalar(@wr)-1)
	 {
		 my $zu=$_;
		 if(($start==1)&&($wr[$zu]!~/\d|$timein/))
		 {$start=0;
			 my $onestr=join("",@tmp_str);
			 $m_res .=$onestr." ";
#			 $pos_nu->{$onestr}=1;
#			 foreach my $ff( @hash_hc)
#			 {
#				 my $s=$wr[$ff];
#				 if($onestr !~/$s/)
#				 {
#			 $m_res .=$wr[$ff]." ";
#				 }
#			 }
			 @hash_hc=();
			 @tmp_str=();$has=0;

		 }
		 if($wr[$zu]!~/\d| |一|二|三|四|五|六|七|八|九|十|百|千|万|亿|零|\.|点|$timein/)
		 {next;}
		 my $wr_uni=decode("utf-8",$wr[$zu]);
		my @zi=split("",$wr_uni);
		my $hash=0;
		for(0 .. scalar(@zi)-1)
		{
			my $zone=$zi[$_];
			my $zone=encode("utf-8",$zone);
			if($zone=~/\d| |一|二|三|四|五|六|七|八|九|十|百|千|万|亿|$timein/)
			{
				$start=1;
			}
			if(($start==1)&&($zone=~/\d|一|二|三|四|五|六|七|八|九|十|百|千|万|亿|零|\.|点|$timein/))
			{
				push (@tmp_str,$zone);
				if(($has ==0 )||($hash_hc[$has-1] != $zu))
				{$hash_hc[$has]=$zu;$has++;
				}
			}
			elsif(($start==1)&&($zone !~/\d| |一|二|三|四|五|六|七|八|九|十|百|千|万|亿|零|\.|点|$timein/))
			{$start=0;
				my $onestr=join("",@tmp_str);
				$m_res .=$onestr." ";
#				$pos_nu->{$onestr}=1;
#				foreach my $ff( @hash_hc)
#				{
#$m_res .=$wr[$ff]." ";
#			         }
			               @hash_hc=();
			              @tmp_str=();$has=0;
					           }
		}
		if(($start ==1)&&($zu== (scalar(@wr)-1)))
		{
			         $start=0;
					 my $onestr=join("",@tmp_str);
					 $m_res .=$onestr." ";
#					 $pos_nu->{$onestr}=1;
					foreach my $ff( @hash_hc)
					{
					my $s=$wr[$ff];
					if($onestr !~/$s/)
					{
#					$m_res .=$wr[$ff]." ";

					}
					}
					@hash_hc=();
					@tmp_str=();
					$has=0;
					print "结束捕获 清算 句子开始$zu $wr[$zu] @hash_hc| @tmp_str|$onestr\n";
					
				}
		  }
#	 push(@wline,$m_res);
	 my @as=split(" ",$m_res);
	 my $my_res;
	 my $tihash;
	 foreach my $ones(@as)
	 {
		 if(length($ones)>3)
		 {
			if(exists($tihash->{$ones}))
			{next;}
			$my_res .=$ones." "; 
			$tihash->{$ones}=1;
		}
	 }
	 $my_res;


}

sub load_dic_add
{
open(FDb, "$_[0]");
my $lsize=$_[1];
my $zoomin=0.5;
if($_[2]>0)
{$zoomin=$_[2];}
        my      $tmp=();
while(<FDb>)
{
        $line=$_;
                                       my $s=$line;
                                       # $m=$2;
                                         my $name;
                                      my  @a=split("\t",$s);
                                                my $dob=0;
                                        #@b=split(" ",$m);
                                        $f=0;
					my $ccn=0;
                                        foreach $i (@a)
                                        {
                                                if($f==0)
                                                {
                                                        $f++;
                                                        $name=$i;

                                                }
                                                else
                                                {
                                                        if($i=~/(.*?)\:(.*)/)
                                                        {
                                                                my $av=$1;
                                                                my $bv=$2;
#覆盖
                                                           if(exists($hash_st->{$av}))
                                                                {
																	next;}
							if($ccn>$lsize)
							{last;}
								$ccn++;	

                                                                        $hash->{$name}->{$av}+=$bv*$zoomin;
                                                        }
                                                }

                                        }


}
close (FDb);
print "load $_[0] add ok\n";
}
sub reduce_v
{
#$remove_log=reduce_v($allf[0].".".$allf[2],$fdebug[2],$tv);
	my $txt=$_[0];
	my $wlist=$_[1];
	my $v=$_[2];
	my $tlist=seg_txt($txt);
	chomp($tlist);
	my @txtlist=split(" ",$tlist);
	my @txtlist2=split(" ",$wlist);
### @txtlist
### @txtlist2
	my $reducehash;
	my $have=0;
	for( 0 .. scalar(@txtlist)-1)
	{
		my $an=$_;
		for(0 .. scalar(@txtlist2)-1)	
		{
				my $bn=$_;
				if($txtlist2[$bn]=~/^原词/)
				{next;}
			if(exists($hash->{$txtlist[$an]}->{$txtlist2[$bn]}))	
			{
					
					$reducehash .=$txtlist[$an].":".$txtlist2[$bn]." $v\t";
					$hash->{$txtlist[$an]}->{$txtlist2[$bn]} +=$v;
					$have=1;
			}
		}
#(debug 1) 甘蓝  伞形科
	}
	if($have==0)
	{
						for( 0 .. scalar(@txtlist)-1)
							{
							my $an=$_;
							for(0 .. scalar(@txtlist2)-1)
							{
							my $bn=$_;
							if($txtlist2[$bn]=~/^原词/)
							{next;}
							$reducehash .=$txtlist[$an].":".$txtlist2[$bn]." 0.2\t";
							$reducehash .=$txtlist2[$bn].":".$txtlist[$an]." 0.2\t";
							$hash->{$txtlist[$an]}->{$txtlist2[$bn]}=0.2;
							$hash->{$txtlist2[$bn]}->{$txtlist[$an]}=0.2;
							$have=1;
							}
							}
#print "$hash->{$txtlist[$an]}->{$txtlist2[$bn]} $txtlist[$an] $txtlist2[$bn]\n";
### $reducehash

	}
	$reducehash;
}
#4——9支持 stop.txt 其中加了若干标点
# jieba分词 分割文件  bug修正
# jieba分词 实现多socket的全内存调用 待高并发测试
# 添加了client_qc.pl用来支持 分类引擎与切词引擎的数据交换
#添加了 若干词到 词典文件
#计划加入 每条 目标与待测文本 进行域向量修订(共现放缩) 域词典 数量为N
#4-12	修正词典中的标点,和去掉一些单字
#测试自然对数 作为衡量相似度的
# 4-14修正  reload 命令同时重启切词系统
#4-20  comp_hash加入相似特征的序列对比 当序列相近则认为相似度更近
#加入同义词词典
#4-23 单字支持做调整 修正了停用词表的bug 修正了文本中有----的bug

#4-29 修正输入 文章中有数字影响 权重的bug
#4-30加入post数据接口 加入 共现词组转义rewrite函数
#添加重复词语数量控制 为20个词中 最多计数一个
#添加了 各种品牌
#向量空间减少到60个
#加入目标语句的深度含义分析
#计划加入 实体路径的最大关联分析
#加入dist函数 直接分析2个词的距离
#支持模糊推理
#调整扩展语义权重不大于2
#加入 词的tfidf比较


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.