Perl模拟浏览器(cookie)抓Email
时间:2009-03-18 来源:snowtty
Perl模拟浏览器(cookie)抓Email
关键字: email perl Perl代码- #!/usr/bin/perl -w
- #use CGI qw(:standard);
- use LWP::UserAgent;
- use HTTP::Request::Common;
- use HTTP::Request::Common qw(POST);
- #$e=exp(1);
- open(OPENFILE,"keyword.txt") or die("open file failure!");
- #$/=undef;
- #$cgi=new CGI;
- #print $cgi->header;
- $email="email.txt";
- $k=1;
- while($line=<OPENFILE>){
- #if($line=~s/##[0-9a-zA-Z]+##/$e/){
- # print $line;
- # }
- #else
- #{
- # print $line;
- #}
- #print $line;
- if($line){
- $line=~s/\n|\r/ /ig;
- $line=~s/\s+$//ig;
- if( -e $email )
- {
- open(SRC,">>$email");
- }
- else{
- open(SRC,">$email");
- }
- $email1=getEmail($line);
- if($email1){
- print SRC "$line\t$email1\r\n";
- }
- else{
- printf SRC "$line\r\n";
- }
- close(SRC);
- sleep(10);
- if($k%8==1){
- `wget http://www.lead411.com`;
- }
- print "$k\n";
- $k++;
- }
- }
- print "Total Record:\$";
- close(OPENFILE);
- #$slurp=<OPENFILE>;
- sub getEmail{
- my $agent=LWP::UserAgent->new;
- my @header=(
- 'User-Agent'=>'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; iCafeMedia; .NET CLR 2.0.50727; CIBA)',
- 'Accept' => '*/*',
- 'Accept-Charset' => 'gzip, deflate',
- 'Accept-Language' => 'zh-cn',
- 'Cookie'=> 'Tango_UserReference=38D8FF1624305B16496E9808; MTCCK=1; _csuid=48feeef505683659; cookmcnt=999; CID=1459382; cookMemberName=YunFan; cookMemberID=61448; [email protected]; DLDExec=OK; __utma=232384002.1655516880.1231991960.1231994793.1232000250.3; __utmb=232384002; __utmc=232384002; __utmz=232384002.1231991960.1.1.utmccn=(direct)|utmcsr=(direct)|utmcmd=(none)'
- );
- my $url="http://www.lead411.com/allsearch.taf?_function=headersearch&SearchParam=$_[0]&SearchType=Company";
- #print $_[0]."\n";
- #my $arg={'SearchParam'=>"$_[0]",'SearchType'=>'Company'};
- #print $arg->{SearchParam}."\n";
- my $temp2="";
- my $temp="",$temp1="";
- #my $req=HTTP::Request->new(GET=>"$url",@header);
- my $res =$agent->get($url,@header);
- my $content="";
- #my $res =$agent->post($url,$arg,'Content_Type' => 'form-data');
- #request($req);get($url,@header);
- if($res->is_success){
- $content= $res->content;
- #print "test";
- #$content=~s/\s+|>|<|\///ig;
- #print $content;
- while($content=~/company.taf\?_function=detail&Company_uid1=(\d+)&_UserReference=(\w+)/ig)
- {
- $temp=$1;$temp1=$2;
- if($temp and $temp1){
- $url='http://www.lead411.com/company.taf?_function=detail&Company_uid1='.$temp.'&_UserReference='.$temp1;
- $content=~s/http:\/\/www\.lead411\.com\/company\.taf\?_function=detail\&Company_uid1=$temp\&_UserReference=$temp1//ig;
- print $url."\n";
- $res =$agent->get($url,@header);
- if( $res->is_success)
- {
- $content=$res->content;
- $content=~s/\s+|>|<|\///ig;
- $content=~s/\s+//ig;
- $i=0;$temp="";
- do{
- $i=index($content,'CEO',$i);
- $temp=substr($content,$i,150);
- #print $temp;
- if($temp=~/"mailto:(.*)"font/ig){
- $temp2.=$1;
- print $_[0]."---------------".$temp2."\n";
- }
- if($i!=-1){
- $i=$i+150;
- }
- else{
- $i=0;
- }
- }while($i)
- #print $content;
- #if($content=~/(<TD>.*?CEO.*?<\/TD> <TD>.*?mailto:"[A-Z0-9._%-]+\@[A-Z0-9._%-]+\.[A-Z]{2,4}"<\/TD>)/ig)
- #{
- # print "test $1";
- #}
- #print $content;
- #$temp=~/mailto:"[A-Z0-9._%-]+\@[A-Z0-9._%-]+\.[A-Z]{2,4}"/ig;
- }
- }
- }
- }
- return $temp2;
- }
- exit;
相关阅读 更多 +