文件目录下建立三个文件:daili.php,1.txt,2.txt
1.txt内容为需要验证的代理ip,格式如下
1.1.2.3:80 2.2.3.4:8080 ....... 234.244.234.23:80
2.txt为验证成功的代理ip地址,本文件自动生成,文件需修改属性为777\
浏览器中运行:http://www.yourdomain.com/daili.php?id=0
2.txt中自动生成验证成功的代理ip和txt,一行一个
daili.phpy源码如下
<?php //截取函数$image1 = cut($file2,"",""); function cut($file,$from,$end){ $message=explode($from,$file); $message=explode($end,$message[1]); return $message[0]; } //curl函数$file2 = GetSources($url2,$User_Agent,$Referer_Url); function GetSources($Url,$User_Agent='',$Referer_Url='',$proxy) { $ch = curl_init(); curl_setopt ($ch, CURLOPT_PROXY, $proxy); curl_setopt($ch, CURLOPT_TIMEOUT,5); curl_setopt ($ch, CURLOPT_URL, $Url); curl_setopt ($ch, CURLOPT_USERAGENT, $User_Agent); curl_setopt ($ch, CURLOPT_REFERER, $Referer_Url); curl_setopt($ch, CURLOPT_FOLLOWLOCATION,0); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); $MySources = curl_exec ($ch); curl_close($ch); return $MySources; } $User_Agent = "baiduspider+(+http://www.baidu.com/search/spider.htm)"; $Referer_Url = 'http://www.baidu.com/'; $dailiarr = explode("\n",file_get_contents("1.txt")); $i = $_GET["id"]; $proxy = $dailiarr[$i]; $url = "http://www.taobao.com/robots.txt"; $file = GetSources($url,$User_Agent,$Referer_Url,$proxy); $yes = strpos("$file","Baiduspider"); if ($yes >=1){ echo "成功"; $filename = "2.txt"; $fp2=fopen($filename,"a+"); fwrite($fp2,$proxy."\n"); fclose($fp2); } else { echo $proxy."失败"; } $in = $i+1; $ccc = count($dailiarr); if ($in <=$ccc) { echo $in."继续"; } else { echo $in."完了"; } $next = "daili.php?id=".$in; ?> <?php echo "<script language='javascript' type='text/javascript'>"; echo "window.location.href='$next'"; echo "</script>"; ?>