都说阿里巴巴有不能采集和防采集的神话,今天就用张老师讲的Curl采集写了一个模拟浏览器的代码。没有不可能只有不去做,哈哈 
- <?php
 
 - set_time_limit(0);
 
 - function _rand() {
 
 - $length=26;
 
 - $chars = "0123456789abcdefghijklmnopqrstuvwxyz";
 
 - $max = strlen($chars) - 1;
 
 - mt_srand((double)microtime() * 1000000);
 
 - $string = '';
 
 - for($i = 0; $i < $length; $i++) {
 
 - $string .= $chars[mt_rand(0, $max)];
 
 - }
 
 - return $string;
 
 - }
 
 - $HTTP_SESSION=_rand();
 
 - $HTTP_SESSION;
 
 - $HTTP_Server="search.china.alibaba.com";
 
 - $HTTP_URL="/company/k-%CB%AE%CB%AE%CB%AE_n-y.html";
 
 - $ch = curl_init();
 
 - curl_setopt ($ch,CURLOPT_URL,"http://".$HTTP_Server.$HTTP_URL);
 
 - curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
 
 - curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
 
 - $res = curl_exec($ch);
 
 - curl_close ($ch);
 
 - print_r($res);
 
 -  
 
 - ?>
 
  复制代码 
 |