首页 新闻 会员 周边

php写了一个爬虫,求大家看看,运行时总有点问题,自身来看逻辑上没问题啊

0
悬赏园豆:20 [已关闭问题] 关闭于 2017-09-01 09:47

<?php
class spider {
private $filename;
private $filename_list;

 

function __construct($filename='url.txt', $filename_list='url_already.txt') {
$this->filename = $filename;
$this->filename_list = $filename_list;
}

function net($num) {
  for($i=0; $i<$num; $i++) {
  $url = $this->fopen_one();
  $url_list = $this->fopen_list();

  if(in_array($url,$url_list)) {
    preg_match('/\.(.*)\./iU',$url,$mat);
    $url_name = $mat[1];
    $output = $this->get_content($url);
    $result = $this->get_url($output,$url,$url_name);
    $this->write_list($result);
    $this->write_one($url);
  } else {
    $this->one_delete($url);
  }
}
}


function get_content($url) {
  $ch = curl_init();
  curl_setopt($ch,CURLOPT_URL,$url);
  curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
  $output = curl_exec($ch);
  curl_close($ch);
  return $output;
}

function fopen_one() {
  $handle = fopen($this->filename,'r');
  $buffer = fgets($handle,4096);
  $url = trim($buffer);
  fclose($handle);
  return $url;
}

function fopen_list() {
  $handle = fopen($this->filename_list,'r');
  while (!feof($handle)) {
    $buffer = fgets($handle,4096);
    $url_list[] = trim($buffer);
  }
  fclose($handle) ;
  return $url_list;
}

function get_url($output,$url,$url_name) {
  $pattern = "/\<a.href=[\'\"].*[\'\"].*\>.*\<\/a\>/iU";
  preg_match_all($pattern,$output,$matches,PREG_SET_ORDER);
  if(is_array($matches)) {
    foreach($matches as $a) {
    $b = $a[0];
    if(preg_match("/\"(.*)\"/iU",$b,$c)) {
      $d = $c[1];
      if(preg_match("/^\/.*/i",$d,$e)) {
      $g = 'http://'.$url.$e[0];
      } else {
      $g = $d;
      }

    if(preg_match("/.*$url_name.*/",$g,$h)) {
    $result[] = trim($h[0]);
  }
  }
}
  $result = array_unique($result);
}
return $result;
}


function write_list($result) {
  $handle = fopen($this->filename,'a');
  foreach($result as $one) {
    $one = "\n".$one;
    fwrite($handle,$one);
  }
  fclose($handle);
}

function write_one($url) {
  $handle = fopen($this->filename_list,'a');
  $url = "\n".trim($url);
  fwrite($handle,$url);
  fclose($handle);
}

function one_delete($url) {
  $handle = fopen($this->filename,'r');
  while(!feof($handle)) {
    $buffer = fgets($handle,4096);
    $url_list[] = trim($buffer);
  }
  fclose($handle);
  $key = array_search($url,$url_list);
  unset($url_list[$key]);
  $handle = fopen($this->filename,'w');
  foreach($url_list as $k) {
    fwrite($handle,"\n".$k);
  }
  fclose($handle);
  }
}

小龙鬼的主页 小龙鬼 | 初学一级 | 园豆:189
提问于:2017-08-29 09:28
< >
分享
所有回答(1)
0

什么问题啊,报错了?还是什么,你光贴代码,我们也不知道啥问题 啊

男人要爽 | 园豆:6 (初学一级) | 2017-08-30 18:57
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册