<?php
class spider {
private $filename;
private $filename_list;
function __construct($filename='url.txt', $filename_list='url_already.txt') {
$this->filename = $filename;
$this->filename_list = $filename_list;
}
function net($num) {
for($i=0; $i<$num; $i++) {
$url = $this->fopen_one();
$url_list = $this->fopen_list();
if(in_array($url,$url_list)) {
preg_match('/\.(.*)\./iU',$url,$mat);
$url_name = $mat[1];
$output = $this->get_content($url);
$result = $this->get_url($output,$url,$url_name);
$this->write_list($result);
$this->write_one($url);
} else {
$this->one_delete($url);
}
}
}
function get_content($url) {
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
$output = curl_exec($ch);
curl_close($ch);
return $output;
}
function fopen_one() {
$handle = fopen($this->filename,'r');
$buffer = fgets($handle,4096);
$url = trim($buffer);
fclose($handle);
return $url;
}
function fopen_list() {
$handle = fopen($this->filename_list,'r');
while (!feof($handle)) {
$buffer = fgets($handle,4096);
$url_list[] = trim($buffer);
}
fclose($handle) ;
return $url_list;
}
function get_url($output,$url,$url_name) {
$pattern = "/\<a.href=[\'\"].*[\'\"].*\>.*\<\/a\>/iU";
preg_match_all($pattern,$output,$matches,PREG_SET_ORDER);
if(is_array($matches)) {
foreach($matches as $a) {
$b = $a[0];
if(preg_match("/\"(.*)\"/iU",$b,$c)) {
$d = $c[1];
if(preg_match("/^\/.*/i",$d,$e)) {
$g = 'http://'.$url.$e[0];
} else {
$g = $d;
}
if(preg_match("/.*$url_name.*/",$g,$h)) {
$result[] = trim($h[0]);
}
}
}
$result = array_unique($result);
}
return $result;
}
function write_list($result) {
$handle = fopen($this->filename,'a');
foreach($result as $one) {
$one = "\n".$one;
fwrite($handle,$one);
}
fclose($handle);
}
function write_one($url) {
$handle = fopen($this->filename_list,'a');
$url = "\n".trim($url);
fwrite($handle,$url);
fclose($handle);
}
function one_delete($url) {
$handle = fopen($this->filename,'r');
while(!feof($handle)) {
$buffer = fgets($handle,4096);
$url_list[] = trim($buffer);
}
fclose($handle);
$key = array_search($url,$url_list);
unset($url_list[$key]);
$handle = fopen($this->filename,'w');
foreach($url_list as $k) {
fwrite($handle,"\n".$k);
}
fclose($handle);
}
}
什么问题啊,报错了?还是什么,你光贴代码,我们也不知道啥问题 啊