//搜索关键词
private function searchCas($cas){
Log::info('searchCas', [$cas]);
$url = "http://www.bbssff.com/m/search";
try{
$client = new Client();
$response = $client->post($url,[
'form_params'=>[
'cas-search'=>$cas
],
'allow_redirects' => true
]);
$html = $response->getBody();
Log::info('html ', [$html]);
}catch (Exception $e){
Log::info('searchCasException ', [$e]);
return false;
}
$dom = new \DOMDocument();
@$dom->loadHTML($html);
$dom->normalize();
$xpath = new \DOMXPath($dom);
//$hrefs = $xpath->query('/html/body//a[@class="checkmore"]//@href');
$hrefs = $xpath->query('/html/body//a[text()="化学百科"]//@href');
$link = '';
if($hrefs->length == 0){
return false;
}
if($hrefs->length >= 1){
$href = $hrefs->item(0);
$link = $href->nodeValue;
}
//判断链接是否合法
if(strpos($link, '-d') === false){
return false;
}
$url = 'http://www.bbssy.com'.$link;
$content = $this->spiderDetail($url);
if(!$content) false;
ProdContent::create(array(
'cas'=>$cas,
'url'=>$url,
'content'=>$content
));
//更新prod 抓取状态
Prod::where('cas','=',$cas)->update(
array(
'is_guzzle'=>1
)
);
return true;
/* for ($i = 0; $i < $hrefs->length; $i++) {
$href = $hrefs->item($i);
$linktext = $href->nodeValue;
Log::info('href', [$linktext]);
}*/
}