//判断是否为蜘蛛 function spider(){ $agent=$_SERVER['HTTP_USER_AGENT']; if (stripos($agent, "googlebot") > 0 - 1 || stripos($agent, "mediapartners-google") > 0 - 1) { $bot = "Google"; } if (stripos($agent, "baiduspider") > 0 - 1) { $bot = "Baidu"; } if (stripos($agent, "360spider") > 0 - 1) { $bot = "360搜索"; } if (stripos($agent, "sogou") > 0 - 1) { $bot = "Sogou"; } if (stripos($agent, "Yisouspider") > 0 - 1) { $bot = "神马"; } if (stripos($agent, "bingbot") > 0 - 1) { $bot = "Bing"; } if (stripos($agent, "yahoo") > 0 - 1) { $bot = "Yahoo!"; } return $bot ? $bot : ''; }
php函数,蜘蛛的判断。
如果不需要知道蜘蛛的名称,则可以这样:
//判断是否爬虫蜘蛛 https://gist.github.com/zhangguiqiang/2859126 if ( !function_exists( 'isCrawler' ) ) { function isCrawler() { if (ini_get('browscap')) { $browser = get_browser(NULL, true); if ($browser['crawler']) { return true; } } else if (isset($_SERVER['HTTP_USER_AGENT'])) { $agent = $_SERVER['HTTP_USER_AGENT']; $crawlers = array( "/spider/", "/bot/", "/crawl/", "/Googlebot/", "/Google/", "/baidu/", "/blogsearch/", "/ia_archive/", "/Slurp/", "/Yandex/", "/Yeti/", "/msnbot/", "/Mediapartners-Google/", "/Scooter/", "/Yahoo-MMCrawler/", "/FAST-WebCrawler/", "/Yahoo-MMCrawler/", "/Yahoo! Slurp/", "/FAST-WebCrawler/", "/FAST Enterprise Crawler/", "/grub-client-/", "/MSIECrawler/", "/NPBot/", "/NameProtect/i", "/ZyBorg/i", "/worio bot heritrix/i", "/Ask Jeeves/", "/libwww-perl/i", "/Gigabot/i", "/bot@bot.bot/i", "/SeznamBot/i" ); foreach ($crawlers as $c) { if (preg_match($c, $agent)) { return true; } } } return false; } }
还有一个不用正则的,效率更高:
if ( !function_exists( 'isCrawler' ) ) { //提取自 WP-PostViews 插件 https://wordpress.org/plugins/wp-postviews function isCrawler() { $bots = array( 'Google Bot' => 'google' , 'MSN' => 'msnbot' , 'Alex' => 'ia_archiver' , 'Lycos' => 'lycos' , 'Ask Jeeves' => 'jeeves' , 'Altavista' => 'scooter' , 'AllTheWeb' => 'fast-webcrawler' , 'Inktomi' => 'slurp@inktomi' , 'Turnitin.com' => 'turnitinbot' , 'Technorati' => 'technorati' , 'Yahoo' => 'yahoo' , 'Findexa' => 'findexa' , 'NextLinks' => 'findlinks' , 'Gais' => 'gaisbo' , 'WiseNut' => 'zyborg' , 'WhoisSource' => 'surveybot' , 'Bloglines' => 'bloglines' , 'BlogSearch' => 'blogsearch' , 'PubSub' => 'pubsub' , 'Syndic8' => 'syndic8' , 'RadioUserland' => 'userland' , 'Gigabot' => 'gigabot' , 'Become.com' => 'become.com' , 'Baidu' => 'baiduspider' , 'so.com' => '360spider' , 'Sogou' => 'spider' , 'soso.com' => 'sosospider' , 'Yandex' => 'yandex' ); $useragent = isset( $_SERVER['HTTP_USER_AGENT'] ) ? $_SERVER['HTTP_USER_AGENT'] : ''; foreach ( $bots as $name => $lookfor ) { if ( ! empty( $useragent ) && ( false !== stripos( $useragent, $lookfor ) ) ) { return true; } } return false; } }