URL抓取工具
有需要csdn免积分下载、pudn免积分下载、51cto免积分,请到http://www.itziy.com/命令行下执行,直接php调用将显示使用方式功能说明1.支持代理2.支持设置递归检查次数3.支持输出类型控制、检
有需要csdn免积分下载、pudn免积分下载、51cto免积分,请到http://www.itziy.com/
命令行下执行,直接php调用将显示使用方式
功能说明
1.支持代理
2.支持设置递归检查次数
3.支持输出类型控制、检查内容控制
作用:
主要代替肉眼尽量多的抓取可能的请求包及url地址等,方便渗透测试
命令行下执行,直接php调用将显示使用方式
功能说明
1.支持代理
2.支持设置递归检查次数
3.支持输出类型控制、检查内容控制
作用:
主要代替肉眼尽量多的抓取可能的请求包及url地址等,方便渗透测试
<?php error_reporting(E_ERROR | E_WARNING | E_PARSE); ini_set('memory_limit','1024M'); set_time_limit(0); define('CHECK_A_TAG', false); define('CHECK_JS_TAG', true); define('CHECK_URL', true); define('SAVE_ERROR', true); $checkArr = array( '$.load', '.ajax', '$.post', '$.get', '.getJSON' ); if ($argc < 2) die(showerror('sorry, parameter error', array('example: php debug.php url num filename header proxy', 'detail information:', 'url: target url address which you want to check it', 'num: The number of pages of recursive,default 3', 'filename: output filename default name ret.txt', 'header: The request header file default null', 'proxy: if you want to use proxy set it here default no use proxy'))); if (!check_extension()) die(showerror('extension curl not support', 'please open php curl extension support')); //global variable $url = trim($argv[1]); if (stripos($url, 'http') === false) $url = 'http://'.$url; $num = isset($argv[2]) ? intval($argv[2]) : 3; $output = isset($argv[3]) ? trim(str_replace("\\", '/', $argv[3])) : str_replace("\\", '/', dirname(__FILE__)).'/ret.txt'; $header = null; $proxy = null; $host = null; if (isset($argv[4])) { $header = trim(str_replace("\\", '/', $argv[4])); if (file_exists($header)) $header = array_filter(explode("\n", str_replace("\r", '', file_get_contents($header)))); else { $file = str_replace("\\", '/', dirname(__FILE__)).'/'.$header; if (file_exists($file)) $header = array_filter(explode("\n", str_replace("\r", '', file_get_contents($file)))); else $header = null; } } if (isset($argv[5])) $proxy = trim($argv[5]); if (!is_array($header) || empty($header)) $header = null; $result = check_valid_url($url); $outputArr = array(); if (!empty($result)) { $result = str_replace("\r", '', $result); $result = str_replace("\n", '', $result); $tmpArr = parse_url($url); if (!isset($tmpArr['host'])) die(showerror('parse url error', 'can not get host form url: '.$url)); $host = $tmpArr['host']; if (stripos($host, 'http') === false) $host = 'http://'.$host; unset($tmpArr); //check for current page if (!isset($outputArr[md5($url)])) { $outputArr[md5($url)] = $url; file_put_contents($output, $url."\n", FILE_APPEND); echo 'url: ',$url,' find ajax require so save it',PHP_EOL; } work($result); } echo 'run finish',PHP_EOL; function work($result, $reverse = false) { global $num, $host, $outputArr, $checkArr, $output; if (!$result) return; $result = str_replace("\r", '', $result); $result = str_replace("\n", '', $result); while ($num > 0) { echo 'remain: ',$num,' now start to check for url address',PHP_EOL,PHP_EOL; preg_match_all('/<a.*?href\s*=\s*("|\'|\s)+(.*?)("|\'|\s)+.*?>/i', $result, $match); if (CHECK_A_TAG && isset($match[2]) && !empty($match[2])) { foreach ($match[2] as $mc) { $mc = trim($mc); if ($mc == '#') continue; if (stripos($mc, 'http') === false) $mc = $host.$mc; if (($ret = check_valid_url($mc))) { if (!isset($outputArr[md5($mc)])) { $outputArr[md5($mc)] = $mc; file_put_contents($output, $mc."\n", FILE_APPEND); echo 'url: ',$mc,' find ajax require so save it',PHP_EOL; } } } } //check for page url echo 'remain: ',$num,' now start to check for page url',PHP_EOL,PHP_EOL; preg_match_all('/(https?|ftp|mms):\/\/([A-z0-9]+[_\-]?[A-z0-9]+\.)*[A-z0-9]+\-?[A-z0-9]+\.[A-z]{2,}(\/.*)*\/?/i', $result, $match); if (CHECK_URL && isset($match[2]) && !empty($match[2])) { foreach ($match[2] as $mc) { $mc = trim($mc); if ($mc == '#') continue; if (stripos($mc, 'http') === false) $mc = $host.$mc; if (($ret = check_valid_url($mc))) { if (!isset($outputArr[md5($mc)])) { $outputArr[md5($mc)] = $mc; file_put_contents($output, $mc."\n", FILE_APPEND); echo 'url: ',$mc,' find ajax require so save it',PHP_EOL; } } } } //check for javascript ajax require echo 'remain: ',$num,' now start to check for javascript ajax require',PHP_EOL,PHP_EOL; preg_match_all('/<script.*?src\s*=\s*("|\'|\s)+(.*?)("|\'|\s)+.*?>/i', $result, $match); if (CHECK_JS_TAG && isset($match[2]) && !empty($match[2])) { foreach ($match[2] as $mc) { $mc = trim($mc); if ($mc == '#') continue; if (stripos($mc, 'http') === false) $mc = $host.$mc; if (($ret = check_valid_url($mc))) { //check for current page foreach ($checkArr as $ck) { if (!isset($outputArr[md5($mc)]) && strpos($ret, $ck) !== false) { $outputArr[md5($mc)] = $mc; file_put_contents($output, $mc."\n", FILE_APPEND); echo 'url: ',$mc,' find ajax require so save it',PHP_EOL; break; } } } } } if ($reverse) return; //check for next page preg_match_all('/<a.*?href\s*=\s*("|\'|\s)+(.*?)("|\'|\s)+.*?>/i', $result, $match); if (isset($match[2]) && !empty($match[2])) { echo 'check for next page, remain page counts: ',$num,PHP_EOL; foreach ($match[2] as $mc) { $mc = trim($mc); if ($mc == '#') continue; if (stripos($mc, 'http') === false) $mc = $host.$mc; echo 'check for next page: ',$mc,PHP_EOL; work(check_valid_url($mc), true); } } $num--; sleep(3); } } function check_valid_url($url) { if (stripos($url, 'http') === false) $url = 'http://'.$url; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'); if (!is_null($header)) curl_setopt($ch, CURLOPT_HTTPHEADER, $header); if (!is_null($proxy)) curl_setopt($ch, CURLOPT_PROXY, $proxy); $ret = curl_exec($ch); $errinfo = curl_error($ch); curl_close($ch); unset($ch); if (!empty($errinfo) || ((strpos($ret, '200 OK') === false) && (strpos($ret, '302 Moved') === false)) || strpos($ret, '114so.cn') !== false) { showerror('check url: '.$url. ' find some errors', array($errinfo, $ret)); if (SAVE_ERROR) file_put_contents(dirname(__FILE__).'/error.txt', $url."\n", FILE_APPEND); return false; } return $ret; } function check_extension() { if (!function_exists('curl_init') || !extension_loaded('curl')) return false; return true; } function showerror($t, $c) { $str = "#########################################################################\n"; $str .= "# ".$t."\n"; if (is_string($c)) $str .= "# ".$c; elseif (is_array($c) && !empty($c)) { foreach ($c as $c1) $str .= "# ".$c1."\n"; } $str .= "\n#########################################################################\n"; echo $str; unset($str); }
精彩图集
精彩文章