龙盟编程博客 | 无障碍搜索 | 云盘搜索神器
快速搜索
主页 > web编程 > php编程 >

全自动小说订阅微信推送

时间:2014-07-22 14:48来源: 作者: 点击:
分享到:
代码放置在sae上,需要安装Wordpress4SAE,微信公众平台最后地址 http://iniu.sinaapp.com需要查看详细过程可以到我的博客查看 http://dlinux.sinaapp.com/archives/991
代码放置在sae上,需要安装Wordpress4SAE,微信公众平台
最后地址 http://iniu.sinaapp.com
需要查看详细过程可以到我的博客查看
 http://dlinux.sinaapp.com/archives/991  
<?php
header("Content-type: text/html;charset=utf-8");
//set_time_limit(0);

$dbname = SAE_MYSQL_DB;

 $host = SAE_MYSQL_HOST_M;
 $port = SAE_MYSQL_PORT;
 $user = SAE_MYSQL_USER;
 $pwd = SAE_MYSQL_PASS;
 
$connect = @mysql_connect("{$host}:{$port}",$user,$pwd,true);
if(!$connect) {
    die("Connect Server Failed: " . mysql_error());
}

if(!mysql_select_db($dbname,$connect)) {
    die("Select Database Failed: " . mysql_error($connect));
}

mysql_query("set names 'utf8'");

$rules = array(
	'start'		=>	'http://www.douluodalu.com.cn/jueshitangmen/6860.html',	//开始采集的url
	'title'		=>	'/<h1>(.*?)<\/h1>/',	//文章title
    'time'		=>	'/发布时间:(.*?)&nbsp;/',	//发布时间
	'content'	=>	'/\"><\/div><p>([\s\S]*?)<div align=center>/',	//内容
	'next'		=>	'/下一篇: <a href=\"(.*?)\"/',	//下一篇网址
    );


//每次排序,取出上一次的最后一篇url
$url = getLatest();

//最后一章的下一篇为空,由此循环
while($url != null && $url != ""){
    $value = get($url);
	
    $value = _prefilter($value);//去除空白字符,空格,回车
    $context = getContent($value);
	$context['url'] = $url;//当前url,同时还有下一篇的url
    $url = $context['next'];
	var_dump($url);
	//防止重复
		if(storage($context)){
			storageWP($context);
		};	
}
echo "采集结束";
mysql_close($connect);

/*入库*/
function storage($content_array){
	global $connect;
	$sql = "insert into `articles` (`id`, `title`, `time`, `url`, `content`) values(null,
	'{$content_array['title']}',
	'{$content_array['time']}',
	'{$content_array['url']}',
	'{$content_array['content']}');";
	$result = mysql_query($sql,$connect);
	return $result;
}

function storageWP($content_array){
		global $connect;	
	$result =  mysql_query("select max(ID) from wp_posts;",$connect);

	$row = mysql_fetch_row($result);
	$last_id = $row[0] +1 ;
	$sql = "INSERT INTO `wp_posts` (`ID`, `post_author`, `post_date`, `post_date_gmt`, `post_content`, `post_title`, `post_excerpt`, `post_status`, `comment_status`, `ping_status`, `post_password`, `post_name`, `to_ping`, `pinged`, `post_modified`, `post_modified_gmt`, `post_content_filtered`, `post_parent`, `guid`, `menu_order`, `post_type`, `post_mime_type`, `comment_count`) VALUES (null,1,'{$content_array['time']}', '{$content_array['time']}', '{$content_array['content']}', '{$content_array['title']}', '', 'publish', 'open', 'open', '', '{$content_array['title']}', '', '', '{$content_array['time']}', '{$content_array['time']}', '', 0, 'http://iniu.sinaapp.com/?p={$last_id}', 0, 'post', '', 0);";

	$result = mysql_query($sql,$connect);

	$sql = "INSERT INTO `wp_term_relationships` (`object_id`, `term_taxonomy_id`, `term_order`) VALUES({$last_id}, 1, 0);";

	$result = mysql_query($sql,$connect);
	return $result;
}
/*返回内容数组,title,context,time,nexturl*/
function getContent($value){
	global $rules;  
	preg_match($rules['title'],$value, $title);

	preg_match($rules['time'],$value, $time);

	preg_match($rules['next'],$value, $next);

	preg_match($rules['content'],$value, $content);

	$context = array(
	'title' => addslashes($title[1]),
	'time' => $time[1],
	'next' => addslashes($next[1]),
	'content' => addslashes($content[1])
	);
	return $context;
}

/*得到最新的一篇文章记录*/
function getLatest(){
  global $connect;
  global $rules;  
  $sql = "SELECT url FROM  `articles` ORDER BY id DESC LIMIT 1";
  $result = mysql_query($sql,$connect);
  $row=mysql_fetch_row($result);
  
  if($row){  
		return $row[0];  
	}else{  
		return $rules['start'];
	} 
 
}

/*Http Get*/
function get($url){
    $ch = curl_init($url) ;
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true) ;
    curl_setopt($ch, CURLOPT_BINARYTRANSFER, true) ;
    $value = curl_exec($ch) ;
	curl_close($ch);
    return $value;
}

/* 对抓去到的内容做简单过滤(过滤空白字符,便于正则匹配)*/
function _prefilter($output) {
	strip_tags($output);
	$output=preg_replace("/\/\/[\S\f\t\v ]*?;[\r|\n]/", "", $output);
	$output=preg_replace("/\<\!\-\-[\s\S]*?\-\-\>/", "", $output);
	$output=preg_replace("/\>[\s]+\</", "><", $output);
	$output=preg_replace("/;[\s]+/", ";", $output);
	$output=preg_replace("/[\s]+\}/", "}", $output);
	$output=preg_replace("
      
精彩图集

赞助商链接