Yet Another Related Posts Plugin の SQL 文を高速化

昨年、め組のVPS に移行した後、チマチマとパフォーマンスチューニングを行ってきたのですが、そこで得られた情報をシェアしておきます。

VPS に移行したことで共用レンタルサーバでは弄れなかった MySQL や Apache の設定なんかも弄れるようになったので、MySQL で実行に時間のかかっているクエリーをログに残してチューニングするようなことも手軽にできます。
スロークエリログの取得方法は、以下を参考に。
漢(オトコ)のコンピュータ道: MySQL 5.1のスロークエリログ

さて、このブログでの関連記事表示には Yet Another Related Posts Plugin を使用しています。
実行に2秒以上かかったクエリーをログに残したところ Yet Another Related Posts Plugin が生成したクエリーがログに記録されたので、これを修正しました。

書き換えた箇所は、関連投稿を取得するためのクエリーを生成している magic.php の yarpp_sql() 関数。
ちなみに使用しているバージョンは 3.2b3 です。

function yarpp_sql($type,$args,$giveresults = true,$reference_ID=false,$domain='website') {
	global $wpdb, $post, $yarpp_debug, $yarpp_cache;

	if (is_object($post) and !$reference_ID) {
		$reference_ID = $post->ID;
	}

	// set the "domain prefix", used for all the preferences.
	if ($domain == 'rss')
		$domainprefix = 'rss_';
	else
		$domainprefix = '';

	$options = array('limit'=>"${domainprefix}limit",
		'threshold'=>'threshold',
		'show_pass_post'=>'show_pass_post',
		'past_only'=>'past_only',
		'cross_relate'=>'cross_relate',
		'body'=>'body',
		'title'=>'title',
		'tags'=>'tags',
		'categories'=>'categories',
		'distags'=>'distags',
		'discats'=>'discats',
		'recent_only'=>'recent_only',
		'recent_number'=>'recent_number',
		'recent_units'=>'recent_units');
	$optvals = array();
	foreach (array_keys($options) as $option) {
		if (isset($args[$option])) {
			$optvals[$option] = stripslashes($args[$option]);
		} else {
			$optvals[$option] = stripslashes(stripslashes(yarpp_get_option($options[$option])));
		}
	}

	extract($optvals);

	// Fetch keywords
	$body_terms = $yarpp_cache->get_keywords($reference_ID,'body');
	$title_terms = $yarpp_cache->get_keywords($reference_ID,'title');

	if ($yarpp_debug) echo "<!--TITLE TERMS: $title_terms-->"; // debug
	if ($yarpp_debug) echo "<!--BODY TERMS: $body_terms-->"; // debug

	// get weights

	$bodyweight = (($body == 3)?3:(($body == 2)?1:0));
	$titleweight = (($title == 3)?3:(($title == 2)?1:0));
	$tagweight = (($tags != 1)?1:0);
	$catweight = (($categories != 1)?1:0);
	$weights = array();
	$weights['body'] = $bodyweight;
	$weights['title'] = $titleweight;
	$weights['cat'] = $catweight;
	$weights['tag'] = $tagweight;

	$totalweight = $bodyweight + $titleweight + $tagweight + $catweight;

	// get disallowed categories and tags

	$disterms = implode(',', array_filter(array_merge(explode(',',$discats),explode(',',$distags)),'is_numeric'));

	$usedisterms = count(array_filter(array_merge(explode(',',$discats),explode(',',$distags)),'is_numeric'));

	$criteria = array();
	if ($bodyweight)
		$criteria['body'] = "(MATCH (post_content) AGAINST ('".$wpdb->escape($body_terms)."'))";
	if ($titleweight)
		$criteria['title'] = "(MATCH (post_title) AGAINST ('".$wpdb->escape($title_terms)."'))";
	if ($tagweight)
		$criteria['tag'] = "tag.tagtax";
//		$criteria['tag'] = "COUNT( DISTINCT tagtax.term_taxonomy_id )";
	if ($catweight)
		$criteria['cat'] = "cat.cattax";
//		$criteria['cat'] = "COUNT( DISTINCT cattax.term_taxonomy_id )";

	$newsql = "SELECT $reference_ID as reference_ID, ID, "; //post_title, post_date, post_content, post_excerpt,

	$newsql .= 'ROUND(0';
	foreach ($criteria as $key => $value) {
		$newsql .= "+ $value * ".$weights[$key];
	}
	$newsql .= ',1) as score';

	$newsql .= "\n from $wpdb->posts \n";

	if ($usedisterms)
		$newsql .= " left join $wpdb->term_relationships as blockrel on ($wpdb->posts.ID = blockrel.object_id)
		left join $wpdb->term_taxonomy as blocktax using (`term_taxonomy_id`)
		left join $wpdb->terms as blockterm on (blocktax.term_id = blockterm.term_id and blockterm.term_id in ($disterms))\n";

	if ($tagweight)
		$newsql .= " left JOIN (
		select tagrel.object_id, COUNT( DISTINCT tagtax.term_taxonomy_id ) as tagtax
		from $wpdb->term_relationships AS thistag
		left JOIN $wpdb->term_relationships AS tagrel on (tagrel.term_taxonomy_id = thistag.term_taxonomy_id)
		left JOIN $wpdb->term_taxonomy AS tagtax ON (tagrel.term_taxonomy_id = tagtax.term_taxonomy_id AND tagtax.taxonomy = 'post_tag')
		where thistag.object_id = $reference_ID
		group by tagrel.object_id) as tag
		ON (tag.object_id = $wpdb->posts.ID )\n";
//		$newsql .= " left JOIN $wpdb->term_relationships AS thistag ON (thistag.object_id = $reference_ID )
//		left JOIN $wpdb->term_relationships AS tagrel on (tagrel.term_taxonomy_id = thistag.term_taxonomy_id
//		AND tagrel.object_id = $wpdb->posts.ID)
//		left JOIN $wpdb->term_taxonomy AS tagtax ON ( tagrel.term_taxonomy_id = tagtax.term_taxonomy_id
//		AND tagtax.taxonomy = 'post_tag')\n";

	if ($catweight)
		$newsql .= " left JOIN (
		select catrel.object_id, COUNT( DISTINCT cattax.term_taxonomy_id ) as cattax
		from $wpdb->term_relationships AS thiscat
		left JOIN $wpdb->term_relationships AS catrel on (catrel.term_taxonomy_id = thiscat.term_taxonomy_id)
		left JOIN $wpdb->term_taxonomy AS cattax ON (catrel.term_taxonomy_id = cattax.term_taxonomy_id AND cattax.taxonomy = 'category')
		where thiscat.object_id = $reference_ID
		group by catrel.object_id) as cat
		ON (cat.object_id = $wpdb->posts.ID )\n";
//		$newsql .= " left JOIN $wpdb->term_relationships AS thiscat ON (thiscat.object_id = $reference_ID )
//		left JOIN $wpdb->term_relationships AS catrel on (catrel.term_taxonomy_id = thiscat.term_taxonomy_id
//		AND catrel.object_id = $wpdb->posts.ID)
//		left JOIN $wpdb->term_taxonomy AS cattax ON ( catrel.term_taxonomy_id = cattax.term_taxonomy_id
//		AND cattax.taxonomy = 'category')\n";

	// WHERE

	$newsql .= " where (post_status IN ( 'publish',  'static' ) and ID != '$reference_ID')";

	if ($past_only) { // 3.1.8: revised $past_only option
		if ( is_object($post) && $reference_ID == $post->ID )
			$reference_post_date = $post->post_date;
		else
			$reference_post_date = $wpdb->get_var("select post_date from $wpdb->posts where ID = $reference_ID");
		$newsql .= " and post_date <= '$reference_post_date' ";
	}
	if (!$show_pass_post)
		$newsql .= " and post_password ='' ";
	if ($recent_only)
		$newsql .= " and post_date > date_sub(now(), interval $recent_number $recent_units) ";

	if ($type == array('page') && !$cross_relate)
		$newsql .= " and post_type = 'page'";
	else
		$newsql .= " and post_type = 'post'";

	$newsql .= (($categories == 3)?' and '.$criteria['cat'].' >= 1':'');
	$newsql .= (($categories == 4)?' and '.$criteria['cat'].' >= 2':'');
	$newsql .= (($tags == 3)?' and '.$criteria['tag'].' >= 1':'');
	$newsql .= (($tags == 4)?' and '.$criteria['tag'].' >= 2':'');

	// GROUP BY
	$newsql .= "\n group by id \n";

	// HAVING
	// safethreshold is so the new calibration system works.
	// number_format fix suggested by vkovalcik! :)
	$safethreshold = number_format(max($threshold,0.1), 2, '.', '');
	$newsql .= " having score >= $safethreshold";
	if ($usedisterms)
		$newsql .= " and count(blockterm.term_id) = 0";

//	$newsql .= (($categories == 3)?' and '.$criteria['cat'].' >= 1':'');
//	$newsql .= (($categories == 4)?' and '.$criteria['cat'].' >= 2':'');
//	$newsql .= (($tags == 3)?' and '.$criteria['tag'].' >= 1':'');
//	$newsql .= (($tags == 4)?' and '.$criteria['tag'].' >= 2':'');
	$newsql .= " order by score desc limit ".$limit;

	if (!$giveresults) {
		$newsql = "select count(t.ID) from ($newsql) as t";
	}

	// if we're looking for a X related entries, make sure we get at most X posts and X pages if
	// we cross-relate
	if ($cross_relate)
		$newsql = "($newsql) union (".str_replace("post_type = 'post'","post_type = 'page'",$newsql).")";

	if ($yarpp_debug)
		echo "<!--$newsql-->";

	return $newsql;
}

サブクエリを多用する形に修正したので MySQL のバージョンによっては動作しないかもしれません。
MySQL 5.1.54 では、問題なく動作しました。

コメントを残す

メールアドレスが公開されることはありません。 が付いている欄は必須項目です

このサイトはスパムを低減するために Akismet を使っています。コメントデータの処理方法の詳細はこちらをご覧ください