1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285:
<?php
if (!defined('SMF'))
die('No direct access...');
class fulltext_search extends search_api
{
protected $bannedWords = array();
protected $min_word_length = 4;
protected $supported_databases = array('mysql', 'postgresql');
public function __construct()
{
global $modSettings, $db_type;
if (!in_array($db_type, $this->supported_databases))
{
$this->is_supported = false;
return;
}
$this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']);
$this->min_word_length = $this->_getMinWordLength();
}
public function supportsMethod($methodName, $query_params = null)
{
$return = false;
switch ($methodName)
{
case 'searchSort':
case 'prepareIndexes':
case 'indexedWordQuery':
$return = true;
break;
default:
$return = false;
break;
}
if (!$return)
$return = parent::supportsMethod($methodName, $query_params);
return $return;
}
protected function _getMinWordLength()
{
global $smcFunc, $db_type;
if ($db_type == 'postgresql')
return 0;
$request = $smcFunc['db_search_query']('max_fulltext_length', '
SHOW VARIABLES
LIKE {string:fulltext_minimum_word_length}',
array(
'fulltext_minimum_word_length' => 'ft_min_word_len',
)
);
if ($request !== false && $smcFunc['db_num_rows']($request) == 1)
{
list (, $min_word_length) = $smcFunc['db_fetch_row']($request);
$smcFunc['db_free_result']($request);
}
else
$min_word_length = 4;
return $min_word_length;
}
public function searchSort($a, $b)
{
global $excludedWords, $smcFunc;
$x = $smcFunc['strlen']($a) - (in_array($a, $excludedWords) ? 1000 : 0);
$y = $smcFunc['strlen']($b) - (in_array($b, $excludedWords) ? 1000 : 0);
return $x < $y ? 1 : ($x > $y ? -1 : 0);
}
public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
{
global $modSettings, $smcFunc;
$subwords = text2words($word, null, false);
if (empty($modSettings['search_force_index']))
{
if (count($subwords) > 1 && preg_match('~[.:@$]~', $word))
{
if (($smcFunc['strlen'](current($subwords)) < $this->min_word_length) && ($smcFunc['strlen'](next($subwords)) < $this->min_word_length))
{
$wordsSearch['words'][] = trim($word, "/*- ");
$wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
}
}
elseif ($smcFunc['strlen'](trim($word, "/*- ")) < $this->min_word_length)
{
$wordsSearch['words'][] = trim($word, "/*- ");
$wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
}
}
$fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
$wordsSearch['indexed_words'][] = $fulltextWord;
if ($isExcluded)
$wordsExclude[] = $fulltextWord;
}
public function indexedWordQuery(array $words, array $search_data)
{
global $modSettings, $smcFunc;
$query_select = array(
'id_msg' => 'm.id_msg',
);
$query_where = array();
$query_params = $search_data['params'];
if ($smcFunc['db_title'] === POSTGRE_TITLE)
$modSettings['search_simple_fulltext'] = true;
if ($query_params['id_search'])
$query_select['id_search'] = '{int:id_search}';
$count = 0;
if (empty($modSettings['search_simple_fulltext']))
foreach ($words['words'] as $regularWord)
{
$query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}';
$query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
}
if ($query_params['user_query'])
$query_where[] = '{raw:user_query}';
if ($query_params['board_query'])
$query_where[] = 'm.id_board {raw:board_query}';
if ($query_params['topic'])
$query_where[] = 'm.id_topic = {int:topic}';
if ($query_params['min_msg_id'])
$query_where[] = 'm.id_msg >= {int:min_msg_id}';
if ($query_params['max_msg_id'])
$query_where[] = 'm.id_msg <= {int:max_msg_id}';
$count = 0;
if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
foreach ($query_params['excluded_phrases'] as $phrase)
{
$query_where[] = 'subject NOT' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}';
$query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
}
$count = 0;
if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
foreach ($query_params['excluded_subject_words'] as $excludedWord)
{
$query_where[] = 'subject NOT' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}';
$query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
}
if (!empty($modSettings['search_simple_fulltext']))
{
if ($smcFunc['db_title'] === POSTGRE_TITLE)
{
$language_ftx = $smcFunc['db_search_language']();
$query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:body_match})';
$query_params['language_ftx'] = $language_ftx;
}
else
$query_where[] = 'MATCH (body) AGAINST ({string:body_match})';
$query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words']));
}
else
{
$query_params['boolean_match'] = '';
$words['indexed_words'] = array_diff($words['indexed_words'], $words['complex_words']);
if ($smcFunc['db_title'] === POSTGRE_TITLE)
{
$row = 0;
foreach ($words['indexed_words'] as $fulltextWord)
{
$query_params['boolean_match'] .= ($row <> 0 ? '&' : '');
$query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '!' : '') . $fulltextWord . ' ';
$row++;
}
}
else
foreach ($words['indexed_words'] as $fulltextWord)
$query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' ';
$query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1);
if ($query_params['boolean_match'])
{
if ($smcFunc['db_title'] === POSTGRE_TITLE)
{
$language_ftx = $smcFunc['db_search_language']();
$query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:boolean_match})';
$query_params['language_ftx'] = $language_ftx;
}
else
$query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)';
}
}
$ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ('
INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
(' . implode(', ', array_keys($query_select)) . ')') : '') . '
SELECT ' . implode(', ', $query_select) . '
FROM {db_prefix}messages AS m
WHERE ' . implode('
AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
$query_params
);
return $ignoreRequest;
}
}
?>