XSSearch.class.php 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392
  1. <?php
  2. /**
  3. * XSSearch 类定义文件
  4. *
  5. * @author hightman
  6. * @link http://www.xunsearch.com/
  7. * @copyright Copyright &copy; 2011 HangZhou YunSheng Network Technology Co., Ltd.
  8. * @license http://www.xunsearch.com/license/
  9. * @version $Id$
  10. */
  11. /**
  12. * XS 搜索类, 执行搜索功能
  13. * 有部分方法支持串接操作
  14. * <pre>
  15. * $xs->search->setQuery($str)->setLimit(10, 10)->search();
  16. * $xs->close();
  17. * </pre>
  18. *
  19. * @property string $query 默认搜索语句
  20. * @property-read int $dbTotal 数据库内的数据总量
  21. * @property-read int $lastCount 最近那次搜索的匹配总量估值
  22. * @property-read array $hotQuery 热门搜索词列表
  23. * @property-read array $relatedQuery 相关搜索词列表
  24. * @property-read array $expandedQuery 展开前缀的搜索词列表
  25. * @property-read array $corredtedQuery 修正后的建议搜索词列表
  26. * @author hightman <hightman@twomice.net>
  27. * @version 1.0.0
  28. * @package XS
  29. */
  30. class XSSearch extends XSServer
  31. {
  32. /**
  33. * 搜索结果默认分页数量
  34. */
  35. const PAGE_SIZE = 10;
  36. const LOG_DB = 'log_db';
  37. private $_defaultOp = XS_CMD_QUERY_OP_AND;
  38. private $_prefix, $_fieldSet, $_resetScheme = false;
  39. private $_query, $_terms, $_count;
  40. private $_lastCount, $_highlight;
  41. private $_curDb, $_curDbs = array();
  42. private $_lastDb, $_lastDbs = array();
  43. private $_facets = array();
  44. private $_limit = 0, $_offset = 0;
  45. private $_charset = 'UTF-8';
  46. /**
  47. * 连接搜索服务端并初始化
  48. * 每次重新连接后所有的搜索语句相关设置均被还原
  49. * @param string $conn
  50. * @see XSServer::open
  51. */
  52. public function open($conn)
  53. {
  54. parent::open($conn);
  55. $this->_prefix = array();
  56. $this->_fieldSet = false;
  57. $this->_lastCount = false;
  58. }
  59. /**
  60. * 设置字符集
  61. * 默认字符集是 UTF-8, 如果您提交的搜索语句和预期得到的搜索结果为其它字符集, 请先设置
  62. * @param string $charset
  63. * @return XSSearch 返回对象本身以支持串接操作
  64. */
  65. public function setCharset($charset)
  66. {
  67. $this->_charset = strtoupper($charset);
  68. if ($this->_charset == 'UTF8') {
  69. $this->_charset = 'UTF-8';
  70. }
  71. return $this;
  72. }
  73. /**
  74. * 开启模糊搜索
  75. * 默认情况只返回包含所有搜索词的记录, 通过本方法可以获得更多搜索结果
  76. * @param bool $value 设为 true 表示开启模糊搜索, 设为 false 关闭模糊搜索
  77. * @return XSSearch 返回对象本身以支持串接操作
  78. */
  79. public function setFuzzy($value = true)
  80. {
  81. $this->_defaultOp = $value === true ? XS_CMD_QUERY_OP_OR : XS_CMD_QUERY_OP_AND;
  82. return $this;
  83. }
  84. /**
  85. * 设置百分比/权重剔除参数
  86. * 通常是在开启 {@link setFuzzy} 或使用 OR 连接搜索语句时才需要设置此项
  87. * @param int $percent 剔除匹配百分比低于此值的文档, 值范围 0-100
  88. * @param float $weight 剔除权重低于此值的文档, 值范围 0.1-25.5, 0 表示不剔除
  89. * @return XSSearch 返回对象本身以支持串接操作
  90. * @see setFuzzy
  91. */
  92. public function setCutOff($percent, $weight = 0)
  93. {
  94. $percent = max(0, min(100, intval($percent)));
  95. $weight = max(0, (intval($weight * 10) & 255));
  96. $cmd = new XSCommand(XS_CMD_SEARCH_SET_CUTOFF, $percent, $weight);
  97. $this->execCommand($cmd);
  98. return $this;
  99. }
  100. /**
  101. * 设置在搜索结果文档中返回匹配词表
  102. * 请在 {@link search} 前调用本方法, 然后使用 {@link XSDocument::matched} 获取
  103. * @param bool $value 设为 true 表示开启返回, 设为 false 关闭该功能, 默认是不开启
  104. * @return XSSearch 返回对象本身以支持串接操作
  105. * @since 1.4.8
  106. */
  107. public function setRequireMatchedTerm($value = true)
  108. {
  109. $arg1 = XS_CMD_SEARCH_MISC_MATCHED_TERM;
  110. $arg2 = $value === true ? 1 : 0;
  111. $cmd = new XSCommand(XS_CMD_SEARCH_SET_MISC, $arg1, $arg2);
  112. $this->execCommand($cmd);
  113. return $this;
  114. }
  115. /**
  116. * 设置检索匹配的权重方案
  117. * 目前支持三种权重方案: 0=BM25/1=Bool/2=Trad
  118. * @param int $scheme 匹配权重方案
  119. * @return XSSearch 返回对象本身以支持串接操作
  120. * @since 1.4.11
  121. */
  122. public function setWeightingScheme($scheme) {
  123. $arg1 = XS_CMD_SEARCH_MISC_WEIGHT_SCHEME;
  124. $arg2 = intval($scheme);
  125. $cmd = new XSCommand(XS_CMD_SEARCH_SET_MISC, $arg1, $arg2);
  126. $this->execCommand($cmd);
  127. return $this;
  128. }
  129. /**
  130. * 开启自动同义词搜索功能
  131. * @param bool $value 设为 true 表示开启同义词功能, 设为 false 关闭同义词功能
  132. * @return XSSearch 返回对象本身以支持串接操作
  133. * @since 1.3.0
  134. */
  135. public function setAutoSynonyms($value = true)
  136. {
  137. $flag = XS_CMD_PARSE_FLAG_BOOLEAN | XS_CMD_PARSE_FLAG_PHRASE | XS_CMD_PARSE_FLAG_LOVEHATE;
  138. if ($value === true) {
  139. $flag |= XS_CMD_PARSE_FLAG_AUTO_MULTIWORD_SYNONYMS;
  140. }
  141. $cmd = array('cmd' => XS_CMD_QUERY_PARSEFLAG, 'arg' => $flag);
  142. $this->execCommand($cmd);
  143. return $this;
  144. }
  145. /**
  146. * 设置同义词搜索的权重比例
  147. * @param float $value 取值范围 0.01-2.55, 1 表示不调整
  148. * @return XSSearch 返回对象本身以支持串接操作
  149. * @notice scws 的复合分词也是以同义词方式呈现的
  150. * @since 1.4.7
  151. */
  152. public function setSynonymScale($value)
  153. {
  154. $arg1 = XS_CMD_SEARCH_MISC_SYN_SCALE;
  155. $arg2 = max(0, (intval($value * 100) & 255));
  156. $cmd = new XSCommand(XS_CMD_SEARCH_SET_MISC, $arg1, $arg2);
  157. $this->execCommand($cmd);
  158. return $this;
  159. }
  160. /**
  161. * 获取当前库内的全部同义词列表
  162. * @param int $limit 数量上限, 若设为 0 则启用默认值 100 个
  163. * @param int $offset 偏移量, 即跳过的结果数量, 默认为 0
  164. * @param bool $stemmed 是否包含处理过的词根同义词, 默认为 false 表示否
  165. * @return array 同义词记录数组, 每个词条为键, 同义词条组成的数组为值
  166. * @since 1.3.0
  167. */
  168. public function getAllSynonyms($limit = 0, $offset = 0, $stemmed = false)
  169. {
  170. $page = $limit > 0 ? pack('II', intval($offset), intval($limit)) : '';
  171. $cmd = array('cmd' => XS_CMD_SEARCH_GET_SYNONYMS, 'buf1' => $page);
  172. $cmd['arg1'] = $stemmed == true ? 1 : 0;
  173. $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_SYNONYMS);
  174. $ret = array();
  175. if (!empty($res->buf)) {
  176. foreach (explode("\n", $res->buf) as $line) {
  177. $value = explode("\t", $line);
  178. $key = array_shift($value);
  179. $ret[$key] = $value;
  180. }
  181. }
  182. return $ret;
  183. }
  184. /**
  185. * 获取指定词汇的同义词列表
  186. * @param string $term 要查询同义词的原词
  187. * @return array 同义词记录数组, 不存在同义词则返回空数组
  188. * @since 1.4.9
  189. */
  190. public function getSynonyms($term)
  191. {
  192. $term = strval($term);
  193. if (strlen($term) === 0) {
  194. return false;
  195. }
  196. $cmd = array('cmd' => XS_CMD_SEARCH_GET_SYNONYMS, 'arg1' => 2, 'buf' => $term);
  197. $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_SYNONYMS);
  198. $ret = $res->buf === '' ? array() : explode("\n", $res->buf);
  199. return $ret;
  200. }
  201. /**
  202. * 获取解析后的搜索语句
  203. * @param string $query 搜索语句, 若传入 null 使用默认语句
  204. * @return string 返回解析后的搜索语句
  205. */
  206. public function getQuery($query = null)
  207. {
  208. $query = $query === null ? '' : $this->preQueryString($query);
  209. $cmd = new XSCommand(XS_CMD_QUERY_GET_STRING, 0, $this->_defaultOp, $query);
  210. $res = $this->execCommand($cmd, XS_CMD_OK_QUERY_STRING);
  211. if (strpos($res->buf, 'VALUE_RANGE') !== false) {
  212. $regex = '/(VALUE_RANGE) (\d+) (\S+) (.+?)(?=\))/';
  213. $res->buf = preg_replace_callback($regex, array($this, 'formatValueRange'), $res->buf);
  214. }
  215. if (strpos($res->buf, 'VALUE_GE') !== false || strpos($res->buf, 'VALUE_LE') !== false) {
  216. $regex = '/(VALUE_[GL]E) (\d+) (.+?)(?=\))/';
  217. $res->buf = preg_replace_callback($regex, array($this, 'formatValueRange'), $res->buf);
  218. }
  219. return XS::convert($res->buf, $this->_charset, 'UTF-8');
  220. }
  221. /**
  222. * 设置默认搜索语句
  223. * 用于不带参数的 {@link count} 或 {@link search} 以及 {@link terms} 调用
  224. * 可与 {@link addWeight} 组合运用
  225. * @param string $query 搜索语句, 设为 null 则清空搜索语句, 最大长度为 80 字节
  226. * @return XSSearch 返回对象本身以支持串接操作
  227. */
  228. public function setQuery($query)
  229. {
  230. $this->clearQuery();
  231. if ($query !== null) {
  232. $this->_query = $query;
  233. $this->addQueryString($query);
  234. }
  235. return $this;
  236. }
  237. /**
  238. * 设置地理位置距离排序方式
  239. *
  240. * 请务必先以 numeric 类型字段定义经纬度坐标字段,例如用 lon 代表经度、lat 代表纬度,
  241. * 那么设置排序代码如下,必须将经度定义在前纬度在后:
  242. * <pre>
  243. * $search->setGeodistSort(array('lon' => 39.18, 'lat' => 120.51));
  244. * </pre>
  245. * @param array $fields 在此定义地理位置信息原点坐标信息,数组至少必须包含2个值
  246. * @param bool $reverse 是否由远及近排序, 默认为由近及远
  247. * @param bool $relevance_first 是否优先相关性排序, 默认为否
  248. * @return XSSearch 返回对象本身以支持串接操作
  249. * @since 1.4.10
  250. */
  251. public function setGeodistSort($fields, $reverse = false, $relevance_first = false)
  252. {
  253. if (!is_array($fields) || count($fields) < 2) {
  254. throw new XSException("Fields of `setGeodistSort' should be an array contain two or more elements");
  255. }
  256. // [vno][vlen][vbuf] ...
  257. $buf = '';
  258. foreach ($fields as $key => $value) {
  259. $field = $this->xs->getField($key, true);
  260. if (!$field->isNumeric()) {
  261. throw new XSException("Type of GeoField `$key' shoud be numeric");
  262. }
  263. $vno = $field->vno;
  264. $vbuf = strval(floatval($value));
  265. $vlen = strlen($vbuf);
  266. if ($vlen >= 255) {
  267. throw new XSException("Value of `$key' too long");
  268. }
  269. $buf .= chr($vno) . chr($vlen) . $vbuf;
  270. }
  271. $type = XS_CMD_SORT_TYPE_GEODIST;
  272. if ($relevance_first) {
  273. $type |= XS_CMD_SORT_FLAG_RELEVANCE;
  274. }
  275. if (!$reverse) {
  276. $type |= XS_CMD_SORT_FLAG_ASCENDING;
  277. }
  278. $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type, 0, $buf);
  279. $this->execCommand($cmd);
  280. return $this;
  281. }
  282. /**
  283. * 设置多字段组合排序方式
  284. * 当您需要根据多个字段的值按不同的方式综合排序时, 请使用这项
  285. * @param array $fields 排序依据的字段数组, 以字段名称为键, true/false 为值表示正序或逆序
  286. * @param bool $reverse 是否为倒序显示, 默认为正向, 此处和 {@link setSort} 略有不同
  287. * @param bool $relevance_first 是否优先相关性排序, 默认为否
  288. * @return XSSearch 返回对象本身以支持串接操作
  289. * @since 1.1.0
  290. */
  291. public function setMultiSort($fields, $reverse = false, $relevance_first = false)
  292. {
  293. if (!is_array($fields)) {
  294. return $this->setSort($fields, !$reverse, $relevance_first);
  295. }
  296. // [vno][0/1] (0:reverse,1:asc)
  297. $buf = '';
  298. foreach ($fields as $key => $value) {
  299. if (is_bool($value)) {
  300. $vno = $this->xs->getField($key, true)->vno;
  301. $asc = $value;
  302. } else {
  303. $vno = $this->xs->getField($value, true)->vno;
  304. $asc = false;
  305. }
  306. if ($vno != XSFieldScheme::MIXED_VNO) {
  307. $buf .= chr($vno) . chr($asc ? 1 : 0);
  308. }
  309. }
  310. if ($buf !== '') {
  311. $type = XS_CMD_SORT_TYPE_MULTI;
  312. if ($relevance_first) {
  313. $type |= XS_CMD_SORT_FLAG_RELEVANCE;
  314. }
  315. if (!$reverse) {
  316. $type |= XS_CMD_SORT_FLAG_ASCENDING;
  317. }
  318. $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type, 0, $buf);
  319. $this->execCommand($cmd);
  320. }
  321. return $this;
  322. }
  323. /**
  324. * 设置搜索结果的排序方式
  325. * 注意, 每当调用 {@link setDb} 或 {@link addDb} 修改当前数据库时会重置排序设定
  326. * 此函数第一参数的用法与 {@link setMultiSort} 兼容, 即也可以用该方法实现多字段排序
  327. * @param string $field 依据指定字段的值排序, 设为 null 则用默认顺序
  328. * @param bool $asc 是否为正序排列, 即从小到大, 从少到多, 默认为反序
  329. * @param bool $relevance_first 是否优先相关性排序, 默认为否
  330. * @return XSSearch 返回对象本身以支持串接操作
  331. */
  332. public function setSort($field, $asc = false, $relevance_first = false)
  333. {
  334. if (is_array($field)) {
  335. return $this->setMultiSort($field, $asc, $relevance_first);
  336. }
  337. if ($field === null) {
  338. $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, XS_CMD_SORT_TYPE_RELEVANCE);
  339. } else {
  340. $type = XS_CMD_SORT_TYPE_VALUE;
  341. if ($relevance_first) {
  342. $type |= XS_CMD_SORT_FLAG_RELEVANCE;
  343. }
  344. if ($asc) {
  345. $type |= XS_CMD_SORT_FLAG_ASCENDING;
  346. }
  347. $vno = $this->xs->getField($field, true)->vno;
  348. $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type, $vno);
  349. }
  350. $this->execCommand($cmd);
  351. return $this;
  352. }
  353. /**
  354. * 设置结果按索引入库先后排序
  355. * 注意, 此项排序不影响相关排序, 权重高的仍会在前面, 主要适合用于布尔检索
  356. * @param bool $asc 是否为正序排列, 即从先到后, 默认为反序
  357. * @return XSSearch 返回对象本身以支持串接操作
  358. */
  359. public function setDocOrder($asc = false)
  360. {
  361. $type = XS_CMD_SORT_TYPE_DOCID | ($asc ? XS_CMD_SORT_FLAG_ASCENDING : 0);
  362. $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type);
  363. $this->execCommand($cmd);
  364. return $this;
  365. }
  366. /**
  367. * 设置折叠搜索结果
  368. * 注意, 每当调用 {@link setDb} 或 {@link addDb} 修改当前数据库时会重置此项设置
  369. * @param string $field 依据该字段的值折叠搜索结果, 设为 null 则取消折叠
  370. * @param int $num 折叠后只是返最匹配的数据数量, 默认为 1, 最大值 255
  371. * @return XSSearch 返回对象本身以支持串接操作
  372. */
  373. public function setCollapse($field, $num = 1)
  374. {
  375. $vno = $field === null ? XSFieldScheme::MIXED_VNO : $this->xs->getField($field, true)->vno;
  376. $max = min(255, intval($num));
  377. $cmd = new XSCommand(XS_CMD_SEARCH_SET_COLLAPSE, $max, $vno);
  378. $this->execCommand($cmd);
  379. return $this;
  380. }
  381. /**
  382. * 添加搜索过滤区间或范围
  383. * @param string $field
  384. * @param mixed $from 起始值(不包含), 若设为 null 则相当于匹配 <= to (字典顺序)
  385. * @param mixed $to 结束值(包含), 若设为 null 则相当于匹配 >= from (字典顺序)
  386. * @return XSSearch 返回对象本身以支持串接操作
  387. */
  388. public function addRange($field, $from, $to)
  389. {
  390. if ($from === '' || $from === false) {
  391. $from = null;
  392. }
  393. if ($to === '' || $to === false) {
  394. $to = null;
  395. }
  396. if ($from !== null || $to !== null) {
  397. if (strlen($from) > 255 || strlen($to) > 255) {
  398. throw new XSException('Value of range is too long');
  399. }
  400. $vno = $this->xs->getField($field)->vno;
  401. $from = XS::convert($from, 'UTF-8', $this->_charset);
  402. $to = XS::convert($to, 'UTF-8', $this->_charset);
  403. if ($from === null) {
  404. $cmd = new XSCommand(XS_CMD_QUERY_VALCMP, XS_CMD_QUERY_OP_FILTER, $vno, $to, chr(XS_CMD_VALCMP_LE));
  405. } elseif ($to === null) {
  406. $cmd = new XSCommand(XS_CMD_QUERY_VALCMP, XS_CMD_QUERY_OP_FILTER, $vno, $from, chr(XS_CMD_VALCMP_GE));
  407. } else {
  408. $cmd = new XSCommand(XS_CMD_QUERY_RANGE, XS_CMD_QUERY_OP_FILTER, $vno, $from, $to);
  409. }
  410. $this->execCommand($cmd);
  411. }
  412. return $this;
  413. }
  414. /**
  415. * 添加权重索引词
  416. * 无论是否包含这种词都不影响搜索匹配, 但会参与计算结果权重, 使结果的相关度更高
  417. * @param string $field 索引词所属的字段
  418. * @param string $term 索引词
  419. * @param float $weight 权重计算缩放比例
  420. * @return XSSearch 返回对象本身以支持串接操作
  421. * @see addQueryTerm
  422. */
  423. public function addWeight($field, $term, $weight = 1)
  424. {
  425. return $this->addQueryTerm($field, $term, XS_CMD_QUERY_OP_AND_MAYBE, $weight);
  426. }
  427. /**
  428. * 设置分面搜索记数
  429. * 用于记录匹配搜索结果中按字段值分组的数量统计, 每次调用 {@link search} 后会还原设置
  430. * 对于多次调用 $exact 参数以最后一次为准, 只支持字段值不超过 255 字节的情况
  431. *
  432. * 自 v1.4.10 起自动对空值的字段按 term 分面统计(相当于多值)
  433. * @param mixed $field 要进行分组统计的字段或字段组成的数组, 最多同时支持 8 个
  434. * @param bool $exact 是否要求绝对精确搜索, 这会造成较大的系统开销
  435. * @return XSSearch 返回对象本身以支持串接操作
  436. * @throw XSException 在非字符串字段建立分面搜索会抛出异常
  437. * @since 1.1.0
  438. */
  439. public function setFacets($field, $exact = false)
  440. {
  441. $buf = '';
  442. if (!is_array($field)) {
  443. $field = array($field);
  444. }
  445. foreach ($field as $name) {
  446. $ff = $this->xs->getField($name);
  447. if ($ff->type !== XSFieldMeta::TYPE_STRING) {
  448. throw new XSException("Field `$name' cann't be used for facets search, can only be string type");
  449. }
  450. $buf .= chr($ff->vno);
  451. }
  452. $cmd = array('cmd' => XS_CMD_SEARCH_SET_FACETS, 'buf' => $buf);
  453. $cmd['arg1'] = $exact === true ? 1 : 0;
  454. $this->execCommand($cmd);
  455. return $this;
  456. }
  457. /**
  458. * 读取最近一次分面搜索记数
  459. * 必须在某一次 {@link search} 之后调用本函数才有意义
  460. * @param string $field 读取分面记数的字段, 若为 null 则返回全部分面搜索记录
  461. * @return array 返回由值和计数组成的关联数组, 若不存在或未曾登记过则返回空数组
  462. * @since 1.1.0
  463. */
  464. public function getFacets($field = null)
  465. {
  466. if ($field === null) {
  467. return $this->_facets;
  468. }
  469. return isset($this->_facets[$field]) ? $this->_facets[$field] : array();
  470. }
  471. /**
  472. * 设置当前搜索语句的分词复合等级
  473. * 复合等级是 scws 分词粒度控制的一个重要参数, 是长词细分处理依据, 默认为 3, 值范围 0~15
  474. * 注意: 这个设置仅直对本次搜索有效, 仅对设置之后的 {@link setQuery} 起作用, 由于 query
  475. * 设计的方式问题, 目前无法支持搜索语句单字切分, 但您可以在模糊检索时设为 0 来关闭复合分词
  476. * @param int $level 要设置的分词复合等级
  477. * @return XSSearch 返回自身对象以支持串接操作
  478. * @since 1.4.7
  479. */
  480. public function setScwsMulti($level)
  481. {
  482. $level = intval($level);
  483. if ($level >= 0 && $level < 16) {
  484. $cmd = array('cmd' => XS_CMD_SEARCH_SCWS_SET, 'arg1' => XS_CMD_SCWS_SET_MULTI, 'arg2' => $level);
  485. $this->execCommand($cmd);
  486. }
  487. return $this;
  488. }
  489. /**
  490. * 设置搜索结果的数量和偏移
  491. * 用于搜索结果分页, 每次调用 {@link search} 后会还原这2个变量到初始值
  492. * @param int $limit 数量上限, 若设为 0 则启用默认值 self::PAGE_SIZE
  493. * @param int $offset 偏移量, 即跳过的结果数量, 默认为 0
  494. * @return XSSearch 返回对象本身以支持串接操作
  495. */
  496. public function setLimit($limit, $offset = 0)
  497. {
  498. $this->_limit = intval($limit);
  499. $this->_offset = intval($offset);
  500. return $this;
  501. }
  502. /**
  503. * 设置要搜索的数据库名
  504. * 若未设置, 使用默认数据库, 数据库必须位于服务端用户目录下
  505. * 对于远程数据库, 请使用 stub 文件来支持
  506. * @param string $name
  507. * @return XSSearch 返回对象本身以支持串接操作
  508. */
  509. public function setDb($name)
  510. {
  511. $name = strval($name);
  512. $this->execCommand(array('cmd' => XS_CMD_SEARCH_SET_DB, 'buf' => strval($name)));
  513. $this->_lastDb = $this->_curDb;
  514. $this->_lastDbs = $this->_curDbs;
  515. $this->_curDb = $name;
  516. $this->_curDbs = array();
  517. return $this;
  518. }
  519. /**
  520. * 添加搜索的数据库名, 支持多库同时搜索
  521. * @param string $name
  522. * @return XSSearch 返回对象本身以支持串接操作
  523. * @see setDb
  524. */
  525. public function addDb($name)
  526. {
  527. $name = strval($name);
  528. $this->execCommand(array('cmd' => XS_CMD_SEARCH_ADD_DB, 'buf' => $name));
  529. $this->_curDbs[] = $name;
  530. return $this;
  531. }
  532. /**
  533. * 标记字段方案重置
  534. * @see XS::setScheme
  535. */
  536. public function markResetScheme()
  537. {
  538. $this->_resetScheme = true;
  539. }
  540. /**
  541. * 获取搜索语句中的高亮词条列表
  542. * @param string $query 搜索语句, 若传入 null 使用默认语句, 最大长度为 80 字节
  543. * @param bool $convert 是否进行编码转换, 默认为 true
  544. * @return array 可用于高亮显示的词条列表
  545. */
  546. public function terms($query = null, $convert = true)
  547. {
  548. $query = $query === null ? '' : $this->preQueryString($query);
  549. if ($query === '' && $this->_terms !== null) {
  550. $ret = $this->_terms;
  551. } else {
  552. $cmd = new XSCommand(XS_CMD_QUERY_GET_TERMS, 0, $this->_defaultOp, $query);
  553. $res = $this->execCommand($cmd, XS_CMD_OK_QUERY_TERMS);
  554. $ret = array();
  555. $tmps = explode(' ', $res->buf);
  556. for ($i = 0; $i < count($tmps); $i++) {
  557. if ($tmps[$i] === '' || strpos($tmps[$i], ':') !== false) {
  558. continue;
  559. }
  560. $ret[] = $tmps[$i];
  561. }
  562. if ($query === '') {
  563. $this->_terms = $ret;
  564. }
  565. }
  566. return $convert ? XS::convert($ret, $this->_charset, 'UTF-8') : $ret;
  567. }
  568. /**
  569. * 估算搜索语句的匹配数据量
  570. * @param string $query 搜索语句, 若传入 null 使用默认语句, 调用后会还原默认排序方式
  571. * 如果搜索语句和最近一次 {@link search} 的语句一样, 请改用 {@link getLastCount} 以提升效率
  572. * 最大长度为 80 字节
  573. * @return int 匹配的搜索结果数量, 估算数值
  574. */
  575. public function count($query = null)
  576. {
  577. $query = $query === null ? '' : $this->preQueryString($query);
  578. if ($query === '' && $this->_count !== null) {
  579. return $this->_count;
  580. }
  581. $cmd = new XSCommand(XS_CMD_SEARCH_GET_TOTAL, 0, $this->_defaultOp, $query);
  582. $res = $this->execCommand($cmd, XS_CMD_OK_SEARCH_TOTAL);
  583. $ret = unpack('Icount', $res->buf);
  584. if ($query === '') {
  585. $this->_count = $ret['count'];
  586. }
  587. return $ret['count'];
  588. }
  589. /**
  590. * 获取匹配的搜索结果文档
  591. * 默认提取最匹配的前 self::PAGE_SIZE 个结果
  592. * 如需分页请参见 {@link setLimit} 设置, 每次调用本函数后都会还原 setLimit 的设置
  593. * @param string $query 搜索语句, 若传入 null 使用默认语句, 最大长度为 80 字节
  594. * @param boolean $saveHighlight 是否存储查询词用于高亮处理, 默认为 true
  595. * @return XSDocument[] 匹配的搜索结果文档列表
  596. */
  597. public function search($query = null, $saveHighlight = true)
  598. {
  599. if ($this->_curDb !== self::LOG_DB && $saveHighlight) {
  600. $this->_highlight = $query;
  601. }
  602. $query = $query === null ? '' : $this->preQueryString($query);
  603. $page = pack('II', $this->_offset, $this->_limit > 0 ? $this->_limit : self::PAGE_SIZE);
  604. // get result header
  605. $cmd = new XSCommand(XS_CMD_SEARCH_GET_RESULT, 0, $this->_defaultOp, $query, $page);
  606. $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_BEGIN);
  607. $tmp = unpack('Icount', $res->buf);
  608. $this->_lastCount = $tmp['count'];
  609. // load vno map to name of fields
  610. $ret = $this->_facets = array();
  611. $vnoes = $this->xs->getScheme()->getVnoMap();
  612. // get result documents
  613. while (true) {
  614. $res = $this->getRespond();
  615. if ($res->cmd == XS_CMD_SEARCH_RESULT_FACETS) {
  616. $off = 0;
  617. while (($off + 6) < strlen($res->buf)) {
  618. $tmp = unpack('Cvno/Cvlen/Inum', substr($res->buf, $off, 6));
  619. if (isset($vnoes[$tmp['vno']])) {
  620. $name = $vnoes[$tmp['vno']];
  621. $value = substr($res->buf, $off + 6, $tmp['vlen']);
  622. if (!isset($this->_facets[$name])) {
  623. $this->_facets[$name] = array();
  624. }
  625. $this->_facets[$name][$value] = $tmp['num'];
  626. }
  627. $off += $tmp['vlen'] + 6;
  628. }
  629. } elseif ($res->cmd == XS_CMD_SEARCH_RESULT_DOC) {
  630. // got new doc
  631. $doc = new XSDocument($res->buf, $this->_charset);
  632. $ret[] = $doc;
  633. } elseif ($res->cmd == XS_CMD_SEARCH_RESULT_FIELD) {
  634. // fields of doc
  635. if (isset($doc)) {
  636. $name = isset($vnoes[$res->arg]) ? $vnoes[$res->arg] : $res->arg;
  637. $doc->setField($name, $res->buf);
  638. }
  639. } elseif ($res->cmd == XS_CMD_SEARCH_RESULT_MATCHED) {
  640. // matched terms
  641. if (isset($doc)) {
  642. $doc->setField('matched', explode(' ', $res->buf), true);
  643. }
  644. } elseif ($res->cmd == XS_CMD_OK && $res->arg == XS_CMD_OK_RESULT_END) {
  645. // got the end
  646. break;
  647. } else {
  648. $msg = 'Unexpected respond in search {CMD:' . $res->cmd . ', ARG:' . $res->arg . '}';
  649. throw new XSException($msg);
  650. }
  651. }
  652. if ($query === '') {
  653. $this->_count = $this->_lastCount;
  654. // trigger log & highlight
  655. if ($this->_curDb !== self::LOG_DB) {
  656. $this->logQuery();
  657. if ($saveHighlight) {
  658. $this->initHighlight();
  659. }
  660. }
  661. }
  662. $this->_limit = $this->_offset = 0;
  663. return $ret;
  664. }
  665. /**
  666. * 获取最近那次搜索的匹配总数估值
  667. * @return int 匹配数据量, 如从未搜索则返回 false
  668. * @see search
  669. */
  670. public function getLastCount()
  671. {
  672. return $this->_lastCount;
  673. }
  674. /**
  675. * 获取搜索数据库内的数据总量
  676. * @return int 数据总量
  677. */
  678. public function getDbTotal()
  679. {
  680. $cmd = new XSCommand(XS_CMD_SEARCH_DB_TOTAL);
  681. $res = $this->execCommand($cmd, XS_CMD_OK_DB_TOTAL);
  682. $tmp = unpack('Itotal', $res->buf);
  683. return $tmp['total'];
  684. }
  685. /**
  686. * 获取热门搜索词列表
  687. * @param int $limit 需要返回的热门搜索数量上限, 默认为 6, 最大值为 50
  688. * @param string $type 排序类型, 默认为 total(搜索总量), 可选值还有 lastnum(上周), currnum(本周)
  689. * @return array 返回以搜索词为键, 搜索指数为值的关联数组
  690. */
  691. public function getHotQuery($limit = 6, $type = 'total')
  692. {
  693. $ret = array();
  694. $limit = max(1, min(50, intval($limit)));
  695. // query from log_db
  696. $this->xs->setScheme(XSFieldScheme::logger());
  697. try {
  698. $this->setDb(self::LOG_DB)->setLimit($limit);
  699. if ($type !== 'lastnum' && $type !== 'currnum') {
  700. $type = 'total';
  701. }
  702. $result = $this->search($type . ':1');
  703. foreach ($result as $doc) /* @var $doc XSDocument */ {
  704. $body = $doc->body;
  705. $ret[$body] = $doc->f($type);
  706. }
  707. $this->restoreDb();
  708. } catch (XSException $e) {
  709. if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
  710. throw $e;
  711. }
  712. }
  713. $this->xs->restoreScheme();
  714. return $ret;
  715. }
  716. /**
  717. * 获取相关搜索词列表
  718. * @param string $query 搜索语句, 若传入 null 使用默认语句
  719. * @param int $limit 需要返回的相关搜索数量上限, 默认为 6, 最大值为 20
  720. * @return array 返回搜索词组成的数组
  721. */
  722. public function getRelatedQuery($query = null, $limit = 6)
  723. {
  724. $ret = array();
  725. $limit = max(1, min(20, intval($limit)));
  726. // Simple to disable query with field filter
  727. if ($query === null) {
  728. $query = $this->cleanFieldQuery($this->_query);
  729. }
  730. if (empty($query) || strpos($query, ':') !== false) {
  731. return $ret;
  732. }
  733. // Search the log database
  734. $op = $this->_defaultOp;
  735. $this->xs->setScheme(XSFieldScheme::logger());
  736. try {
  737. $result = $this->setDb(self::LOG_DB)->setFuzzy()->setLimit($limit + 1)->search($query);
  738. foreach ($result as $doc) /* @var $doc XSDocument */ {
  739. $doc->setCharset($this->_charset);
  740. $body = $doc->body;
  741. if (!strcasecmp($body, $query)) {
  742. continue;
  743. }
  744. $ret[] = $body;
  745. if (count($ret) == $limit) {
  746. break;
  747. }
  748. }
  749. } catch (XSException $e) {
  750. if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
  751. throw $e;
  752. }
  753. }
  754. $this->restoreDb();
  755. $this->xs->restoreScheme();
  756. $this->_defaultOp = $op;
  757. return $ret;
  758. }
  759. /**
  760. * 获取展开的搜索词列表
  761. * @param string $query 需要展开的前缀, 可为拼音、英文、中文
  762. * @param int $limit 需要返回的搜索词数量上限, 默认为 10, 最大值为 20
  763. * @return array 返回搜索词组成的数组
  764. */
  765. public function getExpandedQuery($query, $limit = 10)
  766. {
  767. $ret = array();
  768. $limit = max(1, min(20, intval($limit)));
  769. try {
  770. $buf = XS::convert($query, 'UTF-8', $this->_charset);
  771. $cmd = array('cmd' => XS_CMD_QUERY_GET_EXPANDED, 'arg1' => $limit, 'buf' => $buf);
  772. $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_BEGIN);
  773. // echo "Raw Query: " . $res->buf . "\n";
  774. // get result documents
  775. while (true) {
  776. $res = $this->getRespond();
  777. if ($res->cmd == XS_CMD_SEARCH_RESULT_FIELD) {
  778. $ret[] = XS::convert($res->buf, $this->_charset, 'UTF-8');
  779. } elseif ($res->cmd == XS_CMD_OK && $res->arg == XS_CMD_OK_RESULT_END) {
  780. // got the end
  781. // echo "Parsed Query: " . $res->buf . "\n";
  782. break;
  783. } else {
  784. $msg = 'Unexpected respond in search {CMD:' . $res->cmd . ', ARG:' . $res->arg . '}';
  785. throw new XSException($msg);
  786. }
  787. }
  788. } catch (XSException $e) {
  789. if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
  790. throw $e;
  791. }
  792. }
  793. return $ret;
  794. }
  795. /**
  796. * 获取修正后的搜索词列表
  797. * 通常当某次检索结果数量偏少时, 可以用该函数设计 "你是不是要找: ..." 功能
  798. * @param string $query 需要展开的前缀, 可为拼音、英文、中文
  799. * @return array 返回搜索词组成的数组
  800. */
  801. public function getCorrectedQuery($query = null)
  802. {
  803. $ret = array();
  804. try {
  805. if ($query === null) {
  806. if ($this->_count > 0 && $this->_count > ceil($this->getDbTotal() * 0.001)) {
  807. return $ret;
  808. }
  809. $query = $this->cleanFieldQuery($this->_query);
  810. }
  811. if (empty($query) || strpos($query, ':') !== false) {
  812. return $ret;
  813. }
  814. $buf = XS::convert($query, 'UTF-8', $this->_charset);
  815. $cmd = array('cmd' => XS_CMD_QUERY_GET_CORRECTED, 'buf' => $buf);
  816. $res = $this->execCommand($cmd, XS_CMD_OK_QUERY_CORRECTED);
  817. if ($res->buf !== '') {
  818. $ret = explode("\n", XS::convert($res->buf, $this->_charset, 'UTF-8'));
  819. }
  820. } catch (XSException $e) {
  821. if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
  822. throw $e;
  823. }
  824. }
  825. return $ret;
  826. }
  827. /**
  828. * 添加搜索日志关键词到缓冲区里
  829. * 需要调用 {@link XSIndex::flushLogging} 才能确保立即刷新, 否则要隔一段时间
  830. * @param string $query 需要记录的数据
  831. * @param int $wdf 需要记录的次数, 默认为 1
  832. * @since 1.1.1
  833. */
  834. public function addSearchLog($query, $wdf = 1)
  835. {
  836. $cmd = array('cmd' => XS_CMD_SEARCH_ADD_LOG, 'buf' => $query);
  837. if ($wdf > 1) {
  838. $cmd['buf1'] = pack('i', $wdf);
  839. }
  840. $this->execCommand($cmd, XS_CMD_OK_LOGGED);
  841. }
  842. /**
  843. * 搜索结果字符串高亮处理
  844. * 对搜索结果文档的字段进行高亮、飘红处理, 高亮部分加上 em 标记
  845. * @param string $value 需要处理的数据
  846. * @return string 高亮后的数据
  847. */
  848. public function highlight($value, $strtr = false)
  849. {
  850. // return empty value directly
  851. if (empty($value)) {
  852. return $value;
  853. }
  854. // initlize the highlight replacements
  855. if (!is_array($this->_highlight)) {
  856. $this->initHighlight();
  857. }
  858. // process replace
  859. if (isset($this->_highlight['pattern'])) {
  860. $value = preg_replace($this->_highlight['pattern'], $this->_highlight['replace'], $value);
  861. }
  862. if (isset($this->_highlight['pairs'])) {
  863. $value = $strtr ?
  864. strtr($value, $this->_highlight['pairs']) :
  865. str_replace(array_keys($this->_highlight['pairs']), array_values($this->_highlight['pairs']), $value);
  866. }
  867. return $value;
  868. }
  869. /**
  870. * 记录搜索语句
  871. * 主要是用于相关搜索, 修正搜索等功能, 为避免记录一些杂乱无用的搜索信息,
  872. * 系统会先检测这条语句是否符合记录需求, 力争记录一些规范清洁的数据
  873. * @param string $query 用于记录的搜索词
  874. */
  875. private function logQuery($query = null)
  876. {
  877. if ($this->isRobotAgent()) {
  878. return;
  879. }
  880. if ($query !== '' && $query !== null) {
  881. $terms = $this->terms($query, false);
  882. } else {
  883. // 无结果、包含 OR、XOR、NOT/-、默认 fuzzy
  884. $query = $this->_query;
  885. if (!$this->_lastCount || ($this->_defaultOp == XS_CMD_QUERY_OP_OR && strpos($query, ' '))
  886. || strpos($query, ' OR ') || strpos($query, ' NOT ') || strpos($query, ' XOR ')) {
  887. return;
  888. }
  889. $terms = $this->terms(null, false);
  890. }
  891. // purify the query statement to log
  892. $log = '';
  893. $pos = $max = 0;
  894. foreach ($terms as $term) {
  895. $pos1 = ($pos > 3 && strlen($term) === 6) ? $pos - 3 : $pos;
  896. if (($pos2 = strpos($query, $term, $pos1)) === false) {
  897. continue;
  898. }
  899. if ($pos2 === $pos) {
  900. $log .= $term;
  901. } elseif ($pos2 < $pos) {
  902. $log .= substr($term, 3);
  903. } else {
  904. if (++$max > 3 || strlen($log) > 42) {
  905. break;
  906. }
  907. $log .= ' ' . $term;
  908. }
  909. $pos = $pos2 + strlen($term);
  910. }
  911. // run the command, filter for single word character
  912. $log = trim($log);
  913. if (strlen($log) < 2 || (strlen($log) == 3 && ord($log[0]) > 0x80)) {
  914. return;
  915. }
  916. $this->addSearchLog($log);
  917. }
  918. /**
  919. * 清空默认搜索语句
  920. */
  921. private function clearQuery()
  922. {
  923. $cmd = new XSCommand(XS_CMD_QUERY_INIT);
  924. if ($this->_resetScheme === true) {
  925. $cmd->arg1 = 1;
  926. $this->_prefix = array();
  927. $this->_fieldSet = false;
  928. $this->_resetScheme = false;
  929. }
  930. $this->execCommand($cmd);
  931. $this->_query = $this->_count = $this->_terms = null;
  932. }
  933. /**
  934. * 增加默认搜索语句
  935. * @param string $query 搜索语句
  936. * @param int $addOp 与旧语句的结合操作符, 如果无旧语句或为空则这此无意义, 支持的操作符有:
  937. * XS_CMD_QUERY_OP_AND
  938. * XS_CMD_QUERY_OP_OR
  939. * XS_CMD_QUERY_OP_AND_NOT
  940. * XS_CMD_QUERY_OP_XOR
  941. * XS_CMD_QUERY_OP_AND_MAYBE
  942. * XS_CMD_QUERY_OP_FILTER
  943. * @param float $scale 权重计算缩放比例, 默认为 1表示不缩放, 其它值范围 0.xx ~ 655.35
  944. * @return string 修正后的搜索语句
  945. */
  946. public function addQueryString($query, $addOp = XS_CMD_QUERY_OP_AND, $scale = 1)
  947. {
  948. $query = $this->preQueryString($query);
  949. $bscale = ($scale > 0 && $scale != 1) ? pack('n', intval($scale * 100)) : '';
  950. $cmd = new XSCommand(XS_CMD_QUERY_PARSE, $addOp, $this->_defaultOp, $query, $bscale);
  951. $this->execCommand($cmd);
  952. return $query;
  953. }
  954. /**
  955. * 增加默认搜索词汇
  956. * @param string $field 索引词所属的字段, 若为混合区词汇可设为 null 或 body 型的字段名
  957. * @param string|array $term 索引词或列表
  958. * @param int $addOp 与旧语句的结合操作符, 如果无旧语句或为空则这此无意义, 支持的操作符有:
  959. * @param float $scale 权重计算缩放比例, 默认为 1表示不缩放, 其它值范围 0.xx ~ 655.35
  960. * @return XSSearch 返回对象本身以支持串接操作
  961. * @see addQueryString
  962. *
  963. * 注:自 v1.4.10 起,允许传入数组,多词之间通过 defaultOp 连接,并且这些词不会再被分词。
  964. */
  965. public function addQueryTerm($field, $term, $addOp = XS_CMD_QUERY_OP_AND, $scale = 1)
  966. {
  967. $term = XS::convert($term, 'UTF-8', $this->_charset);
  968. $bscale = ($scale > 0 && $scale != 1) ? pack('n', intval($scale * 100)) : '';
  969. $vno = $field === null ? XSFieldScheme::MIXED_VNO : $this->xs->getField($field, true)->vno;
  970. $cmd = XS_CMD_QUERY_TERM;
  971. if (is_array($term)) {
  972. if (count($term) === 0) {
  973. return $this;
  974. } elseif (count($term) === 1) {
  975. $term = current($term);
  976. } else {
  977. $term = implode("\t", $term);
  978. $cmd = XS_CMD_QUERY_TERMS;
  979. }
  980. }
  981. $cmd = new XSCommand($cmd, $addOp, $vno, $term, $bscale);
  982. $this->execCommand($cmd);
  983. return $this;
  984. }
  985. /**
  986. * 还原搜索 DB
  987. * 常用于因需改变当前 db 为 LOG_DB 后还原
  988. */
  989. private function restoreDb()
  990. {
  991. $db = $this->_lastDb;
  992. $dbs = $this->_lastDbs;
  993. $this->setDb($db);
  994. foreach ($dbs as $name) {
  995. $this->addDb($name);
  996. }
  997. }
  998. /**
  999. * 搜索语句的准备工作
  1000. * 登记相关的字段前缀并给非布尔字段补上括号, 首次搜索必须通知服务端关于 cutlen, numeric 字段的设置
  1001. * @param string $query 要准备的搜索语句
  1002. * @return string 准备好的搜索语句
  1003. */
  1004. private function preQueryString($query)
  1005. {
  1006. // check to register prefix
  1007. $query = trim($query);
  1008. //if ($query === '')
  1009. // throw new XSException('Query string cann\'t be empty');
  1010. // force to clear query with resetScheme
  1011. if ($this->_resetScheme === true) {
  1012. $this->clearQuery();
  1013. }
  1014. // init special field here
  1015. $this->initSpecialField();
  1016. $newQuery = '';
  1017. $parts = preg_split('/[ \t\r\n]+/', $query);
  1018. foreach ($parts as $part) {
  1019. if ($part === '') {
  1020. continue;
  1021. }
  1022. if ($newQuery != '') {
  1023. $newQuery .= ' ';
  1024. }
  1025. if (($pos = strpos($part, ':', 1)) !== false) {
  1026. for ($i = 0; $i < $pos; $i++) {
  1027. if (strpos('+-~(', $part[$i]) === false) {
  1028. break;
  1029. }
  1030. }
  1031. $name = substr($part, $i, $pos - $i);
  1032. if (($field = $this->xs->getField($name, false)) !== false
  1033. && $field->vno != XSFieldScheme::MIXED_VNO) {
  1034. $this->regQueryPrefix($name);
  1035. if ($field->hasCustomTokenizer()) {
  1036. $prefix = $i > 0 ? substr($part, 0, $i) : '';
  1037. $suffix = '';
  1038. // force to lowercase for boolean terms
  1039. $value = substr($part, $pos + 1);
  1040. if (substr($value, -1, 1) === ')') {
  1041. $suffix = ')';
  1042. $value = substr($value, 0, -1);
  1043. }
  1044. $terms = array();
  1045. $tokens = $field->getCustomTokenizer()->getTokens($value);
  1046. foreach ($tokens as $term) {
  1047. $terms[] = strtolower($term);
  1048. }
  1049. $terms = array_unique($terms);
  1050. $newQuery .= $prefix . $name . ':' . implode(' ' . $name . ':', $terms) . $suffix;
  1051. } elseif (substr($part, $pos + 1, 1) != '(' && preg_match('/[\x81-\xfe]/', $part)) {
  1052. // force to add brackets for default scws tokenizer
  1053. $newQuery .= substr($part, 0, $pos + 1) . '(' . substr($part, $pos + 1) . ')';
  1054. } else {
  1055. $newQuery .= $part;
  1056. }
  1057. continue;
  1058. }
  1059. }
  1060. if (strlen($part) > 1 && ($part[0] == '+' || $part[0] == '-') && $part[1] != '('
  1061. && preg_match('/[\x81-\xfe]/', $part)) {
  1062. $newQuery .= substr($part, 0, 1) . '(' . substr($part, 1) . ')';
  1063. continue;
  1064. }
  1065. $newQuery .= $part;
  1066. }
  1067. return XS::convert($newQuery, 'UTF-8', $this->_charset);
  1068. }
  1069. /**
  1070. * 登记搜索语句中的字段
  1071. * @param string $name 字段名称
  1072. */
  1073. private function regQueryPrefix($name)
  1074. {
  1075. if (!isset($this->_prefix[$name])
  1076. && ($field = $this->xs->getField($name, false))
  1077. && ($field->vno != XSFieldScheme::MIXED_VNO)) {
  1078. $type = $field->isBoolIndex() ? XS_CMD_PREFIX_BOOLEAN : XS_CMD_PREFIX_NORMAL;
  1079. $cmd = new XSCommand(XS_CMD_QUERY_PREFIX, $type, $field->vno, $name);
  1080. $this->execCommand($cmd);
  1081. $this->_prefix[$name] = true;
  1082. }
  1083. }
  1084. /**
  1085. * 设置字符型字段及裁剪长度
  1086. */
  1087. private function initSpecialField()
  1088. {
  1089. if ($this->_fieldSet === true) {
  1090. return;
  1091. }
  1092. foreach ($this->xs->getAllFields() as $field) /* @var $field XSFieldMeta */ {
  1093. if ($field->cutlen != 0) {
  1094. $len = min(127, ceil($field->cutlen / 10));
  1095. $cmd = new XSCommand(XS_CMD_SEARCH_SET_CUT, $len, $field->vno);
  1096. $this->execCommand($cmd);
  1097. }
  1098. if ($field->isNumeric()) {
  1099. $cmd = new XSCommand(XS_CMD_SEARCH_SET_NUMERIC, 0, $field->vno);
  1100. $this->execCommand($cmd);
  1101. }
  1102. }
  1103. $this->_fieldSet = true;
  1104. }
  1105. /**
  1106. * 清除查询语句中的字段名、布尔字段条件
  1107. * @param string $query 查询语句
  1108. * @return string 净化后的语句
  1109. */
  1110. private function cleanFieldQuery($query)
  1111. {
  1112. $query = strtr($query, array(' AND ' => ' ', ' OR ' => ' '));
  1113. if (strpos($query, ':') !== false) {
  1114. $regex = '/(^|\s)([0-9A-Za-z_\.-]+):([^\s]+)/';
  1115. return preg_replace_callback($regex, array($this, 'cleanFieldCallback'), $query);
  1116. }
  1117. return $query;
  1118. }
  1119. /**
  1120. * 清除布尔字段查询语句和非布尔的字段名
  1121. * 用于正则替换回调函数, 净化 {@link getCorrectedQuery} 和 {@link getRelatedQuery} 中的搜索语句
  1122. * @param array $match 正则匹配的部分, [1]:prefix [2]:field, [3]:data
  1123. */
  1124. private function cleanFieldCallback($match)
  1125. {
  1126. if (($field = $this->xs->getField($match[2], false)) === false) {
  1127. return $match[0];
  1128. }
  1129. if ($field->isBoolIndex()) {
  1130. return '';
  1131. }
  1132. if (substr($match[3], 0, 1) == '(' && substr($match[3], -1, 1) == ')') {
  1133. $match[3] = substr($match[3], 1, -1);
  1134. }
  1135. return $match[1] . $match[3];
  1136. }
  1137. /**
  1138. * 初始始化高亮替换数据
  1139. */
  1140. private function initHighlight()
  1141. {
  1142. $terms = array();
  1143. $tmps = $this->terms($this->_highlight, false);
  1144. for ($i = 0; $i < count($tmps); $i++) {
  1145. if (strlen($tmps[$i]) !== 6 || ord(substr($tmps[$i], 0, 1)) < 0xc0) {
  1146. $terms[] = XS::convert($tmps[$i], $this->_charset, 'UTF-8');
  1147. continue;
  1148. }
  1149. // auto fixed duality in libscws
  1150. // ABC => AB,BC => ABC,BC,AB
  1151. // ABCD => AB,BC,CD => CD,ABC,BC,AB
  1152. // ABCDE => AB,BC,CD,DE => CDE,DE,CD,ABC,BC,AB
  1153. for ($j = $i + 1; $j < count($tmps); $j++) {
  1154. if (strlen($tmps[$j]) !== 6 || substr($tmps[$j], 0, 3) !== substr($tmps[$j - 1], 3, 3)) {
  1155. break;
  1156. }
  1157. }
  1158. if (($k = ($j - $i)) === 1) {
  1159. $terms[] = XS::convert($tmps[$i], $this->_charset, 'UTF-8');
  1160. } else {
  1161. $i = $j - 1;
  1162. while ($k--) {
  1163. $j--;
  1164. if ($k & 1) {
  1165. $terms[] = XS::convert(substr($tmps[$j - 1], 0, 3) . $tmps[$j], $this->_charset, 'UTF-8');
  1166. }
  1167. $terms[] = XS::convert($tmps[$j], $this->_charset, 'UTF-8');
  1168. }
  1169. }
  1170. }
  1171. $pattern = $replace = $pairs = array();
  1172. foreach ($terms as $term) {
  1173. if (!preg_match('/[a-zA-Z]/', $term)) {
  1174. $pairs[$term] = '<em>' . $term . '</em>';
  1175. } else {
  1176. $pattern[] = '/' . strtr($term, array('+' => '\\+', '/' => '\\/')) . '/i';
  1177. $replace[] = '<em>$0</em>';
  1178. }
  1179. }
  1180. $this->_highlight = array();
  1181. if (count($pairs) > 0) {
  1182. $this->_highlight['pairs'] = $pairs;
  1183. }
  1184. if (count($pattern) > 0) {
  1185. $this->_highlight['pattern'] = $pattern;
  1186. $this->_highlight['replace'] = $replace;
  1187. }
  1188. }
  1189. /**
  1190. * Format the value range/ge
  1191. * @param array $match
  1192. * @return string
  1193. */
  1194. private function formatValueRange($match)
  1195. {
  1196. // VALUE_[GL]E 0 xxx yyy
  1197. $field = $this->xs->getField(intval($match[2]), false);
  1198. if ($field === false) {
  1199. return $match[0];
  1200. }
  1201. $val1 = $val2 = '~';
  1202. if (isset($match[4])) {
  1203. $val2 = $field->isNumeric() ? $this->xapianUnserialise($match[4]) : $match[4];
  1204. }
  1205. if ($match[1] === 'VALUE_LE') {
  1206. $val2 = $field->isNumeric() ? $this->xapianUnserialise($match[3]) : $match[3];
  1207. } else {
  1208. $val1 = $field->isNumeric() ? $this->xapianUnserialise($match[3]) : $match[3];
  1209. }
  1210. return $field->name . ':[' . $val1 . ',' . $val2 . ']';
  1211. }
  1212. private function numfromstr($str, $index)
  1213. {
  1214. return $index < strlen($str) ? ord($str[$index]) : 0;
  1215. }
  1216. /**
  1217. * Convert a string encoded by xapian to a floating point number
  1218. * @param string $value
  1219. * @return double unserialised number
  1220. */
  1221. private function xapianUnserialise($value)
  1222. {
  1223. if ($value === "\x80") {
  1224. return 0.0;
  1225. }
  1226. if ($value === str_repeat("\xff", 9)) {
  1227. return INF;
  1228. }
  1229. if ($value === '') {
  1230. return -INF;
  1231. }
  1232. $i = 0;
  1233. $c = ord($value[0]);
  1234. $c ^= ($c & 0xc0) >> 1;
  1235. $negative = !($c & 0x80) ? 1 : 0;
  1236. $exponent_negative = ($c & 0x40) ? 1 : 0;
  1237. $explen = !($c & 0x20) ? 1 : 0;
  1238. $exponent = $c & 0x1f;
  1239. if (!$explen) {
  1240. $exponent >>= 2;
  1241. if ($negative ^ $exponent_negative) {
  1242. $exponent ^= 0x07;
  1243. }
  1244. } else {
  1245. $c = $this->numfromstr($value, ++$i);
  1246. $exponent <<= 6;
  1247. $exponent |= ($c >> 2);
  1248. if ($negative ^ $exponent_negative) {
  1249. $exponent &= 0x07ff;
  1250. }
  1251. }
  1252. $word1 = ($c & 0x03) << 24;
  1253. $word1 |= $this->numfromstr($value, ++$i) << 16;
  1254. $word1 |= $this->numfromstr($value, ++$i) << 8;
  1255. $word1 |= $this->numfromstr($value, ++$i);
  1256. $word2 = 0;
  1257. if ($i < strlen($value)) {
  1258. $word2 = $this->numfromstr($value, ++$i) << 24;
  1259. $word2 |= $this->numfromstr($value, ++$i) << 16;
  1260. $word2 |= $this->numfromstr($value, ++$i) << 8;
  1261. $word2 |= $this->numfromstr($value, ++$i);
  1262. }
  1263. if (!$negative) {
  1264. $word1 |= 1 << 26;
  1265. } else {
  1266. $word1 = 0 - $word1;
  1267. if ($word2 != 0) {
  1268. ++$word1;
  1269. }
  1270. $word2 = 0 - $word2;
  1271. $word1 &= 0x03ffffff;
  1272. }
  1273. $mantissa = 0;
  1274. if ($word2) {
  1275. $mantissa = $word2 / 4294967296.0; // 1<<32
  1276. }
  1277. $mantissa += $word1;
  1278. $mantissa /= 1 << ($negative === 1 ? 26 : 27);
  1279. if ($exponent_negative) {
  1280. $exponent = 0 - $exponent;
  1281. }
  1282. $exponent += 8;
  1283. if ($negative) {
  1284. $mantissa = 0 - $mantissa;
  1285. }
  1286. return round($mantissa * pow(2, $exponent), 2);
  1287. }
  1288. /**
  1289. * @return boolean whether the user agent is a robot or search engine
  1290. */
  1291. private function isRobotAgent()
  1292. {
  1293. if (isset($_SERVER['HTTP_USER_AGENT'])) {
  1294. $agent = strtolower($_SERVER['HTTP_USER_AGENT']);
  1295. $keys = array('bot', 'slurp', 'spider', 'crawl', 'curl');
  1296. foreach ($keys as $key) {
  1297. if (strpos($agent, $key) !== false) {
  1298. return true;
  1299. }
  1300. }
  1301. }
  1302. return false;
  1303. }
  1304. }