123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392 |
- <?php
- /**
- * XSSearch 类定义文件
- *
- * @author hightman
- * @link http://www.xunsearch.com/
- * @copyright Copyright © 2011 HangZhou YunSheng Network Technology Co., Ltd.
- * @license http://www.xunsearch.com/license/
- * @version $Id$
- */
- /**
- * XS 搜索类, 执行搜索功能
- * 有部分方法支持串接操作
- * <pre>
- * $xs->search->setQuery($str)->setLimit(10, 10)->search();
- * $xs->close();
- * </pre>
- *
- * @property string $query 默认搜索语句
- * @property-read int $dbTotal 数据库内的数据总量
- * @property-read int $lastCount 最近那次搜索的匹配总量估值
- * @property-read array $hotQuery 热门搜索词列表
- * @property-read array $relatedQuery 相关搜索词列表
- * @property-read array $expandedQuery 展开前缀的搜索词列表
- * @property-read array $corredtedQuery 修正后的建议搜索词列表
- * @author hightman <hightman@twomice.net>
- * @version 1.0.0
- * @package XS
- */
- class XSSearch extends XSServer
- {
- /**
- * 搜索结果默认分页数量
- */
- const PAGE_SIZE = 10;
- const LOG_DB = 'log_db';
- private $_defaultOp = XS_CMD_QUERY_OP_AND;
- private $_prefix, $_fieldSet, $_resetScheme = false;
- private $_query, $_terms, $_count;
- private $_lastCount, $_highlight;
- private $_curDb, $_curDbs = array();
- private $_lastDb, $_lastDbs = array();
- private $_facets = array();
- private $_limit = 0, $_offset = 0;
- private $_charset = 'UTF-8';
- /**
- * 连接搜索服务端并初始化
- * 每次重新连接后所有的搜索语句相关设置均被还原
- * @param string $conn
- * @see XSServer::open
- */
- public function open($conn)
- {
- parent::open($conn);
- $this->_prefix = array();
- $this->_fieldSet = false;
- $this->_lastCount = false;
- }
- /**
- * 设置字符集
- * 默认字符集是 UTF-8, 如果您提交的搜索语句和预期得到的搜索结果为其它字符集, 请先设置
- * @param string $charset
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setCharset($charset)
- {
- $this->_charset = strtoupper($charset);
- if ($this->_charset == 'UTF8') {
- $this->_charset = 'UTF-8';
- }
- return $this;
- }
- /**
- * 开启模糊搜索
- * 默认情况只返回包含所有搜索词的记录, 通过本方法可以获得更多搜索结果
- * @param bool $value 设为 true 表示开启模糊搜索, 设为 false 关闭模糊搜索
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setFuzzy($value = true)
- {
- $this->_defaultOp = $value === true ? XS_CMD_QUERY_OP_OR : XS_CMD_QUERY_OP_AND;
- return $this;
- }
- /**
- * 设置百分比/权重剔除参数
- * 通常是在开启 {@link setFuzzy} 或使用 OR 连接搜索语句时才需要设置此项
- * @param int $percent 剔除匹配百分比低于此值的文档, 值范围 0-100
- * @param float $weight 剔除权重低于此值的文档, 值范围 0.1-25.5, 0 表示不剔除
- * @return XSSearch 返回对象本身以支持串接操作
- * @see setFuzzy
- */
- public function setCutOff($percent, $weight = 0)
- {
- $percent = max(0, min(100, intval($percent)));
- $weight = max(0, (intval($weight * 10) & 255));
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_CUTOFF, $percent, $weight);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 设置在搜索结果文档中返回匹配词表
- * 请在 {@link search} 前调用本方法, 然后使用 {@link XSDocument::matched} 获取
- * @param bool $value 设为 true 表示开启返回, 设为 false 关闭该功能, 默认是不开启
- * @return XSSearch 返回对象本身以支持串接操作
- * @since 1.4.8
- */
- public function setRequireMatchedTerm($value = true)
- {
- $arg1 = XS_CMD_SEARCH_MISC_MATCHED_TERM;
- $arg2 = $value === true ? 1 : 0;
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_MISC, $arg1, $arg2);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 设置检索匹配的权重方案
- * 目前支持三种权重方案: 0=BM25/1=Bool/2=Trad
- * @param int $scheme 匹配权重方案
- * @return XSSearch 返回对象本身以支持串接操作
- * @since 1.4.11
- */
- public function setWeightingScheme($scheme) {
- $arg1 = XS_CMD_SEARCH_MISC_WEIGHT_SCHEME;
- $arg2 = intval($scheme);
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_MISC, $arg1, $arg2);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 开启自动同义词搜索功能
- * @param bool $value 设为 true 表示开启同义词功能, 设为 false 关闭同义词功能
- * @return XSSearch 返回对象本身以支持串接操作
- * @since 1.3.0
- */
- public function setAutoSynonyms($value = true)
- {
- $flag = XS_CMD_PARSE_FLAG_BOOLEAN | XS_CMD_PARSE_FLAG_PHRASE | XS_CMD_PARSE_FLAG_LOVEHATE;
- if ($value === true) {
- $flag |= XS_CMD_PARSE_FLAG_AUTO_MULTIWORD_SYNONYMS;
- }
- $cmd = array('cmd' => XS_CMD_QUERY_PARSEFLAG, 'arg' => $flag);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 设置同义词搜索的权重比例
- * @param float $value 取值范围 0.01-2.55, 1 表示不调整
- * @return XSSearch 返回对象本身以支持串接操作
- * @notice scws 的复合分词也是以同义词方式呈现的
- * @since 1.4.7
- */
- public function setSynonymScale($value)
- {
- $arg1 = XS_CMD_SEARCH_MISC_SYN_SCALE;
- $arg2 = max(0, (intval($value * 100) & 255));
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_MISC, $arg1, $arg2);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 获取当前库内的全部同义词列表
- * @param int $limit 数量上限, 若设为 0 则启用默认值 100 个
- * @param int $offset 偏移量, 即跳过的结果数量, 默认为 0
- * @param bool $stemmed 是否包含处理过的词根同义词, 默认为 false 表示否
- * @return array 同义词记录数组, 每个词条为键, 同义词条组成的数组为值
- * @since 1.3.0
- */
- public function getAllSynonyms($limit = 0, $offset = 0, $stemmed = false)
- {
- $page = $limit > 0 ? pack('II', intval($offset), intval($limit)) : '';
- $cmd = array('cmd' => XS_CMD_SEARCH_GET_SYNONYMS, 'buf1' => $page);
- $cmd['arg1'] = $stemmed == true ? 1 : 0;
- $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_SYNONYMS);
- $ret = array();
- if (!empty($res->buf)) {
- foreach (explode("\n", $res->buf) as $line) {
- $value = explode("\t", $line);
- $key = array_shift($value);
- $ret[$key] = $value;
- }
- }
- return $ret;
- }
- /**
- * 获取指定词汇的同义词列表
- * @param string $term 要查询同义词的原词
- * @return array 同义词记录数组, 不存在同义词则返回空数组
- * @since 1.4.9
- */
- public function getSynonyms($term)
- {
- $term = strval($term);
- if (strlen($term) === 0) {
- return false;
- }
- $cmd = array('cmd' => XS_CMD_SEARCH_GET_SYNONYMS, 'arg1' => 2, 'buf' => $term);
- $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_SYNONYMS);
- $ret = $res->buf === '' ? array() : explode("\n", $res->buf);
- return $ret;
- }
- /**
- * 获取解析后的搜索语句
- * @param string $query 搜索语句, 若传入 null 使用默认语句
- * @return string 返回解析后的搜索语句
- */
- public function getQuery($query = null)
- {
- $query = $query === null ? '' : $this->preQueryString($query);
- $cmd = new XSCommand(XS_CMD_QUERY_GET_STRING, 0, $this->_defaultOp, $query);
- $res = $this->execCommand($cmd, XS_CMD_OK_QUERY_STRING);
- if (strpos($res->buf, 'VALUE_RANGE') !== false) {
- $regex = '/(VALUE_RANGE) (\d+) (\S+) (.+?)(?=\))/';
- $res->buf = preg_replace_callback($regex, array($this, 'formatValueRange'), $res->buf);
- }
- if (strpos($res->buf, 'VALUE_GE') !== false || strpos($res->buf, 'VALUE_LE') !== false) {
- $regex = '/(VALUE_[GL]E) (\d+) (.+?)(?=\))/';
- $res->buf = preg_replace_callback($regex, array($this, 'formatValueRange'), $res->buf);
- }
- return XS::convert($res->buf, $this->_charset, 'UTF-8');
- }
- /**
- * 设置默认搜索语句
- * 用于不带参数的 {@link count} 或 {@link search} 以及 {@link terms} 调用
- * 可与 {@link addWeight} 组合运用
- * @param string $query 搜索语句, 设为 null 则清空搜索语句, 最大长度为 80 字节
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setQuery($query)
- {
- $this->clearQuery();
- if ($query !== null) {
- $this->_query = $query;
- $this->addQueryString($query);
- }
- return $this;
- }
- /**
- * 设置地理位置距离排序方式
- *
- * 请务必先以 numeric 类型字段定义经纬度坐标字段,例如用 lon 代表经度、lat 代表纬度,
- * 那么设置排序代码如下,必须将经度定义在前纬度在后:
- * <pre>
- * $search->setGeodistSort(array('lon' => 39.18, 'lat' => 120.51));
- * </pre>
- * @param array $fields 在此定义地理位置信息原点坐标信息,数组至少必须包含2个值
- * @param bool $reverse 是否由远及近排序, 默认为由近及远
- * @param bool $relevance_first 是否优先相关性排序, 默认为否
- * @return XSSearch 返回对象本身以支持串接操作
- * @since 1.4.10
- */
- public function setGeodistSort($fields, $reverse = false, $relevance_first = false)
- {
- if (!is_array($fields) || count($fields) < 2) {
- throw new XSException("Fields of `setGeodistSort' should be an array contain two or more elements");
- }
- // [vno][vlen][vbuf] ...
- $buf = '';
- foreach ($fields as $key => $value) {
- $field = $this->xs->getField($key, true);
- if (!$field->isNumeric()) {
- throw new XSException("Type of GeoField `$key' shoud be numeric");
- }
- $vno = $field->vno;
- $vbuf = strval(floatval($value));
- $vlen = strlen($vbuf);
- if ($vlen >= 255) {
- throw new XSException("Value of `$key' too long");
- }
- $buf .= chr($vno) . chr($vlen) . $vbuf;
- }
- $type = XS_CMD_SORT_TYPE_GEODIST;
- if ($relevance_first) {
- $type |= XS_CMD_SORT_FLAG_RELEVANCE;
- }
- if (!$reverse) {
- $type |= XS_CMD_SORT_FLAG_ASCENDING;
- }
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type, 0, $buf);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 设置多字段组合排序方式
- * 当您需要根据多个字段的值按不同的方式综合排序时, 请使用这项
- * @param array $fields 排序依据的字段数组, 以字段名称为键, true/false 为值表示正序或逆序
- * @param bool $reverse 是否为倒序显示, 默认为正向, 此处和 {@link setSort} 略有不同
- * @param bool $relevance_first 是否优先相关性排序, 默认为否
- * @return XSSearch 返回对象本身以支持串接操作
- * @since 1.1.0
- */
- public function setMultiSort($fields, $reverse = false, $relevance_first = false)
- {
- if (!is_array($fields)) {
- return $this->setSort($fields, !$reverse, $relevance_first);
- }
- // [vno][0/1] (0:reverse,1:asc)
- $buf = '';
- foreach ($fields as $key => $value) {
- if (is_bool($value)) {
- $vno = $this->xs->getField($key, true)->vno;
- $asc = $value;
- } else {
- $vno = $this->xs->getField($value, true)->vno;
- $asc = false;
- }
- if ($vno != XSFieldScheme::MIXED_VNO) {
- $buf .= chr($vno) . chr($asc ? 1 : 0);
- }
- }
- if ($buf !== '') {
- $type = XS_CMD_SORT_TYPE_MULTI;
- if ($relevance_first) {
- $type |= XS_CMD_SORT_FLAG_RELEVANCE;
- }
- if (!$reverse) {
- $type |= XS_CMD_SORT_FLAG_ASCENDING;
- }
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type, 0, $buf);
- $this->execCommand($cmd);
- }
- return $this;
- }
- /**
- * 设置搜索结果的排序方式
- * 注意, 每当调用 {@link setDb} 或 {@link addDb} 修改当前数据库时会重置排序设定
- * 此函数第一参数的用法与 {@link setMultiSort} 兼容, 即也可以用该方法实现多字段排序
- * @param string $field 依据指定字段的值排序, 设为 null 则用默认顺序
- * @param bool $asc 是否为正序排列, 即从小到大, 从少到多, 默认为反序
- * @param bool $relevance_first 是否优先相关性排序, 默认为否
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setSort($field, $asc = false, $relevance_first = false)
- {
- if (is_array($field)) {
- return $this->setMultiSort($field, $asc, $relevance_first);
- }
- if ($field === null) {
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, XS_CMD_SORT_TYPE_RELEVANCE);
- } else {
- $type = XS_CMD_SORT_TYPE_VALUE;
- if ($relevance_first) {
- $type |= XS_CMD_SORT_FLAG_RELEVANCE;
- }
- if ($asc) {
- $type |= XS_CMD_SORT_FLAG_ASCENDING;
- }
- $vno = $this->xs->getField($field, true)->vno;
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type, $vno);
- }
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 设置结果按索引入库先后排序
- * 注意, 此项排序不影响相关排序, 权重高的仍会在前面, 主要适合用于布尔检索
- * @param bool $asc 是否为正序排列, 即从先到后, 默认为反序
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setDocOrder($asc = false)
- {
- $type = XS_CMD_SORT_TYPE_DOCID | ($asc ? XS_CMD_SORT_FLAG_ASCENDING : 0);
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_SORT, $type);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 设置折叠搜索结果
- * 注意, 每当调用 {@link setDb} 或 {@link addDb} 修改当前数据库时会重置此项设置
- * @param string $field 依据该字段的值折叠搜索结果, 设为 null 则取消折叠
- * @param int $num 折叠后只是返最匹配的数据数量, 默认为 1, 最大值 255
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setCollapse($field, $num = 1)
- {
- $vno = $field === null ? XSFieldScheme::MIXED_VNO : $this->xs->getField($field, true)->vno;
- $max = min(255, intval($num));
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_COLLAPSE, $max, $vno);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 添加搜索过滤区间或范围
- * @param string $field
- * @param mixed $from 起始值(不包含), 若设为 null 则相当于匹配 <= to (字典顺序)
- * @param mixed $to 结束值(包含), 若设为 null 则相当于匹配 >= from (字典顺序)
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function addRange($field, $from, $to)
- {
- if ($from === '' || $from === false) {
- $from = null;
- }
- if ($to === '' || $to === false) {
- $to = null;
- }
- if ($from !== null || $to !== null) {
- if (strlen($from) > 255 || strlen($to) > 255) {
- throw new XSException('Value of range is too long');
- }
- $vno = $this->xs->getField($field)->vno;
- $from = XS::convert($from, 'UTF-8', $this->_charset);
- $to = XS::convert($to, 'UTF-8', $this->_charset);
- if ($from === null) {
- $cmd = new XSCommand(XS_CMD_QUERY_VALCMP, XS_CMD_QUERY_OP_FILTER, $vno, $to, chr(XS_CMD_VALCMP_LE));
- } elseif ($to === null) {
- $cmd = new XSCommand(XS_CMD_QUERY_VALCMP, XS_CMD_QUERY_OP_FILTER, $vno, $from, chr(XS_CMD_VALCMP_GE));
- } else {
- $cmd = new XSCommand(XS_CMD_QUERY_RANGE, XS_CMD_QUERY_OP_FILTER, $vno, $from, $to);
- }
- $this->execCommand($cmd);
- }
- return $this;
- }
- /**
- * 添加权重索引词
- * 无论是否包含这种词都不影响搜索匹配, 但会参与计算结果权重, 使结果的相关度更高
- * @param string $field 索引词所属的字段
- * @param string $term 索引词
- * @param float $weight 权重计算缩放比例
- * @return XSSearch 返回对象本身以支持串接操作
- * @see addQueryTerm
- */
- public function addWeight($field, $term, $weight = 1)
- {
- return $this->addQueryTerm($field, $term, XS_CMD_QUERY_OP_AND_MAYBE, $weight);
- }
- /**
- * 设置分面搜索记数
- * 用于记录匹配搜索结果中按字段值分组的数量统计, 每次调用 {@link search} 后会还原设置
- * 对于多次调用 $exact 参数以最后一次为准, 只支持字段值不超过 255 字节的情况
- *
- * 自 v1.4.10 起自动对空值的字段按 term 分面统计(相当于多值)
- * @param mixed $field 要进行分组统计的字段或字段组成的数组, 最多同时支持 8 个
- * @param bool $exact 是否要求绝对精确搜索, 这会造成较大的系统开销
- * @return XSSearch 返回对象本身以支持串接操作
- * @throw XSException 在非字符串字段建立分面搜索会抛出异常
- * @since 1.1.0
- */
- public function setFacets($field, $exact = false)
- {
- $buf = '';
- if (!is_array($field)) {
- $field = array($field);
- }
- foreach ($field as $name) {
- $ff = $this->xs->getField($name);
- if ($ff->type !== XSFieldMeta::TYPE_STRING) {
- throw new XSException("Field `$name' cann't be used for facets search, can only be string type");
- }
- $buf .= chr($ff->vno);
- }
- $cmd = array('cmd' => XS_CMD_SEARCH_SET_FACETS, 'buf' => $buf);
- $cmd['arg1'] = $exact === true ? 1 : 0;
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 读取最近一次分面搜索记数
- * 必须在某一次 {@link search} 之后调用本函数才有意义
- * @param string $field 读取分面记数的字段, 若为 null 则返回全部分面搜索记录
- * @return array 返回由值和计数组成的关联数组, 若不存在或未曾登记过则返回空数组
- * @since 1.1.0
- */
- public function getFacets($field = null)
- {
- if ($field === null) {
- return $this->_facets;
- }
- return isset($this->_facets[$field]) ? $this->_facets[$field] : array();
- }
- /**
- * 设置当前搜索语句的分词复合等级
- * 复合等级是 scws 分词粒度控制的一个重要参数, 是长词细分处理依据, 默认为 3, 值范围 0~15
- * 注意: 这个设置仅直对本次搜索有效, 仅对设置之后的 {@link setQuery} 起作用, 由于 query
- * 设计的方式问题, 目前无法支持搜索语句单字切分, 但您可以在模糊检索时设为 0 来关闭复合分词
- * @param int $level 要设置的分词复合等级
- * @return XSSearch 返回自身对象以支持串接操作
- * @since 1.4.7
- */
- public function setScwsMulti($level)
- {
- $level = intval($level);
- if ($level >= 0 && $level < 16) {
- $cmd = array('cmd' => XS_CMD_SEARCH_SCWS_SET, 'arg1' => XS_CMD_SCWS_SET_MULTI, 'arg2' => $level);
- $this->execCommand($cmd);
- }
- return $this;
- }
- /**
- * 设置搜索结果的数量和偏移
- * 用于搜索结果分页, 每次调用 {@link search} 后会还原这2个变量到初始值
- * @param int $limit 数量上限, 若设为 0 则启用默认值 self::PAGE_SIZE
- * @param int $offset 偏移量, 即跳过的结果数量, 默认为 0
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setLimit($limit, $offset = 0)
- {
- $this->_limit = intval($limit);
- $this->_offset = intval($offset);
- return $this;
- }
- /**
- * 设置要搜索的数据库名
- * 若未设置, 使用默认数据库, 数据库必须位于服务端用户目录下
- * 对于远程数据库, 请使用 stub 文件来支持
- * @param string $name
- * @return XSSearch 返回对象本身以支持串接操作
- */
- public function setDb($name)
- {
- $name = strval($name);
- $this->execCommand(array('cmd' => XS_CMD_SEARCH_SET_DB, 'buf' => strval($name)));
- $this->_lastDb = $this->_curDb;
- $this->_lastDbs = $this->_curDbs;
- $this->_curDb = $name;
- $this->_curDbs = array();
- return $this;
- }
- /**
- * 添加搜索的数据库名, 支持多库同时搜索
- * @param string $name
- * @return XSSearch 返回对象本身以支持串接操作
- * @see setDb
- */
- public function addDb($name)
- {
- $name = strval($name);
- $this->execCommand(array('cmd' => XS_CMD_SEARCH_ADD_DB, 'buf' => $name));
- $this->_curDbs[] = $name;
- return $this;
- }
- /**
- * 标记字段方案重置
- * @see XS::setScheme
- */
- public function markResetScheme()
- {
- $this->_resetScheme = true;
- }
- /**
- * 获取搜索语句中的高亮词条列表
- * @param string $query 搜索语句, 若传入 null 使用默认语句, 最大长度为 80 字节
- * @param bool $convert 是否进行编码转换, 默认为 true
- * @return array 可用于高亮显示的词条列表
- */
- public function terms($query = null, $convert = true)
- {
- $query = $query === null ? '' : $this->preQueryString($query);
- if ($query === '' && $this->_terms !== null) {
- $ret = $this->_terms;
- } else {
- $cmd = new XSCommand(XS_CMD_QUERY_GET_TERMS, 0, $this->_defaultOp, $query);
- $res = $this->execCommand($cmd, XS_CMD_OK_QUERY_TERMS);
- $ret = array();
- $tmps = explode(' ', $res->buf);
- for ($i = 0; $i < count($tmps); $i++) {
- if ($tmps[$i] === '' || strpos($tmps[$i], ':') !== false) {
- continue;
- }
- $ret[] = $tmps[$i];
- }
- if ($query === '') {
- $this->_terms = $ret;
- }
- }
- return $convert ? XS::convert($ret, $this->_charset, 'UTF-8') : $ret;
- }
- /**
- * 估算搜索语句的匹配数据量
- * @param string $query 搜索语句, 若传入 null 使用默认语句, 调用后会还原默认排序方式
- * 如果搜索语句和最近一次 {@link search} 的语句一样, 请改用 {@link getLastCount} 以提升效率
- * 最大长度为 80 字节
- * @return int 匹配的搜索结果数量, 估算数值
- */
- public function count($query = null)
- {
- $query = $query === null ? '' : $this->preQueryString($query);
- if ($query === '' && $this->_count !== null) {
- return $this->_count;
- }
- $cmd = new XSCommand(XS_CMD_SEARCH_GET_TOTAL, 0, $this->_defaultOp, $query);
- $res = $this->execCommand($cmd, XS_CMD_OK_SEARCH_TOTAL);
- $ret = unpack('Icount', $res->buf);
- if ($query === '') {
- $this->_count = $ret['count'];
- }
- return $ret['count'];
- }
- /**
- * 获取匹配的搜索结果文档
- * 默认提取最匹配的前 self::PAGE_SIZE 个结果
- * 如需分页请参见 {@link setLimit} 设置, 每次调用本函数后都会还原 setLimit 的设置
- * @param string $query 搜索语句, 若传入 null 使用默认语句, 最大长度为 80 字节
- * @param boolean $saveHighlight 是否存储查询词用于高亮处理, 默认为 true
- * @return XSDocument[] 匹配的搜索结果文档列表
- */
- public function search($query = null, $saveHighlight = true)
- {
- if ($this->_curDb !== self::LOG_DB && $saveHighlight) {
- $this->_highlight = $query;
- }
- $query = $query === null ? '' : $this->preQueryString($query);
- $page = pack('II', $this->_offset, $this->_limit > 0 ? $this->_limit : self::PAGE_SIZE);
- // get result header
- $cmd = new XSCommand(XS_CMD_SEARCH_GET_RESULT, 0, $this->_defaultOp, $query, $page);
- $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_BEGIN);
- $tmp = unpack('Icount', $res->buf);
- $this->_lastCount = $tmp['count'];
- // load vno map to name of fields
- $ret = $this->_facets = array();
- $vnoes = $this->xs->getScheme()->getVnoMap();
- // get result documents
- while (true) {
- $res = $this->getRespond();
- if ($res->cmd == XS_CMD_SEARCH_RESULT_FACETS) {
- $off = 0;
- while (($off + 6) < strlen($res->buf)) {
- $tmp = unpack('Cvno/Cvlen/Inum', substr($res->buf, $off, 6));
- if (isset($vnoes[$tmp['vno']])) {
- $name = $vnoes[$tmp['vno']];
- $value = substr($res->buf, $off + 6, $tmp['vlen']);
- if (!isset($this->_facets[$name])) {
- $this->_facets[$name] = array();
- }
- $this->_facets[$name][$value] = $tmp['num'];
- }
- $off += $tmp['vlen'] + 6;
- }
- } elseif ($res->cmd == XS_CMD_SEARCH_RESULT_DOC) {
- // got new doc
- $doc = new XSDocument($res->buf, $this->_charset);
- $ret[] = $doc;
- } elseif ($res->cmd == XS_CMD_SEARCH_RESULT_FIELD) {
- // fields of doc
- if (isset($doc)) {
- $name = isset($vnoes[$res->arg]) ? $vnoes[$res->arg] : $res->arg;
- $doc->setField($name, $res->buf);
- }
- } elseif ($res->cmd == XS_CMD_SEARCH_RESULT_MATCHED) {
- // matched terms
- if (isset($doc)) {
- $doc->setField('matched', explode(' ', $res->buf), true);
- }
- } elseif ($res->cmd == XS_CMD_OK && $res->arg == XS_CMD_OK_RESULT_END) {
- // got the end
- break;
- } else {
- $msg = 'Unexpected respond in search {CMD:' . $res->cmd . ', ARG:' . $res->arg . '}';
- throw new XSException($msg);
- }
- }
- if ($query === '') {
- $this->_count = $this->_lastCount;
- // trigger log & highlight
- if ($this->_curDb !== self::LOG_DB) {
- $this->logQuery();
- if ($saveHighlight) {
- $this->initHighlight();
- }
- }
- }
- $this->_limit = $this->_offset = 0;
- return $ret;
- }
- /**
- * 获取最近那次搜索的匹配总数估值
- * @return int 匹配数据量, 如从未搜索则返回 false
- * @see search
- */
- public function getLastCount()
- {
- return $this->_lastCount;
- }
- /**
- * 获取搜索数据库内的数据总量
- * @return int 数据总量
- */
- public function getDbTotal()
- {
- $cmd = new XSCommand(XS_CMD_SEARCH_DB_TOTAL);
- $res = $this->execCommand($cmd, XS_CMD_OK_DB_TOTAL);
- $tmp = unpack('Itotal', $res->buf);
- return $tmp['total'];
- }
- /**
- * 获取热门搜索词列表
- * @param int $limit 需要返回的热门搜索数量上限, 默认为 6, 最大值为 50
- * @param string $type 排序类型, 默认为 total(搜索总量), 可选值还有 lastnum(上周), currnum(本周)
- * @return array 返回以搜索词为键, 搜索指数为值的关联数组
- */
- public function getHotQuery($limit = 6, $type = 'total')
- {
- $ret = array();
- $limit = max(1, min(50, intval($limit)));
- // query from log_db
- $this->xs->setScheme(XSFieldScheme::logger());
- try {
- $this->setDb(self::LOG_DB)->setLimit($limit);
- if ($type !== 'lastnum' && $type !== 'currnum') {
- $type = 'total';
- }
- $result = $this->search($type . ':1');
- foreach ($result as $doc) /* @var $doc XSDocument */ {
- $body = $doc->body;
- $ret[$body] = $doc->f($type);
- }
- $this->restoreDb();
- } catch (XSException $e) {
- if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
- throw $e;
- }
- }
- $this->xs->restoreScheme();
- return $ret;
- }
- /**
- * 获取相关搜索词列表
- * @param string $query 搜索语句, 若传入 null 使用默认语句
- * @param int $limit 需要返回的相关搜索数量上限, 默认为 6, 最大值为 20
- * @return array 返回搜索词组成的数组
- */
- public function getRelatedQuery($query = null, $limit = 6)
- {
- $ret = array();
- $limit = max(1, min(20, intval($limit)));
- // Simple to disable query with field filter
- if ($query === null) {
- $query = $this->cleanFieldQuery($this->_query);
- }
- if (empty($query) || strpos($query, ':') !== false) {
- return $ret;
- }
- // Search the log database
- $op = $this->_defaultOp;
- $this->xs->setScheme(XSFieldScheme::logger());
- try {
- $result = $this->setDb(self::LOG_DB)->setFuzzy()->setLimit($limit + 1)->search($query);
- foreach ($result as $doc) /* @var $doc XSDocument */ {
- $doc->setCharset($this->_charset);
- $body = $doc->body;
- if (!strcasecmp($body, $query)) {
- continue;
- }
- $ret[] = $body;
- if (count($ret) == $limit) {
- break;
- }
- }
- } catch (XSException $e) {
- if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
- throw $e;
- }
- }
- $this->restoreDb();
- $this->xs->restoreScheme();
- $this->_defaultOp = $op;
- return $ret;
- }
- /**
- * 获取展开的搜索词列表
- * @param string $query 需要展开的前缀, 可为拼音、英文、中文
- * @param int $limit 需要返回的搜索词数量上限, 默认为 10, 最大值为 20
- * @return array 返回搜索词组成的数组
- */
- public function getExpandedQuery($query, $limit = 10)
- {
- $ret = array();
- $limit = max(1, min(20, intval($limit)));
- try {
- $buf = XS::convert($query, 'UTF-8', $this->_charset);
- $cmd = array('cmd' => XS_CMD_QUERY_GET_EXPANDED, 'arg1' => $limit, 'buf' => $buf);
- $res = $this->execCommand($cmd, XS_CMD_OK_RESULT_BEGIN);
- // echo "Raw Query: " . $res->buf . "\n";
- // get result documents
- while (true) {
- $res = $this->getRespond();
- if ($res->cmd == XS_CMD_SEARCH_RESULT_FIELD) {
- $ret[] = XS::convert($res->buf, $this->_charset, 'UTF-8');
- } elseif ($res->cmd == XS_CMD_OK && $res->arg == XS_CMD_OK_RESULT_END) {
- // got the end
- // echo "Parsed Query: " . $res->buf . "\n";
- break;
- } else {
- $msg = 'Unexpected respond in search {CMD:' . $res->cmd . ', ARG:' . $res->arg . '}';
- throw new XSException($msg);
- }
- }
- } catch (XSException $e) {
- if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
- throw $e;
- }
- }
- return $ret;
- }
- /**
- * 获取修正后的搜索词列表
- * 通常当某次检索结果数量偏少时, 可以用该函数设计 "你是不是要找: ..." 功能
- * @param string $query 需要展开的前缀, 可为拼音、英文、中文
- * @return array 返回搜索词组成的数组
- */
- public function getCorrectedQuery($query = null)
- {
- $ret = array();
- try {
- if ($query === null) {
- if ($this->_count > 0 && $this->_count > ceil($this->getDbTotal() * 0.001)) {
- return $ret;
- }
- $query = $this->cleanFieldQuery($this->_query);
- }
- if (empty($query) || strpos($query, ':') !== false) {
- return $ret;
- }
- $buf = XS::convert($query, 'UTF-8', $this->_charset);
- $cmd = array('cmd' => XS_CMD_QUERY_GET_CORRECTED, 'buf' => $buf);
- $res = $this->execCommand($cmd, XS_CMD_OK_QUERY_CORRECTED);
- if ($res->buf !== '') {
- $ret = explode("\n", XS::convert($res->buf, $this->_charset, 'UTF-8'));
- }
- } catch (XSException $e) {
- if ($e->getCode() != XS_CMD_ERR_XAPIAN) {
- throw $e;
- }
- }
- return $ret;
- }
- /**
- * 添加搜索日志关键词到缓冲区里
- * 需要调用 {@link XSIndex::flushLogging} 才能确保立即刷新, 否则要隔一段时间
- * @param string $query 需要记录的数据
- * @param int $wdf 需要记录的次数, 默认为 1
- * @since 1.1.1
- */
- public function addSearchLog($query, $wdf = 1)
- {
- $cmd = array('cmd' => XS_CMD_SEARCH_ADD_LOG, 'buf' => $query);
- if ($wdf > 1) {
- $cmd['buf1'] = pack('i', $wdf);
- }
- $this->execCommand($cmd, XS_CMD_OK_LOGGED);
- }
- /**
- * 搜索结果字符串高亮处理
- * 对搜索结果文档的字段进行高亮、飘红处理, 高亮部分加上 em 标记
- * @param string $value 需要处理的数据
- * @return string 高亮后的数据
- */
- public function highlight($value, $strtr = false)
- {
- // return empty value directly
- if (empty($value)) {
- return $value;
- }
- // initlize the highlight replacements
- if (!is_array($this->_highlight)) {
- $this->initHighlight();
- }
- // process replace
- if (isset($this->_highlight['pattern'])) {
- $value = preg_replace($this->_highlight['pattern'], $this->_highlight['replace'], $value);
- }
- if (isset($this->_highlight['pairs'])) {
- $value = $strtr ?
- strtr($value, $this->_highlight['pairs']) :
- str_replace(array_keys($this->_highlight['pairs']), array_values($this->_highlight['pairs']), $value);
- }
- return $value;
- }
- /**
- * 记录搜索语句
- * 主要是用于相关搜索, 修正搜索等功能, 为避免记录一些杂乱无用的搜索信息,
- * 系统会先检测这条语句是否符合记录需求, 力争记录一些规范清洁的数据
- * @param string $query 用于记录的搜索词
- */
- private function logQuery($query = null)
- {
- if ($this->isRobotAgent()) {
- return;
- }
- if ($query !== '' && $query !== null) {
- $terms = $this->terms($query, false);
- } else {
- // 无结果、包含 OR、XOR、NOT/-、默认 fuzzy
- $query = $this->_query;
- if (!$this->_lastCount || ($this->_defaultOp == XS_CMD_QUERY_OP_OR && strpos($query, ' '))
- || strpos($query, ' OR ') || strpos($query, ' NOT ') || strpos($query, ' XOR ')) {
- return;
- }
- $terms = $this->terms(null, false);
- }
- // purify the query statement to log
- $log = '';
- $pos = $max = 0;
- foreach ($terms as $term) {
- $pos1 = ($pos > 3 && strlen($term) === 6) ? $pos - 3 : $pos;
- if (($pos2 = strpos($query, $term, $pos1)) === false) {
- continue;
- }
- if ($pos2 === $pos) {
- $log .= $term;
- } elseif ($pos2 < $pos) {
- $log .= substr($term, 3);
- } else {
- if (++$max > 3 || strlen($log) > 42) {
- break;
- }
- $log .= ' ' . $term;
- }
- $pos = $pos2 + strlen($term);
- }
- // run the command, filter for single word character
- $log = trim($log);
- if (strlen($log) < 2 || (strlen($log) == 3 && ord($log[0]) > 0x80)) {
- return;
- }
- $this->addSearchLog($log);
- }
- /**
- * 清空默认搜索语句
- */
- private function clearQuery()
- {
- $cmd = new XSCommand(XS_CMD_QUERY_INIT);
- if ($this->_resetScheme === true) {
- $cmd->arg1 = 1;
- $this->_prefix = array();
- $this->_fieldSet = false;
- $this->_resetScheme = false;
- }
- $this->execCommand($cmd);
- $this->_query = $this->_count = $this->_terms = null;
- }
- /**
- * 增加默认搜索语句
- * @param string $query 搜索语句
- * @param int $addOp 与旧语句的结合操作符, 如果无旧语句或为空则这此无意义, 支持的操作符有:
- * XS_CMD_QUERY_OP_AND
- * XS_CMD_QUERY_OP_OR
- * XS_CMD_QUERY_OP_AND_NOT
- * XS_CMD_QUERY_OP_XOR
- * XS_CMD_QUERY_OP_AND_MAYBE
- * XS_CMD_QUERY_OP_FILTER
- * @param float $scale 权重计算缩放比例, 默认为 1表示不缩放, 其它值范围 0.xx ~ 655.35
- * @return string 修正后的搜索语句
- */
- public function addQueryString($query, $addOp = XS_CMD_QUERY_OP_AND, $scale = 1)
- {
- $query = $this->preQueryString($query);
- $bscale = ($scale > 0 && $scale != 1) ? pack('n', intval($scale * 100)) : '';
- $cmd = new XSCommand(XS_CMD_QUERY_PARSE, $addOp, $this->_defaultOp, $query, $bscale);
- $this->execCommand($cmd);
- return $query;
- }
- /**
- * 增加默认搜索词汇
- * @param string $field 索引词所属的字段, 若为混合区词汇可设为 null 或 body 型的字段名
- * @param string|array $term 索引词或列表
- * @param int $addOp 与旧语句的结合操作符, 如果无旧语句或为空则这此无意义, 支持的操作符有:
- * @param float $scale 权重计算缩放比例, 默认为 1表示不缩放, 其它值范围 0.xx ~ 655.35
- * @return XSSearch 返回对象本身以支持串接操作
- * @see addQueryString
- *
- * 注:自 v1.4.10 起,允许传入数组,多词之间通过 defaultOp 连接,并且这些词不会再被分词。
- */
- public function addQueryTerm($field, $term, $addOp = XS_CMD_QUERY_OP_AND, $scale = 1)
- {
- $term = XS::convert($term, 'UTF-8', $this->_charset);
- $bscale = ($scale > 0 && $scale != 1) ? pack('n', intval($scale * 100)) : '';
- $vno = $field === null ? XSFieldScheme::MIXED_VNO : $this->xs->getField($field, true)->vno;
- $cmd = XS_CMD_QUERY_TERM;
- if (is_array($term)) {
- if (count($term) === 0) {
- return $this;
- } elseif (count($term) === 1) {
- $term = current($term);
- } else {
- $term = implode("\t", $term);
- $cmd = XS_CMD_QUERY_TERMS;
- }
- }
- $cmd = new XSCommand($cmd, $addOp, $vno, $term, $bscale);
- $this->execCommand($cmd);
- return $this;
- }
- /**
- * 还原搜索 DB
- * 常用于因需改变当前 db 为 LOG_DB 后还原
- */
- private function restoreDb()
- {
- $db = $this->_lastDb;
- $dbs = $this->_lastDbs;
- $this->setDb($db);
- foreach ($dbs as $name) {
- $this->addDb($name);
- }
- }
- /**
- * 搜索语句的准备工作
- * 登记相关的字段前缀并给非布尔字段补上括号, 首次搜索必须通知服务端关于 cutlen, numeric 字段的设置
- * @param string $query 要准备的搜索语句
- * @return string 准备好的搜索语句
- */
- private function preQueryString($query)
- {
- // check to register prefix
- $query = trim($query);
- //if ($query === '')
- // throw new XSException('Query string cann\'t be empty');
- // force to clear query with resetScheme
- if ($this->_resetScheme === true) {
- $this->clearQuery();
- }
- // init special field here
- $this->initSpecialField();
- $newQuery = '';
- $parts = preg_split('/[ \t\r\n]+/', $query);
- foreach ($parts as $part) {
- if ($part === '') {
- continue;
- }
- if ($newQuery != '') {
- $newQuery .= ' ';
- }
- if (($pos = strpos($part, ':', 1)) !== false) {
- for ($i = 0; $i < $pos; $i++) {
- if (strpos('+-~(', $part[$i]) === false) {
- break;
- }
- }
- $name = substr($part, $i, $pos - $i);
- if (($field = $this->xs->getField($name, false)) !== false
- && $field->vno != XSFieldScheme::MIXED_VNO) {
- $this->regQueryPrefix($name);
- if ($field->hasCustomTokenizer()) {
- $prefix = $i > 0 ? substr($part, 0, $i) : '';
- $suffix = '';
- // force to lowercase for boolean terms
- $value = substr($part, $pos + 1);
- if (substr($value, -1, 1) === ')') {
- $suffix = ')';
- $value = substr($value, 0, -1);
- }
- $terms = array();
- $tokens = $field->getCustomTokenizer()->getTokens($value);
- foreach ($tokens as $term) {
- $terms[] = strtolower($term);
- }
- $terms = array_unique($terms);
- $newQuery .= $prefix . $name . ':' . implode(' ' . $name . ':', $terms) . $suffix;
- } elseif (substr($part, $pos + 1, 1) != '(' && preg_match('/[\x81-\xfe]/', $part)) {
- // force to add brackets for default scws tokenizer
- $newQuery .= substr($part, 0, $pos + 1) . '(' . substr($part, $pos + 1) . ')';
- } else {
- $newQuery .= $part;
- }
- continue;
- }
- }
- if (strlen($part) > 1 && ($part[0] == '+' || $part[0] == '-') && $part[1] != '('
- && preg_match('/[\x81-\xfe]/', $part)) {
- $newQuery .= substr($part, 0, 1) . '(' . substr($part, 1) . ')';
- continue;
- }
- $newQuery .= $part;
- }
- return XS::convert($newQuery, 'UTF-8', $this->_charset);
- }
- /**
- * 登记搜索语句中的字段
- * @param string $name 字段名称
- */
- private function regQueryPrefix($name)
- {
- if (!isset($this->_prefix[$name])
- && ($field = $this->xs->getField($name, false))
- && ($field->vno != XSFieldScheme::MIXED_VNO)) {
- $type = $field->isBoolIndex() ? XS_CMD_PREFIX_BOOLEAN : XS_CMD_PREFIX_NORMAL;
- $cmd = new XSCommand(XS_CMD_QUERY_PREFIX, $type, $field->vno, $name);
- $this->execCommand($cmd);
- $this->_prefix[$name] = true;
- }
- }
- /**
- * 设置字符型字段及裁剪长度
- */
- private function initSpecialField()
- {
- if ($this->_fieldSet === true) {
- return;
- }
- foreach ($this->xs->getAllFields() as $field) /* @var $field XSFieldMeta */ {
- if ($field->cutlen != 0) {
- $len = min(127, ceil($field->cutlen / 10));
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_CUT, $len, $field->vno);
- $this->execCommand($cmd);
- }
- if ($field->isNumeric()) {
- $cmd = new XSCommand(XS_CMD_SEARCH_SET_NUMERIC, 0, $field->vno);
- $this->execCommand($cmd);
- }
- }
- $this->_fieldSet = true;
- }
- /**
- * 清除查询语句中的字段名、布尔字段条件
- * @param string $query 查询语句
- * @return string 净化后的语句
- */
- private function cleanFieldQuery($query)
- {
- $query = strtr($query, array(' AND ' => ' ', ' OR ' => ' '));
- if (strpos($query, ':') !== false) {
- $regex = '/(^|\s)([0-9A-Za-z_\.-]+):([^\s]+)/';
- return preg_replace_callback($regex, array($this, 'cleanFieldCallback'), $query);
- }
- return $query;
- }
- /**
- * 清除布尔字段查询语句和非布尔的字段名
- * 用于正则替换回调函数, 净化 {@link getCorrectedQuery} 和 {@link getRelatedQuery} 中的搜索语句
- * @param array $match 正则匹配的部分, [1]:prefix [2]:field, [3]:data
- */
- private function cleanFieldCallback($match)
- {
- if (($field = $this->xs->getField($match[2], false)) === false) {
- return $match[0];
- }
- if ($field->isBoolIndex()) {
- return '';
- }
- if (substr($match[3], 0, 1) == '(' && substr($match[3], -1, 1) == ')') {
- $match[3] = substr($match[3], 1, -1);
- }
- return $match[1] . $match[3];
- }
- /**
- * 初始始化高亮替换数据
- */
- private function initHighlight()
- {
- $terms = array();
- $tmps = $this->terms($this->_highlight, false);
- for ($i = 0; $i < count($tmps); $i++) {
- if (strlen($tmps[$i]) !== 6 || ord(substr($tmps[$i], 0, 1)) < 0xc0) {
- $terms[] = XS::convert($tmps[$i], $this->_charset, 'UTF-8');
- continue;
- }
- // auto fixed duality in libscws
- // ABC => AB,BC => ABC,BC,AB
- // ABCD => AB,BC,CD => CD,ABC,BC,AB
- // ABCDE => AB,BC,CD,DE => CDE,DE,CD,ABC,BC,AB
- for ($j = $i + 1; $j < count($tmps); $j++) {
- if (strlen($tmps[$j]) !== 6 || substr($tmps[$j], 0, 3) !== substr($tmps[$j - 1], 3, 3)) {
- break;
- }
- }
- if (($k = ($j - $i)) === 1) {
- $terms[] = XS::convert($tmps[$i], $this->_charset, 'UTF-8');
- } else {
- $i = $j - 1;
- while ($k--) {
- $j--;
- if ($k & 1) {
- $terms[] = XS::convert(substr($tmps[$j - 1], 0, 3) . $tmps[$j], $this->_charset, 'UTF-8');
- }
- $terms[] = XS::convert($tmps[$j], $this->_charset, 'UTF-8');
- }
- }
- }
- $pattern = $replace = $pairs = array();
- foreach ($terms as $term) {
- if (!preg_match('/[a-zA-Z]/', $term)) {
- $pairs[$term] = '<em>' . $term . '</em>';
- } else {
- $pattern[] = '/' . strtr($term, array('+' => '\\+', '/' => '\\/')) . '/i';
- $replace[] = '<em>$0</em>';
- }
- }
- $this->_highlight = array();
- if (count($pairs) > 0) {
- $this->_highlight['pairs'] = $pairs;
- }
- if (count($pattern) > 0) {
- $this->_highlight['pattern'] = $pattern;
- $this->_highlight['replace'] = $replace;
- }
- }
- /**
- * Format the value range/ge
- * @param array $match
- * @return string
- */
- private function formatValueRange($match)
- {
- // VALUE_[GL]E 0 xxx yyy
- $field = $this->xs->getField(intval($match[2]), false);
- if ($field === false) {
- return $match[0];
- }
- $val1 = $val2 = '~';
- if (isset($match[4])) {
- $val2 = $field->isNumeric() ? $this->xapianUnserialise($match[4]) : $match[4];
- }
- if ($match[1] === 'VALUE_LE') {
- $val2 = $field->isNumeric() ? $this->xapianUnserialise($match[3]) : $match[3];
- } else {
- $val1 = $field->isNumeric() ? $this->xapianUnserialise($match[3]) : $match[3];
- }
- return $field->name . ':[' . $val1 . ',' . $val2 . ']';
- }
- private function numfromstr($str, $index)
- {
- return $index < strlen($str) ? ord($str[$index]) : 0;
- }
- /**
- * Convert a string encoded by xapian to a floating point number
- * @param string $value
- * @return double unserialised number
- */
- private function xapianUnserialise($value)
- {
- if ($value === "\x80") {
- return 0.0;
- }
- if ($value === str_repeat("\xff", 9)) {
- return INF;
- }
- if ($value === '') {
- return -INF;
- }
- $i = 0;
- $c = ord($value[0]);
- $c ^= ($c & 0xc0) >> 1;
- $negative = !($c & 0x80) ? 1 : 0;
- $exponent_negative = ($c & 0x40) ? 1 : 0;
- $explen = !($c & 0x20) ? 1 : 0;
- $exponent = $c & 0x1f;
- if (!$explen) {
- $exponent >>= 2;
- if ($negative ^ $exponent_negative) {
- $exponent ^= 0x07;
- }
- } else {
- $c = $this->numfromstr($value, ++$i);
- $exponent <<= 6;
- $exponent |= ($c >> 2);
- if ($negative ^ $exponent_negative) {
- $exponent &= 0x07ff;
- }
- }
- $word1 = ($c & 0x03) << 24;
- $word1 |= $this->numfromstr($value, ++$i) << 16;
- $word1 |= $this->numfromstr($value, ++$i) << 8;
- $word1 |= $this->numfromstr($value, ++$i);
- $word2 = 0;
- if ($i < strlen($value)) {
- $word2 = $this->numfromstr($value, ++$i) << 24;
- $word2 |= $this->numfromstr($value, ++$i) << 16;
- $word2 |= $this->numfromstr($value, ++$i) << 8;
- $word2 |= $this->numfromstr($value, ++$i);
- }
- if (!$negative) {
- $word1 |= 1 << 26;
- } else {
- $word1 = 0 - $word1;
- if ($word2 != 0) {
- ++$word1;
- }
- $word2 = 0 - $word2;
- $word1 &= 0x03ffffff;
- }
- $mantissa = 0;
- if ($word2) {
- $mantissa = $word2 / 4294967296.0; // 1<<32
- }
- $mantissa += $word1;
- $mantissa /= 1 << ($negative === 1 ? 26 : 27);
- if ($exponent_negative) {
- $exponent = 0 - $exponent;
- }
- $exponent += 8;
- if ($negative) {
- $mantissa = 0 - $mantissa;
- }
- return round($mantissa * pow(2, $exponent), 2);
- }
- /**
- * @return boolean whether the user agent is a robot or search engine
- */
- private function isRobotAgent()
- {
- if (isset($_SERVER['HTTP_USER_AGENT'])) {
- $agent = strtolower($_SERVER['HTTP_USER_AGENT']);
- $keys = array('bot', 'slurp', 'spider', 'crawl', 'curl');
- foreach ($keys as $key) {
- if (strpos($agent, $key) !== false) {
- return true;
- }
- }
- }
- return false;
- }
- }
|