Quest.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. #!/usr/bin/env php
  2. <?php
  3. /**
  4. * Xunsearch PHP-SDK 搜索测试工具
  5. *
  6. * @author hightman
  7. * @link http://www.xunsearch.com/
  8. * @copyright Copyright &copy; 2011 HangZhou YunSheng Network Technology Co., Ltd.
  9. * @license http://www.xunsearch.com/license/
  10. * @version $Id$
  11. */
  12. $lib_file = dirname(__FILE__) . '/../lib/XS.php';
  13. if (!file_exists($lib_file)) {
  14. $lib_file = dirname(__FILE__) . '/../lib/XS.class.php';
  15. }
  16. require_once $lib_file;
  17. require_once dirname(__FILE__) . '/XSUtil.class.php';
  18. // check arguments
  19. XSUtil::parseOpt(array('p', 'q', 'c', 'd', 's',
  20. 'project', 'query', 'db', 'limit', 'charset',
  21. 'sort', 'add-weight', 'scws-multi', 'cut-off',
  22. ));
  23. $project = XSUtil::getOpt('p', 'project', true);
  24. $query = XSUtil::getOpt('q', 'query', true);
  25. $hot = XSUtil::getOpt(null, 'hot');
  26. $synonyms = XSUtil::getOpt(null, 'list-synonyms', false);
  27. $terms = XSUtil::getOpt(null, 'terms');
  28. $weights = XSUtil::getOpt(null, 'add-weight');
  29. $info = XSUtil::getOpt(null, 'info');
  30. $scws_multi = XSUtil::getOpt(null, 'scws-multi');
  31. $cut_off = XSUtil::getOpt(null, 'cut-off');
  32. // magick output charset
  33. $charset = XSUtil::getOpt('c', 'charset');
  34. XSUtil::setCharset($charset);
  35. $query = XSUtil::convertIn($query);
  36. // sort scheme
  37. $sort = XSUtil::getOpt('s', 'sort');
  38. if (XSUtil::getOpt('h', 'help') !== null || !is_string($project)
  39. || (!$info && !$hot && !$synonyms && !is_string($query))) {
  40. $version = XS_PACKAGE_NAME . '/' . XS_PACKAGE_VERSION;
  41. echo <<<EOF
  42. Quest - 搜索查询和测试工具 ($version)
  43. 用法
  44. {$_SERVER['argv'][0]} [options] [-p|--project] <project> [[-q|--query] <query>]
  45. 选项说明
  46. --project=<name|ini>
  47. -p <project> 用于指定要搜索的项目名称或项目配置文件的路径,
  48. 如果指定的是名称,则使用 ../app/<name>.ini 作为配置文件
  49. --charset=<gbk|utf-8>
  50. -c <charset> 指定您当前在用的字符集,以便系统进行智能转换(默认:UTF-8)
  51. --db=<name[,name2 ...]>
  52. -d <db[,db2 ...]> 指定项目中的数据库名称,默认是名为 db 的库,多个库之间用逗号分隔
  53. --query=<query>
  54. -q <query> 指定要搜索的查询语句,如果语句中包含空格请用使用双引号包围起来
  55. 在搜索语句中可采用 'field:\$from..\$to' 做区间过滤
  56. --sort=<field1[,field2[,...]]
  57. -s <field1[,field2[,...]] 指定排序字段,在字段前加上 ~ 符号表示逆序
  58. --fuzzy 将搜索默认设为模糊搜索
  59. --synonym[=scale]
  60. 开启自动同义词搜索功能,可选择设置同义词权重调整(0.01~2.55)
  61. --scws-multi=<level>
  62. 查看或设置搜索语句的 scws 复合分词等级(值:0-15,默认为 3)
  63. --add-weight=<[field1:]word1[:weight1][,[field2:]word2[:weight2]]>
  64. 添加搜索权重词汇,词与次数之间用半角冒号分隔
  65. --hot[=total|last|cur]
  66. 用于显示指定项目的热门搜索词,此时 <query> 参数无意义,可省略
  67. 其值含义分别表示总搜索量、上周搜索量、本周搜索量,默认为总搜索量。
  68. --suggest 根据当前搜索词展开常用搜索词建议,如查询“中”,即显示“中”开头的词
  69. --correct 根据当前搜索词进行同音、拼写纠错,输出更合适的关键词
  70. --related 根据当前搜索词查找相关搜索词
  71. --list-synonyms[=stemmed]
  72. 列出库内的全部同义词,每行显示一个,可以搭配 --limit 使用,默认显示前 100 个
  73. 如果设置了 stemmed 值则连同词根同义词也列出,设置其它值则只列出指定词的全部同义词
  74. --limit=<num>用于设置 suggest|hot|related 的返回数量,两者默认值均为 10 个
  75. 对于普通搜索和列出同义词时,还支持用 --limit=offset,num 的格式
  76. --show-query 用于在搜索结果显示内部的 Xapian 结构的 query 语句用于调试
  77. --cut-off=<percent[,weight>
  78. 设置搜索结果剔除的匹配百分比及权限(百分比:0-100,权重:0.1-25.5)
  79. --terms 列出搜索词被切分后的词(不含排除及权重词)
  80. --info 显示当前连接服务端的信息及线程(仅绘制当前 worker 进程)
  81. -h|--help 显示帮助信息
  82. 若未指定 -p 或 -q 则会依次把附加的参数当作 <project> 和 <query> 处理,例:
  83. {$_SERVER['argv'][0]} <project> <query>
  84. {$_SERVER['argv'][0]} --hot <project>
  85. EOF;
  86. exit(0);
  87. }
  88. // create xs project
  89. $ini = XSUtil::toProjectIni($project);
  90. if (!file_exists($ini)) {
  91. echo "错误:无效的项目名称 ($project),不存在相应的配置文件。\n";
  92. exit(-1);
  93. }
  94. // execute the search
  95. try {
  96. // params
  97. $params = array('hot', 'suggest', 'correct', 'related', 'output', 'limit');
  98. foreach ($params as $_) {
  99. $$_ = XSUtil::getOpt(null, $_);
  100. }
  101. $limit1 = $limit === null ? 10 : intval($limit);
  102. $db = XSUtil::getOpt('d', 'db');
  103. // create xs object
  104. $xs = new XS($ini);
  105. $search = $xs->search;
  106. $search->setCharset('UTF-8');
  107. if ($db !== null) {
  108. $dbs = explode(',', $db);
  109. $search->setDb(trim($dbs[0]));
  110. for ($i = 1; $i < count($dbs); $i++) {
  111. $search->addDb(trim($dbs[$i]));
  112. }
  113. }
  114. if ($scws_multi !== null) {
  115. $search->setScwsMulti($scws_multi);
  116. }
  117. if ($hot !== null) {
  118. $type = $hot === 'cur' ? 'currnum' : ($hot === 'last' ? 'lastnum' : 'total');
  119. $result = $search->getHotQuery($limit1, $type);
  120. if (count($result) === 0) {
  121. echo "暂无相关热门搜索记录。\n";
  122. } else {
  123. $i = 1;
  124. printf("序 %s %s\n%s\n", XSUtil::fixWidth('搜索关键词(' . $type . ')', 40), XSUtil::fixWidth('次数', 10), XSUtil::fixWidth('', 56, '-'));
  125. foreach ($result as $word => $freq) {
  126. printf("%2d. %s %d\n", $i, XSUtil::fixWidth($word, 40), $freq);
  127. $i++;
  128. }
  129. }
  130. } elseif ($info !== null) {
  131. // server info
  132. echo "---------- SERVER INFO BEGIN ----------\n";
  133. $res = $search->execCommand(XS_CMD_DEBUG);
  134. echo $res->buf;
  135. echo "\n---------- SERVER INFO END ----------\n";
  136. // thread pool
  137. $res = $search->execCommand(XS_CMD_SEARCH_DRAW_TPOOL);
  138. echo $res->buf;
  139. } elseif (is_string($synonyms) && $synonyms !== 'stemmed') {
  140. echo "列出\033[7m" . $synonyms . "\033[m的同义词:\n";
  141. $synonyms = $search->getSynonyms($synonyms);
  142. print_r($synonyms);
  143. } elseif ($synonyms !== null) {
  144. // list all
  145. if ($limit === null) {
  146. $offset = $limit1 = 0;
  147. } elseif (($pos = strpos($limit, ',')) === false) {
  148. $offset = 0;
  149. } else {
  150. $limit1 = intval(substr($limit, $pos + 1));
  151. $offset = intval($limit);
  152. }
  153. $synonyms = $search->getAllSynonyms($limit1, $offset, $synonyms === 'stemmed');
  154. if (count($synonyms) == 0) {
  155. echo "暂无相关的同义词记录";
  156. if ($offset != 0) {
  157. echo ",反正总数不超过 $offset 个";
  158. }
  159. echo "。\n";
  160. } else {
  161. $i = $offset + 1;
  162. printf(" %s %s\n%s\n", XSUtil::fixWidth('原词', 32), '同义词', XSUtil::fixWidth('', 56, '-'));
  163. foreach ($synonyms as $raw => $list) {
  164. printf("%4d. %s %s\n", $i++, XSUtil::fixWidth($raw, 29), implode(", ", $list));
  165. }
  166. }
  167. } elseif ($terms !== null) {
  168. $result = $search->terms($query);
  169. echo "列出\033[7m" . $query . "\033[m的内部切分结果:\n";
  170. print_r($result);
  171. } elseif ($correct !== null) {
  172. $result = $search->getCorrectedQuery($query);
  173. if (count($result) === 0) {
  174. echo "目前对\033[7m" . $query . "\033[m还没有更好的修正方案。\n";
  175. } else {
  176. echo "您可以试试找:\033[4m" . implode("\033[m \033[4m", $result) . "\033[m\n";
  177. }
  178. } elseif ($suggest !== null) {
  179. $result = $search->getExpandedQuery($query, $limit1);
  180. if (count($result) === 0) {
  181. echo "目前对\033[7m" . $query . "\033[m还没有任何搜索建议。\n";
  182. } else {
  183. echo "展开\033[7m" . $query . "\033[m得到以下搜索建议:\n";
  184. for ($i = 0; $i < count($result); $i++) {
  185. printf("%d. %s\n", $i + 1, $result[$i]);
  186. }
  187. }
  188. } elseif ($related !== null) {
  189. $result = $search->getRelatedQuery($query, $limit1);
  190. if (count($result) === 0) {
  191. echo "目前还没有与\033[7m" . $query . "\033[m相关的搜索词。\n";
  192. } else {
  193. echo "与\033[7m" . $query . "\033[m相关的搜索词:\n";
  194. for ($i = 0; $i < count($result); $i++) {
  195. printf("%d. %s\n", $i + 1, $result[$i]);
  196. }
  197. }
  198. } else {
  199. // fuzzy search
  200. if (XSUtil::getOpt(null, 'fuzzy') !== null) {
  201. $search->setFuzzy();
  202. }
  203. $syn = XSUtil::getOpt(null, 'synonym');
  204. if ($syn !== null) {
  205. $search->setAutoSynonyms();
  206. if ($syn !== true) {
  207. $search->setSynonymScale(floatval($syn));
  208. }
  209. }
  210. if (($pos = strpos($limit, ',')) === false) {
  211. $offset = 0;
  212. } else {
  213. $limit1 = intval(substr($limit, $pos + 1));
  214. $offset = intval($limit);
  215. }
  216. // sort
  217. if ($sort !== null) {
  218. $fields = array();
  219. $tmps = explode(',', $sort);
  220. foreach ($tmps as $tmp) {
  221. $tmp = trim($tmp);
  222. if ($tmp === '') {
  223. continue;
  224. }
  225. if (substr($tmp, 0, 1) === '~') {
  226. $fields[substr($tmp, 1)] = false;
  227. } else {
  228. $fields[$tmp] = true;
  229. }
  230. }
  231. $search->setMultiSort($fields);
  232. }
  233. // special fields
  234. $fid = $xs->getFieldId();
  235. $ftitle = $xs->getFieldTitle();
  236. $fbody = $xs->getFieldBody();
  237. if ($fbody) {
  238. $xs->getFieldBody()->cutlen = 100;
  239. }
  240. // add range
  241. $ranges = array();
  242. if (strpos($query, '..') !== false) {
  243. $regex = '/(\S+?):(\S*?)\.\.(\S*)/';
  244. if (preg_match_all($regex, $query, $matches) > 0) {
  245. for ($i = 0; $i < count($matches[0]); $i++) {
  246. $ranges[] = array($matches[1][$i],
  247. $matches[2][$i] === '' ? null : $matches[2][$i],
  248. $matches[3][$i] === '' ? null : $matches[3][$i]);
  249. $query = str_replace($matches[0][$i], '', $query);
  250. }
  251. }
  252. }
  253. // set query
  254. $search->setQuery($query);
  255. foreach ($ranges as $range) {
  256. $search->addRange($range[0], $range[1], $range[2]);
  257. }
  258. // add weights
  259. if ($weights !== null) {
  260. foreach (explode(',', $weights) as $tmp) {
  261. $tmp = explode(':', trim($tmp));
  262. if (count($tmp) === 1) {
  263. $search->addWeight(null, $tmp[0]);
  264. } elseif (count($tmp) === 2) {
  265. if (is_numeric($tmp[1])) {
  266. $search->addWeight(null, $tmp[0], floatval($tmp[1]));
  267. } else {
  268. $search->addWeight($tmp[0], $tmp[1]);
  269. }
  270. } else {
  271. $search->addWeight($tmp[0], $tmp[1], floatval($tmp[2]));
  272. }
  273. }
  274. }
  275. // cut off
  276. if ($cut_off !== null) {
  277. if (($pos = strpos($cut_off, ','))) {
  278. $search->setCutOff(substr($cut_off, 0, $pos), substr($cut_off, $pos + 1));
  279. } elseif (strpos($cut_off, '.') !== false) {
  280. $search->setCutOff(0, $cut_off);
  281. } else {
  282. $search->setCutOff($cut_off);
  283. }
  284. }
  285. // preform search
  286. $begin = microtime(true);
  287. $result = $search->setLimit($limit1, $offset)->search();
  288. $cost = microtime(true) - $begin;
  289. $matched = $search->getLastCount();
  290. $total = $search->getDbTotal();
  291. // show query?
  292. if (XSUtil::getOpt(null, 'show-query') !== null) {
  293. echo str_repeat("-", 20) . "\n";
  294. echo "解析后的 QUERY 语句:" . $search->getQuery() . "\n";
  295. echo str_repeat("-", 20) . "\n";
  296. }
  297. // related & corrected
  298. $correct = $search->getCorrectedQuery();
  299. $related = $search->getRelatedQuery();
  300. // info
  301. printf("在 %s 条数据中,大约有 %d 条包含 \033[7m%s\033[m ,第 %d-%d 条,用时:%.4f 秒。\n", number_format($total), $matched, $query, min($matched, $offset + 1), min($matched, $limit1 + $offset), $cost);
  302. // correct
  303. if (count($correct) > 0) {
  304. echo "您是不是想找:\033[4m" . implode("\033[m \033[4m", $correct) . "\033[m\n";
  305. }
  306. // show result
  307. foreach ($result as $doc) /* @var $doc XSDocument */ {
  308. // body & title
  309. $body = $title = '';
  310. if ($ftitle !== false) {
  311. $title = cliHighlight($doc->f($ftitle));
  312. }
  313. if ($fbody !== false) {
  314. $body = cliHighlight($doc->f($fbody)) . "\n";
  315. }
  316. // main fields
  317. printf("\n%d. \033[4m%s#%s# [%d%%,%.2f]\033[m\n", $doc->rank(), $title, $doc->f($fid), $doc->percent(), $doc->weight());
  318. echo $body;
  319. // other fields
  320. $line = '';
  321. foreach ($xs->getAllFields() as $field) /* @var $field XSFieldMeta */ {
  322. if ($field->isSpeical()) {
  323. continue;
  324. }
  325. $tmp = ucfirst($field->name) . ':' . cliHighlight($doc->f($field));
  326. if ((strlen($tmp) + strlen($line)) > 80) {
  327. if (strlen($line) > 0) {
  328. echo $line . "\n";
  329. $line = '';
  330. }
  331. echo $tmp . "\n";
  332. } else {
  333. $line .= $tmp . ' ';
  334. }
  335. }
  336. if (strlen($line) > 0) {
  337. echo $line . "\n";
  338. }
  339. }
  340. // related
  341. if (count($related) > 0) {
  342. echo "\n相关搜索:\033[4m" . implode("\033[m \033[4m", $related) . "\033[m\n";
  343. }
  344. echo "\n";
  345. }
  346. } catch (XSException $e) {
  347. // Exception
  348. $start = dirname(dirname(__FILE__));
  349. $relative = XSException::getRelPath($start);
  350. $traceString = $e->getTraceAsString();
  351. $traceString = str_replace(dirname(__FILE__) . '/', '', $traceString);
  352. $traceString = str_replace($start . ($relative === '' ? '/' : ''), $relative, $traceString);
  353. echo $e . "\n" . $traceString . "\n";
  354. }
  355. // local highlight function
  356. function cliHighlight($str)
  357. {
  358. global $search;
  359. $str = $search->highlight($str);
  360. $str = preg_replace('#<em>(.+?)</em>#', "\033[7m\\1\033[m", $str) . ' ';
  361. $str = strtr($str, array('<em>' => '', '</em>' => ''));
  362. return $str;
  363. }