Stopwords.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. <?php
  2. /**
  3. * Copyright © Magento, Inc. All rights reserved.
  4. * See COPYING.txt for license details.
  5. */
  6. namespace Magento\Elasticsearch\SearchAdapter\Query\Preprocessor;
  7. use Magento\Framework\Filesystem\Directory\ReadFactory;
  8. use Magento\Elasticsearch\Model\Adapter\Index\Config\EsConfigInterface;
  9. use Magento\Framework\Search\Adapter\Preprocessor\PreprocessorInterface;
  10. use Magento\Framework\Module\Dir;
  11. /**
  12. * @api
  13. * @since 100.1.0
  14. */
  15. class Stopwords implements PreprocessorInterface
  16. {
  17. /**
  18. * Cache id for elasticsearch stopwords
  19. */
  20. const CACHE_ID = 'elasticsearch_stopwords';
  21. /**
  22. * Stopwords file modification time gap, seconds
  23. */
  24. const STOPWORDS_FILE_MODIFICATION_TIME_GAP = 900;
  25. /**
  26. * @var \Magento\Store\Model\StoreManagerInterface
  27. * @since 100.1.0
  28. */
  29. protected $storeManager;
  30. /**
  31. * @var \Magento\Framework\Locale\Resolver
  32. * @since 100.1.0
  33. */
  34. protected $localeResolver;
  35. /**
  36. * @var ReadFactory
  37. * @since 100.1.0
  38. */
  39. protected $readFactory;
  40. /**
  41. * @var \Magento\Framework\App\Cache\Type\Config
  42. * @since 100.1.0
  43. */
  44. protected $configCache;
  45. /**
  46. * @var EsConfigInterface
  47. * @since 100.1.0
  48. */
  49. protected $esConfig;
  50. /**
  51. * @var \Magento\Framework\Module\Dir\Reader
  52. * @since 100.1.0
  53. */
  54. protected $moduleDirReader;
  55. /**
  56. * @var string
  57. */
  58. private $stopwordsModule;
  59. /**
  60. * @var string
  61. */
  62. private $stopwordsDirectory;
  63. /**
  64. * @var \Magento\Framework\Serialize\SerializerInterface
  65. */
  66. private $serializer;
  67. /**
  68. * Initialize dependencies.
  69. *
  70. * @param \Magento\Store\Model\StoreManagerInterface $storeManager
  71. * @param \Magento\Framework\Locale\Resolver $localeResolver
  72. * @param ReadFactory $readFactory
  73. * @param \Magento\Framework\App\Cache\Type\Config $configCache
  74. * @param EsConfigInterface $esConfig
  75. * @param \Magento\Framework\Module\Dir\Reader $moduleDirReader
  76. * @param string $stopwordsModule
  77. * @param string $stopwordsDirectory
  78. */
  79. public function __construct(
  80. \Magento\Store\Model\StoreManagerInterface $storeManager,
  81. \Magento\Framework\Locale\Resolver $localeResolver,
  82. ReadFactory $readFactory,
  83. \Magento\Framework\App\Cache\Type\Config $configCache,
  84. EsConfigInterface $esConfig,
  85. \Magento\Framework\Module\Dir\Reader $moduleDirReader,
  86. $stopwordsModule = '',
  87. $stopwordsDirectory = ''
  88. ) {
  89. $this->storeManager = $storeManager;
  90. $this->localeResolver = $localeResolver;
  91. $this->readFactory = $readFactory;
  92. $this->configCache = $configCache;
  93. $this->esConfig = $esConfig;
  94. $this->moduleDirReader = $moduleDirReader;
  95. $this->stopwordsModule = $stopwordsModule;
  96. $this->stopwordsDirectory = $stopwordsDirectory;
  97. }
  98. /**
  99. * {@inheritdoc}
  100. * @since 100.1.0
  101. */
  102. public function process($query)
  103. {
  104. $stopwords = $this->getStopwordsList();
  105. $queryParts = explode(' ', $query);
  106. $query = implode(' ', array_diff($queryParts, $stopwords));
  107. return trim($query);
  108. }
  109. /**
  110. * Get stopwords list for current locale
  111. *
  112. * @return array
  113. * @since 100.1.0
  114. */
  115. protected function getStopwordsList()
  116. {
  117. $filename = $this->getStopwordsFile();
  118. $fileDir = $this->moduleDirReader->getModuleDir(Dir::MODULE_ETC_DIR, $this->stopwordsModule)
  119. . '/' . $this->stopwordsDirectory;
  120. $source = $this->readFactory->create($fileDir);
  121. $fileStats = $source->stat($filename);
  122. if (((time() - $fileStats['mtime']) > self::STOPWORDS_FILE_MODIFICATION_TIME_GAP)
  123. && ($cachedValue = $this->configCache->load(self::CACHE_ID))) {
  124. $stopwords = $this->getSerializer()->unserialize($cachedValue);
  125. } else {
  126. $fileContent = $source->readFile($filename);
  127. $stopwords = explode("\n", $fileContent);
  128. $this->configCache->save($this->getSerializer()->serialize($stopwords), self::CACHE_ID);
  129. }
  130. return $stopwords;
  131. }
  132. /**
  133. * Get stopwords file for current locale
  134. *
  135. * @return string
  136. * @since 100.1.0
  137. */
  138. protected function getStopwordsFile()
  139. {
  140. $stopwordsInfo = $this->esConfig->getStopwordsInfo();
  141. $storeId = $this->storeManager->getStore()->getId();
  142. $this->localeResolver->emulate($storeId);
  143. $locale = $this->localeResolver->getLocale();
  144. $stopwordsFile = isset($stopwordsInfo[$locale]) ? $stopwordsInfo[$locale] : $stopwordsInfo['default'];
  145. return $stopwordsFile;
  146. }
  147. /**
  148. * Get serializer
  149. *
  150. * @return \Magento\Framework\Serialize\SerializerInterface
  151. * @deprecated 100.2.0
  152. */
  153. private function getSerializer()
  154. {
  155. if (null === $this->serializer) {
  156. $this->serializer = \Magento\Framework\App\ObjectManager::getInstance()
  157. ->get(\Magento\Framework\Serialize\SerializerInterface::class);
  158. }
  159. return $this->serializer;
  160. }
  161. }