SynonymAnalyzer.php 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. <?php
  2. /**
  3. * Copyright © Magento, Inc. All rights reserved.
  4. * See COPYING.txt for license details.
  5. */
  6. declare(strict_types=1);
  7. namespace Magento\Search\Model;
  8. use Magento\Search\Api\SynonymAnalyzerInterface;
  9. /**
  10. * SynonymAnalyzer responsible for search of synonyms matching a word or a phrase.
  11. */
  12. class SynonymAnalyzer implements SynonymAnalyzerInterface
  13. {
  14. /**
  15. * @var SynonymReader $synReaderModel
  16. */
  17. protected $synReaderModel;
  18. /**
  19. * Constructor
  20. *
  21. * @param SynonymReader $synReader
  22. */
  23. public function __construct(SynonymReader $synReader)
  24. {
  25. $this->synReaderModel = $synReader;
  26. }
  27. /**
  28. * Returns an array of arrays consisting of the synonyms found for each word in the input phrase
  29. *
  30. * For phrase: "Elizabeth is the English queen" correct output is an array of arrays containing synonyms for each
  31. * word in the phrase:
  32. *
  33. * [
  34. * 0 => [ 0 => "elizabeth" ],
  35. * 1 => [ 0 => "is" ],
  36. * 2 => [ 0 => "the" ],
  37. * 3 => [ 0 => "british", 1 => "english" ],
  38. * 4 => [ 0 => "queen", 1 => "monarch" ]
  39. * ]
  40. * @param string $phrase
  41. * @return array
  42. * @throws \Magento\Framework\Exception\LocalizedException
  43. */
  44. public function getSynonymsForPhrase($phrase)
  45. {
  46. $result = [];
  47. if (empty(trim($phrase))) {
  48. return $result;
  49. }
  50. $synonymGroups = $this->getSynonymGroupsByPhrase($phrase);
  51. // Replace multiple spaces in a row with the only one space
  52. $phrase = preg_replace("/ {2,}/", " ", $phrase);
  53. // Go through every returned record looking for presence of the actual phrase. If there were no matching
  54. // records found in DB then create a new entry for it in the returned array
  55. $words = explode(' ', $phrase);
  56. foreach ($words as $offset => $word) {
  57. $synonyms = [$word];
  58. if ($synonymGroups) {
  59. $pattern = $this->getSearchPattern(\array_slice($words, $offset));
  60. $position = $this->findInArray($pattern, $synonymGroups);
  61. if ($position !== null) {
  62. $synonyms = explode(',', $synonymGroups[$position]);
  63. }
  64. }
  65. $result[] = $synonyms;
  66. }
  67. return $result;
  68. }
  69. /**
  70. * Helper method to find the matching of $pattern to $synonymGroupsToExamine.
  71. * If matches, the particular array index is returned.
  72. * Otherwise null will be returned.
  73. *
  74. * @param string $pattern
  75. * @param array $synonymGroupsToExamine
  76. * @return int|null
  77. */
  78. private function findInArray(string $pattern, array $synonymGroupsToExamine)
  79. {
  80. $position = 0;
  81. foreach ($synonymGroupsToExamine as $synonymGroup) {
  82. $matchingResultCode = preg_match($pattern, $synonymGroup);
  83. if ($matchingResultCode === 1) {
  84. return $position;
  85. }
  86. $position++;
  87. }
  88. return null;
  89. }
  90. /**
  91. * Returns a regular expression to search for synonyms of the phrase represented as the list of words.
  92. *
  93. * Returned pattern contains expression to search for a part of the phrase from the beginning.
  94. *
  95. * For example, in the phrase "Elizabeth is the English queen" with subset from the very first word,
  96. * the method will build an expression which looking for synonyms for all these patterns:
  97. * - Elizabeth is the English queen
  98. * - Elizabeth is the English
  99. * - Elizabeth is the
  100. * - Elizabeth is
  101. * - Elizabeth
  102. *
  103. * For the same phrase on the second iteration with the first word "is" it will match for these synonyms:
  104. * - is the English queen
  105. * - is the English
  106. * - is the
  107. * - is
  108. *
  109. * The pattern looking for exact match and will not find these phrases as synonyms:
  110. * - Is there anybody in the room?
  111. * - Is the English is most popular language?
  112. * - Is the English queen Elizabeth?
  113. *
  114. * Take into account that returned pattern expects that data will be represented as comma-separated value.
  115. *
  116. * @param array $words
  117. * @return string
  118. */
  119. private function getSearchPattern(array $words): string
  120. {
  121. $patterns = [];
  122. for ($lastItem = count($words); $lastItem > 0; $lastItem--) {
  123. $phrase = implode("\s+", \array_slice($words, 0, $lastItem));
  124. $patterns[] = '^' . $phrase . ',';
  125. $patterns[] = ',' . $phrase . ',';
  126. $patterns[] = ',' . $phrase . '$';
  127. }
  128. $pattern = '/' . implode('|', $patterns) . '/i';
  129. return $pattern;
  130. }
  131. /**
  132. * Get all synonym groups for the phrase
  133. *
  134. * Returns an array of synonyms which are represented as comma-separated value for each item in the list
  135. *
  136. * @param string $phrase
  137. * @return string[]
  138. * @throws \Magento\Framework\Exception\LocalizedException
  139. */
  140. private function getSynonymGroupsByPhrase(string $phrase): array
  141. {
  142. $result = [];
  143. /** @var array $synonymGroups */
  144. $synonymGroups = $this->synReaderModel->loadByPhrase($phrase)->getData();
  145. foreach ($synonymGroups as $row) {
  146. $result[] = $row['synonyms'];
  147. }
  148. return $result;
  149. }
  150. }