class-link-extractor.php 993 B

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\Admin\Links
  6. */
  7. /**
  8. * Represents the link extractor.
  9. */
  10. class WPSEO_Link_Extractor {
  11. /**
  12. * The content to extract the links from.
  13. *
  14. * @var string
  15. */
  16. protected $content;
  17. /**
  18. * Sets the content.
  19. *
  20. * @param string $content The content to extract the links from.
  21. */
  22. public function __construct( $content ) {
  23. $this->content = $content;
  24. }
  25. /**
  26. * Extracts the hrefs from the content and returns them as an array.
  27. *
  28. * @return array All the extracted links
  29. */
  30. public function extract() {
  31. $links = [];
  32. if ( strpos( $this->content, 'href' ) === false ) {
  33. return $links;
  34. }
  35. $regexp = '<a\s[^>]*href=("??)([^" >]*?)\\1[^>]*>';
  36. // Used modifiers iU to match case insensitive and make greedy quantifiers lazy.
  37. if ( preg_match_all( "/$regexp/iU", $this->content, $matches, PREG_SET_ORDER ) ) {
  38. foreach ( $matches as $match ) {
  39. $links[] = trim( $match[2], "'" );
  40. }
  41. }
  42. return $links;
  43. }
  44. }