| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 | 
							- <?php
 
- /**
 
-  * @copyright Copyright (c) 2014 Carsten Brandt
 
-  * @license https://github.com/cebe/js-search/blob/master/LICENSE
 
-  * @link https://github.com/cebe/js-search#readme
 
-  */
 
- namespace cebe\jssearch;
 
- use cebe\jssearch\analyzer\HtmlAnalyzer;
 
- use cebe\jssearch\tokenizer\StandardTokenizer;
 
- /**
 
-  * @author Carsten Brandt <mail@cebe.cc>
 
-  */
 
- class Indexer
 
- {
 
- 	public $index = [];
 
- 	public $files = [];
 
- 	public function indexFiles($files, $basePath, $baseUrl = './')
 
- 	{
 
- 		$fi = count($this->files);
 
- 		foreach($files as $file) {
 
- 			$fi++;
 
- 			$contents = file_get_contents($file);
 
- 			// create file entry
 
- 			$this->files[$fi] = $this->generateFileInfo($file, $contents, $basePath, $baseUrl);
 
- 			// analyze file
 
- 			foreach($this->getAnalyzer()->analyze($contents, $this->getTokenizer()) as $index) {
 
- 				foreach($index as $word) {
 
- 					// $word['t'] - the token
 
- 					// $word['w'] - the weight
 
- 					if (isset($this->index[$word['t']][$fi])) {
 
- 						$this->index[$word['t']][$fi]['w'] *= $word['w'];
 
- 					} else {
 
- 						$this->index[$word['t']][$fi] = [
 
- 							'f' => $fi,
 
- 							'w' => $word['w'],
 
- 						];
 
- 					}
 
- 				}
 
- 			}
 
- 		}
 
- 		// reset array indexes for files to create correct json arrays
 
- 		foreach($this->index as $word => $files) {
 
- 			$this->index[$word] = array_values($files);
 
- 		}
 
- 	}
 
- 	protected function generateFileInfo($file, $contents, $basePath, $baseUrl)
 
- 	{
 
- 		// create file entry
 
- 		if (preg_match('~<h1>(.*?)</h1>~s', $contents, $matches)) {
 
- 			$title = strip_tags($matches[1]);
 
- 		} elseif (preg_match('~<title>(.*?)</title>~s', $contents, $matches)) {
 
- 			$title = strip_tags($matches[1]);
 
- 		} else {
 
- 			$title = '<i>No title</i>';
 
- 		}
 
- 		return [
 
- 			'url' => $baseUrl . str_replace('\\', '/', substr($file, strlen(rtrim($basePath, '\\/')))),
 
- 			'title' => $title,
 
- 		];
 
- 	}
 
- 	public function exportJs()
 
- 	{
 
- 		$index = json_encode($this->index);
 
- 		$files = json_encode($this->files);
 
- 		$tokenizeString = $this->getTokenizer()->tokenizeJs();
 
- 		return <<<JS
 
- jssearch.index = $index;
 
- jssearch.files = $files;
 
- jssearch.tokenizeString = $tokenizeString;
 
- JS;
 
- 	}
 
- 	private $_tokenizer;
 
- 	/**
 
- 	 * @return TokenizerInterface
 
- 	 */
 
- 	public function getTokenizer()
 
- 	{
 
- 		if ($this->_tokenizer === null) {
 
- 			$this->_tokenizer = new StandardTokenizer();
 
- 		}
 
- 		return $this->_tokenizer;
 
- 	}
 
- 	/**
 
- 	 * @param TokenizerInterface $tokenizer
 
- 	 */
 
- 	public function setTokenizer($tokenizer)
 
- 	{
 
- 		$this->_tokenizer = $tokenizer;
 
- 	}
 
- 	private $_analyzer;
 
- 	/**
 
- 	 * @return AnalyzerInterface
 
- 	 */
 
- 	public function getAnalyzer()
 
- 	{
 
- 		if ($this->_analyzer === null) {
 
- 			$this->_analyzer = new HtmlAnalyzer();
 
- 		}
 
- 		return $this->_analyzer;
 
- 	}
 
- 	/**
 
- 	 * @param AnalyzerInterface $analyzer
 
- 	 */
 
- 	public function setAnalyzer($analyzer)
 
- 	{
 
- 		$this->_analyzer = $analyzer;
 
- 	}
 
- }
 
 
  |