*/
class Indexer
{
public $index = [];
public $files = [];
public function indexFiles($files, $basePath, $baseUrl = './')
{
$fi = count($this->files);
foreach($files as $file) {
$fi++;
$contents = file_get_contents($file);
// create file entry
$this->files[$fi] = $this->generateFileInfo($file, $contents, $basePath, $baseUrl);
// analyze file
foreach($this->getAnalyzer()->analyze($contents, $this->getTokenizer()) as $index) {
foreach($index as $word) {
// $word['t'] - the token
// $word['w'] - the weight
if (isset($this->index[$word['t']][$fi])) {
$this->index[$word['t']][$fi]['w'] *= $word['w'];
} else {
$this->index[$word['t']][$fi] = [
'f' => $fi,
'w' => $word['w'],
];
}
}
}
}
// reset array indexes for files to create correct json arrays
foreach($this->index as $word => $files) {
$this->index[$word] = array_values($files);
}
}
protected function generateFileInfo($file, $contents, $basePath, $baseUrl)
{
// create file entry
if (preg_match('~
(.*?)
~s', $contents, $matches)) {
$title = strip_tags($matches[1]);
} elseif (preg_match('~(.*?)~s', $contents, $matches)) {
$title = strip_tags($matches[1]);
} else {
$title = 'No title';
}
return [
'url' => $baseUrl . str_replace('\\', '/', substr($file, strlen(rtrim($basePath, '\\/')))),
'title' => $title,
];
}
public function exportJs()
{
$index = json_encode($this->index);
$files = json_encode($this->files);
$tokenizeString = $this->getTokenizer()->tokenizeJs();
return <<_tokenizer === null) {
$this->_tokenizer = new StandardTokenizer();
}
return $this->_tokenizer;
}
/**
* @param TokenizerInterface $tokenizer
*/
public function setTokenizer($tokenizer)
{
$this->_tokenizer = $tokenizer;
}
private $_analyzer;
/**
* @return AnalyzerInterface
*/
public function getAnalyzer()
{
if ($this->_analyzer === null) {
$this->_analyzer = new HtmlAnalyzer();
}
return $this->_analyzer;
}
/**
* @param AnalyzerInterface $analyzer
*/
public function setAnalyzer($analyzer)
{
$this->_analyzer = $analyzer;
}
}