cache = $cache ?? new FileCache(); $this->loadIndex(); } public function indexContent(string $path, string $content, array $metadata = []): void { $words = $this->tokenize($content); $pathHash = md5($path); foreach ($words as $word) { if (!isset($this->index[$word])) { $this->index[$word] = []; } if (!in_array($pathHash, $this->index[$word])) { $this->index[$word][] = $pathHash; } } // Store metadata for this path $this->cache->set('search_meta_' . $pathHash, [ 'path' => $path, 'title' => $metadata['title'] ?? basename($path), 'snippet' => $this->generateSnippet($content), 'last_modified' => $metadata['modified'] ?? time() ], 86400); // 24 hours $this->saveIndex(); } public function search(string $query, int $limit = 20): array { $terms = $this->tokenize($query); $results = []; $pathScores = []; foreach ($terms as $term) { if (isset($this->index[$term])) { foreach ($this->index[$term] as $pathHash) { if (!isset($pathScores[$pathHash])) { $pathScores[$pathHash] = 0; } $pathScores[$pathHash]++; } } } // Sort by relevance (term frequency) arsort($pathScores); // Get top results $count = 0; foreach ($pathScores as $pathHash => $score) { if ($count >= $limit) break; $metadata = $this->cache->get('search_meta_' . $pathHash); if ($metadata) { $results[] = array_merge($metadata, ['score' => $score]); $count++; } } return $results; } public function removeFromIndex(string $path): void { $pathHash = md5($path); foreach ($this->index as $word => $paths) { $this->index[$word] = array_filter($paths, fn($hash) => $hash !== $pathHash); if (empty($this->index[$word])) { unset($this->index[$word]); } } $this->cache->delete('search_meta_' . $pathHash); $this->saveIndex(); } public function clearIndex(): void { $this->index = []; $this->cache->clear(); $this->saveIndex(); } private function tokenize(string $text): array { // Convert to lowercase, remove punctuation, split into words $text = strtolower($text); $text = preg_replace('/[^\w\s]/u', ' ', $text); $words = preg_split('/\s+/u', $text, -1, PREG_SPLIT_NO_EMPTY); // Filter out common stop words and short words $stopWords = ['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can']; $words = array_filter($words, function($word) use ($stopWords) { return strlen($word) > 2 && !in_array($word, $stopWords); }); return array_unique($words); } private function generateSnippet(string $content, int $length = 150): string { // Remove HTML tags and extra whitespace $content = strip_tags($content); $content = preg_replace('/\s+/', ' ', $content); if (strlen($content) <= $length) { return $content; } return substr($content, 0, $length) . '...'; } private function loadIndex(): void { $cached = $this->cache->get('search_index'); if ($cached) { $this->index = $cached; } } private function saveIndex(): void { $this->cache->set('search_index', $this->index, 86400); // 24 hours } }