cs-midi-docs/internal/search/index.go

142 lines
2.4 KiB
Go

package search
import (
"os"
"strings"
"sync"
"unicode"
"git.else-if.org/jess/cs-midi-docs/internal/content"
)
type Result struct {
Title string
Path string
Snippet string
}
type Index struct {
mu sync.RWMutex
entries []entry
}
type entry struct {
title string
path string
words []string
raw string
}
func NewIndex() *Index {
return &Index{}
}
func (idx *Index) Build(tree *content.Node) {
pages := tree.Flatten()
entries := make([]entry, 0, len(pages))
for _, p := range pages {
data, err := os.ReadFile(p.FilePath)
if err != nil {
continue
}
text := string(data)
entries = append(entries, entry{
title: p.Title,
path: p.Path,
words: tokenize(text),
raw: text,
})
}
idx.mu.Lock()
idx.entries = entries
idx.mu.Unlock()
}
func (idx *Index) Search(query string, limit int) []Result {
terms := tokenize(query)
if len(terms) == 0 {
return nil
}
idx.mu.RLock()
defer idx.mu.RUnlock()
var results []Result
for _, e := range idx.entries {
if !matchAll(e.words, terms) {
continue
}
results = append(results, Result{
Title: e.title,
Path: e.path,
Snippet: extractSnippet(e.raw, terms[0], 150),
})
if limit > 0 && len(results) >= limit {
break
}
}
return results
}
func tokenize(s string) []string {
s = strings.ToLower(s)
var tokens []string
var cur strings.Builder
for _, r := range s {
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
cur.WriteRune(r)
} else if cur.Len() > 0 {
tokens = append(tokens, cur.String())
cur.Reset()
}
}
if cur.Len() > 0 {
tokens = append(tokens, cur.String())
}
return tokens
}
func matchAll(words, terms []string) bool {
for _, t := range terms {
found := false
for _, w := range words {
if strings.Contains(w, t) {
found = true
break
}
}
if !found {
return false
}
}
return true
}
func extractSnippet(text, term string, maxLen int) string {
lower := strings.ToLower(text)
idx := strings.Index(lower, term)
if idx < 0 {
if len(text) > maxLen {
return text[:maxLen] + "..."
}
return text
}
start := idx - maxLen/2
if start < 0 {
start = 0
}
end := start + maxLen
if end > len(text) {
end = len(text)
}
snippet := text[start:end]
snippet = strings.ReplaceAll(snippet, "\n", " ")
if start > 0 {
snippet = "..." + snippet
}
if end < len(text) {
snippet = snippet + "..."
}
return snippet
}