142 lines
2.4 KiB
Go
142 lines
2.4 KiB
Go
package search
|
|
|
|
import (
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"unicode"
|
|
|
|
"git.else-if.org/jess/cs-midi-docs/internal/content"
|
|
)
|
|
|
|
type Result struct {
|
|
Title string
|
|
Path string
|
|
Snippet string
|
|
}
|
|
|
|
type Index struct {
|
|
mu sync.RWMutex
|
|
entries []entry
|
|
}
|
|
|
|
type entry struct {
|
|
title string
|
|
path string
|
|
words []string
|
|
raw string
|
|
}
|
|
|
|
func NewIndex() *Index {
|
|
return &Index{}
|
|
}
|
|
|
|
func (idx *Index) Build(tree *content.Node) {
|
|
pages := tree.Flatten()
|
|
entries := make([]entry, 0, len(pages))
|
|
for _, p := range pages {
|
|
data, err := os.ReadFile(p.FilePath)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
text := string(data)
|
|
entries = append(entries, entry{
|
|
title: p.Title,
|
|
path: p.Path,
|
|
words: tokenize(text),
|
|
raw: text,
|
|
})
|
|
}
|
|
idx.mu.Lock()
|
|
idx.entries = entries
|
|
idx.mu.Unlock()
|
|
}
|
|
|
|
func (idx *Index) Search(query string, limit int) []Result {
|
|
terms := tokenize(query)
|
|
if len(terms) == 0 {
|
|
return nil
|
|
}
|
|
|
|
idx.mu.RLock()
|
|
defer idx.mu.RUnlock()
|
|
|
|
var results []Result
|
|
for _, e := range idx.entries {
|
|
if !matchAll(e.words, terms) {
|
|
continue
|
|
}
|
|
results = append(results, Result{
|
|
Title: e.title,
|
|
Path: e.path,
|
|
Snippet: extractSnippet(e.raw, terms[0], 150),
|
|
})
|
|
if limit > 0 && len(results) >= limit {
|
|
break
|
|
}
|
|
}
|
|
return results
|
|
}
|
|
|
|
func tokenize(s string) []string {
|
|
s = strings.ToLower(s)
|
|
var tokens []string
|
|
var cur strings.Builder
|
|
for _, r := range s {
|
|
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
|
|
cur.WriteRune(r)
|
|
} else if cur.Len() > 0 {
|
|
tokens = append(tokens, cur.String())
|
|
cur.Reset()
|
|
}
|
|
}
|
|
if cur.Len() > 0 {
|
|
tokens = append(tokens, cur.String())
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
func matchAll(words, terms []string) bool {
|
|
for _, t := range terms {
|
|
found := false
|
|
for _, w := range words {
|
|
if strings.Contains(w, t) {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func extractSnippet(text, term string, maxLen int) string {
|
|
lower := strings.ToLower(text)
|
|
idx := strings.Index(lower, term)
|
|
if idx < 0 {
|
|
if len(text) > maxLen {
|
|
return text[:maxLen] + "..."
|
|
}
|
|
return text
|
|
}
|
|
start := idx - maxLen/2
|
|
if start < 0 {
|
|
start = 0
|
|
}
|
|
end := start + maxLen
|
|
if end > len(text) {
|
|
end = len(text)
|
|
}
|
|
snippet := text[start:end]
|
|
snippet = strings.ReplaceAll(snippet, "\n", " ")
|
|
if start > 0 {
|
|
snippet = "..." + snippet
|
|
}
|
|
if end < len(text) {
|
|
snippet = snippet + "..."
|
|
}
|
|
return snippet
|
|
}
|