blog/parse.go

558 lines
17 KiB
Go

// parse.go
package main
import (
"bytes"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"git.else-if.org/jess/blog/templates"
"github.com/BurntSushi/toml"
mathjax "github.com/litao91/goldmark-mathjax"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"go.abhg.dev/goldmark/frontmatter"
"gopkg.in/yaml.v3"
)
// GetContentPath ensures the content is cached and returns the path to the cached file.
func GetContentPath(file ContentFile) (string, error) {
cachePath := GetCacheFilename(file.OriginalPath)
// Read Raw File
raw, err := ReadRaw(file.OriginalPath)
if err != nil {
return "", fmt.Errorf("failed to read file: %w", err)
}
var dataToWrite []byte
if !file.IsMarkdown {
dataToWrite = raw
} else {
// Configure Goldmark with the custom tag extension
md := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
&frontmatter.Extender{},
mathjax.MathJax,
templates.SidebarTag, // ||| -> <sidebar>
templates.TopBanner,
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
)
var buf bytes.Buffer
ctx := parser.NewContext()
if err := md.Convert(raw, &buf, parser.WithContext(ctx)); err != nil {
return "", fmt.Errorf("failed to parse markdown: %w", err)
}
// Extract Frontmatter
// 1. Initialize with External Metadata (if any)
var meta templates.PageMetadata
if err := loadExternalMetadata(file.OriginalPath, &meta); err != nil {
fmt.Printf("Warning: failed to load external metadata for %s: %v\n", file.OriginalPath, err)
}
// 2. Parse Internal Frontmatter (overrides external)
d := frontmatter.Get(ctx)
if d != nil {
// Decode into a temporary map to capture all fields
var rawMap map[string]interface{}
if err := d.Decode(&rawMap); err != nil {
fmt.Printf("Warning: failed to decode frontmatter for %s: %v\n", file.OriginalPath, err)
} else {
// Merge into meta
if meta.Raw == nil {
meta.Raw = make(map[string]interface{})
}
for k, v := range rawMap {
meta.Raw[k] = v
// Also update struct fields manually if needed, or use layout decoding
switch strings.ToLower(k) {
case "title":
if s, ok := v.(string); ok {
meta.Title = s
}
case "stylesheet":
if s, ok := v.(string); ok {
meta.Stylesheet = s
}
case "style":
if s, ok := v.(string); ok {
meta.Style = s
}
}
}
}
}
// 3. Resolve Stylesheet Path
if meta.Stylesheet != "" {
// If not absolute path (starting with /), resolve relative to content file
if !strings.HasPrefix(meta.Stylesheet, "/") {
// Get valid web path for the directory containing the file
// file.RoutePath is like "/entries/entry1" or "/index"
// We want the directory part of the RoutePath
dirRoute := filepath.Dir(file.RoutePath)
if dirRoute == "." {
dirRoute = ""
}
// Construct new path
// e.g. /entries + / + style.css -> /entries/style.css
// Clean handles double slashes
meta.Stylesheet = filepath.Join(dirRoute, meta.Stylesheet)
// Ensure it starts with / for web usage
if !strings.HasPrefix(meta.Stylesheet, "/") {
meta.Stylesheet = "/" + meta.Stylesheet
}
}
}
if meta.Title == "" {
base := filepath.Base(file.OriginalPath)
meta.Title = strings.TrimSuffix(base, filepath.Ext(base))
}
htmlContent := buf.String()
// 3. Post-Process for Index (Dynamic Content Injection)
// If Home Page, replace content with Latest Post Preview
if file.RoutePath == "/" {
// Find latest post
var latestPost *ContentFile
for _, f := range AllContent {
if f.IsMarkdown && f.RoutePath != "/" {
// Found one
latestPost = &f
break // AllContent is sorted by time descending
}
}
if latestPost != nil {
// Read the latest post
postRaw, err := ReadRaw(latestPost.OriginalPath)
if err == nil {
// Parse the post to HTML to display as preview
// We need to parse it fully to get the HTML content
// Re-using the MD parser config from above would be cleaner but let's instantiate for now
mdPreview := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
&frontmatter.Extender{},
mathjax.MathJax,
templates.SidebarTag,
templates.TopBanner,
),
goldmark.WithRendererOptions(html.WithUnsafe()),
)
var previewBuf bytes.Buffer
ctxPreview := parser.NewContext()
if err := mdPreview.Convert(postRaw, &previewBuf, parser.WithContext(ctxPreview)); err == nil {
// Extract Metadata of the post to update Page Title?
// User said: "the [title] be big text embedded in the black area."
// So if we are previewing the latest post, the Home Page Title should probably match the Post Title on the Home Page?
// Or should it say "Latest: Title"?
// The main header h1 is where {{Title}} goes.
// Getting metadata from preview context
var postMeta templates.PageMetadata
dPost := frontmatter.Get(ctxPreview)
if dPost != nil {
dPost.Decode(&postMeta)
}
// Fallback title logic
if postMeta.Title == "" {
base := filepath.Base(latestPost.OriginalPath)
postMeta.Title = strings.TrimSuffix(base, filepath.Ext(base))
}
// Update the meta for the HOME PAGE render
meta.Title = postMeta.Title // Use the post's title
// Extract Preview (First Paragraph?)
// "preview of the latest post, then read more"
fullHTML := previewBuf.String()
// Simple strategy: take everything up to the first </p> or limit chars?
// Or explicit split?
// Let's take the first paragraph.
parts := strings.Split(fullHTML, "</p>")
if len(parts) > 0 {
previewHTML := parts[0] + "</p>"
// Add Read More Link
readMore := fmt.Sprintf(`<p><a href="%s">Read More...</a></p>`, latestPost.RoutePath)
htmlContent = previewHTML + readMore
} else {
htmlContent = fullHTML
}
}
}
}
}
// [directory] Tag Expansion
// User can put [directory] in any page (e.g. archive.md) to generate the list
if strings.Contains(htmlContent, "[directory]") {
// Generate Directory HTML (Grouped by Year)
var dirHTML strings.Builder
dirHTML.WriteString(`<div class="directory-list">`)
// Group by year
type YearGroup struct {
Year int
Posts []templates.PostSnippet
}
var years []YearGroup
for _, f := range AllContent {
if f.IsMarkdown && f.RoutePath != "/" {
name := filepath.Base(f.OriginalPath)
title := strings.TrimSuffix(name, filepath.Ext(name))
// Get Metadata for title? Ideally yes, but costly to parse all?
// Let's rely on cached AllContent if we had metadata there, but we don't.
// For now, filename title is fast.
year := f.ModTime.Year()
// Add to current year group or new one
if len(years) == 0 || years[len(years)-1].Year != year {
years = append(years, YearGroup{Year: year})
}
years[len(years)-1].Posts = append(years[len(years)-1].Posts, templates.PostSnippet{
Title: title,
URL: f.RoutePath,
Date: f.ModTime,
})
}
}
for _, yg := range years {
dirHTML.WriteString(fmt.Sprintf(`<h3>%d</h3><ul>`, yg.Year))
for _, p := range yg.Posts {
dirHTML.WriteString(fmt.Sprintf(`<li><a href="%s">%s</a> <span class="date">(%s)</span></li>`,
p.URL, p.Title, p.Date.Format("Jan 02")))
}
dirHTML.WriteString(`</ul>`)
}
dirHTML.WriteString(`</div>`)
htmlContent = strings.ReplaceAll(htmlContent, "[directory]", dirHTML.String())
}
/**
Add More Custom tags here. Add a customTag in tags.go as well, and the implementation in a template file.
*/
// ||| SideBar
sidebarRegex := regexp.MustCompile(`(?s)<sidebar>.*?</sidebar>`)
if sidebarRegex.MatchString(htmlContent) {
// Render the dynamic content
// Generate posts data
var posts []templates.PostSnippet
for _, f := range AllContent {
if f.IsMarkdown && f.RoutePath != "/" {
name := filepath.Base(f.OriginalPath)
title := strings.TrimSuffix(name, filepath.Ext(name))
posts = append(posts, templates.PostSnippet{
Title: title,
URL: f.RoutePath,
Date: f.ModTime,
})
}
}
latestPostsHTML := templates.RenderLatestPosts(posts)
dirLink := templates.RenderDirectoryLink()
// Wrap in <div class="sidebar"> to match CSS
replacement := fmt.Sprintf(`<div class="sidebar">%s%s</div>`, latestPostsHTML, dirLink)
// Replace the placeholder tag with the actual content
htmlContent = sidebarRegex.ReplaceAllString(htmlContent, replacement)
}
// _-_- TopBanner
topbannerRegex := regexp.MustCompile(`(?s)<topbanner>.*?</topbanner>`)
if topbannerRegex.MatchString(htmlContent) {
htmlContent = topbannerRegex.ReplaceAllStringFunc(htmlContent, func(match string) string {
// 'match' is "<topbanner>...content...</topbanner>"
// Remove the known tags safely
innerContent := strings.TrimPrefix(match, "<topbanner>")
innerContent = strings.TrimSuffix(innerContent, "</topbanner>")
// Pass to template
bannerHTML := templates.RenderTopBanner(innerContent)
// Return the formatted HTML
return fmt.Sprintf(`<div class="topbanner">%s</div>`, bannerHTML)
})
}
// [filename] Image Resolution
// Matches [filename.ext] or [filename]
// Avoids matches that look like markdown links [text](url) or existing tags
// We'll use a slightly broad regex and rely on file existence check
// Regex: \[([^\]<]+)\]
// But careful not to match [text](url).
// Goldmark renders [text](url) as <a href="...">...</a>.
// However, if the link is broken (reference link not defined)? It renders as [text].
// So this strategy is acceptable per user request.
imgRegex := regexp.MustCompile(`\[([^\]<]+)\]`)
if imgRegex.MatchString(htmlContent) {
htmlContent = imgRegex.ReplaceAllStringFunc(htmlContent, func(match string) string {
// match is "[filename]"
name := match[1 : len(match)-1] // Strip [ and ]
// Resolve Image Path
// Current directory of the document
docDir := filepath.Dir(file.OriginalPath)
// Root content directory
rootDir := "content"
imagePath, err := resolveImage(name, docDir, rootDir)
if err != nil {
// Not found, return original text
return match
}
// Calculate web path
// imagePath is a filesystem path e.g. "content/entries/image.png"
// We need "/entries/image.png"
webPath := "/" + filepath.ToSlash(strings.TrimPrefix(imagePath, "content/"))
// Check for styling metadata
// Config key is the filename (e.g. "image.png")
// Use the resolved filename for the key
resolvedName := filepath.Base(imagePath)
var styleAttr, otherAttrs string
if configVal, ok := meta.Raw[resolvedName]; ok {
// Config can be:
// 1. A map (TOML: image = {width="...", ...})
// 2. A string (TOML: image = "style=...") ? User example: image = "width: auto" ? No, user said: [a] style = "..." OR width = auto
if configMap, ok := configVal.(map[string]interface{}); ok {
for k, v := range configMap {
val := fmt.Sprintf("%v", v)
if k == "style" {
styleAttr = fmt.Sprintf(` style="%s"`, val)
} else {
// height, width, etc.
otherAttrs += fmt.Sprintf(` %s="%s"`, k, val)
}
}
}
}
return fmt.Sprintf(`<img src="%s" alt="%s"%s%s>`, webPath, name, styleAttr, otherAttrs)
})
}
// Build Full Page
// Generate global posts data (Latest 5-10) for the sidebar
// This logic is now handled within the sidebarRegex.MatchString block if a sidebar is present.
// If no sidebar, we still need to generate the latest posts for the full page template.
var globalPostsForTemplate []templates.PostSnippet
for _, f := range AllContent {
if f.IsMarkdown && f.RoutePath != "/" {
name := filepath.Base(f.OriginalPath)
title := strings.TrimSuffix(name, filepath.Ext(name))
globalPostsForTemplate = append(globalPostsForTemplate, templates.PostSnippet{
Title: title,
URL: f.RoutePath,
Date: f.ModTime,
})
}
// Limit to 10 latest
if len(globalPostsForTemplate) >= 10 {
break
}
}
// This `latestPostsHTML` is for the overall page template, not necessarily the sidebar content.
// The sidebar content is generated and injected directly into `htmlContent`.
// This `latestPostsHTML` is passed to `BuildFullPage` for the general layout.
latestPostsHTML := templates.RenderLatestPosts(globalPostsForTemplate)
latestPostsHTML += templates.RenderDirectoryLink()
dataToWrite = templates.BuildFullPage([]byte(htmlContent), meta, latestPostsHTML)
}
// 4. Return Content Object or Path?
// The function signature returns (string, error), which is the path to the cached file.
// Since we are disabling cache, we should probably change the architecture to return ([]byte, error)
// or write to a temporary file if the rest of the app expects a file path.
// The rest of the app (server.go) does http.ServeFile(w, r, cachePath).
// So we MUST return a file path.
// For now, we will continue to write to the "cache" location, but since we removed the read check,
// it basically acts as a "render to temp file" on every request.
// This satisfies the "disable caching" requirement (always fresh) while keeping the file-based serving architecture.
tmpPath := cachePath + ".tmp"
if err := os.WriteFile(tmpPath, dataToWrite, 0644); err != nil {
return "", fmt.Errorf("failed to write cache: %w", err)
}
if err := os.Rename(tmpPath, cachePath); err != nil {
return "", fmt.Errorf("failed to commit cache: %w", err)
}
return cachePath, nil
}
// loadExternalMetadata checks for .yml, .yaml, .toml files and loads them.
func loadExternalMetadata(markdownPath string, meta *templates.PageMetadata) error {
basePath := strings.TrimSuffix(markdownPath, filepath.Ext(markdownPath))
extensions := []string{".yml", ".yaml", ".toml"}
for _, ext := range extensions {
path := basePath + ext
if _, err := os.Stat(path); err == nil {
// Found a metadata file
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("read file %s: %w", path, err)
}
// Decode into a map first to capture everything in Raw
if meta.Raw == nil {
meta.Raw = make(map[string]interface{})
}
if ext == ".toml" {
if _, err := toml.Decode(string(data), &meta.Raw); err != nil {
return fmt.Errorf("decode toml %s: %w", path, err)
}
} else {
// yaml handles both .yml and .yaml
if err := yaml.Unmarshal(data, &meta.Raw); err != nil {
return fmt.Errorf("decode yaml %s: %w", path, err)
}
}
// Map known fields to struct
if v, ok := meta.Raw["title"]; ok {
if s, ok := v.(string); ok { meta.Title = s }
}
if v, ok := meta.Raw["stylesheet"]; ok {
if s, ok := v.(string); ok { meta.Stylesheet = s }
}
if v, ok := meta.Raw["style"]; ok {
if s, ok := v.(string); ok { meta.Style = s }
}
// Stop after finding the first matching metadata file
// Priority order is implicit in `extensions` slice (.yml > .yaml > .toml)
return nil
}
}
return nil
}
// resolveImage searches for the image file
func resolveImage(name, docDir, rootDir string) (string, error) {
// 1. Check strict path in docDir
path := filepath.Join(docDir, name)
if !hasExt(name) {
// Try extensions
if found, err := findNewestWithExt(docDir, name); err == nil {
return found, nil
}
} else {
if _, err := os.Stat(path); err == nil {
return path, nil
}
}
// 2. Check strict path in rootDir
path = filepath.Join(rootDir, name)
if !hasExt(name) {
if found, err := findNewestWithExt(rootDir, name); err == nil {
return found, nil
}
} else {
if _, err := os.Stat(path); err == nil {
return path, nil
}
}
return "", fmt.Errorf("not found")
}
func hasExt(name string) bool {
return filepath.Ext(name) != ""
}
func findNewestWithExt(dir, baseName string) (string, error) {
entries, err := os.ReadDir(dir)
if err != nil {
return "", err
}
var bestMatch string
var newestTime time.Time
targetBase := strings.ToLower(baseName)
for _, e := range entries {
if e.IsDir() {
continue
}
name := e.Name()
ext := filepath.Ext(name)
if ext == "" {
continue
}
nameNoExt := strings.TrimSuffix(name, ext)
if strings.ToLower(nameNoExt) == targetBase {
// Check if image extension
info, err := e.Info()
if err != nil {
continue
}
if bestMatch == "" || info.ModTime().After(newestTime) {
bestMatch = filepath.Join(dir, name)
newestTime = info.ModTime()
}
}
}
if bestMatch != "" {
return bestMatch, nil
}
return "", fmt.Errorf("not found")
}