forked from jess/Acord
493 lines
18 KiB
Rust
493 lines
18 KiB
Rust
//! Embedded sidecar archive.
|
|
//!
|
|
//! Markdown is the floor — `.md` files stay readable in vim and on GitHub —
|
|
//! but Numbers-class table features (positional metadata, per-cell formatting,
|
|
//! formulas, references) don't fit in markdown.
|
|
//!
|
|
//! Acord stores rich metadata in the SAME `.md` file as a base64-encoded zip
|
|
//! wrapped in an HTML comment appended to the end of the document:
|
|
//!
|
|
//! ```text
|
|
//! ...the user's markdown content...
|
|
//!
|
|
//! <!-- acord-archive
|
|
//! UEsDBBQAAAAIA...base64...AAAA
|
|
//! -->
|
|
//! ```
|
|
//!
|
|
//! Why this shape:
|
|
//! - HTML comments are valid markdown — every renderer (GitHub, Bear, Obsidian)
|
|
//! treats them as invisible. Vim shows them as a single comment block, not
|
|
//! as binary garbage.
|
|
//! - Base64 stays text-clean — no `\0` bytes, vim won't flag the file as
|
|
//! binary, `git diff` is still legible (modulo a wide line at the bottom).
|
|
//! - The zip's central directory makes it trivial to add more entries later
|
|
//! (per-block scratch state, formula caches, embedded images) without
|
|
//! changing the framing.
|
|
//!
|
|
//! Per-table linking is positional: the Nth non-eval table in document layout
|
|
//! order is sidecar key "N". No proprietary tags appear in the markdown body.
|
|
//! Identity is runtime state derived from the document, never written to disk.
|
|
//!
|
|
//! The archive is structured like a Rust crate — each block is a submodule
|
|
//! file under `src/`, and `config.toml` holds display-only metadata (col
|
|
//! widths, row heights, cell styles). Save direction only: the markdown is
|
|
//! always the source of truth; the archive is regenerated fresh on every save.
|
|
//! On load, only `config.toml` is read for display metadata. If missing or
|
|
//! malformed, start fresh — next save overwrites.
|
|
//!
|
|
//! Eval result tables are explicitly NOT persisted. Only the source `/= expr`
|
|
//! line goes into markdown; the result table re-renders fresh on load.
|
|
|
|
use std::collections::HashMap;
|
|
use std::io::{Cursor, Read, Write};
|
|
|
|
use base64::engine::general_purpose::STANDARD as B64;
|
|
use base64::Engine;
|
|
use serde::{Deserialize, Serialize};
|
|
use zip::write::SimpleFileOptions;
|
|
use zip::{CompressionMethod, ZipArchive, ZipWriter};
|
|
|
|
/// Sentinel that opens the embedded archive comment. Anything from this string
|
|
/// to the matching `-->` is the archive payload (base64-encoded zip).
|
|
const ARCHIVE_OPEN: &str = "<!-- acord-archive";
|
|
const ARCHIVE_CLOSE: &str = "-->";
|
|
|
|
/// Root-level display metadata file inside the zip. Holds col widths, row
|
|
/// heights, cell styles, formulas — things that don't affect evaluation.
|
|
const CONFIG_ENTRY: &str = "config.toml";
|
|
/// Directory inside the zip holding one `.cord` file per block. Each file
|
|
/// contains TOML front-matter + source, structured like a crate submodule.
|
|
const SRC_DIR: &str = "src/";
|
|
|
|
/// Top-level schema of a `<file>.acord.toml` companion. Versioned so we can
|
|
/// migrate later as the Numbers-class table feature set grows.
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
|
pub struct Sidecar {
|
|
/// Schema version. Bump on incompatible changes.
|
|
#[serde(default = "default_version")]
|
|
pub version: u32,
|
|
/// Table metadata indexed by `[#id]` markers in the markdown.
|
|
#[serde(default)]
|
|
pub tables: HashMap<String, TableSidecar>,
|
|
}
|
|
|
|
fn default_version() -> u32 {
|
|
1
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
|
pub struct TableSidecar {
|
|
/// Per-column widths in pixels. Same length as the table's column count
|
|
/// (or shorter; missing entries fall back to the editor's default width).
|
|
#[serde(default)]
|
|
pub col_widths: Vec<f32>,
|
|
/// Sparse per-row explicit heights. Keys are row indices serialized as
|
|
/// strings (TOML's native key type); convert with `parse::<usize>()` at
|
|
/// the boundary. A table with a few resized rows doesn't carry the
|
|
/// default for every other row.
|
|
#[serde(default)]
|
|
pub row_heights: HashMap<String, f32>,
|
|
/// Per-cell metadata indexed by spreadsheet-style address ("A1", "D2", ...).
|
|
#[serde(default)]
|
|
pub cells: HashMap<String, CellSidecar>,
|
|
/// Cell formulas indexed by spreadsheet address.
|
|
#[serde(default)]
|
|
pub formulas: HashMap<String, String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
|
pub struct CellSidecar {
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
pub background: Option<String>,
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
pub foreground: Option<String>,
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
pub font_weight: Option<String>,
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
pub align: Option<String>,
|
|
}
|
|
|
|
/// Reads sidecar TOML. Returns `Default` on parse error so a corrupt sidecar
|
|
/// never blocks opening a markdown file — the user just loses the rich metadata
|
|
/// until they re-save.
|
|
pub struct SidecarReader {
|
|
inner: Sidecar,
|
|
}
|
|
|
|
impl SidecarReader {
|
|
pub fn from_toml(text: &str) -> Self {
|
|
let inner: Sidecar = toml::from_str(text).unwrap_or_default();
|
|
Self { inner }
|
|
}
|
|
|
|
pub fn empty() -> Self {
|
|
Self { inner: Sidecar::default() }
|
|
}
|
|
|
|
pub fn table(&self, id: &str) -> Option<&TableSidecar> {
|
|
self.inner.tables.get(id)
|
|
}
|
|
}
|
|
|
|
/// Accumulates sidecar entries during a save pass. Each block's `to_md` writes
|
|
/// its side-channel state into the writer; after the pass, `flush` produces the
|
|
/// TOML text to write to disk (or `None` if there's nothing to write — empty
|
|
/// sidecars should be deleted from disk to avoid littering).
|
|
pub struct SidecarWriter {
|
|
inner: Sidecar,
|
|
}
|
|
|
|
impl SidecarWriter {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
inner: Sidecar {
|
|
version: 1,
|
|
tables: HashMap::new(),
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn put_table(&mut self, id: String, data: TableSidecar) {
|
|
self.inner.tables.insert(id, data);
|
|
}
|
|
|
|
/// Returns the serialized TOML, or `None` if the sidecar has no entries.
|
|
pub fn flush(self) -> Option<String> {
|
|
if self.inner.tables.is_empty() {
|
|
return None;
|
|
}
|
|
toml::to_string_pretty(&self.inner).ok()
|
|
}
|
|
}
|
|
|
|
impl Default for SidecarWriter {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Embedded archive: split markdown text into (body, optional sidecar)
|
|
// ----------------------------------------------------------------------------
|
|
|
|
/// Result of pulling an archive out of an `.md` file. `markdown` is the user
|
|
/// content with the archive comment stripped; `sidecar` is the parsed config
|
|
/// (or `None` if the file had no archive).
|
|
pub struct LoadedDoc {
|
|
pub markdown: String,
|
|
pub sidecar: Option<Sidecar>,
|
|
}
|
|
|
|
/// Pull an embedded archive out of a markdown file. If the file has no
|
|
/// `<!-- acord-archive ... -->` comment, returns the text unchanged with
|
|
/// `sidecar = None`. Failure modes (truncated comment, bad base64, malformed
|
|
/// zip, malformed TOML) all degrade gracefully to "no sidecar" — the user
|
|
/// never loses access to their markdown content because of corrupted metadata.
|
|
pub fn extract_archive(text: &str) -> LoadedDoc {
|
|
let Some(open_idx) = text.rfind(ARCHIVE_OPEN) else {
|
|
return LoadedDoc {
|
|
markdown: text.to_string(),
|
|
sidecar: None,
|
|
};
|
|
};
|
|
// The closing `-->` must come AFTER the opener.
|
|
let after_open = open_idx + ARCHIVE_OPEN.len();
|
|
let Some(rel_close) = text[after_open..].find(ARCHIVE_CLOSE) else {
|
|
return LoadedDoc {
|
|
markdown: text.to_string(),
|
|
sidecar: None,
|
|
};
|
|
};
|
|
let close_idx = after_open + rel_close;
|
|
let payload = text[after_open..close_idx].trim();
|
|
|
|
let body = strip_trailing_blank_lines(text[..open_idx].trim_end_matches('\n'));
|
|
|
|
let parsed = decode_archive_payload(payload);
|
|
LoadedDoc {
|
|
markdown: body,
|
|
sidecar: parsed,
|
|
}
|
|
}
|
|
|
|
/// A single block's source file for the archive. Written to `src/<filename>`
|
|
/// inside the zip. Content is TOML front-matter + `---` separator + raw source.
|
|
pub struct BlockFile {
|
|
pub filename: String,
|
|
pub content: String,
|
|
}
|
|
|
|
/// builds archive zip bytes from sidecar metadata and block files, None when both empty.
|
|
pub fn build_archive_bytes(sidecar: &Sidecar, block_files: &[BlockFile]) -> Option<Vec<u8>> {
|
|
if sidecar.tables.is_empty() && block_files.is_empty() {
|
|
return None;
|
|
}
|
|
let toml_text = toml::to_string_pretty(sidecar).ok()?;
|
|
write_zip(&toml_text, block_files).ok()
|
|
}
|
|
|
|
/// parses zip bytes back into a Sidecar.
|
|
pub fn extract_archive_bytes(bytes: &[u8]) -> Option<Sidecar> {
|
|
let toml_text = read_zip(bytes)?;
|
|
toml::from_str::<Sidecar>(&toml_text).ok()
|
|
}
|
|
|
|
/// magic separating the markdown body from the appended raw zip; the surrounding NULs
|
|
/// trip text editors into "binary mode" so the archive shows up as garbage, not as
|
|
/// readable base64.
|
|
pub const BINARY_SENTINEL: &[u8] = b"\n\x00ACORD-ARCHIVE\x00\n";
|
|
|
|
/// appends raw zip bytes after the markdown body, separated by BINARY_SENTINEL.
|
|
pub fn embed_in_md(markdown: &[u8], archive: &[u8]) -> Vec<u8> {
|
|
let mut out = Vec::with_capacity(markdown.len() + BINARY_SENTINEL.len() + archive.len());
|
|
out.extend_from_slice(markdown);
|
|
if !markdown.ends_with(b"\n") {
|
|
out.push(b'\n');
|
|
}
|
|
out.extend_from_slice(BINARY_SENTINEL);
|
|
out.extend_from_slice(archive);
|
|
out
|
|
}
|
|
|
|
/// splits raw file bytes on BINARY_SENTINEL, returning (text_bytes, optional zip bytes).
|
|
pub fn extract_from_md(bytes: &[u8]) -> (Vec<u8>, Option<Vec<u8>>) {
|
|
if let Some(idx) = rfind_subslice(bytes, BINARY_SENTINEL) {
|
|
let text = bytes[..idx].to_vec();
|
|
let archive_start = idx + BINARY_SENTINEL.len();
|
|
let archive = bytes[archive_start..].to_vec();
|
|
return (text, Some(archive));
|
|
}
|
|
(bytes.to_vec(), None)
|
|
}
|
|
|
|
fn rfind_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
|
if needle.is_empty() || haystack.len() < needle.len() {
|
|
return None;
|
|
}
|
|
haystack.windows(needle.len()).rposition(|w| w == needle)
|
|
}
|
|
|
|
/// legacy embed format: base64-encoded zip inside an HTML comment. Kept for the
|
|
/// round-trip tests and the load-side back-compat path; new saves go through embed_in_md.
|
|
pub fn embed_archive(markdown: &str, sidecar: &Sidecar, block_files: &[BlockFile]) -> String {
|
|
let Some(zip_bytes) = build_archive_bytes(sidecar, block_files) else {
|
|
return markdown.to_string();
|
|
};
|
|
let encoded = B64.encode(&zip_bytes);
|
|
let wrapped = wrap_base64(&encoded, 76);
|
|
|
|
let mut out = markdown.trim_end_matches('\n').to_string();
|
|
out.push_str("\n\n");
|
|
out.push_str(ARCHIVE_OPEN);
|
|
out.push('\n');
|
|
out.push_str(&wrapped);
|
|
out.push('\n');
|
|
out.push_str(ARCHIVE_CLOSE);
|
|
out.push('\n');
|
|
out
|
|
}
|
|
|
|
fn strip_trailing_blank_lines(s: &str) -> String {
|
|
// Walk back over consecutive trailing newlines / whitespace lines so that
|
|
// round-tripping a doc with an archive doesn't accumulate blank lines.
|
|
let mut end = s.len();
|
|
let bytes = s.as_bytes();
|
|
while end > 0 {
|
|
let line_end = end;
|
|
let mut line_start = end;
|
|
while line_start > 0 && bytes[line_start - 1] != b'\n' {
|
|
line_start -= 1;
|
|
}
|
|
let line = &s[line_start..line_end];
|
|
if line.trim().is_empty() {
|
|
end = if line_start == 0 { 0 } else { line_start - 1 };
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
s[..end].to_string()
|
|
}
|
|
|
|
fn decode_archive_payload(payload: &str) -> Option<Sidecar> {
|
|
// Strip whitespace inside the comment so the wrapping is invisible to the
|
|
// decoder.
|
|
let cleaned: String = payload.chars().filter(|c| !c.is_whitespace()).collect();
|
|
let zip_bytes = B64.decode(cleaned.as_bytes()).ok()?;
|
|
let toml_text = read_zip(&zip_bytes)?;
|
|
toml::from_str::<Sidecar>(&toml_text).ok()
|
|
}
|
|
|
|
fn write_zip(toml_text: &str, block_files: &[BlockFile]) -> Result<Vec<u8>, String> {
|
|
let total_bytes = toml_text.len()
|
|
+ block_files.iter().map(|f| f.filename.len() + f.content.len()).sum::<usize>();
|
|
let mut buf: Vec<u8> = Vec::with_capacity(total_bytes + 512);
|
|
{
|
|
let cursor = Cursor::new(&mut buf);
|
|
let mut zip = ZipWriter::new(cursor);
|
|
let opts: SimpleFileOptions =
|
|
SimpleFileOptions::default().compression_method(CompressionMethod::Deflated);
|
|
|
|
if !toml_text.is_empty() {
|
|
zip.start_file(CONFIG_ENTRY, opts)
|
|
.map_err(|e| format!("zip start_file config: {}", e))?;
|
|
zip.write_all(toml_text.as_bytes())
|
|
.map_err(|e| format!("zip write config: {}", e))?;
|
|
}
|
|
|
|
for file in block_files {
|
|
let path = format!("{}{}", SRC_DIR, file.filename);
|
|
zip.start_file(path, opts)
|
|
.map_err(|e| format!("zip start_file {}: {}", file.filename, e))?;
|
|
zip.write_all(file.content.as_bytes())
|
|
.map_err(|e| format!("zip write {}: {}", file.filename, e))?;
|
|
}
|
|
|
|
zip.finish()
|
|
.map_err(|e| format!("zip finish: {}", e))?;
|
|
}
|
|
Ok(buf)
|
|
}
|
|
|
|
fn read_zip(bytes: &[u8]) -> Option<String> {
|
|
let cursor = Cursor::new(bytes);
|
|
let mut zip = ZipArchive::new(cursor).ok()?;
|
|
let mut entry = zip.by_name(CONFIG_ENTRY).ok()?;
|
|
let mut text = String::new();
|
|
entry.read_to_string(&mut text).ok()?;
|
|
Some(text)
|
|
}
|
|
|
|
fn wrap_base64(s: &str, width: usize) -> String {
|
|
if width == 0 || s.len() <= width {
|
|
return s.to_string();
|
|
}
|
|
let mut out = String::with_capacity(s.len() + s.len() / width);
|
|
let bytes = s.as_bytes();
|
|
let mut i = 0;
|
|
while i < bytes.len() {
|
|
let end = (i + width).min(bytes.len());
|
|
// Base64 is ASCII, slicing by byte == slicing by char.
|
|
out.push_str(&s[i..end]);
|
|
if end < bytes.len() {
|
|
out.push('\n');
|
|
}
|
|
i = end;
|
|
}
|
|
out
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn sample_sidecar() -> Sidecar {
|
|
let mut tables = HashMap::new();
|
|
tables.insert(
|
|
"t1".to_string(),
|
|
TableSidecar {
|
|
col_widths: vec![100.0, 200.0, 150.0],
|
|
row_heights: HashMap::new(),
|
|
cells: HashMap::new(),
|
|
formulas: HashMap::new(),
|
|
},
|
|
);
|
|
Sidecar { version: 1, tables }
|
|
}
|
|
|
|
#[test]
|
|
fn round_trip_embed_extract() {
|
|
let body = "# Hello\n\nSome text.\n\n| a | b |\n|---|---|\n| 1 | 2 |\n";
|
|
let sidecar = sample_sidecar();
|
|
let with_archive = embed_archive(body, &sidecar, &[]);
|
|
assert!(with_archive.contains(ARCHIVE_OPEN));
|
|
assert!(with_archive.contains(ARCHIVE_CLOSE));
|
|
|
|
let loaded = extract_archive(&with_archive);
|
|
assert_eq!(loaded.markdown.trim_end(), body.trim_end());
|
|
let parsed = loaded.sidecar.expect("sidecar should round-trip");
|
|
assert_eq!(parsed.tables.len(), 1);
|
|
let t1 = &parsed.tables["t1"];
|
|
assert_eq!(t1.col_widths, vec![100.0, 200.0, 150.0]);
|
|
}
|
|
|
|
#[test]
|
|
fn empty_sidecar_skips_embed() {
|
|
let body = "Just some markdown.\n";
|
|
let empty = Sidecar::default();
|
|
let out = embed_archive(body, &empty, &[]);
|
|
assert_eq!(out, body);
|
|
assert!(!out.contains("acord-archive"));
|
|
}
|
|
|
|
#[test]
|
|
fn extract_with_no_archive() {
|
|
let body = "# Plain doc\n\nNo archive here.";
|
|
let loaded = extract_archive(body);
|
|
assert_eq!(loaded.markdown, body);
|
|
assert!(loaded.sidecar.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn extract_with_corrupt_payload_recovers_markdown() {
|
|
// Garbage in the comment body must NOT eat the user's markdown — they
|
|
// get the body back, sidecar None.
|
|
let doc = "# Body\n\nstuff\n\n<!-- acord-archive\nnot-actually-base64!!!\n-->\n";
|
|
let loaded = extract_archive(doc);
|
|
assert!(loaded.markdown.contains("# Body"));
|
|
assert!(loaded.markdown.contains("stuff"));
|
|
assert!(loaded.sidecar.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn round_trip_preserves_complex_metadata() {
|
|
let mut tables = HashMap::new();
|
|
let mut cells = HashMap::new();
|
|
cells.insert(
|
|
"A1".to_string(),
|
|
CellSidecar {
|
|
background: Some("#ff0000".into()),
|
|
foreground: Some("#ffffff".into()),
|
|
font_weight: Some("bold".into()),
|
|
align: Some("center".into()),
|
|
},
|
|
);
|
|
let mut row_heights = HashMap::new();
|
|
row_heights.insert("2".to_string(), 48.0);
|
|
let mut formulas = HashMap::new();
|
|
formulas.insert("B3".to_string(), "=SUM(A1:A10)".to_string());
|
|
tables.insert(
|
|
"t1".to_string(),
|
|
TableSidecar {
|
|
col_widths: vec![80.0, 120.0],
|
|
row_heights,
|
|
cells,
|
|
formulas,
|
|
},
|
|
);
|
|
let sc = Sidecar { version: 1, tables };
|
|
|
|
let body = "# Doc\n";
|
|
let embedded = embed_archive(body, &sc, &[]);
|
|
let loaded = extract_archive(&embedded);
|
|
let parsed = loaded.sidecar.unwrap();
|
|
|
|
let t = &parsed.tables["t1"];
|
|
assert_eq!(t.col_widths, vec![80.0, 120.0]);
|
|
assert_eq!(t.row_heights["2"], 48.0);
|
|
assert_eq!(t.cells["A1"].background.as_deref(), Some("#ff0000"));
|
|
assert_eq!(t.formulas["B3"], "=SUM(A1:A10)");
|
|
}
|
|
|
|
#[test]
|
|
fn embed_does_not_double_blank_line() {
|
|
// Body that already ends with newlines should round-trip cleanly.
|
|
let body = "Line\n\n\n";
|
|
let sc = sample_sidecar();
|
|
let embedded = embed_archive(body, &sc, &[]);
|
|
let loaded = extract_archive(&embedded);
|
|
// Trailing blank lines around the archive should not accumulate.
|
|
assert_eq!(loaded.markdown.trim_end(), "Line");
|
|
}
|
|
}
|