Former/marker.go

719 lines
18 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"fmt"
"image"
"math"
"strings"
)
const (
markerCellMM = 1.0
bullseyeN = 5
markerN = 7 // 5×5 bullseye + 1-cell data ring
dataBitCount = 24
calibBarCells = 8
calibBarHeight = 5.0 // mm
calibBarGap = 3.0 // mm between bars
)
const (
CornerTL = 0
CornerTR = 1
CornerBL = 2
CornerBR = 3
CornerCenter = 4
)
type MarkerData struct {
PageNum int
CornerID int
NumFaces int
LongestMM int
}
type DetectedMarker struct {
PixelCenter [2]float64
CellSizePx float64
Data MarkerData
Rotation int
}
// bullseyePattern returns the 5×5 finder core. true = black.
// Concentric squares: ring 0 (center) = black, ring 1 = white, ring 2 = black.
func bullseyePattern() [bullseyeN][bullseyeN]bool {
var p [bullseyeN][bullseyeN]bool
for r := 0; r < bullseyeN; r++ {
for c := 0; c < bullseyeN; c++ {
dr := r - 2
dc := c - 2
if dr < 0 {
dr = -dr
}
if dc < 0 {
dc = -dc
}
ring := dr
if dc > ring {
ring = dc
}
p[r][c] = ring%2 == 0
}
}
return p
}
// dataRingCells returns (row,col) positions of the 24 perimeter cells of the 7×7 grid,
// clockwise from top-left.
func dataRingCells() [dataBitCount][2]int {
var cells [dataBitCount][2]int
idx := 0
for c := 0; c < markerN; c++ {
cells[idx] = [2]int{0, c}
idx++
}
for r := 1; r < markerN-1; r++ {
cells[idx] = [2]int{r, markerN - 1}
idx++
}
for c := markerN - 1; c >= 0; c-- {
cells[idx] = [2]int{markerN - 1, c}
idx++
}
for r := markerN - 2; r >= 1; r-- {
cells[idx] = [2]int{r, 0}
idx++
}
return cells
}
// === Encoding ===
// Bit layout (24 bits):
// [0:2] orientation: 1,1,0
// [3:5] corner ID (3 bits)
// [6:9] page number (4 bits)
// [10:14] num faces (5 bits)
// [15:21] longest side / 5 (7 bits → 0-635mm)
// [22] reserved (0)
// [23] parity
func encodeMarkerBits(d MarkerData) [dataBitCount]bool {
var bits [dataBitCount]bool
bits[0] = true
bits[1] = true
bits[2] = false
for i := 0; i < 3; i++ {
bits[3+i] = (d.CornerID>>uint(2-i))&1 == 1
}
for i := 0; i < 4; i++ {
bits[6+i] = (d.PageNum>>uint(3-i))&1 == 1
}
for i := 0; i < 5; i++ {
bits[10+i] = (d.NumFaces>>uint(4-i))&1 == 1
}
ls := d.LongestMM / 5
if ls > 127 {
ls = 127
}
for i := 0; i < 7; i++ {
bits[15+i] = (ls>>uint(6-i))&1 == 1
}
bits[22] = false
parity := false
for i := 0; i < 23; i++ {
if bits[i] {
parity = !parity
}
}
bits[23] = parity
return bits
}
func decodeMarkerBits(bits [dataBitCount]bool) (MarkerData, bool) {
parity := false
for i := 0; i < 23; i++ {
if bits[i] {
parity = !parity
}
}
if bits[23] != parity {
return MarkerData{}, false
}
if !(bits[0] && bits[1] && !bits[2]) {
return MarkerData{}, false
}
var d MarkerData
for i := 0; i < 3; i++ {
if bits[3+i] {
d.CornerID |= 1 << uint(2-i)
}
}
for i := 0; i < 4; i++ {
if bits[6+i] {
d.PageNum |= 1 << uint(3-i)
}
}
for i := 0; i < 5; i++ {
if bits[10+i] {
d.NumFaces |= 1 << uint(4-i)
}
}
ls := 0
for i := 0; i < 7; i++ {
if bits[15+i] {
ls |= 1 << uint(6-i)
}
}
d.LongestMM = ls * 5
return d, true
}
func encodeMarkerGrid(d MarkerData) [markerN][markerN]bool {
var grid [markerN][markerN]bool
bull := bullseyePattern()
for r := 0; r < bullseyeN; r++ {
for c := 0; c < bullseyeN; c++ {
grid[r+1][c+1] = bull[r][c]
}
}
bits := encodeMarkerBits(d)
cells := dataRingCells()
for i := 0; i < dataBitCount; i++ {
grid[cells[i][0]][cells[i][1]] = bits[i]
}
return grid
}
// === SVG generation ===
func renderMarkerSVG(b *strings.Builder, cx, cy float64, data MarkerData) {
grid := encodeMarkerGrid(data)
half := float64(markerN) * markerCellMM / 2.0
for r := 0; r < markerN; r++ {
for c := 0; c < markerN; c++ {
if grid[r][c] {
x := cx - half + float64(c)*markerCellMM
y := cy - half + float64(r)*markerCellMM
b.WriteString(fmt.Sprintf(
`<rect x="%.3f" y="%.3f" width="%.3f" height="%.3f" fill="black"/>`,
x, y, markerCellMM, markerCellMM))
b.WriteString("\n")
}
}
}
}
// CalibBar defines an encoded calibration barcode.
// WidthMM is both the physical width on the page and the encoded value.
type CalibBar struct {
WidthMM int
}
// encodeCalibBar returns the 8-cell pattern for a calibration bar.
// Cell 0: start (black), Cells 1-6: value in 6-bit binary, Cell 7: stop (black).
func encodeCalibBar(widthMM int) [calibBarCells]bool {
var cells [calibBarCells]bool
cells[0] = true
cells[calibBarCells-1] = true
for i := 0; i < 6; i++ {
cells[1+i] = (widthMM>>uint(5-i))&1 == 1
}
return cells
}
func decodeCalibBar(cells [calibBarCells]bool) (int, bool) {
if !cells[0] || !cells[calibBarCells-1] {
return 0, false
}
val := 0
for i := 0; i < 6; i++ {
if cells[1+i] {
val |= 1 << uint(5-i)
}
}
return val, true
}
// renderCalibBarsSVG renders encoded calibration barcodes.
// Each bar's total width = its encoded mm value. Internal cell pattern
// encodes the value in binary for machine reading.
func renderCalibBarsSVG(b *strings.Builder, x, y float64, bars []CalibBar) {
bx := x
for _, bar := range bars {
cells := encodeCalibBar(bar.WidthMM)
cellW := float64(bar.WidthMM) / float64(calibBarCells)
for c := 0; c < calibBarCells; c++ {
if cells[c] {
b.WriteString(fmt.Sprintf(
`<rect x="%.3f" y="%.3f" width="%.3f" height="%.3f" fill="black"/>`,
bx+float64(c)*cellW, y, cellW, calibBarHeight))
b.WriteString("\n")
}
}
b.WriteString(fmt.Sprintf(
`<text x="%.3f" y="%.3f" font-family="monospace" font-size="1.5" fill="#bbb" text-anchor="middle">%dmm</text>`,
bx+float64(bar.WidthMM)/2, y+calibBarHeight+2.0, bar.WidthMM))
b.WriteString("\n")
bx += float64(bar.WidthMM) + calibBarGap
}
}
// markerPositionsMM returns the expected mm positions of the 4 corner markers on a page.
func markerPositionsMM(pageW, pageH float64) [4][2]float64 {
m := float64(markerN)*markerCellMM/2 + 5.0
return [4][2]float64{
{m, m}, // TL
{pageW - m, m}, // TR
{m, pageH - m}, // BL
{pageW - m, pageH - m}, // BR
}
}
func calibBarSpecs() []CalibBar {
return []CalibBar{{5}, {10}, {20}, {50}}
}
// === Detection ===
type pixelRun struct {
start int
width int
black bool
}
func findHorizontalRuns(img *image.Gray, y int, threshold uint8) []pixelRun {
bounds := img.Bounds()
w := bounds.Dx()
if w == 0 {
return nil
}
var runs []pixelRun
prevBlack := img.GrayAt(bounds.Min.X, bounds.Min.Y+y).Y < threshold
runStart := 0
for x := 1; x < w; x++ {
isBlack := img.GrayAt(bounds.Min.X+x, bounds.Min.Y+y).Y < threshold
if isBlack != prevBlack {
runs = append(runs, pixelRun{start: runStart, width: x - runStart, black: prevBlack})
runStart = x
prevBlack = isBlack
}
}
runs = append(runs, pixelRun{start: runStart, width: w - runStart, black: prevBlack})
return runs
}
func findVerticalRuns(img *image.Gray, x int, threshold uint8) []pixelRun {
bounds := img.Bounds()
h := bounds.Dy()
if h == 0 {
return nil
}
var runs []pixelRun
prevBlack := img.GrayAt(bounds.Min.X+x, bounds.Min.Y).Y < threshold
runStart := 0
for y := 1; y < h; y++ {
isBlack := img.GrayAt(bounds.Min.X+x, bounds.Min.Y+y).Y < threshold
if isBlack != prevBlack {
runs = append(runs, pixelRun{start: runStart, width: y - runStart, black: prevBlack})
runStart = y
prevBlack = isBlack
}
}
runs = append(runs, pixelRun{start: runStart, width: h - runStart, black: prevBlack})
return runs
}
// findBullseyeInRuns looks for B-W-B-W-B sequences with roughly 1:1:1:1:1 ratio.
// Returns candidate centers (pixel coord along the scan direction) and estimated cell size.
type bullseyeCandidate struct {
center float64
cellSizePx float64
}
func findBullseyeInRuns(runs []pixelRun, minCellPx, maxCellPx float64) []bullseyeCandidate {
var candidates []bullseyeCandidate
for i := 0; i+4 < len(runs); i++ {
if !runs[i].black {
continue
}
ok := true
for j := 0; j < 5; j++ {
if runs[i+j].black != (j%2 == 0) {
ok = false
break
}
}
if !ok {
continue
}
// Use the inner 3 runs (W-B-W) to estimate cell size, since the outer
// B runs may be wider due to adjacent black features (data ring cells).
innerTotal := float64(runs[i+1].width + runs[i+2].width + runs[i+3].width)
cellEst := innerTotal / 3.0
if cellEst < minCellPx || cellEst > maxCellPx {
continue
}
// Inner runs must be uniform (within 40% of each other)
innerOK := true
for j := 1; j <= 3; j++ {
ratio := float64(runs[i+j].width) / cellEst
if ratio < 0.6 || ratio > 1.4 {
innerOK = false
break
}
}
if !innerOK {
continue
}
// Outer B runs: must be at least 0.5x cell and at most 2.5x cell
// (allows for one adjacent data ring cell merging)
for _, j := range []int{0, 4} {
ratio := float64(runs[i+j].width) / cellEst
if ratio < 0.5 || ratio > 2.5 {
innerOK = false
break
}
}
if !innerOK {
continue
}
// Center of the middle (3rd) run
pos := float64(runs[i].start)
for j := 0; j < 2; j++ {
pos += float64(runs[i+j].width)
}
pos += float64(runs[i+2].width) / 2.0
candidates = append(candidates, bullseyeCandidate{center: pos, cellSizePx: cellEst})
}
return candidates
}
// DetectMarkers finds all fiducial markers in a grayscale image.
func DetectMarkers(img *image.Gray, dpiEstimate float64) []DetectedMarker {
bounds := img.Bounds()
w, h := bounds.Dx(), bounds.Dy()
threshold := otsuThreshold(img)
expectedCellPx := markerCellMM * dpiEstimate / 25.4
minCell := expectedCellPx * 0.4
maxCell := expectedCellPx * 2.5
debugLog("DetectMarkers: image %dx%d, dpiEst=%.0f, expectedCell=%.1fpx, threshold=%d",
w, h, dpiEstimate, expectedCellPx, threshold)
// Phase 1: horizontal scan for B-W-B-W-B pattern
type candidate struct {
x, y float64
cellSizePx float64
}
var hCandidates []candidate
step := max(1, int(expectedCellPx/3))
for y := 0; y < h; y += step {
runs := findHorizontalRuns(img, y, threshold)
for _, bc := range findBullseyeInRuns(runs, minCell, maxCell) {
hCandidates = append(hCandidates, candidate{x: bc.center, y: float64(y), cellSizePx: bc.cellSizePx})
}
}
debugLog("DetectMarkers: %d horizontal candidates", len(hCandidates))
// Phase 2: verify each candidate with vertical cross-section
type verifiedCenter struct {
x, y float64
cellSizePx float64
}
var verified []verifiedCenter
for _, hc := range hCandidates {
ix := int(hc.x)
if ix < 0 || ix >= w {
continue
}
vRuns := findVerticalRuns(img, ix, threshold)
vCands := findBullseyeInRuns(vRuns, minCell, maxCell)
for _, vc := range vCands {
if math.Abs(vc.center-hc.y) < hc.cellSizePx*2 {
avgCell := (hc.cellSizePx + vc.cellSizePx) / 2
verified = append(verified, verifiedCenter{
x: hc.x, y: (hc.y + vc.center) / 2, cellSizePx: avgCell,
})
}
}
}
debugLog("DetectMarkers: %d verified centers", len(verified))
// Phase 3: cluster nearby detections
used := make([]bool, len(verified))
var clusters []verifiedCenter
clusterRadius := expectedCellPx * 3
for i := range verified {
if used[i] {
continue
}
cx, cy, cs := verified[i].x, verified[i].y, verified[i].cellSizePx
count := 1.0
used[i] = true
for j := i + 1; j < len(verified); j++ {
if used[j] {
continue
}
dx := verified[j].x - cx
dy := verified[j].y - cy
if math.Sqrt(dx*dx+dy*dy) < clusterRadius {
cx = (cx*count + verified[j].x) / (count + 1)
cy = (cy*count + verified[j].y) / (count + 1)
cs = (cs*count + verified[j].cellSizePx) / (count + 1)
count++
used[j] = true
}
}
clusters = append(clusters, verifiedCenter{x: cx, y: cy, cellSizePx: cs})
}
debugLog("DetectMarkers: %d clusters", len(clusters))
// Phase 4: read grid and decode data for each cluster
var markers []DetectedMarker
for _, cl := range clusters {
grid := readMarkerGrid(img, [2]float64{cl.x, cl.y}, cl.cellSizePx, threshold)
data, rot, ok := decodeMarkerFromGrid(grid)
if ok {
markers = append(markers, DetectedMarker{
PixelCenter: [2]float64{cl.x, cl.y},
CellSizePx: cl.cellSizePx,
Data: data,
Rotation: rot,
})
debugLog(" marker decoded: corner=%d page=%d faces=%d longest=%dmm rot=%d at (%.0f,%.0f)",
data.CornerID, data.PageNum, data.NumFaces, data.LongestMM, rot, cl.x, cl.y)
}
}
debugLog("DetectMarkers: %d markers decoded", len(markers))
return markers
}
func readMarkerGrid(img *image.Gray, center [2]float64, cellPx float64, threshold uint8) [markerN][markerN]bool {
var grid [markerN][markerN]bool
halfN := float64(markerN) / 2.0
bounds := img.Bounds()
for r := 0; r < markerN; r++ {
for c := 0; c < markerN; c++ {
px := int(center[0] + (float64(c)-halfN+0.5)*cellPx)
py := int(center[1] + (float64(r)-halfN+0.5)*cellPx)
if px >= bounds.Min.X && px < bounds.Max.X && py >= bounds.Min.Y && py < bounds.Max.Y {
grid[r][c] = img.GrayAt(px, py).Y < threshold
}
}
}
return grid
}
func rotateGrid90CW(g [markerN][markerN]bool) [markerN][markerN]bool {
var rot [markerN][markerN]bool
for r := 0; r < markerN; r++ {
for c := 0; c < markerN; c++ {
rot[c][markerN-1-r] = g[r][c]
}
}
return rot
}
func verifyBullseye(g [markerN][markerN]bool) bool {
bull := bullseyePattern()
errors := 0
for r := 0; r < bullseyeN; r++ {
for c := 0; c < bullseyeN; c++ {
if g[r+1][c+1] != bull[r][c] {
errors++
}
}
}
return errors <= 3
}
func extractDataRing(g [markerN][markerN]bool) [dataBitCount]bool {
cells := dataRingCells()
var bits [dataBitCount]bool
for i := 0; i < dataBitCount; i++ {
bits[i] = g[cells[i][0]][cells[i][1]]
}
return bits
}
func decodeMarkerFromGrid(grid [markerN][markerN]bool) (MarkerData, int, bool) {
g := grid
for rot := 0; rot < 4; rot++ {
if verifyBullseye(g) {
bits := extractDataRing(g)
data, ok := decodeMarkerBits(bits)
if ok {
return data, rot, true
}
}
g = rotateGrid90CW(g)
}
return MarkerData{}, 0, false
}
// === Calibration bar measurement ===
// MeasureCalibBarDPI measures encoded calibration barcodes and returns precise DPI.
// Finds the first and last black pixels of each bar (start/stop delimiters are always black).
func MeasureCalibBarDPI(img *image.Gray, threshold uint8, xform affineTransform, barX, barY float64, bars []CalibBar) float64 {
inv := invertAffine(xform)
var sumPxPerMM float64
count := 0
bx := barX
for _, bar := range bars {
cy := barY + calibBarHeight/2
leftPxF, pyF := inv.transform(bx, cy)
rightPxF, _ := inv.transform(bx+float64(bar.WidthMM), cy)
py := int(pyF)
bounds := img.Bounds()
if py < bounds.Min.Y || py >= bounds.Max.Y {
bx += float64(bar.WidthMM) + calibBarGap
continue
}
// Margin must be less than half the gap (1.5mm) to avoid adjacent bar overlap
pxPerMM := math.Abs(rightPxF-leftPxF) / float64(bar.WidthMM)
marginPx := pxPerMM * 1.0
searchL := max(bounds.Min.X, int(math.Min(leftPxF, rightPxF)-marginPx))
searchR := min(bounds.Max.X-1, int(math.Max(leftPxF, rightPxF)+marginPx))
firstBlack := -1
lastBlack := -1
for px := searchL; px <= searchR; px++ {
if img.GrayAt(px, py).Y < threshold {
if firstBlack < 0 {
firstBlack = px
}
lastBlack = px
}
}
if firstBlack >= 0 && lastBlack > firstBlack {
measuredPx := float64(lastBlack - firstBlack)
if measuredPx > 5 {
sumPxPerMM += measuredPx / float64(bar.WidthMM)
count++
debugLog(" calibbar %dmm: measured %.1fpx → %.1f px/mm",
bar.WidthMM, measuredPx, measuredPx/float64(bar.WidthMM))
}
}
bx += float64(bar.WidthMM) + calibBarGap
}
if count == 0 {
return 0
}
return (sumPxPerMM / float64(count)) * 25.4
}
// === Template erasure ===
// TemplateElementMap builds a lookup for fast "is this pixel a template element?" checks.
// Returns a function that takes mm coordinates and returns true if it's a template element.
func TemplateElementMap(cfg FaceTemplateConfig, page pageLayout) func(mmX, mmY float64) bool {
markerPos := markerPositionsMM(cfg.PageWidth, cfg.PageHeight)
markerHalf := float64(markerN)*markerCellMM/2 + 1.0
barX := markerPos[0][0] + float64(markerN)*markerCellMM/2 + 3
barY := cfg.PageHeight - 15.0
bars := calibBarSpecs()
type barRect struct{ x, y, w, h float64 }
var barRects []barRect
bx := barX
for _, bar := range bars {
barRects = append(barRects, barRect{bx, barY, float64(bar.WidthMM), calibBarHeight})
bx += float64(bar.WidthMM) + calibBarGap
}
return func(mmX, mmY float64) bool {
// Outside page margins
margin := 8.0
if mmX < margin || mmX > cfg.PageWidth-margin || mmY < margin || mmY > cfg.PageHeight-margin {
return true
}
// Marker zones
for _, mp := range markerPos {
if math.Abs(mmX-mp[0]) < markerHalf && math.Abs(mmY-mp[1]) < markerHalf {
return true
}
}
// Data bar zones
for _, br := range barRects {
if mmX >= br.x-1 && mmX <= br.x+br.w+1 && mmY >= br.y-1 && mmY <= br.y+br.h+4 {
return true
}
}
// Cell borders and their immediate vicinity
borderThick := 1.0 // mm tolerance for printed+scanned border
for _, cell := range page.Cells {
// Near cell left/right edges
if (math.Abs(mmX-cell.X) < borderThick || math.Abs(mmX-(cell.X+cell.W)) < borderThick) &&
mmY >= cell.Y-1 && mmY <= cell.Y+cell.H+1 {
return true
}
// Near cell top/bottom edges
if (math.Abs(mmY-cell.Y) < borderThick || math.Abs(mmY-(cell.Y+cell.H)) < borderThick) &&
mmX >= cell.X-1 && mmX <= cell.X+cell.W+1 {
return true
}
// Grid lines: NOT erased. They are 0.1mm at #e0e0e0 — too faint
// and thin to survive thresholding + morph opening.
// Center cross: 0.15mm stroke at #ddd — too faint to need erasure.
// Only erase if wider stroke or darker color is used.
// Face number label area (top-left, small zone)
if mmX >= cell.X && mmX <= cell.X+10 && mmY >= cell.Y && mmY <= cell.Y+7 {
return true
}
}
// Header text area
if mmY < 18 {
return true
}
// Scale bar area (bottom of each page, left side)
if mmY > cfg.PageHeight-25 && mmX < cfg.PageWidth/2 {
return true
}
return false
}
}
// EraseTemplateFromMask zeroes out all pixels that correspond to known template elements.
func EraseTemplateFromMask(mask []bool, w, h int, xform affineTransform, isTemplate func(mmX, mmY float64) bool, cropX0, cropY0 int) {
for py := 0; py < h; py++ {
for px := 0; px < w; px++ {
if !mask[py*w+px] {
continue
}
mmX, mmY := xform.transform(float64(px+cropX0), float64(py+cropY0))
if isTemplate(mmX, mmY) {
mask[py*w+px] = false
}
}
}
}