From e80dcad126983dc52483cbea724d428f6d53b647 Mon Sep 17 00:00:00 2001
From: James <james@jongsma.me>
Date: Wed, 25 Feb 2026 03:55:54 -0500
Subject: [PATCH] chore: add feature spec for responses/AI matching/assignment
 rules

---
 FEATURE_SPEC_RESPONSES.md    | 333 +++++++++++++++++++++++++++++++++++
 internal/handler/requests.go | 127 ++++++++++---
 templates/dealroom.templ     |   6 +
 3 files changed, 446 insertions(+), 20 deletions(-)
 create mode 100644 FEATURE_SPEC_RESPONSES.md

diff --git a/FEATURE_SPEC_RESPONSES.md b/FEATURE_SPEC_RESPONSES.md
new file mode 100644
index 0000000..ed7ec88
--- /dev/null
+++ b/FEATURE_SPEC_RESPONSES.md
@@ -0,0 +1,333 @@
+# Feature Spec: Responses, AI Matching, Assignment Rules
+
+## Context
+Dealspace needs to separate *what buyers ask* (requests) from *what sellers provide* (responses), with AI automatically discovering which responses satisfy which requests via embeddings. Confirmed by a human before being counted as answered.
+
+## Locked Decisions
+- Assignment rules: per deal
+- Statements (typed text answers): IN SCOPE
+- Extraction: async background worker
+- Confirmation: internal users only (RBAC refinement later)
+
+---
+
+## 1. Schema Changes
+
+### New tables (add to migrate.go as CREATE TABLE IF NOT EXISTS in the migrations slice)
+
+```sql
+-- Responses: seller-provided answers (document OR typed statement)
+CREATE TABLE IF NOT EXISTS responses (
+    id              TEXT PRIMARY KEY,
+    deal_id         TEXT NOT NULL,
+    type            TEXT NOT NULL CHECK (type IN ('document','statement')),
+    title           TEXT NOT NULL,
+    body            TEXT DEFAULT '',        -- markdown: extracted doc content OR typed text
+    file_id         TEXT DEFAULT '',        -- populated for type='document'
+    extraction_status TEXT DEFAULT 'pending'
+        CHECK (extraction_status IN ('pending','processing','done','failed')),
+    created_by      TEXT DEFAULT '',
+    created_at      DATETIME DEFAULT CURRENT_TIMESTAMP,
+    updated_at      DATETIME DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (deal_id) REFERENCES deals(id)
+);
+
+-- Chunks: segments of a response body for fine-grained matching
+CREATE TABLE IF NOT EXISTS response_chunks (
+    id           TEXT PRIMARY KEY,
+    response_id  TEXT NOT NULL,
+    chunk_index  INTEGER NOT NULL,
+    text         TEXT NOT NULL,
+    vector       BLOB NOT NULL,     -- []float32 serialised as little-endian bytes
+    FOREIGN KEY (response_id) REFERENCES responses(id)
+);
+
+-- N:M: AI-discovered links between requests and response chunks
+CREATE TABLE IF NOT EXISTS request_links (
+    request_id   TEXT NOT NULL,
+    response_id  TEXT NOT NULL,
+    chunk_id     TEXT NOT NULL,
+    confidence   REAL NOT NULL,         -- cosine similarity 0-1
+    auto_linked  BOOLEAN DEFAULT 1,
+    confirmed    BOOLEAN DEFAULT 0,
+    confirmed_by TEXT DEFAULT '',
+    confirmed_at DATETIME,
+    PRIMARY KEY (request_id, response_id, chunk_id)
+);
+
+-- Assignment rules: keyword → assignee, per deal
+CREATE TABLE IF NOT EXISTS assignment_rules (
+    id           TEXT PRIMARY KEY,
+    deal_id      TEXT NOT NULL,
+    keyword      TEXT NOT NULL,         -- e.g. "Legal", "Tax", "HR"
+    assignee_id  TEXT NOT NULL,         -- profile ID
+    created_at   DATETIME DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (deal_id) REFERENCES deals(id)
+);
+```
+
+### Additive migrations (append to additiveMigrationStmts in migrate.go)
+```go
+`ALTER TABLE diligence_requests ADD COLUMN assignee_id TEXT DEFAULT ''`,
+`ALTER TABLE diligence_requests ADD COLUMN status TEXT DEFAULT 'open'`,
+`ALTER TABLE files ADD COLUMN response_id TEXT DEFAULT ''`,
+```
+
+Note: status CHECK constraint can't be added via ALTER TABLE in SQLite — enforce in handler.
+
+---
+
+## 2. Fireworks Client
+
+Create `internal/fireworks/client.go`:
+
+```
+Package: fireworks
+
+Fireworks API key: fw_RVcDe4c6mN4utKLsgA7hTm
+Base URL: https://api.fireworks.ai/inference/v1
+
+Functions needed:
+
+1. ExtractToMarkdown(ctx, imageBase64 []string, filename string) (string, error)
+   - Model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+   - System prompt: "You are a document extraction expert. Extract ALL content from this document into clean markdown. Preserve headings, tables, lists, and structure. Do not summarise — extract everything."
+   - Send up to 10 images per call (multi-page docs: batch into 10-page chunks, concatenate results)
+   - For XLSX files (no images): use a different path — just send the structured data as text
+   - Return full markdown string
+
+2. EmbedText(ctx, texts []string) ([][]float32, error)
+   - Model: nomic-ai/nomic-embed-text-v1.5
+   - POST /embeddings (OpenAI-compatible)
+   - Batch up to 50 texts per call
+   - Return [][]float32
+
+3. CosineSimilarity(a, b []float32) float32
+   - Pure Go dot product (normalised vectors)
+```
+
+---
+
+## 3. PDF-to-Images Conversion
+
+Create `internal/extract/pdf.go`:
+
+```
+Use exec("pdftoppm") subprocess:
+  pdftoppm -jpeg -r 150 input.pdf /tmp/prefix
+  → produces /tmp/prefix-1.jpg, /tmp/prefix-2.jpg, ...
+
+Read each JPEG → base64 encode → pass to fireworks.ExtractToMarkdown
+
+For non-PDF files that are images (jpg/png): base64 encode directly, skip pdftoppm.
+For XLSX: use excelize GetRows on all sheets → format as markdown table → skip vision model entirely.
+For other binary types: attempt pdftoppm, fall back to filename+extension as minimal context.
+
+Function signature:
+  FileToImages(path string) ([]string, error)  // returns base64-encoded JPEG strings
+```
+
+---
+
+## 4. Chunker
+
+Create `internal/extract/chunker.go`:
+
+```
+ChunkMarkdown(text string) []string
+  - Split on markdown headings (## or ###) first
+  - If a section > 600 tokens (approx 2400 chars): split further at paragraph breaks (\n\n)
+  - If a paragraph > 600 tokens: split at sentence boundary (". ")
+  - Overlap: prepend last 80 chars of previous chunk to each chunk (context continuity)
+  - Minimum chunk length: 50 chars (discard shorter)
+  - Return []string of chunks
+```
+
+---
+
+## 5. Extraction Worker
+
+Create `internal/worker/extractor.go`:
+
+```
+type ExtractionJob struct {
+    ResponseID string
+    FilePath   string   // absolute path to uploaded file (or "" for statements)
+    DealID     string
+}
+
+type Extractor struct {
+    db       *sql.DB
+    fw       *fireworks.Client
+    jobs     chan ExtractionJob
+}
+
+func NewExtractor(db *sql.DB, fw *fireworks.Client) *Extractor
+func (e *Extractor) Start()  // launch 2 worker goroutines
+func (e *Extractor) Enqueue(job ExtractionJob)
+
+Worker loop:
+1. Set responses.extraction_status = 'processing'
+2. If file:
+   a. Convert to images (extract.FileToImages)
+   b. Call fw.ExtractToMarkdown → markdown body
+   c. UPDATE responses SET body=?, extraction_status='done'
+3. If statement (body already set, skip extraction):
+   a. extraction_status → 'done' immediately
+4. Chunk: extract.ChunkMarkdown(body)
+5. Embed: fw.EmbedText(chunks) → [][]float32
+6. Store each chunk: INSERT INTO response_chunks (id, response_id, chunk_index, text, vector)
+   - Serialise []float32 as little-endian bytes: each float32 = 4 bytes
+7. Match against all open requests in this deal:
+   a. Load all diligence_requests for deal_id (embed their descriptions if not already embedded)
+   b. Embed request descriptions that have no embedding yet (store in a simple in-memory cache or re-embed each run — re-embed is fine for now)
+   c. For each (chunk, request) pair: compute cosine similarity
+   d. If similarity >= 0.72: INSERT OR IGNORE INTO request_links (request_id, response_id, chunk_id, confidence, auto_linked=1, confirmed=0)
+8. Log summary: "Response {id}: {N} chunks, {M} request links auto-created"
+On error: SET extraction_status = 'failed', log error
+```
+
+---
+
+## 6. Handler: Responses & Assignment Rules
+
+Create `internal/handler/responses.go`:
+
+```
+Handlers:
+
+POST /deals/responses/statement
+  - Fields: deal_id, title, body (markdown text)
+  - Create responses row (type='statement', extraction_status='pending')
+  - Enqueue extraction job (body already set, worker will chunk+embed+match)
+  - Redirect to /deals/{deal_id}?tab=requests
+
+POST /deals/responses/confirm
+  - Fields: request_id, response_id, chunk_id
+  - UPDATE request_links SET confirmed=1, confirmed_by=profile.ID, confirmed_at=now
+  - Return 200 OK (HTMX partial or redirect)
+
+POST /deals/responses/reject
+  - Fields: request_id, response_id, chunk_id
+  - DELETE FROM request_links WHERE ...
+  - Return 200 OK
+
+GET /deals/responses/pending/{dealID}
+  - Returns all request_links WHERE confirmed=0 AND auto_linked=1
+  - Joined with requests (description) and responses (title, type)
+  - Returns JSON for HTMX partial
+
+POST /deals/assignment-rules/save
+  - Fields: deal_id, rules[] (keyword + assignee_id pairs, JSON array)
+  - DELETE existing rules for deal, INSERT new set
+  - On save: re-run auto-assignment for all unassigned requests in deal
+  - Redirect back to deal settings
+
+GET /deals/assignment-rules/{dealID}
+  - Returns JSON array of {id, keyword, assignee_id, assignee_name}
+
+Auto-assignment function (call on: rule save, request import):
+  func autoAssignRequests(db, dealID):
+    - Load all assignment_rules for deal_id
+    - For each diligence_request WHERE assignee_id = '':
+      - Check if section contains any rule keyword (case-insensitive)
+      - If match: UPDATE diligence_requests SET assignee_id = rule.assignee_id
+```
+
+---
+
+## 7. Wire Up in handler.go
+
+Add to RegisterRoutes:
+```go
+// Responses & AI matching
+mux.HandleFunc("/deals/responses/statement", h.requireAuth(h.handleCreateStatement))
+mux.HandleFunc("/deals/responses/confirm", h.requireAuth(h.handleConfirmLink))
+mux.HandleFunc("/deals/responses/reject", h.requireAuth(h.handleRejectLink))
+mux.HandleFunc("/deals/responses/pending/", h.requireAuth(h.handlePendingLinks))
+mux.HandleFunc("/deals/assignment-rules/save", h.requireAuth(h.handleSaveAssignmentRules))
+mux.HandleFunc("/deals/assignment-rules/", h.requireAuth(h.handleGetAssignmentRules))
+```
+
+In Handler struct, add:
+```go
+extractor *worker.Extractor
+fw        *fireworks.Client
+```
+
+In New(): initialise both, call extractor.Start().
+
+In handleFileUpload (files.go): after saving file, create a responses row (type='document') and enqueue extraction job.
+
+---
+
+## 8. Template Changes
+
+### dealroom.templ — Requests tab
+
+Current requests tab shows a list of requests. Add:
+
+**A) Per-request: assignee + status badge**
+- Show assignee name (or "Unassigned" in gray) next to each request
+- Status pill: open (gray), in_progress (blue), answered (green), not_applicable (muted)
+- If confirmed link exists: show "✓ Answered" with link to the response
+- If pending auto-links exist: show "🤖 N AI matches — review" button (teal outline)
+
+**B) Pending AI matches panel** (shown above request list if any pending)
+- Collapsible section: "🤖 X AI-suggested matches waiting for review"
+- Each row: Request description | → | Response title | Confidence % | [Confirm] [Reject]
+- Confirm/Reject use fetch() POST to /deals/responses/confirm or /reject, then reload
+
+**C) "Add Statement" button** (in requests toolbar)
+- Opens a modal: Title + markdown textarea
+- Submits to POST /deals/responses/statement
+- After submit: shows in pending matches if AI matched any requests
+
+**D) Assignment rules** (accessible via a gear icon or "Settings" in requests tab header)
+- Inline expandable panel or small modal
+- Table: Keyword | Assignee (dropdown of internal team members) | [Remove]
+- [Add Rule] row at bottom
+- Save button → POST /deals/assignment-rules/save
+
+### Keep it clean
+- Don't clutter the existing request rows — use progressive disclosure
+- The "N AI matches" prompt should be prominent but not alarming
+- Confidence shown as percentage (e.g. "87%"), not raw float
+
+---
+
+## 9. Files tab: extraction status
+
+In the files table, add a small status indicator per file:
+- ⏳ Extracting... (extraction_status = 'pending' or 'processing')
+- ✓ (extraction_status = 'done') — subtle, no noise
+- ⚠ (extraction_status = 'failed') — show tooltip with reason
+
+Poll via a simple setInterval (every 5s) that reloads the file list if any files are pending — only while any files are pending, stop polling once all done.
+
+---
+
+## 10. Build & Deploy
+
+After all code changes:
+1. Run: cd ~/dev/dealroom && PATH=$PATH:/home/johan/go/bin:/usr/local/go/bin make build
+2. Run: systemctl --user stop dealroom && cp bin/dealroom dealroom && systemctl --user start dealroom
+3. Verify: curl -s -o /dev/null -w "%{http_code}" http://localhost:9300/ (expect 303)
+4. Check logs: journalctl --user -u dealroom -n 30 --no-pager
+5. Run: cd ~/dev/dealroom && git add -A && git commit -m "feat: responses, AI matching, assignment rules" && git push origin main
+
+---
+
+## Key Constants
+
+Fireworks API key: fw_RVcDe4c6mN4utKLsgA7hTm
+Extraction model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+Embedding model: nomic-ai/nomic-embed-text-v1.5
+Match threshold: 0.72 cosine similarity
+Chunk size: ~400 tokens / ~1600 chars max
+Chunk overlap: ~80 chars
+Max images per vision call: 10
+Worker concurrency: 2 goroutines
+
+Files are stored at: data/uploads/ (relative to WorkingDirectory in the service)
+DB path: data/db/dealroom.db
diff --git a/internal/handler/requests.go b/internal/handler/requests.go
index 3dd6f15..d2e7b85 100644
--- a/internal/handler/requests.go
+++ b/internal/handler/requests.go
@@ -6,8 +6,12 @@ import (
 	"encoding/csv"
 	"fmt"
 	"io"
+	"log"
 	"net/http"
+	"os"
+	"path/filepath"
 	"strings"
+	"time"
 
 	"dealroom/internal/rbac"
 	"dealroom/templates"
@@ -89,6 +93,17 @@ func (h *Handler) handleRequestListUpload(w http.ResponseWriter, r *http.Request
 		return
 	}
 
+	// Save uploaded file to data/uploads/ for inspection and reprocessing
+	uploadsDir := "data/uploads"
+	os.MkdirAll(uploadsDir, 0755)
+	stamp := time.Now().Format("20060102-150405")
+	saveName := filepath.Join(uploadsDir, stamp+"-"+dealID+"-"+header.Filename)
+	if err := os.WriteFile(saveName, raw, 0644); err != nil {
+		log.Printf("Warning: could not save uploaded file: %v", err)
+	} else {
+		log.Printf("Saved upload: %s (%d bytes)", saveName, len(raw))
+	}
+
 	// Detect XLSX by filename extension or magic bytes (PK = zip/xlsx)
 	fname := strings.ToLower(header.Filename)
 	isXLSX := strings.HasSuffix(fname, ".xlsx") || strings.HasSuffix(fname, ".xls") ||
@@ -126,33 +141,105 @@ func (h *Handler) handleRequestListUpload(w http.ResponseWriter, r *http.Request
 		rows = csvRows
 	}
 
-	// Detect column indices from header row using common DD checklist naming conventions
-	// Falls back to positional (col 0=section, 1=item#, 2=description, 3=priority)
-	idxSection := 0
-	idxItem := 1
-	idxDesc := 2
-	idxPriority := -1
+	// Log first 12 rows of the file for debugging
+	for ri, row := range rows {
+		if ri >= 12 {
+			break
+		}
+		log.Printf("[upload-debug] row %d: %v\n", ri, row)
+	}
 
-	if len(rows) > 0 {
-		for ci, cell := range rows[0] {
+	// Scan up to first 12 rows to find the actual header row (highest keyword score).
+	// Many DD checklists have title/metadata rows before the real column headers.
+	idxSection := -1
+	idxItem := -1
+	idxDesc := -1
+	idxPriority := -1
+	headerRowIdx := 0
+	bestScore := 0
+
+	for ri, record := range rows {
+		if ri >= 12 {
+			break
+		}
+		score := 0
+		tmpSection, tmpItem, tmpDesc, tmpPri := -1, -1, -1, -1
+		for ci, cell := range record {
 			h := strings.ToLower(strings.TrimSpace(cell))
-			switch {
-			case contains(h, "section", "category", "topic", "area", "phase", "workstream"):
-				idxSection = ci
-			case contains(h, "item #", "item#", "item no", "no.", "ref", "number", "#"):
-				idxItem = ci
-			case contains(h, "description", "request", "document", "information", "detail", "item") && ci != idxSection:
-				idxDesc = ci
-			case contains(h, "priority", "urgency", "importance", "criticality"):
-				idxPriority = ci
+			if h == "" {
+				continue
+			}
+			if contains(h, "section", "category", "topic", "area", "phase", "workstream") {
+				tmpSection = ci
+				score += 3
+			} else if contains(h, "description", "request", "document", "information requested", "detail") {
+				tmpDesc = ci
+				score += 3
+			} else if contains(h, "priority", "urgency", "importance", "criticality") {
+				tmpPri = ci
+				score += 2
+			} else if h == "#" || h == "no." || h == "no" || h == "item #" || h == "item#" ||
+				contains(h, "item no", "ref no", "ref #") {
+				tmpItem = ci
+				score += 2
+			}
+		}
+		if score > bestScore {
+			bestScore = score
+			headerRowIdx = ri
+			if tmpSection >= 0 {
+				idxSection = tmpSection
+			}
+			if tmpItem >= 0 {
+				idxItem = tmpItem
+			}
+			if tmpDesc >= 0 {
+				idxDesc = tmpDesc
+			}
+			if tmpPri >= 0 {
+				idxPriority = tmpPri
 			}
 		}
 	}
 
+	// If no header found, fall back to positional
+	if bestScore < 2 {
+		headerRowIdx = 0
+		idxSection = 0
+		idxItem = 1
+		idxDesc = 2
+	}
+
+	// If desc still not found, guess: pick the column with the longest average text
+	if idxDesc < 0 && len(rows) > headerRowIdx+1 {
+		maxLen := 0
+		for ci := range rows[headerRowIdx] {
+			total := 0
+			count := 0
+			for ri := headerRowIdx + 1; ri < len(rows) && ri < headerRowIdx+20; ri++ {
+				if ci < len(rows[ri]) {
+					total += len(strings.TrimSpace(rows[ri][ci]))
+					count++
+				}
+			}
+			avg := 0
+			if count > 0 {
+				avg = total / count
+			}
+			if avg > maxLen && ci != idxSection && ci != idxItem {
+				maxLen = avg
+				idxDesc = ci
+			}
+		}
+	}
+
+	log.Printf("[upload-debug] header at row %d (score=%d) | section=%d item=%d desc=%d priority=%d\n",
+		headerRowIdx, bestScore, idxSection, idxItem, idxDesc, idxPriority)
+
 	var items []reqRow
 	for ri, record := range rows {
-		if ri == 0 {
-			continue // skip header
+		if ri <= headerRowIdx {
+			continue // skip title rows + header row itself
 		}
 		if len(record) == 0 {
 			continue
@@ -253,7 +340,7 @@ func (h *Handler) handleRequestListUpload(w http.ResponseWriter, r *http.Request
 
 	h.logActivity(dealID, profile.ID, profile.OrganizationID, "upload", "request_list", fmt.Sprintf("%d items", len(items)), "")
 
-	http.Redirect(w, r, "/deals/"+dealID, http.StatusSeeOther)
+	http.Redirect(w, r, "/deals/"+dealID+"?tab=requests", http.StatusSeeOther)
 }
 
 func (h *Handler) autoAssignFilesToRequests(dealID string) {
diff --git a/templates/dealroom.templ b/templates/dealroom.templ
index 1378a63..cfece0e 100644
--- a/templates/dealroom.templ
+++ b/templates/dealroom.templ
@@ -516,6 +516,12 @@ templ DealRoomDetail(profile *model.Profile, deal *model.Deal, folders []*model.
 						: 'px-4 py-2 text-sm font-medium border-b-2 border-transparent text-gray-500 hover:text-gray-300';
 				}
 
+				// Auto-switch tab based on ?tab= query param (e.g. after request list upload)
+				(function() {
+					var tab = new URLSearchParams(window.location.search).get('tab');
+					if (tab) { showTab(tab); }
+				})();
+
 				function filterFiles() {
 					var q = document.getElementById('fileSearch').value.toLowerCase();
 					document.querySelectorAll('.file-row').forEach(function(row) {