chore: add feature spec for responses/AI matching/assignment rules

This commit is contained in:
James 2026-02-25 03:55:54 -05:00
parent bdd1d9fdde
commit e80dcad126
3 changed files with 446 additions and 20 deletions

333
FEATURE_SPEC_RESPONSES.md Normal file
View File

@ -0,0 +1,333 @@
# Feature Spec: Responses, AI Matching, Assignment Rules
## Context
Dealspace needs to separate *what buyers ask* (requests) from *what sellers provide* (responses), with AI automatically discovering which responses satisfy which requests via embeddings. Confirmed by a human before being counted as answered.
## Locked Decisions
- Assignment rules: per deal
- Statements (typed text answers): IN SCOPE
- Extraction: async background worker
- Confirmation: internal users only (RBAC refinement later)
---
## 1. Schema Changes
### New tables (add to migrate.go as CREATE TABLE IF NOT EXISTS in the migrations slice)
```sql
-- Responses: seller-provided answers (document OR typed statement)
CREATE TABLE IF NOT EXISTS responses (
id TEXT PRIMARY KEY,
deal_id TEXT NOT NULL,
type TEXT NOT NULL CHECK (type IN ('document','statement')),
title TEXT NOT NULL,
body TEXT DEFAULT '', -- markdown: extracted doc content OR typed text
file_id TEXT DEFAULT '', -- populated for type='document'
extraction_status TEXT DEFAULT 'pending'
CHECK (extraction_status IN ('pending','processing','done','failed')),
created_by TEXT DEFAULT '',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (deal_id) REFERENCES deals(id)
);
-- Chunks: segments of a response body for fine-grained matching
CREATE TABLE IF NOT EXISTS response_chunks (
id TEXT PRIMARY KEY,
response_id TEXT NOT NULL,
chunk_index INTEGER NOT NULL,
text TEXT NOT NULL,
vector BLOB NOT NULL, -- []float32 serialised as little-endian bytes
FOREIGN KEY (response_id) REFERENCES responses(id)
);
-- N:M: AI-discovered links between requests and response chunks
CREATE TABLE IF NOT EXISTS request_links (
request_id TEXT NOT NULL,
response_id TEXT NOT NULL,
chunk_id TEXT NOT NULL,
confidence REAL NOT NULL, -- cosine similarity 0-1
auto_linked BOOLEAN DEFAULT 1,
confirmed BOOLEAN DEFAULT 0,
confirmed_by TEXT DEFAULT '',
confirmed_at DATETIME,
PRIMARY KEY (request_id, response_id, chunk_id)
);
-- Assignment rules: keyword → assignee, per deal
CREATE TABLE IF NOT EXISTS assignment_rules (
id TEXT PRIMARY KEY,
deal_id TEXT NOT NULL,
keyword TEXT NOT NULL, -- e.g. "Legal", "Tax", "HR"
assignee_id TEXT NOT NULL, -- profile ID
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (deal_id) REFERENCES deals(id)
);
```
### Additive migrations (append to additiveMigrationStmts in migrate.go)
```go
`ALTER TABLE diligence_requests ADD COLUMN assignee_id TEXT DEFAULT ''`,
`ALTER TABLE diligence_requests ADD COLUMN status TEXT DEFAULT 'open'`,
`ALTER TABLE files ADD COLUMN response_id TEXT DEFAULT ''`,
```
Note: status CHECK constraint can't be added via ALTER TABLE in SQLite — enforce in handler.
---
## 2. Fireworks Client
Create `internal/fireworks/client.go`:
```
Package: fireworks
Fireworks API key: fw_RVcDe4c6mN4utKLsgA7hTm
Base URL: https://api.fireworks.ai/inference/v1
Functions needed:
1. ExtractToMarkdown(ctx, imageBase64 []string, filename string) (string, error)
- Model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
- System prompt: "You are a document extraction expert. Extract ALL content from this document into clean markdown. Preserve headings, tables, lists, and structure. Do not summarise — extract everything."
- Send up to 10 images per call (multi-page docs: batch into 10-page chunks, concatenate results)
- For XLSX files (no images): use a different path — just send the structured data as text
- Return full markdown string
2. EmbedText(ctx, texts []string) ([][]float32, error)
- Model: nomic-ai/nomic-embed-text-v1.5
- POST /embeddings (OpenAI-compatible)
- Batch up to 50 texts per call
- Return [][]float32
3. CosineSimilarity(a, b []float32) float32
- Pure Go dot product (normalised vectors)
```
---
## 3. PDF-to-Images Conversion
Create `internal/extract/pdf.go`:
```
Use exec("pdftoppm") subprocess:
pdftoppm -jpeg -r 150 input.pdf /tmp/prefix
→ produces /tmp/prefix-1.jpg, /tmp/prefix-2.jpg, ...
Read each JPEG → base64 encode → pass to fireworks.ExtractToMarkdown
For non-PDF files that are images (jpg/png): base64 encode directly, skip pdftoppm.
For XLSX: use excelize GetRows on all sheets → format as markdown table → skip vision model entirely.
For other binary types: attempt pdftoppm, fall back to filename+extension as minimal context.
Function signature:
FileToImages(path string) ([]string, error) // returns base64-encoded JPEG strings
```
---
## 4. Chunker
Create `internal/extract/chunker.go`:
```
ChunkMarkdown(text string) []string
- Split on markdown headings (## or ###) first
- If a section > 600 tokens (approx 2400 chars): split further at paragraph breaks (\n\n)
- If a paragraph > 600 tokens: split at sentence boundary (". ")
- Overlap: prepend last 80 chars of previous chunk to each chunk (context continuity)
- Minimum chunk length: 50 chars (discard shorter)
- Return []string of chunks
```
---
## 5. Extraction Worker
Create `internal/worker/extractor.go`:
```
type ExtractionJob struct {
ResponseID string
FilePath string // absolute path to uploaded file (or "" for statements)
DealID string
}
type Extractor struct {
db *sql.DB
fw *fireworks.Client
jobs chan ExtractionJob
}
func NewExtractor(db *sql.DB, fw *fireworks.Client) *Extractor
func (e *Extractor) Start() // launch 2 worker goroutines
func (e *Extractor) Enqueue(job ExtractionJob)
Worker loop:
1. Set responses.extraction_status = 'processing'
2. If file:
a. Convert to images (extract.FileToImages)
b. Call fw.ExtractToMarkdown → markdown body
c. UPDATE responses SET body=?, extraction_status='done'
3. If statement (body already set, skip extraction):
a. extraction_status → 'done' immediately
4. Chunk: extract.ChunkMarkdown(body)
5. Embed: fw.EmbedText(chunks) → [][]float32
6. Store each chunk: INSERT INTO response_chunks (id, response_id, chunk_index, text, vector)
- Serialise []float32 as little-endian bytes: each float32 = 4 bytes
7. Match against all open requests in this deal:
a. Load all diligence_requests for deal_id (embed their descriptions if not already embedded)
b. Embed request descriptions that have no embedding yet (store in a simple in-memory cache or re-embed each run — re-embed is fine for now)
c. For each (chunk, request) pair: compute cosine similarity
d. If similarity >= 0.72: INSERT OR IGNORE INTO request_links (request_id, response_id, chunk_id, confidence, auto_linked=1, confirmed=0)
8. Log summary: "Response {id}: {N} chunks, {M} request links auto-created"
On error: SET extraction_status = 'failed', log error
```
---
## 6. Handler: Responses & Assignment Rules
Create `internal/handler/responses.go`:
```
Handlers:
POST /deals/responses/statement
- Fields: deal_id, title, body (markdown text)
- Create responses row (type='statement', extraction_status='pending')
- Enqueue extraction job (body already set, worker will chunk+embed+match)
- Redirect to /deals/{deal_id}?tab=requests
POST /deals/responses/confirm
- Fields: request_id, response_id, chunk_id
- UPDATE request_links SET confirmed=1, confirmed_by=profile.ID, confirmed_at=now
- Return 200 OK (HTMX partial or redirect)
POST /deals/responses/reject
- Fields: request_id, response_id, chunk_id
- DELETE FROM request_links WHERE ...
- Return 200 OK
GET /deals/responses/pending/{dealID}
- Returns all request_links WHERE confirmed=0 AND auto_linked=1
- Joined with requests (description) and responses (title, type)
- Returns JSON for HTMX partial
POST /deals/assignment-rules/save
- Fields: deal_id, rules[] (keyword + assignee_id pairs, JSON array)
- DELETE existing rules for deal, INSERT new set
- On save: re-run auto-assignment for all unassigned requests in deal
- Redirect back to deal settings
GET /deals/assignment-rules/{dealID}
- Returns JSON array of {id, keyword, assignee_id, assignee_name}
Auto-assignment function (call on: rule save, request import):
func autoAssignRequests(db, dealID):
- Load all assignment_rules for deal_id
- For each diligence_request WHERE assignee_id = '':
- Check if section contains any rule keyword (case-insensitive)
- If match: UPDATE diligence_requests SET assignee_id = rule.assignee_id
```
---
## 7. Wire Up in handler.go
Add to RegisterRoutes:
```go
// Responses & AI matching
mux.HandleFunc("/deals/responses/statement", h.requireAuth(h.handleCreateStatement))
mux.HandleFunc("/deals/responses/confirm", h.requireAuth(h.handleConfirmLink))
mux.HandleFunc("/deals/responses/reject", h.requireAuth(h.handleRejectLink))
mux.HandleFunc("/deals/responses/pending/", h.requireAuth(h.handlePendingLinks))
mux.HandleFunc("/deals/assignment-rules/save", h.requireAuth(h.handleSaveAssignmentRules))
mux.HandleFunc("/deals/assignment-rules/", h.requireAuth(h.handleGetAssignmentRules))
```
In Handler struct, add:
```go
extractor *worker.Extractor
fw *fireworks.Client
```
In New(): initialise both, call extractor.Start().
In handleFileUpload (files.go): after saving file, create a responses row (type='document') and enqueue extraction job.
---
## 8. Template Changes
### dealroom.templ — Requests tab
Current requests tab shows a list of requests. Add:
**A) Per-request: assignee + status badge**
- Show assignee name (or "Unassigned" in gray) next to each request
- Status pill: open (gray), in_progress (blue), answered (green), not_applicable (muted)
- If confirmed link exists: show "✓ Answered" with link to the response
- If pending auto-links exist: show "🤖 N AI matches — review" button (teal outline)
**B) Pending AI matches panel** (shown above request list if any pending)
- Collapsible section: "🤖 X AI-suggested matches waiting for review"
- Each row: Request description | → | Response title | Confidence % | [Confirm] [Reject]
- Confirm/Reject use fetch() POST to /deals/responses/confirm or /reject, then reload
**C) "Add Statement" button** (in requests toolbar)
- Opens a modal: Title + markdown textarea
- Submits to POST /deals/responses/statement
- After submit: shows in pending matches if AI matched any requests
**D) Assignment rules** (accessible via a gear icon or "Settings" in requests tab header)
- Inline expandable panel or small modal
- Table: Keyword | Assignee (dropdown of internal team members) | [Remove]
- [Add Rule] row at bottom
- Save button → POST /deals/assignment-rules/save
### Keep it clean
- Don't clutter the existing request rows — use progressive disclosure
- The "N AI matches" prompt should be prominent but not alarming
- Confidence shown as percentage (e.g. "87%"), not raw float
---
## 9. Files tab: extraction status
In the files table, add a small status indicator per file:
- ⏳ Extracting... (extraction_status = 'pending' or 'processing')
- ✓ (extraction_status = 'done') — subtle, no noise
- ⚠ (extraction_status = 'failed') — show tooltip with reason
Poll via a simple setInterval (every 5s) that reloads the file list if any files are pending — only while any files are pending, stop polling once all done.
---
## 10. Build & Deploy
After all code changes:
1. Run: cd ~/dev/dealroom && PATH=$PATH:/home/johan/go/bin:/usr/local/go/bin make build
2. Run: systemctl --user stop dealroom && cp bin/dealroom dealroom && systemctl --user start dealroom
3. Verify: curl -s -o /dev/null -w "%{http_code}" http://localhost:9300/ (expect 303)
4. Check logs: journalctl --user -u dealroom -n 30 --no-pager
5. Run: cd ~/dev/dealroom && git add -A && git commit -m "feat: responses, AI matching, assignment rules" && git push origin main
---
## Key Constants
Fireworks API key: fw_RVcDe4c6mN4utKLsgA7hTm
Extraction model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
Embedding model: nomic-ai/nomic-embed-text-v1.5
Match threshold: 0.72 cosine similarity
Chunk size: ~400 tokens / ~1600 chars max
Chunk overlap: ~80 chars
Max images per vision call: 10
Worker concurrency: 2 goroutines
Files are stored at: data/uploads/ (relative to WorkingDirectory in the service)
DB path: data/db/dealroom.db

View File

@ -6,8 +6,12 @@ import (
"encoding/csv" "encoding/csv"
"fmt" "fmt"
"io" "io"
"log"
"net/http" "net/http"
"os"
"path/filepath"
"strings" "strings"
"time"
"dealroom/internal/rbac" "dealroom/internal/rbac"
"dealroom/templates" "dealroom/templates"
@ -89,6 +93,17 @@ func (h *Handler) handleRequestListUpload(w http.ResponseWriter, r *http.Request
return return
} }
// Save uploaded file to data/uploads/ for inspection and reprocessing
uploadsDir := "data/uploads"
os.MkdirAll(uploadsDir, 0755)
stamp := time.Now().Format("20060102-150405")
saveName := filepath.Join(uploadsDir, stamp+"-"+dealID+"-"+header.Filename)
if err := os.WriteFile(saveName, raw, 0644); err != nil {
log.Printf("Warning: could not save uploaded file: %v", err)
} else {
log.Printf("Saved upload: %s (%d bytes)", saveName, len(raw))
}
// Detect XLSX by filename extension or magic bytes (PK = zip/xlsx) // Detect XLSX by filename extension or magic bytes (PK = zip/xlsx)
fname := strings.ToLower(header.Filename) fname := strings.ToLower(header.Filename)
isXLSX := strings.HasSuffix(fname, ".xlsx") || strings.HasSuffix(fname, ".xls") || isXLSX := strings.HasSuffix(fname, ".xlsx") || strings.HasSuffix(fname, ".xls") ||
@ -126,33 +141,105 @@ func (h *Handler) handleRequestListUpload(w http.ResponseWriter, r *http.Request
rows = csvRows rows = csvRows
} }
// Detect column indices from header row using common DD checklist naming conventions // Log first 12 rows of the file for debugging
// Falls back to positional (col 0=section, 1=item#, 2=description, 3=priority) for ri, row := range rows {
idxSection := 0 if ri >= 12 {
idxItem := 1 break
idxDesc := 2 }
idxPriority := -1 log.Printf("[upload-debug] row %d: %v\n", ri, row)
}
if len(rows) > 0 { // Scan up to first 12 rows to find the actual header row (highest keyword score).
for ci, cell := range rows[0] { // Many DD checklists have title/metadata rows before the real column headers.
idxSection := -1
idxItem := -1
idxDesc := -1
idxPriority := -1
headerRowIdx := 0
bestScore := 0
for ri, record := range rows {
if ri >= 12 {
break
}
score := 0
tmpSection, tmpItem, tmpDesc, tmpPri := -1, -1, -1, -1
for ci, cell := range record {
h := strings.ToLower(strings.TrimSpace(cell)) h := strings.ToLower(strings.TrimSpace(cell))
switch { if h == "" {
case contains(h, "section", "category", "topic", "area", "phase", "workstream"): continue
idxSection = ci }
case contains(h, "item #", "item#", "item no", "no.", "ref", "number", "#"): if contains(h, "section", "category", "topic", "area", "phase", "workstream") {
idxItem = ci tmpSection = ci
case contains(h, "description", "request", "document", "information", "detail", "item") && ci != idxSection: score += 3
idxDesc = ci } else if contains(h, "description", "request", "document", "information requested", "detail") {
case contains(h, "priority", "urgency", "importance", "criticality"): tmpDesc = ci
idxPriority = ci score += 3
} else if contains(h, "priority", "urgency", "importance", "criticality") {
tmpPri = ci
score += 2
} else if h == "#" || h == "no." || h == "no" || h == "item #" || h == "item#" ||
contains(h, "item no", "ref no", "ref #") {
tmpItem = ci
score += 2
}
}
if score > bestScore {
bestScore = score
headerRowIdx = ri
if tmpSection >= 0 {
idxSection = tmpSection
}
if tmpItem >= 0 {
idxItem = tmpItem
}
if tmpDesc >= 0 {
idxDesc = tmpDesc
}
if tmpPri >= 0 {
idxPriority = tmpPri
} }
} }
} }
// If no header found, fall back to positional
if bestScore < 2 {
headerRowIdx = 0
idxSection = 0
idxItem = 1
idxDesc = 2
}
// If desc still not found, guess: pick the column with the longest average text
if idxDesc < 0 && len(rows) > headerRowIdx+1 {
maxLen := 0
for ci := range rows[headerRowIdx] {
total := 0
count := 0
for ri := headerRowIdx + 1; ri < len(rows) && ri < headerRowIdx+20; ri++ {
if ci < len(rows[ri]) {
total += len(strings.TrimSpace(rows[ri][ci]))
count++
}
}
avg := 0
if count > 0 {
avg = total / count
}
if avg > maxLen && ci != idxSection && ci != idxItem {
maxLen = avg
idxDesc = ci
}
}
}
log.Printf("[upload-debug] header at row %d (score=%d) | section=%d item=%d desc=%d priority=%d\n",
headerRowIdx, bestScore, idxSection, idxItem, idxDesc, idxPriority)
var items []reqRow var items []reqRow
for ri, record := range rows { for ri, record := range rows {
if ri == 0 { if ri <= headerRowIdx {
continue // skip header continue // skip title rows + header row itself
} }
if len(record) == 0 { if len(record) == 0 {
continue continue
@ -253,7 +340,7 @@ func (h *Handler) handleRequestListUpload(w http.ResponseWriter, r *http.Request
h.logActivity(dealID, profile.ID, profile.OrganizationID, "upload", "request_list", fmt.Sprintf("%d items", len(items)), "") h.logActivity(dealID, profile.ID, profile.OrganizationID, "upload", "request_list", fmt.Sprintf("%d items", len(items)), "")
http.Redirect(w, r, "/deals/"+dealID, http.StatusSeeOther) http.Redirect(w, r, "/deals/"+dealID+"?tab=requests", http.StatusSeeOther)
} }
func (h *Handler) autoAssignFilesToRequests(dealID string) { func (h *Handler) autoAssignFilesToRequests(dealID string) {

View File

@ -516,6 +516,12 @@ templ DealRoomDetail(profile *model.Profile, deal *model.Deal, folders []*model.
: 'px-4 py-2 text-sm font-medium border-b-2 border-transparent text-gray-500 hover:text-gray-300'; : 'px-4 py-2 text-sm font-medium border-b-2 border-transparent text-gray-500 hover:text-gray-300';
} }
// Auto-switch tab based on ?tab= query param (e.g. after request list upload)
(function() {
var tab = new URLSearchParams(window.location.search).get('tab');
if (tab) { showTab(tab); }
})();
function filterFiles() { function filterFiles() {
var q = document.getElementById('fileSearch').value.toLowerCase(); var q = document.getElementById('fileSearch').value.toLowerCase();
document.querySelectorAll('.file-row').forEach(function(row) { document.querySelectorAll('.file-row').forEach(function(row) {