diff --git a/.opencode/opencode.db b/.opencode/opencode.db index 4e86411..f31900e 100644 Binary files a/.opencode/opencode.db and b/.opencode/opencode.db differ diff --git a/.opencode/opencode.db-shm b/.opencode/opencode.db-shm index ba6df8f..b688802 100644 Binary files a/.opencode/opencode.db-shm and b/.opencode/opencode.db-shm differ diff --git a/.opencode/opencode.db-wal b/.opencode/opencode.db-wal index bdb67af..5172d12 100644 Binary files a/.opencode/opencode.db-wal and b/.opencode/opencode.db-wal differ diff --git a/api/chat.go b/api/chat.go index a781b3c..d9d28ef 100644 --- a/api/chat.go +++ b/api/chat.go @@ -12,6 +12,8 @@ import ( "strings" "sync" "time" + + "github.com/mish/dealspace/lib" ) // ChatRequest is the incoming chat message from the client. @@ -291,12 +293,28 @@ func callAnthropicAPI(userMessage string, history []Message) (string, error) { return "", fmt.Errorf("FIREWORKS_API_KEY not set") } - // System prompt first, then history, then new message - messages := []OAIMessage{{Role: "system", Content: ariaSystemPrompt}} - for _, m := range history { - messages = append(messages, OAIMessage{Role: m.Role, Content: m.Content}) + // Convert Message slice to map slice for sanitization + historyMaps := make([]map[string]string, len(history)) + for i, m := range history { + historyMaps[i] = map[string]string{ + "role": m.Role, + "content": m.Content, + } + } + + // Use sanitization layer to build safe messages + safeMessages, violations := lib.BuildSafeChatMessages(ariaSystemPrompt, historyMaps, userMessage, 4000) + if len(violations) > 0 { + log.Printf("Chat sanitization violations: %v", violations) + } + + // Convert back to OAI format + messages := make([]OAIMessage, len(safeMessages)) + for i, m := range safeMessages { + content, _ := m["content"].(string) + role, _ := m["role"].(string) + messages[i] = OAIMessage{Role: role, Content: content} } - messages = append(messages, OAIMessage{Role: "user", Content: userMessage}) reqBody := OAIRequest{ Model: "accounts/fireworks/models/llama-v3p3-70b-instruct", diff --git a/api/handlers.go b/api/handlers.go index eeb4f88..2993b66 100644 --- a/api/handlers.go +++ b/api/handlers.go @@ -1362,6 +1362,7 @@ func (h *Handlers) orgToMap(org *lib.Entry) map[string]any { result["name"] = orgData.Name result["domains"] = orgData.Domains result["role"] = orgData.Role + result["is_us"] = orgData.IsUs result["logo"] = orgData.Logo result["website"] = orgData.Website result["description"] = orgData.Description @@ -1860,6 +1861,7 @@ func (h *Handlers) UpdateOrg(w http.ResponseWriter, r *http.Request) { Name *string `json:"name"` Domains []string `json:"domains"` Role *string `json:"role"` + IsUs *bool `json:"is_us"` // mark this org as "us" — resets others Website *string `json:"website"` Description *string `json:"description"` Industry *string `json:"industry"` @@ -1907,6 +1909,28 @@ func (h *Handlers) UpdateOrg(w http.ResponseWriter, r *http.Request) { } orgData.Role = *req.Role } + if req.IsUs != nil && *req.IsUs { + // Clear is_us on all other orgs first + orgs, _ := lib.EntryRead(h.DB, h.Cfg, actorID, "", lib.EntryFilter{Type: lib.TypeOrganization}) + for _, other := range orgs { + if other.EntryID == orgID { + continue + } + var otherData lib.OrgData + if other.DataText != "" { + json.Unmarshal([]byte(other.DataText), &otherData) + } + if otherData.IsUs { + otherData.IsUs = false + dataJSON, _ := json.Marshal(otherData) + other.DataText = string(dataJSON) + _ = lib.EntryWrite(h.DB, h.Cfg, actorID, &other) + } + } + orgData.IsUs = true + } else if req.IsUs != nil && !*req.IsUs { + orgData.IsUs = false + } if req.Website != nil { orgData.Website = *req.Website } diff --git a/api/mcp.go b/api/mcp.go index 6fd9135..adfd41f 100644 --- a/api/mcp.go +++ b/api/mcp.go @@ -83,7 +83,7 @@ func updateRequestStatusTool() mcp.Tool { return mcp.NewTool("update_request_status", mcp.WithDescription("Update the status of a request"), mcp.WithString("request_id", mcp.Description("The request ID"), mcp.Required()), - mcp.WithString("status", mcp.Description("New status: open, in_process, partial, or complete"), mcp.Required()), + mcp.WithString("status", mcp.Description("New status: open, assigned, answered, review, or published"), mcp.Required()), ) } @@ -334,9 +334,9 @@ func updateRequestStatusHandler(db *lib.DB, cfg *lib.Config) server.ToolHandlerF return mcp.NewToolResultError("status is required"), nil } - validStatuses := map[string]bool{"open": true, "in_process": true, "partial": true, "complete": true} + validStatuses := map[string]bool{"open": true, "assigned": true, "answered": true, "review": true, "published": true} if !validStatuses[status] { - return mcp.NewToolResultError("Invalid status. Must be: open, in_process, partial, or complete"), nil + return mcp.NewToolResultError("Invalid status. Must be: open, assigned, answered, review, or published"), nil } entry, err := lib.EntryByID(db, cfg, requestID) diff --git a/lib/llm.go b/lib/llm.go index 4aa1298..d0f2333 100644 --- a/lib/llm.go +++ b/lib/llm.go @@ -6,9 +6,171 @@ import ( "fmt" "io" "net/http" + "regexp" "strings" ) +// InjectionDetection contains patterns that indicate potential LLM injection attacks +var injectionPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)(ignore|forget|disregard)\s+(all|previous|your)\s+(instructions|prompts|training)`), + regexp.MustCompile(`(?i)system\s*:\s*you\s+are\s+now`), + regexp.MustCompile(`(?i)<\s*(system|assistant|user)\s*>`), + regexp.MustCompile(`(?i)\[\s*(system|assistant|user)\s*\]`), + regexp.MustCompile(`(?i)\{\s*(system|assistant|user)\s*\}`), + regexp.MustCompile(`(?i)you\s+are\s+(an?\s+)?(attacker|hacker|malicious)`), + regexp.MustCompile(`(?i)output\s*:\s*.*(?:password|secret|key|token)`), + regexp.MustCompile(`(?i)prompt\s*:\s*.*(?:override|bypass|ignore)`), +} + +// SanitizeResult indicates the outcome of sanitization +type SanitizeResult struct { + Cleaned string + Violations []string + Blocked bool +} + +// SanitizeUserInput sanitizes user input for LLM prompts (chat endpoint). +// Implements defense-in-depth: validation, injection detection, and message delimiters. +func SanitizeUserInput(input string, maxLen int) SanitizeResult { + result := SanitizeResult{Cleaned: input} + + // Layer 1: Length enforcement + if maxLen <= 0 { + maxLen = 4000 // Default limit + } + if len(input) > maxLen { + result.Cleaned = input[:maxLen] + result.Violations = append(result.Violations, "input_truncated") + } + + // Layer 2: Injection pattern detection + for _, pattern := range injectionPatterns { + if matches := pattern.FindAllString(result.Cleaned, -1); len(matches) > 0 { + result.Violations = append(result.Violations, "injection_pattern_detected") + result.Blocked = true + // Replace matches with safe placeholders + result.Cleaned = pattern.ReplaceAllString(result.Cleaned, "[FILTERED]") + } + } + + // Layer 3: Escape XML-like delimiters that could confuse message boundaries + result.Cleaned = strings.ReplaceAll(result.Cleaned, "", "<system>") + result.Cleaned = strings.ReplaceAll(result.Cleaned, "", "</system>") + result.Cleaned = strings.ReplaceAll(result.Cleaned, "", "<assistant>") + result.Cleaned = strings.ReplaceAll(result.Cleaned, "", "</assistant>") + result.Cleaned = strings.ReplaceAll(result.Cleaned, "", "<user>") + result.Cleaned = strings.ReplaceAll(result.Cleaned, "", "</user>") + + return result +} + +// WrapUserMessage wraps user input in XML-style delimiters for clear separation from system instructions. +// This makes it harder for user input to be interpreted as system/assistant messages. +func WrapUserMessage(content string) string { + return fmt.Sprintf("\n%s\n", content) +} + +// WrapSystemMessage wraps system content in XML-style delimiters. +func WrapSystemMessage(content string) string { + return fmt.Sprintf("\n%s\n", content) +} + +// SanitizeHTMLContent prepares HTML content for LLM processing (scrape endpoint). +// Removes scripts, styles, and other potentially malicious content before LLM sees it. +func SanitizeHTMLContent(html string, maxLen int) string { + if maxLen <= 0 { + maxLen = 50000 // Default limit for HTML + } + + // Layer 1: Remove scripts (both blocks entirely + scriptRegex := regexp.MustCompile(`(?is)]*>.*?`) + cleaned := scriptRegex.ReplaceAllString(html, "") + + // Layer 2: Remove `) + cleaned = styleRegex.ReplaceAllString(cleaned, "") + + // Layer 3: Remove onclick, onload, etc. event handlers + eventRegex := regexp.MustCompile(`(?i)\s+on\w+\s*=\s*"[^"]*"`) + cleaned = eventRegex.ReplaceAllString(cleaned, "") + + eventRegex2 := regexp.MustCompile(`(?i)\s+on\w+\s*=\s*'[^']*'`) + cleaned = eventRegex2.ReplaceAllString(cleaned, "") + + // Layer 4: Remove javascript: URLs + jsUrlRegex := regexp.MustCompile(`(?i)javascript\s*:\s*[^"'>\s]+`) + cleaned = jsUrlRegex.ReplaceAllString(cleaned, "") + + // Layer 5: Remove data: URLs that could be SVG/script-based + dataUrlRegex := regexp.MustCompile(`(?i)data\s*:\s*[^"'>\s]+`) + cleaned = dataUrlRegex.ReplaceAllString(cleaned, "[data-url-removed]") + + // Layer 6: Remove comments that might hide injection attempts + commentRegex := regexp.MustCompile(`(?s)`) + cleaned = commentRegex.ReplaceAllString(cleaned, "") + + // Layer 7: Normalize whitespace to reduce hidden characters + cleaned = regexp.MustCompile(`[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]`).ReplaceAllString(cleaned, "") + + // Layer 8: Length enforcement + if len(cleaned) > maxLen { + cleaned = cleaned[:maxLen] + } + + return cleaned +} + +// BuildSafeChatMessages constructs a properly delimited message list for chat LLM calls. +// Takes raw user input and sanitizes it before inclusion in messages. +func BuildSafeChatMessages(systemPrompt string, history []map[string]string, userMessage string, maxUserLen int) ([]map[string]interface{}, []string) { + messages := []map[string]interface{}{ + {"role": "system", "content": WrapSystemMessage(systemPrompt)}, + } + + // Process history through sanitization + for _, msg := range history { + role := msg["role"] + content := msg["content"] + + // Sanitize based on role + if role == "user" { + result := SanitizeUserInput(content, maxUserLen) + content = WrapUserMessage(result.Cleaned) + } + // Assistant messages are trusted (came from our system) + + messages = append(messages, map[string]interface{}{ + "role": role, + "content": content, + }) + } + + // Sanitize current user message + result := SanitizeUserInput(userMessage, maxUserLen) + messages = append(messages, map[string]interface{}{ + "role": "user", + "content": WrapUserMessage(result.Cleaned), + }) + + return messages, result.Violations +} + +// BuildSafeScrapePrompt constructs a safe prompt with sanitized HTML content for scraping. +// The HTML is wrapped in delimiters to isolate it from instructions. +func BuildSafeScrapePrompt(instructions string, htmlContent string, domain string, maxHtmlLen int) string { + sanitizedHTML := SanitizeHTMLContent(htmlContent, maxHtmlLen) + + return fmt.Sprintf(`%s + +The following is sanitized HTML content from %s, enclosed in delimiters. Analyze this content only. + + +%s +`, + instructions, domain, sanitizedHTML) +} + // CallOpenRouter sends a request to OpenRouter (OpenAI-compatible API). func CallOpenRouter(apiKey, model string, messages []map[string]interface{}, maxTokens int) (string, error) { if apiKey == "" { diff --git a/lib/scrape.go b/lib/scrape.go index 695adc7..f94b39d 100644 --- a/lib/scrape.go +++ b/lib/scrape.go @@ -71,8 +71,8 @@ func ScrapeOrg(apiKey, domain string) (*ScrapedOrg, error) { return nil, fmt.Errorf("could not fetch %s", base) } - // Ask LLM to find relevant pages - discoverPrompt := fmt.Sprintf(`You are analyzing the HTML of %s to find pages that contain: + // Ask LLM to find relevant pages using sanitized prompt + discoverInstructions := fmt.Sprintf(`You are analyzing the HTML of %s to find pages that contain: 1. Team / leadership / people / staff pages (with bios, headshots, names) 2. About / company info pages 3. Contact / office address pages @@ -82,10 +82,9 @@ Look at the navigation, footer, and links in the HTML. Return a JSON array of up Return ONLY a JSON array of strings, no markdown: ["https://%s/about", "https://%s/team", ...] -If you cannot find any relevant links, return an empty array: [] +If you cannot find any relevant links, return an empty array: []`, domain, domain, domain, domain) -HTML: -%s`, domain, domain, domain, domain, homepage) + discoverPrompt := BuildSafeScrapePrompt(discoverInstructions, homepage, domain, 50000) discoverMessages := []map[string]interface{}{ {"role": "user", "content": discoverPrompt}, @@ -115,8 +114,8 @@ HTML: html := allHTML.String() - // Pass 2: extract structured data - prompt := fmt.Sprintf(`Extract structured data from this company website. Domain: %s + // Pass 2: extract structured data using sanitized prompt + extractInstructions := fmt.Sprintf(`Extract structured data from this company website. Domain: %s RULES: - Extract EVERY person mentioned — do not skip anyone @@ -157,10 +156,9 @@ Return a single JSON object: ] } -Return ONLY valid JSON — no markdown, no explanation. All text values must be clean plain text — decode any HTML entities (e.g. ’ → ', & → &). +Return ONLY valid JSON — no markdown, no explanation. All text values must be clean plain text — decode any HTML entities (e.g. ’ → ', & → &).`, domain, domain, domain, domain) -HTML: -%s`, domain, domain, domain, domain, html) + prompt := BuildSafeScrapePrompt(extractInstructions, html, domain, 50000) messages := []map[string]interface{}{ {"role": "user", "content": prompt}, diff --git a/lib/types.go b/lib/types.go index c73cc17..e149751 100644 --- a/lib/types.go +++ b/lib/types.go @@ -67,6 +67,7 @@ type OrgData struct { Name string `json:"name"` Domains []string `json:"domains"` // required, e.g. ["kaseya.com","datto.com"] Role string `json:"role"` // seller | buyer | ib | advisor + IsUs bool `json:"is_us,omitempty"` // is this organization "us" (the primary org) Logo string `json:"logo,omitempty"` Website string `json:"website,omitempty"` Description string `json:"description,omitempty"` @@ -280,7 +281,7 @@ type RequestData struct { Section string `json:"section"` // e.g. "Financial", "Legal" Description string `json:"description"` // full detail / context Priority string `json:"priority"` // critical | high | medium | low - Status string `json:"status"` // open | in_process | partial | complete + Status string `json:"status"` // open | assigned | answered | review | published AssigneeID string `json:"assignee_id,omitempty"` AssigneeName string `json:"assignee_name,omitempty"` ReviewerID string `json:"reviewer_id,omitempty"` diff --git a/portal/templates/app/orgs.html b/portal/templates/app/orgs.html index 4045f7d..64debac 100644 --- a/portal/templates/app/orgs.html +++ b/portal/templates/app/orgs.html @@ -84,6 +84,10 @@

Comma-separated. Used to validate invite emails.

+
+ + +
@@ -172,10 +176,14 @@ const domains = Array.isArray(o.domains) ? o.domains : []; const memberCount = Array.isArray(o.members) ? o.members.length : 0; const logo = o.logo || ''; + const isUs = o.is_us; return '
' + '
' + (logo ? '
' : '') + + '
' + '

' + escHtml(name) + '

' + + (isUs ? '★ US' : '') + + '
' + '
' + (o.description ? '

' + escHtml(o.description) + '

' : '') + '
' @@ -244,6 +252,7 @@ document.getElementById('eState').value = o.state || ''; document.getElementById('eDomains').value = Array.isArray(o.domains) ? o.domains.join(', ') : ''; document.getElementById('eLogo').value = o.logo || ''; + document.getElementById('eIsUs').checked = !!o.is_us; document.getElementById('editModalError').classList.add('hidden'); document.getElementById('editOrgTitle').textContent = o.name || 'Edit Organization'; const logo = o.logo || ''; @@ -293,6 +302,7 @@ method: 'PUT', body: JSON.stringify({ name, domains, + is_us: document.getElementById('eIsUs').checked, description: document.getElementById('eDesc').value.trim(), industry: document.getElementById('eIndustry').value.trim(), website: document.getElementById('eWebsite').value.trim(), diff --git a/portal/templates/app/project.html b/portal/templates/app/project.html index 6a39187..48daffc 100644 --- a/portal/templates/app/project.html +++ b/portal/templates/app/project.html @@ -627,8 +627,14 @@ // Badge helpers const priStyles = { critical:'background:rgba(239,68,68,.15);color:#f87171', high:'background:rgba(251,146,60,.15);color:#fb923c', medium:'background:rgba(250,204,21,.15);color:#facc15', low:'background:rgba(148,163,184,.15);color:#b0bec5' }; - const statStyles = { open:'background:rgba(96,165,250,.15);color:#60a5fa', in_process:'background:rgba(251,191,36,.15);color:#fbbf24', partial:'background:rgba(168,85,247,.15);color:#a855f7', complete:'background:rgba(74,222,128,.15);color:#4ade80' }; - const statLabels = { open:'Open', in_process:'In Process', partial:'Partial', complete:'Complete' }; + const statStyles = { + open:'background:rgba(107,114,128,.15);color:#9CA3AF', + assigned:'background:rgba(59,130,246,.15);color:#3b82f6', + answered:'background:rgba(168,85,247,.15);color:#a855f7', + review:'background:rgba(201,168,76,.15);color:#C9A84C', + published:'background:rgba(34,197,94,.15);color:#22c55e' + }; + const statLabels = { open:'Open', assigned:'Assigned', answered:'Answered', review:'Review', published:'Published' }; function priorityBadge(p) { return `${escHtml(p||'medium')}`; @@ -867,7 +873,7 @@ const prioritySelect = ``; // Status select - const statOpts = ['open','in_process','partial','complete'].map(o => + const statOpts = ['open','assigned','answered','review','published'].map(o => `` ).join(''); const statusSelect = ``; diff --git a/portal/templates/app/request.html b/portal/templates/app/request.html index 48134dd..5b21523 100644 --- a/portal/templates/app/request.html +++ b/portal/templates/app/request.html @@ -42,7 +42,7 @@
@@ -112,7 +112,13 @@ return r === 'buyer' || r === 'seller' || r === 'advisor'; } - const statusColors = { open: 'bg-yellow-500/20 text-yellow-300', answered: 'bg-green-500/20 text-green-300', closed: 'bg-gray-500/20 text-gray-300', 'under-review': 'bg-blue-500/20 text-blue-300' }; + const statusColors = { + open: 'bg-gray-500/20 text-gray-300', + assigned: 'bg-blue-500/20 text-blue-300', + answered: 'bg-purple-500/20 text-purple-300', + review: 'bg-yellow-500/20 text-yellow-300', + published: 'bg-green-500/20 text-green-300' + }; const priorityColors = { high: 'bg-red-500/20 text-red-300', medium: 'bg-yellow-500/20 text-yellow-300', low: 'bg-blue-500/20 text-blue-300' }; let currentRequest = null;