From 77db02a6ebd9ed988d8e592c97a66964b5f2bcd1 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 10 Feb 2026 02:13:18 -0500 Subject: [PATCH] feat: optimize genome queries with IN clauses, dedup, repute filter - Replace N separate SQL queries with single IN clause for rsids and genes - Dedup results by rsid, merging categories from multiple tiers - Add repute filter (Good/Bad/Clear) to genome queries - Expose limit/offset as MCP parameters - Add genotype to search check - Fix category filter in genomeEntriesToResult - Remove deprecated api/api_categories.go and api/api_genome.go - Change GenomeMatch to use Categories []string instead of Category+Subcategory Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 4 + api/api_categories.go | 257 ---------------------------------- api/api_genome.go | 68 --------- api/main.go | 2 - import-genome/main.go | 3 +- lib/v2.go | 311 ++++++++++++++++++++++++++++-------------- portal/api_proxy.go | 2 - portal/mcp_http.go | 32 +++-- portal/mcp_tools.go | 42 ++++-- 9 files changed, 265 insertions(+), 456 deletions(-) delete mode 100644 api/api_categories.go delete mode 100644 api/api_genome.go diff --git a/CLAUDE.md b/CLAUDE.md index 784e577..3425b77 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -94,6 +94,10 @@ Key tables: ### Querying Encrypted Data +**Encryption is DETERMINISTIC.** You CAN use encrypted columns in WHERE clauses by encrypting the search value: `WHERE col = lib.CryptoEncrypt("value")`. Do NOT pull large datasets and filter client-side when a precise query is possible. + +Columns ending in `_id` are NOT encrypted (plain text, always filterable). + **IMPORTANT:** Do NOT write Python/Go scripts to decrypt database fields. Use the `decrypt` tool: ```bash diff --git a/api/api_categories.go b/api/api_categories.go deleted file mode 100644 index 06d2ec3..0000000 --- a/api/api_categories.go +++ /dev/null @@ -1,257 +0,0 @@ -package main - -import ( - "encoding/json" - "net/http" - "strings" - - "inou/lib" -) - -type CategoryCount struct { - Shown int `json:"shown"` - Hidden int `json:"hidden"` -} - -func handleCategories(w http.ResponseWriter, r *http.Request) { - // Get accessor (who is asking) - ctx := getAccessContextOrFail(w, r) - if ctx == nil { - return - } - - dossierHex := r.URL.Query().Get("dossier") - if dossierHex == "" { - http.Error(w, "missing dossier", http.StatusBadRequest) - return - } - dossierID := dossierHex - - obsType := r.URL.Query().Get("type") - category := r.URL.Query().Get("category") - - // Pass accessor + dossier to lib - RBAC handled there - var counts map[string]CategoryCount - - if obsType == "" { - counts = getTopLevelCounts(ctx.AccessorID, dossierID) - } else if obsType == "genome" { - if category != "" { - counts = getGenomeSubcategoryCounts(ctx.AccessorID, dossierID, category) - } else { - counts = getGenomeCounts(ctx.AccessorID, dossierID) - } - } else { - counts = make(map[string]CategoryCount) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(counts) -} - -func getTopLevelCounts(accessorID, dossierID string) map[string]CategoryCount { - counts := make(map[string]CategoryCount) - - categoryNames := map[int]string{ - 1: "imaging", 2: "documents", 3: "labs", 4: "genome", - } - - // Check each category - only include if accessor has access - for catInt, catName := range categoryNames { - // Try to get a count by querying entries with RBAC - entries, err := lib.EntryList(accessorID, "", catInt, &lib.EntryFilter{ - DossierID: dossierID, - Limit: 1, // Just check if we can see any - }) - if err != nil || len(entries) == 0 { - continue // No access or no entries - } - - // Get actual count (using system context for counting only) - var catCounts []struct { - Count int `db:"cnt"` - } - lib.Query("SELECT COUNT(*) as cnt FROM entries WHERE dossier_id = ? AND category = ?", - []any{dossierID, catInt}, &catCounts) - - if len(catCounts) > 0 && catCounts[0].Count > 0 { - counts[catName] = CategoryCount{Shown: catCounts[0].Count, Hidden: 0} - } - } - - // For genome, replace with detailed subcategory counts - genomeCats := getGenomeCounts(accessorID, dossierID) - if len(genomeCats) > 0 { - totalShown, totalHidden := 0, 0 - for _, c := range genomeCats { - totalShown += c.Shown - totalHidden += c.Hidden - } - counts["genome"] = CategoryCount{Shown: len(genomeCats), Hidden: totalHidden} - } - - return counts -} - -// variantData for parsing the data JSON -type variantData struct { - Mag float64 `json:"mag"` - Rep string `json:"rep"` - Sub string `json:"sub"` -} - -// shouldIncludeVariant returns true if the variant should be counted/shown -func shouldIncludeVariant(data variantData, includeHidden bool) bool { - if includeHidden { - return true - } - // Hide high magnitude variants - if data.Mag > 4.0 { - return false - } - // Hide "Bad" repute variants - if data.Rep == "Bad" || data.Rep == "bad" { - return false - } - return true -} - -// getGenomeCounts reads cached counts from the extraction entry (fast path) -func getGenomeCounts(accessorID, dossierID string) map[string]CategoryCount { - counts := make(map[string]CategoryCount) - - // Create access context for RBAC - ctx := &lib.AccessContext{AccessorID: accessorID} - - // Find extraction entry and read its data - extraction, err := lib.GenomeGetExtraction(ctx, dossierID) - if err != nil { - return counts - } - - if extraction.Data == "" { - return counts - } - - // Parse extraction data which contains pre-computed counts - var extractionData struct { - Counts map[string]CategoryCount `json:"counts"` - } - if err := json.Unmarshal([]byte(extraction.Data), &extractionData); err != nil { - return counts - } - - // Return cached counts if available - if extractionData.Counts != nil { - return extractionData.Counts - } - - // Fallback: compute counts (for old data without cached counts) - return getGenomeCountsSlow(accessorID, dossierID) -} - -// getGenomeCountsSlow computes counts by scanning all variants (fallback for old data) -func getGenomeCountsSlow(accessorID, dossierID string) map[string]CategoryCount { - counts := make(map[string]CategoryCount) - - // Create access context for RBAC - ctx := &lib.AccessContext{AccessorID: accessorID} - - // Find extraction entry - extraction, err := lib.GenomeGetExtraction(ctx, dossierID) - if err != nil { - return counts - } - - // Get all tiers - tiers, err := lib.GenomeGetTiers(ctx, dossierID, extraction.EntryID) - if err != nil { - return counts - } - - // For each tier, count shown and hidden variants - for _, tier := range tiers { - variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID) - if err != nil { - continue - } - shown, hidden := 0, 0 - for _, v := range variants { - data := variantData{ - Mag: v.Magnitude, - Rep: v.Repute, - Sub: v.Subcategory, - } - if shouldIncludeVariant(data, false) { - shown++ - } else { - hidden++ - } - } - if shown > 0 || hidden > 0 { - counts[tier.Category] = CategoryCount{Shown: shown, Hidden: hidden} - } - } - - return counts -} - -func getGenomeSubcategoryCounts(accessorID, dossierID string, category string) map[string]CategoryCount { - counts := make(map[string]CategoryCount) - shownCounts := make(map[string]int) - hiddenCounts := make(map[string]int) - - // Create access context for RBAC - ctx := &lib.AccessContext{AccessorID: accessorID} - - // Find extraction entry - extraction, err := lib.GenomeGetExtraction(ctx, dossierID) - if err != nil { - return counts - } - - // Find tier for this category - tier, err := lib.GenomeGetTierByCategory(ctx, dossierID, extraction.EntryID, category) - if err != nil { - return counts - } - - // Get variants and count by subcategory - variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID) - if err != nil { - return counts - } - - for _, v := range variants { - if v.Subcategory == "" { - continue - } - data := variantData{ - Mag: v.Magnitude, - Rep: v.Repute, - Sub: v.Subcategory, - } - if shouldIncludeVariant(data, false) { - shownCounts[v.Subcategory]++ - } else { - hiddenCounts[v.Subcategory]++ - } - } - - // Combine into CategoryCount - allSubs := make(map[string]bool) - for k := range shownCounts { - allSubs[k] = true - } - for k := range hiddenCounts { - allSubs[k] = true - } - for sub := range allSubs { - counts[sub] = CategoryCount{Shown: shownCounts[sub], Hidden: hiddenCounts[sub]} - } - - return counts -} - -// Unused but keeping for compatibility - remove if not needed -var _ = strings.Contains diff --git a/api/api_genome.go b/api/api_genome.go deleted file mode 100644 index 124e668..0000000 --- a/api/api_genome.go +++ /dev/null @@ -1,68 +0,0 @@ -package main - -import ( - "encoding/json" - "net/http" - "strconv" - "strings" - - "inou/lib" -) - -func handleGenomeQuery(w http.ResponseWriter, r *http.Request) { - ctx := getAccessContextOrFail(w, r) - if ctx == nil { - return - } - - dossierID := r.URL.Query().Get("dossier") - if dossierID == "" { - http.Error(w, "missing dossier", http.StatusBadRequest) - return - } - - if !requireDossierAccess(w, ctx, dossierID) { - return - } - - // Parse query params into opts - q := r.URL.Query() - var minMag float64 - if s := q.Get("min_magnitude"); s != "" { - minMag, _ = strconv.ParseFloat(s, 64) - } - offset := 0 - if s := q.Get("offset"); s != "" { - offset, _ = strconv.Atoi(s) - } - limit := 100 - if s := q.Get("limit"); s != "" { - limit, _ = strconv.Atoi(s) - } - var rsids []string - if s := q.Get("rsids"); s != "" { - rsids = strings.Split(s, ",") - } - - result, err := lib.GenomeQuery(dossierID, lib.GenomeQueryOpts{ - Category: q.Get("category"), - Search: q.Get("search"), - Gene: q.Get("gene"), - RSIDs: rsids, - MinMagnitude: minMag, - IncludeHidden: q.Get("include_hidden") == "true", - Sort: q.Get("sort"), - Offset: offset, - Limit: limit, - }) - if err != nil { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]interface{}{ - "error": err.Error(), - }) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(result) -} diff --git a/api/main.go b/api/main.go index 2c246fc..67839f2 100644 --- a/api/main.go +++ b/api/main.go @@ -41,8 +41,6 @@ func main() { http.HandleFunc("/api/dossiers", handleDossiers) http.HandleFunc("/api/dossier", handleDossier) http.HandleFunc("/api/studies", handleStudies) - http.HandleFunc("/api/genome", handleGenomeQuery) - http.HandleFunc("/api/categories", handleCategories) http.HandleFunc("/api/series", handleSeries) http.HandleFunc("/api/slices", handleSlices) http.HandleFunc("/image/", handleImage) diff --git a/import-genome/main.go b/import-genome/main.go index 64de5ef..627e79a 100644 --- a/import-genome/main.go +++ b/import-genome/main.go @@ -265,7 +265,7 @@ func main() { // ===== PHASE 4: Load SNPedia and match ===== phase4Start := time.Now() - snpediaDB, err := sql.Open("sqlite3", "/home/johan/dev/inou/snpedia-genotypes/genotypes.db?mode=ro") + snpediaDB, err := sql.Open("sqlite3", "/tank/inou/data/genotypes.db?mode=ro") if err != nil { fmt.Println("SNPedia DB open failed:", err) os.Exit(1) @@ -542,6 +542,7 @@ func main() { Type: v.RSID, Value: v.Genotype, Tags: v.Gene, + SearchKey: v.Gene, Ordinal: i + 1, Timestamp: now, Data: string(dataJSON), diff --git a/lib/v2.go b/lib/v2.go index af42e89..0132233 100644 --- a/lib/v2.go +++ b/lib/v2.go @@ -1184,8 +1184,7 @@ type GenomeMatch struct { Magnitude *float64 `json:"magnitude,omitempty"` Repute string `json:"repute,omitempty"` Summary string `json:"summary,omitempty"` - Category string `json:"category,omitempty"` - Subcategory string `json:"subcategory,omitempty"` + Categories []string `json:"categories,omitempty"` } // GenomeQueryResult is the response from GenomeQuery @@ -1202,58 +1201,33 @@ type GenomeQueryOpts struct { Gene string // comma-separated RSIDs []string MinMagnitude float64 + Repute string // filter by repute (Good, Bad, Clear) IncludeHidden bool Sort string // "magnitude" (default), "gene", "rsid" Offset int Limit int + AccessorID string // who is querying (for audit logging) } -// GenomeQuery queries genome variants for a dossier with filtering, sorting, and pagination. +// GenomeQuery queries genome variants for a dossier. +// Fast path: gene/rsid use indexed search_key/type columns (precise SQL queries). +// Slow path: search/min_magnitude load all variants and filter in memory. func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, error) { - sysCtx := &AccessContext{IsSystem: true} - - extraction, err := GenomeGetExtraction(sysCtx, dossierID) - if err != nil { - return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err) - } - - // Get tiers - var tiers []GenomeTier - tierCategories := make(map[string]string) - - if opts.Category != "" { - tier, err := GenomeGetTierByCategory(sysCtx, dossierID, extraction.EntryID, opts.Category) - if err == nil { - tiers = append(tiers, *tier) - tierCategories[tier.TierID] = tier.Category + if opts.IncludeHidden { + var details []string + if opts.Gene != "" { + details = append(details, "gene="+opts.Gene) } - } else { - tiers, _ = GenomeGetTiers(sysCtx, dossierID, extraction.EntryID) - for _, t := range tiers { - tierCategories[t.TierID] = t.Category + if len(opts.RSIDs) > 0 { + details = append(details, "rsids="+strings.Join(opts.RSIDs, ",")) } - } - - if len(tiers) == 0 { - return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil - } - - tierIDs := make([]string, len(tiers)) - for i, t := range tiers { - tierIDs[i] = t.TierID - } - - variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs) - if err != nil { - return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err) - } - - // Parse genes - var genes []string - if opts.Gene != "" { - for _, g := range strings.Split(opts.Gene, ",") { - genes = append(genes, strings.TrimSpace(g)) + if opts.Search != "" { + details = append(details, "search="+opts.Search) } + if opts.Category != "" { + details = append(details, "category="+opts.Category) + } + AuditLog(opts.AccessorID, "genome_reveal_hidden", dossierID, strings.Join(details, " ")) } limit := opts.Limit @@ -1264,96 +1238,235 @@ func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, er limit = 500 } - // Filter and collect - var matches []GenomeMatch - total := 0 + // Fast path: gene or rsid — use indexed columns + if opts.Gene != "" || len(opts.RSIDs) > 0 { + return genomeQueryFast(dossierID, opts, limit) + } - for _, v := range variants { - if len(opts.RSIDs) > 0 { - found := false - for _, r := range opts.RSIDs { - if r == v.RSID { - found = true - break - } - } - if !found { - continue - } + // Slow path: search/min_magnitude — load all, filter in memory + return genomeQuerySlow(dossierID, opts, limit) +} + +// genomeQueryFast uses indexed search_key (gene) and type (rsid) columns. +func genomeQueryFast(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) { + var entries []Entry + + if opts.Gene != "" { + // Split comma-separated genes, query each via indexed search_key + genes := strings.Split(opts.Gene, ",") + for i := range genes { + genes[i] = strings.TrimSpace(genes[i]) } - if len(genes) > 0 { - found := false - for _, g := range genes { - if strings.EqualFold(v.Gene, g) { - found = true - break - } - } - if !found { - continue + // Build gene IN clause + genePlaceholders := make([]string, len(genes)) + args := []any{dossierID, CategoryGenome} + for i, gene := range genes { + genePlaceholders[i] = "?" + args = append(args, CryptoEncrypt(gene)) + } + sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND search_key IN (" + strings.Join(genePlaceholders, ",") + ")" + + // Add rsid filter if specified + if len(opts.RSIDs) > 0 { + rsidPlaceholders := make([]string, len(opts.RSIDs)) + for i, rsid := range opts.RSIDs { + rsidPlaceholders[i] = "?" + args = append(args, CryptoEncrypt(rsid)) } + sql += " AND type IN (" + strings.Join(rsidPlaceholders, ",") + ")" + } + + Query(sql, args, &entries) + } else if len(opts.RSIDs) > 0 { + // rsid only, no gene — single IN query + placeholders := make([]string, len(opts.RSIDs)) + args := []any{dossierID, CategoryGenome} + for i, rsid := range opts.RSIDs { + placeholders[i] = "?" + args = append(args, CryptoEncrypt(rsid)) + } + sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND type IN (" + strings.Join(placeholders, ",") + ")" + Query(sql, args, &entries) + } + + // Look up tier categories for parent_ids (single IN query) + tierCategories := make(map[string]string) + tierIDSet := make(map[string]bool) + for _, e := range entries { + tierIDSet[e.ParentID] = true + } + if len(tierIDSet) > 0 { + placeholders := make([]string, 0, len(tierIDSet)) + args := make([]any, 0, len(tierIDSet)) + for id := range tierIDSet { + placeholders = append(placeholders, "?") + args = append(args, id) + } + var tierEntries []Entry + Query("SELECT * FROM entries WHERE entry_id IN ("+strings.Join(placeholders, ",")+")", args, &tierEntries) + for _, t := range tierEntries { + tierCategories[t.EntryID] = t.Value + } + } + + return genomeEntriesToResult(entries, tierCategories, opts, limit) +} + +// genomeQuerySlow loads all variants and filters in memory (for search/min_magnitude). +func genomeQuerySlow(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) { + sysCtx := &AccessContext{IsSystem: true} + + extraction, err := GenomeGetExtraction(sysCtx, dossierID) + if err != nil { + return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err) + } + + tiers, _ := GenomeGetTiers(sysCtx, dossierID, extraction.EntryID) + if len(tiers) == 0 { + return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil + } + + tierCategories := make(map[string]string) + tierIDs := make([]string, len(tiers)) + for i, t := range tiers { + tierIDs[i] = t.TierID + tierCategories[t.TierID] = t.Category + } + + variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs) + if err != nil { + return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err) + } + + // Convert to entries for shared result builder + var entries []Entry + for _, v := range variants { + dataJSON, _ := json.Marshal(struct { + Mag float64 `json:"mag,omitempty"` + Rep string `json:"rep,omitempty"` + Sum string `json:"sum,omitempty"` + Sub string `json:"sub,omitempty"` + }{v.Magnitude, v.Repute, v.Summary, v.Subcategory}) + + entries = append(entries, Entry{ + EntryID: v.EntryID, + ParentID: v.TierID, + Type: v.RSID, + Value: v.Genotype, + Tags: v.Gene, + Data: string(dataJSON), + }) + } + + return genomeEntriesToResult(entries, tierCategories, opts, limit) +} + +// genomeEntriesToResult converts raw entries to GenomeQueryResult with filtering/sorting. +func genomeEntriesToResult(entries []Entry, tierCategories map[string]string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) { + targeted := opts.Gene != "" || len(opts.RSIDs) > 0 + + // Dedup by rsid, merge categories + seen := make(map[string]*GenomeMatch) + var order []string + + for _, e := range entries { + var data struct { + Mag float64 `json:"mag"` + Rep string `json:"rep"` + Sum string `json:"sum"` + Sub string `json:"sub"` + } + if e.Data != "" { + json.Unmarshal([]byte(e.Data), &data) } if opts.Search != "" { sl := strings.ToLower(opts.Search) - if !strings.Contains(strings.ToLower(v.Gene), sl) && - !strings.Contains(strings.ToLower(v.Summary), sl) && - !strings.Contains(strings.ToLower(v.Subcategory), sl) && - !strings.Contains(strings.ToLower(v.RSID), sl) { + if !strings.Contains(strings.ToLower(e.Tags), sl) && + !strings.Contains(strings.ToLower(data.Sum), sl) && + !strings.Contains(strings.ToLower(data.Sub), sl) && + !strings.Contains(strings.ToLower(e.Type), sl) && + !strings.Contains(strings.ToLower(e.Value), sl) { continue } } - if opts.MinMagnitude > 0 && v.Magnitude < opts.MinMagnitude { + if opts.Category != "" && tierCategories[e.ParentID] != opts.Category { continue } - // Determine if variant would be hidden - isHidden := v.Magnitude > 4.0 || strings.EqualFold(v.Repute, "bad") + if opts.MinMagnitude > 0 && data.Mag < opts.MinMagnitude { + continue + } - // Targeted queries (specific rsIDs or gene) return redacted results - // Broad queries skip hidden variants entirely - targeted := len(opts.RSIDs) > 0 || len(genes) > 0 + if opts.Repute != "" && !strings.EqualFold(data.Rep, opts.Repute) { + continue + } + isHidden := data.Mag > 4.0 || strings.EqualFold(data.Rep, "bad") if isHidden && !opts.IncludeHidden && !targeted { continue } - total++ - if total <= opts.Offset { - continue - } - if len(matches) >= limit { + cat := tierCategories[e.ParentID] + + if m, ok := seen[e.Type]; ok { + // Already seen this rsid — add category if not duplicate + if cat != "" { + dup := false + for _, c := range m.Categories { + if c == cat { + dup = true + break + } + } + if !dup { + m.Categories = append(m.Categories, cat) + } + } continue } - // Redact sensitive fields unless include_hidden is set redact := isHidden && !opts.IncludeHidden - - match := GenomeMatch{ - RSID: v.RSID, - Gene: v.Gene, - Category: tierCategories[v.TierID], - Subcategory: v.Subcategory, + match := &GenomeMatch{ + RSID: e.Type, + Gene: e.Tags, + } + if cat != "" { + match.Categories = []string{cat} } if redact { match.Genotype = "hidden" match.Summary = "Sensitive variant hidden. Query with include_hidden=true to reveal." } else { - match.Genotype = v.Genotype - if v.Summary != "" { - match.Summary = v.Summary + match.Genotype = e.Value + if data.Sum != "" { + match.Summary = data.Sum } } - if v.Magnitude > 0 { - mag := v.Magnitude + if data.Mag > 0 { + mag := data.Mag match.Magnitude = &mag } - if v.Repute != "" { - match.Repute = v.Repute + if data.Rep != "" { + match.Repute = data.Rep } - matches = append(matches, match) + + seen[e.Type] = match + order = append(order, e.Type) + } + + total := len(order) + var matches []GenomeMatch + for i, rsid := range order { + if i < opts.Offset { + continue + } + if len(matches) >= limit { + break + } + matches = append(matches, *seen[rsid]) } // Sort diff --git a/portal/api_proxy.go b/portal/api_proxy.go index e06f6f7..c2d4be7 100644 --- a/portal/api_proxy.go +++ b/portal/api_proxy.go @@ -12,8 +12,6 @@ var apiEndpoints = map[string]bool{ "/api/dossiers": true, "/api/dossier": true, "/api/studies": true, - "/api/genome": true, - "/api/categories": true, "/api/series": true, "/api/slices": true, "/api/labs/tests": true, diff --git a/portal/mcp_http.go b/portal/mcp_http.go index 4bc9880..425a39f 100644 --- a/portal/mcp_http.go +++ b/portal/mcp_http.go @@ -369,13 +369,11 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) { }, { "name": "get_categories", - "description": "List available data categories for a dossier (imaging, labs, documents, genome). Use this first to discover what data types are available, then query entries by category. For lab results, LOINC codes provide best search accuracy.", + "description": "List available data categories for a dossier with entry counts. Returns category names and how many entries exist in each.", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ - "dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"}, - "type": map[string]interface{}{"type": "string", "description": "Observation type (e.g., genome) for subcategories"}, - "category": map[string]interface{}{"type": "string", "description": "Get subcategories within this category"}, + "dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"}, }, "required": []string{"dossier"}, }, @@ -402,17 +400,20 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) { }, { "name": "query_genome", - "description": "Query genome variants with specialized genome parameters (magnitude, repute, gene, rsids). Returns matches with significance ratings. For non-genome data, use get_categories to find available categories.", + "description": "Query genome variants by gene, rsid, or category. Use gene names (e.g. MTHFR) not concepts (e.g. methylation). Search also matches genotype (e.g. AA, GG). Sensitive variants are redacted; use include_hidden to reveal.", "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"}, "gene": map[string]interface{}{"type": "string", "description": "Gene name(s), comma-separated"}, - "search": map[string]interface{}{"type": "string", "description": "Search gene, subcategory, or summary"}, - "category": map[string]interface{}{"type": "string", "description": "Filter by category"}, + "search": map[string]interface{}{"type": "string", "description": "Free-text search across gene, genotype, subcategory, summary, rsid"}, + "category": map[string]interface{}{"type": "string", "description": "Filter by genome category (e.g. cancer, disease, traits)"}, "rsids": map[string]interface{}{"type": "string", "description": "Comma-separated rsids"}, "min_magnitude": map[string]interface{}{"type": "number", "description": "Minimum magnitude"}, - "include_hidden": map[string]interface{}{"type": "boolean", "description": "Include hidden categories and high-magnitude variants"}, + "repute": map[string]interface{}{"type": "string", "description": "Filter by repute: Good, Bad, or Clear"}, + "include_hidden": map[string]interface{}{"type": "boolean", "description": "Reveal redacted sensitive variants (audited)"}, + "limit": map[string]interface{}{"type": "number", "description": "Max results to return (default 20)"}, + "offset": map[string]interface{}{"type": "number", "description": "Skip first N results for pagination"}, }, "required": []string{"dossier"}, }, @@ -613,9 +614,7 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss sendMCPError(w, req.ID, -32602, "dossier required") return } - typ, _ := params.Arguments["type"].(string) - category, _ := params.Arguments["category"].(string) - result, err := mcpGetCategories(accessToken, dossier, typ, category) + result, err := mcpGetCategories(dossier) if err != nil { sendMCPError(w, req.ID, -32000, err.Error()) return @@ -633,12 +632,17 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss category, _ := params.Arguments["category"].(string) rsids, _ := params.Arguments["rsids"].(string) minMag, _ := params.Arguments["min_magnitude"].(float64) + repute, _ := params.Arguments["repute"].(string) includeHidden, _ := params.Arguments["include_hidden"].(bool) + limitF, _ := params.Arguments["limit"].(float64) + offsetF, _ := params.Arguments["offset"].(float64) + limit := int(limitF) + offset := int(offsetF) - fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s includeHidden=%v\n", - dossier, gene, category, includeHidden) + fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s repute=%s limit=%d offset=%d\n", + dossier, gene, category, repute, limit, offset) - result, err := mcpQueryGenome(accessToken, dossier, gene, search, category, rsids, minMag, includeHidden) + result, err := mcpQueryGenome(accessToken, dossier, dossierID, gene, search, category, rsids, minMag, repute, includeHidden, limit, offset) if err != nil { fmt.Printf("[MCP] query_genome error: %v\n", err) sendMCPError(w, req.ID, -32000, err.Error()) diff --git a/portal/mcp_tools.go b/portal/mcp_tools.go index e4aaf6b..285a49b 100644 --- a/portal/mcp_tools.go +++ b/portal/mcp_tools.go @@ -181,38 +181,54 @@ func mcpQueryEntries(accessToken, dossier, category, typ, searchKey, parent, fro return string(pretty), nil } -func mcpGetCategories(accessToken, dossier, typ, category string) (string, error) { - params := map[string]string{"dossier": dossier} - if typ != "" { - params["type"] = typ +func mcpGetCategories(dossier string) (string, error) { + var counts []struct { + Category int `db:"category"` + Count int `db:"cnt"` } - if category != "" { - params["category"] = category - } - - body, err := mcpAPICall(accessToken, "/api/categories", params) + err := lib.Query("SELECT category, COUNT(*) as cnt FROM entries WHERE dossier_id = ? GROUP BY category", []any{dossier}, &counts) if err != nil { return "", err } - var data interface{} - json.Unmarshal(body, &data) - pretty, _ := json.MarshalIndent(data, "", " ") + result := make(map[string]int) + for _, c := range counts { + name := lib.CategoryName(c.Category) + if name != "unknown" { + result[name] = c.Count + } + } + pretty, _ := json.MarshalIndent(result, "", " ") return string(pretty), nil } -func mcpQueryGenome(accessToken, dossier, gene, search, category, rsids string, minMag float64, includeHidden bool) (string, error) { +func mcpQueryGenome(accessToken, dossier, accessorID, gene, search, category, rsids string, minMag float64, repute string, includeHidden bool, limit, offset int) (string, error) { var rsidList []string if rsids != "" { rsidList = strings.Split(rsids, ",") } + if limit <= 0 { + numTerms := 1 + if gene != "" { + numTerms = len(strings.Split(gene, ",")) + } + if len(rsidList) > numTerms { + numTerms = len(rsidList) + } + limit = 20 * numTerms + } + result, err := lib.GenomeQuery(dossier, lib.GenomeQueryOpts{ Category: category, Search: search, Gene: gene, RSIDs: rsidList, MinMagnitude: minMag, + Repute: repute, IncludeHidden: includeHidden, + Limit: limit, + Offset: offset, + AccessorID: accessorID, }) if err != nil { return "", err