feat: optimize genome queries with IN clauses, dedup, repute filter

- Replace N separate SQL queries with single IN clause for rsids and genes
- Dedup results by rsid, merging categories from multiple tiers
- Add repute filter (Good/Bad/Clear) to genome queries
- Expose limit/offset as MCP parameters
- Add genotype to search check
- Fix category filter in genomeEntriesToResult
- Remove deprecated api/api_categories.go and api/api_genome.go
- Change GenomeMatch to use Categories []string instead of Category+Subcategory

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
James 2026-02-10 02:13:18 -05:00
parent d2d77d1503
commit 77db02a6eb
9 changed files with 265 additions and 456 deletions

View File

@ -94,6 +94,10 @@ Key tables:
### Querying Encrypted Data ### Querying Encrypted Data
**Encryption is DETERMINISTIC.** You CAN use encrypted columns in WHERE clauses by encrypting the search value: `WHERE col = lib.CryptoEncrypt("value")`. Do NOT pull large datasets and filter client-side when a precise query is possible.
Columns ending in `_id` are NOT encrypted (plain text, always filterable).
**IMPORTANT:** Do NOT write Python/Go scripts to decrypt database fields. Use the `decrypt` tool: **IMPORTANT:** Do NOT write Python/Go scripts to decrypt database fields. Use the `decrypt` tool:
```bash ```bash

View File

@ -1,257 +0,0 @@
package main
import (
"encoding/json"
"net/http"
"strings"
"inou/lib"
)
type CategoryCount struct {
Shown int `json:"shown"`
Hidden int `json:"hidden"`
}
func handleCategories(w http.ResponseWriter, r *http.Request) {
// Get accessor (who is asking)
ctx := getAccessContextOrFail(w, r)
if ctx == nil {
return
}
dossierHex := r.URL.Query().Get("dossier")
if dossierHex == "" {
http.Error(w, "missing dossier", http.StatusBadRequest)
return
}
dossierID := dossierHex
obsType := r.URL.Query().Get("type")
category := r.URL.Query().Get("category")
// Pass accessor + dossier to lib - RBAC handled there
var counts map[string]CategoryCount
if obsType == "" {
counts = getTopLevelCounts(ctx.AccessorID, dossierID)
} else if obsType == "genome" {
if category != "" {
counts = getGenomeSubcategoryCounts(ctx.AccessorID, dossierID, category)
} else {
counts = getGenomeCounts(ctx.AccessorID, dossierID)
}
} else {
counts = make(map[string]CategoryCount)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(counts)
}
func getTopLevelCounts(accessorID, dossierID string) map[string]CategoryCount {
counts := make(map[string]CategoryCount)
categoryNames := map[int]string{
1: "imaging", 2: "documents", 3: "labs", 4: "genome",
}
// Check each category - only include if accessor has access
for catInt, catName := range categoryNames {
// Try to get a count by querying entries with RBAC
entries, err := lib.EntryList(accessorID, "", catInt, &lib.EntryFilter{
DossierID: dossierID,
Limit: 1, // Just check if we can see any
})
if err != nil || len(entries) == 0 {
continue // No access or no entries
}
// Get actual count (using system context for counting only)
var catCounts []struct {
Count int `db:"cnt"`
}
lib.Query("SELECT COUNT(*) as cnt FROM entries WHERE dossier_id = ? AND category = ?",
[]any{dossierID, catInt}, &catCounts)
if len(catCounts) > 0 && catCounts[0].Count > 0 {
counts[catName] = CategoryCount{Shown: catCounts[0].Count, Hidden: 0}
}
}
// For genome, replace with detailed subcategory counts
genomeCats := getGenomeCounts(accessorID, dossierID)
if len(genomeCats) > 0 {
totalShown, totalHidden := 0, 0
for _, c := range genomeCats {
totalShown += c.Shown
totalHidden += c.Hidden
}
counts["genome"] = CategoryCount{Shown: len(genomeCats), Hidden: totalHidden}
}
return counts
}
// variantData for parsing the data JSON
type variantData struct {
Mag float64 `json:"mag"`
Rep string `json:"rep"`
Sub string `json:"sub"`
}
// shouldIncludeVariant returns true if the variant should be counted/shown
func shouldIncludeVariant(data variantData, includeHidden bool) bool {
if includeHidden {
return true
}
// Hide high magnitude variants
if data.Mag > 4.0 {
return false
}
// Hide "Bad" repute variants
if data.Rep == "Bad" || data.Rep == "bad" {
return false
}
return true
}
// getGenomeCounts reads cached counts from the extraction entry (fast path)
func getGenomeCounts(accessorID, dossierID string) map[string]CategoryCount {
counts := make(map[string]CategoryCount)
// Create access context for RBAC
ctx := &lib.AccessContext{AccessorID: accessorID}
// Find extraction entry and read its data
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
if err != nil {
return counts
}
if extraction.Data == "" {
return counts
}
// Parse extraction data which contains pre-computed counts
var extractionData struct {
Counts map[string]CategoryCount `json:"counts"`
}
if err := json.Unmarshal([]byte(extraction.Data), &extractionData); err != nil {
return counts
}
// Return cached counts if available
if extractionData.Counts != nil {
return extractionData.Counts
}
// Fallback: compute counts (for old data without cached counts)
return getGenomeCountsSlow(accessorID, dossierID)
}
// getGenomeCountsSlow computes counts by scanning all variants (fallback for old data)
func getGenomeCountsSlow(accessorID, dossierID string) map[string]CategoryCount {
counts := make(map[string]CategoryCount)
// Create access context for RBAC
ctx := &lib.AccessContext{AccessorID: accessorID}
// Find extraction entry
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
if err != nil {
return counts
}
// Get all tiers
tiers, err := lib.GenomeGetTiers(ctx, dossierID, extraction.EntryID)
if err != nil {
return counts
}
// For each tier, count shown and hidden variants
for _, tier := range tiers {
variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID)
if err != nil {
continue
}
shown, hidden := 0, 0
for _, v := range variants {
data := variantData{
Mag: v.Magnitude,
Rep: v.Repute,
Sub: v.Subcategory,
}
if shouldIncludeVariant(data, false) {
shown++
} else {
hidden++
}
}
if shown > 0 || hidden > 0 {
counts[tier.Category] = CategoryCount{Shown: shown, Hidden: hidden}
}
}
return counts
}
func getGenomeSubcategoryCounts(accessorID, dossierID string, category string) map[string]CategoryCount {
counts := make(map[string]CategoryCount)
shownCounts := make(map[string]int)
hiddenCounts := make(map[string]int)
// Create access context for RBAC
ctx := &lib.AccessContext{AccessorID: accessorID}
// Find extraction entry
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
if err != nil {
return counts
}
// Find tier for this category
tier, err := lib.GenomeGetTierByCategory(ctx, dossierID, extraction.EntryID, category)
if err != nil {
return counts
}
// Get variants and count by subcategory
variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID)
if err != nil {
return counts
}
for _, v := range variants {
if v.Subcategory == "" {
continue
}
data := variantData{
Mag: v.Magnitude,
Rep: v.Repute,
Sub: v.Subcategory,
}
if shouldIncludeVariant(data, false) {
shownCounts[v.Subcategory]++
} else {
hiddenCounts[v.Subcategory]++
}
}
// Combine into CategoryCount
allSubs := make(map[string]bool)
for k := range shownCounts {
allSubs[k] = true
}
for k := range hiddenCounts {
allSubs[k] = true
}
for sub := range allSubs {
counts[sub] = CategoryCount{Shown: shownCounts[sub], Hidden: hiddenCounts[sub]}
}
return counts
}
// Unused but keeping for compatibility - remove if not needed
var _ = strings.Contains

View File

@ -1,68 +0,0 @@
package main
import (
"encoding/json"
"net/http"
"strconv"
"strings"
"inou/lib"
)
func handleGenomeQuery(w http.ResponseWriter, r *http.Request) {
ctx := getAccessContextOrFail(w, r)
if ctx == nil {
return
}
dossierID := r.URL.Query().Get("dossier")
if dossierID == "" {
http.Error(w, "missing dossier", http.StatusBadRequest)
return
}
if !requireDossierAccess(w, ctx, dossierID) {
return
}
// Parse query params into opts
q := r.URL.Query()
var minMag float64
if s := q.Get("min_magnitude"); s != "" {
minMag, _ = strconv.ParseFloat(s, 64)
}
offset := 0
if s := q.Get("offset"); s != "" {
offset, _ = strconv.Atoi(s)
}
limit := 100
if s := q.Get("limit"); s != "" {
limit, _ = strconv.Atoi(s)
}
var rsids []string
if s := q.Get("rsids"); s != "" {
rsids = strings.Split(s, ",")
}
result, err := lib.GenomeQuery(dossierID, lib.GenomeQueryOpts{
Category: q.Get("category"),
Search: q.Get("search"),
Gene: q.Get("gene"),
RSIDs: rsids,
MinMagnitude: minMag,
IncludeHidden: q.Get("include_hidden") == "true",
Sort: q.Get("sort"),
Offset: offset,
Limit: limit,
})
if err != nil {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"error": err.Error(),
})
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(result)
}

View File

@ -41,8 +41,6 @@ func main() {
http.HandleFunc("/api/dossiers", handleDossiers) http.HandleFunc("/api/dossiers", handleDossiers)
http.HandleFunc("/api/dossier", handleDossier) http.HandleFunc("/api/dossier", handleDossier)
http.HandleFunc("/api/studies", handleStudies) http.HandleFunc("/api/studies", handleStudies)
http.HandleFunc("/api/genome", handleGenomeQuery)
http.HandleFunc("/api/categories", handleCategories)
http.HandleFunc("/api/series", handleSeries) http.HandleFunc("/api/series", handleSeries)
http.HandleFunc("/api/slices", handleSlices) http.HandleFunc("/api/slices", handleSlices)
http.HandleFunc("/image/", handleImage) http.HandleFunc("/image/", handleImage)

View File

@ -265,7 +265,7 @@ func main() {
// ===== PHASE 4: Load SNPedia and match ===== // ===== PHASE 4: Load SNPedia and match =====
phase4Start := time.Now() phase4Start := time.Now()
snpediaDB, err := sql.Open("sqlite3", "/home/johan/dev/inou/snpedia-genotypes/genotypes.db?mode=ro") snpediaDB, err := sql.Open("sqlite3", "/tank/inou/data/genotypes.db?mode=ro")
if err != nil { if err != nil {
fmt.Println("SNPedia DB open failed:", err) fmt.Println("SNPedia DB open failed:", err)
os.Exit(1) os.Exit(1)
@ -542,6 +542,7 @@ func main() {
Type: v.RSID, Type: v.RSID,
Value: v.Genotype, Value: v.Genotype,
Tags: v.Gene, Tags: v.Gene,
SearchKey: v.Gene,
Ordinal: i + 1, Ordinal: i + 1,
Timestamp: now, Timestamp: now,
Data: string(dataJSON), Data: string(dataJSON),

299
lib/v2.go
View File

@ -1184,8 +1184,7 @@ type GenomeMatch struct {
Magnitude *float64 `json:"magnitude,omitempty"` Magnitude *float64 `json:"magnitude,omitempty"`
Repute string `json:"repute,omitempty"` Repute string `json:"repute,omitempty"`
Summary string `json:"summary,omitempty"` Summary string `json:"summary,omitempty"`
Category string `json:"category,omitempty"` Categories []string `json:"categories,omitempty"`
Subcategory string `json:"subcategory,omitempty"`
} }
// GenomeQueryResult is the response from GenomeQuery // GenomeQueryResult is the response from GenomeQuery
@ -1202,58 +1201,33 @@ type GenomeQueryOpts struct {
Gene string // comma-separated Gene string // comma-separated
RSIDs []string RSIDs []string
MinMagnitude float64 MinMagnitude float64
Repute string // filter by repute (Good, Bad, Clear)
IncludeHidden bool IncludeHidden bool
Sort string // "magnitude" (default), "gene", "rsid" Sort string // "magnitude" (default), "gene", "rsid"
Offset int Offset int
Limit int Limit int
AccessorID string // who is querying (for audit logging)
} }
// GenomeQuery queries genome variants for a dossier with filtering, sorting, and pagination. // GenomeQuery queries genome variants for a dossier.
// Fast path: gene/rsid use indexed search_key/type columns (precise SQL queries).
// Slow path: search/min_magnitude load all variants and filter in memory.
func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, error) { func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, error) {
sysCtx := &AccessContext{IsSystem: true} if opts.IncludeHidden {
var details []string
extraction, err := GenomeGetExtraction(sysCtx, dossierID)
if err != nil {
return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err)
}
// Get tiers
var tiers []GenomeTier
tierCategories := make(map[string]string)
if opts.Category != "" {
tier, err := GenomeGetTierByCategory(sysCtx, dossierID, extraction.EntryID, opts.Category)
if err == nil {
tiers = append(tiers, *tier)
tierCategories[tier.TierID] = tier.Category
}
} else {
tiers, _ = GenomeGetTiers(sysCtx, dossierID, extraction.EntryID)
for _, t := range tiers {
tierCategories[t.TierID] = t.Category
}
}
if len(tiers) == 0 {
return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil
}
tierIDs := make([]string, len(tiers))
for i, t := range tiers {
tierIDs[i] = t.TierID
}
variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs)
if err != nil {
return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err)
}
// Parse genes
var genes []string
if opts.Gene != "" { if opts.Gene != "" {
for _, g := range strings.Split(opts.Gene, ",") { details = append(details, "gene="+opts.Gene)
genes = append(genes, strings.TrimSpace(g))
} }
if len(opts.RSIDs) > 0 {
details = append(details, "rsids="+strings.Join(opts.RSIDs, ","))
}
if opts.Search != "" {
details = append(details, "search="+opts.Search)
}
if opts.Category != "" {
details = append(details, "category="+opts.Category)
}
AuditLog(opts.AccessorID, "genome_reveal_hidden", dossierID, strings.Join(details, " "))
} }
limit := opts.Limit limit := opts.Limit
@ -1264,96 +1238,235 @@ func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, er
limit = 500 limit = 500
} }
// Filter and collect // Fast path: gene or rsid — use indexed columns
var matches []GenomeMatch if opts.Gene != "" || len(opts.RSIDs) > 0 {
total := 0 return genomeQueryFast(dossierID, opts, limit)
}
for _, v := range variants { // Slow path: search/min_magnitude — load all, filter in memory
return genomeQuerySlow(dossierID, opts, limit)
}
// genomeQueryFast uses indexed search_key (gene) and type (rsid) columns.
func genomeQueryFast(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
var entries []Entry
if opts.Gene != "" {
// Split comma-separated genes, query each via indexed search_key
genes := strings.Split(opts.Gene, ",")
for i := range genes {
genes[i] = strings.TrimSpace(genes[i])
}
// Build gene IN clause
genePlaceholders := make([]string, len(genes))
args := []any{dossierID, CategoryGenome}
for i, gene := range genes {
genePlaceholders[i] = "?"
args = append(args, CryptoEncrypt(gene))
}
sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND search_key IN (" + strings.Join(genePlaceholders, ",") + ")"
// Add rsid filter if specified
if len(opts.RSIDs) > 0 { if len(opts.RSIDs) > 0 {
found := false rsidPlaceholders := make([]string, len(opts.RSIDs))
for _, r := range opts.RSIDs { for i, rsid := range opts.RSIDs {
if r == v.RSID { rsidPlaceholders[i] = "?"
found = true args = append(args, CryptoEncrypt(rsid))
break
} }
sql += " AND type IN (" + strings.Join(rsidPlaceholders, ",") + ")"
} }
if !found {
continue Query(sql, args, &entries)
} else if len(opts.RSIDs) > 0 {
// rsid only, no gene — single IN query
placeholders := make([]string, len(opts.RSIDs))
args := []any{dossierID, CategoryGenome}
for i, rsid := range opts.RSIDs {
placeholders[i] = "?"
args = append(args, CryptoEncrypt(rsid))
}
sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND type IN (" + strings.Join(placeholders, ",") + ")"
Query(sql, args, &entries)
}
// Look up tier categories for parent_ids (single IN query)
tierCategories := make(map[string]string)
tierIDSet := make(map[string]bool)
for _, e := range entries {
tierIDSet[e.ParentID] = true
}
if len(tierIDSet) > 0 {
placeholders := make([]string, 0, len(tierIDSet))
args := make([]any, 0, len(tierIDSet))
for id := range tierIDSet {
placeholders = append(placeholders, "?")
args = append(args, id)
}
var tierEntries []Entry
Query("SELECT * FROM entries WHERE entry_id IN ("+strings.Join(placeholders, ",")+")", args, &tierEntries)
for _, t := range tierEntries {
tierCategories[t.EntryID] = t.Value
} }
} }
if len(genes) > 0 { return genomeEntriesToResult(entries, tierCategories, opts, limit)
found := false
for _, g := range genes {
if strings.EqualFold(v.Gene, g) {
found = true
break
} }
// genomeQuerySlow loads all variants and filters in memory (for search/min_magnitude).
func genomeQuerySlow(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
sysCtx := &AccessContext{IsSystem: true}
extraction, err := GenomeGetExtraction(sysCtx, dossierID)
if err != nil {
return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err)
} }
if !found {
continue tiers, _ := GenomeGetTiers(sysCtx, dossierID, extraction.EntryID)
if len(tiers) == 0 {
return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil
} }
tierCategories := make(map[string]string)
tierIDs := make([]string, len(tiers))
for i, t := range tiers {
tierIDs[i] = t.TierID
tierCategories[t.TierID] = t.Category
}
variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs)
if err != nil {
return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err)
}
// Convert to entries for shared result builder
var entries []Entry
for _, v := range variants {
dataJSON, _ := json.Marshal(struct {
Mag float64 `json:"mag,omitempty"`
Rep string `json:"rep,omitempty"`
Sum string `json:"sum,omitempty"`
Sub string `json:"sub,omitempty"`
}{v.Magnitude, v.Repute, v.Summary, v.Subcategory})
entries = append(entries, Entry{
EntryID: v.EntryID,
ParentID: v.TierID,
Type: v.RSID,
Value: v.Genotype,
Tags: v.Gene,
Data: string(dataJSON),
})
}
return genomeEntriesToResult(entries, tierCategories, opts, limit)
}
// genomeEntriesToResult converts raw entries to GenomeQueryResult with filtering/sorting.
func genomeEntriesToResult(entries []Entry, tierCategories map[string]string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
targeted := opts.Gene != "" || len(opts.RSIDs) > 0
// Dedup by rsid, merge categories
seen := make(map[string]*GenomeMatch)
var order []string
for _, e := range entries {
var data struct {
Mag float64 `json:"mag"`
Rep string `json:"rep"`
Sum string `json:"sum"`
Sub string `json:"sub"`
}
if e.Data != "" {
json.Unmarshal([]byte(e.Data), &data)
} }
if opts.Search != "" { if opts.Search != "" {
sl := strings.ToLower(opts.Search) sl := strings.ToLower(opts.Search)
if !strings.Contains(strings.ToLower(v.Gene), sl) && if !strings.Contains(strings.ToLower(e.Tags), sl) &&
!strings.Contains(strings.ToLower(v.Summary), sl) && !strings.Contains(strings.ToLower(data.Sum), sl) &&
!strings.Contains(strings.ToLower(v.Subcategory), sl) && !strings.Contains(strings.ToLower(data.Sub), sl) &&
!strings.Contains(strings.ToLower(v.RSID), sl) { !strings.Contains(strings.ToLower(e.Type), sl) &&
!strings.Contains(strings.ToLower(e.Value), sl) {
continue continue
} }
} }
if opts.MinMagnitude > 0 && v.Magnitude < opts.MinMagnitude { if opts.Category != "" && tierCategories[e.ParentID] != opts.Category {
continue continue
} }
// Determine if variant would be hidden if opts.MinMagnitude > 0 && data.Mag < opts.MinMagnitude {
isHidden := v.Magnitude > 4.0 || strings.EqualFold(v.Repute, "bad") continue
}
// Targeted queries (specific rsIDs or gene) return redacted results if opts.Repute != "" && !strings.EqualFold(data.Rep, opts.Repute) {
// Broad queries skip hidden variants entirely continue
targeted := len(opts.RSIDs) > 0 || len(genes) > 0 }
isHidden := data.Mag > 4.0 || strings.EqualFold(data.Rep, "bad")
if isHidden && !opts.IncludeHidden && !targeted { if isHidden && !opts.IncludeHidden && !targeted {
continue continue
} }
total++ cat := tierCategories[e.ParentID]
if total <= opts.Offset {
continue if m, ok := seen[e.Type]; ok {
// Already seen this rsid — add category if not duplicate
if cat != "" {
dup := false
for _, c := range m.Categories {
if c == cat {
dup = true
break
}
}
if !dup {
m.Categories = append(m.Categories, cat)
}
} }
if len(matches) >= limit {
continue continue
} }
// Redact sensitive fields unless include_hidden is set
redact := isHidden && !opts.IncludeHidden redact := isHidden && !opts.IncludeHidden
match := &GenomeMatch{
match := GenomeMatch{ RSID: e.Type,
RSID: v.RSID, Gene: e.Tags,
Gene: v.Gene, }
Category: tierCategories[v.TierID], if cat != "" {
Subcategory: v.Subcategory, match.Categories = []string{cat}
} }
if redact { if redact {
match.Genotype = "hidden" match.Genotype = "hidden"
match.Summary = "Sensitive variant hidden. Query with include_hidden=true to reveal." match.Summary = "Sensitive variant hidden. Query with include_hidden=true to reveal."
} else { } else {
match.Genotype = v.Genotype match.Genotype = e.Value
if v.Summary != "" { if data.Sum != "" {
match.Summary = v.Summary match.Summary = data.Sum
} }
} }
if v.Magnitude > 0 { if data.Mag > 0 {
mag := v.Magnitude mag := data.Mag
match.Magnitude = &mag match.Magnitude = &mag
} }
if v.Repute != "" { if data.Rep != "" {
match.Repute = v.Repute match.Repute = data.Rep
} }
matches = append(matches, match)
seen[e.Type] = match
order = append(order, e.Type)
}
total := len(order)
var matches []GenomeMatch
for i, rsid := range order {
if i < opts.Offset {
continue
}
if len(matches) >= limit {
break
}
matches = append(matches, *seen[rsid])
} }
// Sort // Sort

View File

@ -12,8 +12,6 @@ var apiEndpoints = map[string]bool{
"/api/dossiers": true, "/api/dossiers": true,
"/api/dossier": true, "/api/dossier": true,
"/api/studies": true, "/api/studies": true,
"/api/genome": true,
"/api/categories": true,
"/api/series": true, "/api/series": true,
"/api/slices": true, "/api/slices": true,
"/api/labs/tests": true, "/api/labs/tests": true,

View File

@ -369,13 +369,11 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) {
}, },
{ {
"name": "get_categories", "name": "get_categories",
"description": "List available data categories for a dossier (imaging, labs, documents, genome). Use this first to discover what data types are available, then query entries by category. For lab results, LOINC codes provide best search accuracy.", "description": "List available data categories for a dossier with entry counts. Returns category names and how many entries exist in each.",
"inputSchema": map[string]interface{}{ "inputSchema": map[string]interface{}{
"type": "object", "type": "object",
"properties": map[string]interface{}{ "properties": map[string]interface{}{
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"}, "dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
"type": map[string]interface{}{"type": "string", "description": "Observation type (e.g., genome) for subcategories"},
"category": map[string]interface{}{"type": "string", "description": "Get subcategories within this category"},
}, },
"required": []string{"dossier"}, "required": []string{"dossier"},
}, },
@ -402,17 +400,20 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) {
}, },
{ {
"name": "query_genome", "name": "query_genome",
"description": "Query genome variants with specialized genome parameters (magnitude, repute, gene, rsids). Returns matches with significance ratings. For non-genome data, use get_categories to find available categories.", "description": "Query genome variants by gene, rsid, or category. Use gene names (e.g. MTHFR) not concepts (e.g. methylation). Search also matches genotype (e.g. AA, GG). Sensitive variants are redacted; use include_hidden to reveal.",
"inputSchema": map[string]interface{}{ "inputSchema": map[string]interface{}{
"type": "object", "type": "object",
"properties": map[string]interface{}{ "properties": map[string]interface{}{
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"}, "dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
"gene": map[string]interface{}{"type": "string", "description": "Gene name(s), comma-separated"}, "gene": map[string]interface{}{"type": "string", "description": "Gene name(s), comma-separated"},
"search": map[string]interface{}{"type": "string", "description": "Search gene, subcategory, or summary"}, "search": map[string]interface{}{"type": "string", "description": "Free-text search across gene, genotype, subcategory, summary, rsid"},
"category": map[string]interface{}{"type": "string", "description": "Filter by category"}, "category": map[string]interface{}{"type": "string", "description": "Filter by genome category (e.g. cancer, disease, traits)"},
"rsids": map[string]interface{}{"type": "string", "description": "Comma-separated rsids"}, "rsids": map[string]interface{}{"type": "string", "description": "Comma-separated rsids"},
"min_magnitude": map[string]interface{}{"type": "number", "description": "Minimum magnitude"}, "min_magnitude": map[string]interface{}{"type": "number", "description": "Minimum magnitude"},
"include_hidden": map[string]interface{}{"type": "boolean", "description": "Include hidden categories and high-magnitude variants"}, "repute": map[string]interface{}{"type": "string", "description": "Filter by repute: Good, Bad, or Clear"},
"include_hidden": map[string]interface{}{"type": "boolean", "description": "Reveal redacted sensitive variants (audited)"},
"limit": map[string]interface{}{"type": "number", "description": "Max results to return (default 20)"},
"offset": map[string]interface{}{"type": "number", "description": "Skip first N results for pagination"},
}, },
"required": []string{"dossier"}, "required": []string{"dossier"},
}, },
@ -613,9 +614,7 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss
sendMCPError(w, req.ID, -32602, "dossier required") sendMCPError(w, req.ID, -32602, "dossier required")
return return
} }
typ, _ := params.Arguments["type"].(string) result, err := mcpGetCategories(dossier)
category, _ := params.Arguments["category"].(string)
result, err := mcpGetCategories(accessToken, dossier, typ, category)
if err != nil { if err != nil {
sendMCPError(w, req.ID, -32000, err.Error()) sendMCPError(w, req.ID, -32000, err.Error())
return return
@ -633,12 +632,17 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss
category, _ := params.Arguments["category"].(string) category, _ := params.Arguments["category"].(string)
rsids, _ := params.Arguments["rsids"].(string) rsids, _ := params.Arguments["rsids"].(string)
minMag, _ := params.Arguments["min_magnitude"].(float64) minMag, _ := params.Arguments["min_magnitude"].(float64)
repute, _ := params.Arguments["repute"].(string)
includeHidden, _ := params.Arguments["include_hidden"].(bool) includeHidden, _ := params.Arguments["include_hidden"].(bool)
limitF, _ := params.Arguments["limit"].(float64)
offsetF, _ := params.Arguments["offset"].(float64)
limit := int(limitF)
offset := int(offsetF)
fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s includeHidden=%v\n", fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s repute=%s limit=%d offset=%d\n",
dossier, gene, category, includeHidden) dossier, gene, category, repute, limit, offset)
result, err := mcpQueryGenome(accessToken, dossier, gene, search, category, rsids, minMag, includeHidden) result, err := mcpQueryGenome(accessToken, dossier, dossierID, gene, search, category, rsids, minMag, repute, includeHidden, limit, offset)
if err != nil { if err != nil {
fmt.Printf("[MCP] query_genome error: %v\n", err) fmt.Printf("[MCP] query_genome error: %v\n", err)
sendMCPError(w, req.ID, -32000, err.Error()) sendMCPError(w, req.ID, -32000, err.Error())

View File

@ -181,38 +181,54 @@ func mcpQueryEntries(accessToken, dossier, category, typ, searchKey, parent, fro
return string(pretty), nil return string(pretty), nil
} }
func mcpGetCategories(accessToken, dossier, typ, category string) (string, error) { func mcpGetCategories(dossier string) (string, error) {
params := map[string]string{"dossier": dossier} var counts []struct {
if typ != "" { Category int `db:"category"`
params["type"] = typ Count int `db:"cnt"`
} }
if category != "" { err := lib.Query("SELECT category, COUNT(*) as cnt FROM entries WHERE dossier_id = ? GROUP BY category", []any{dossier}, &counts)
params["category"] = category
}
body, err := mcpAPICall(accessToken, "/api/categories", params)
if err != nil { if err != nil {
return "", err return "", err
} }
var data interface{} result := make(map[string]int)
json.Unmarshal(body, &data) for _, c := range counts {
pretty, _ := json.MarshalIndent(data, "", " ") name := lib.CategoryName(c.Category)
if name != "unknown" {
result[name] = c.Count
}
}
pretty, _ := json.MarshalIndent(result, "", " ")
return string(pretty), nil return string(pretty), nil
} }
func mcpQueryGenome(accessToken, dossier, gene, search, category, rsids string, minMag float64, includeHidden bool) (string, error) { func mcpQueryGenome(accessToken, dossier, accessorID, gene, search, category, rsids string, minMag float64, repute string, includeHidden bool, limit, offset int) (string, error) {
var rsidList []string var rsidList []string
if rsids != "" { if rsids != "" {
rsidList = strings.Split(rsids, ",") rsidList = strings.Split(rsids, ",")
} }
if limit <= 0 {
numTerms := 1
if gene != "" {
numTerms = len(strings.Split(gene, ","))
}
if len(rsidList) > numTerms {
numTerms = len(rsidList)
}
limit = 20 * numTerms
}
result, err := lib.GenomeQuery(dossier, lib.GenomeQueryOpts{ result, err := lib.GenomeQuery(dossier, lib.GenomeQueryOpts{
Category: category, Category: category,
Search: search, Search: search,
Gene: gene, Gene: gene,
RSIDs: rsidList, RSIDs: rsidList,
MinMagnitude: minMag, MinMagnitude: minMag,
Repute: repute,
IncludeHidden: includeHidden, IncludeHidden: includeHidden,
Limit: limit,
Offset: offset,
AccessorID: accessorID,
}) })
if err != nil { if err != nil {
return "", err return "", err