feat: optimize genome queries with IN clauses, dedup, repute filter
- Replace N separate SQL queries with single IN clause for rsids and genes - Dedup results by rsid, merging categories from multiple tiers - Add repute filter (Good/Bad/Clear) to genome queries - Expose limit/offset as MCP parameters - Add genotype to search check - Fix category filter in genomeEntriesToResult - Remove deprecated api/api_categories.go and api/api_genome.go - Change GenomeMatch to use Categories []string instead of Category+Subcategory Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d2d77d1503
commit
77db02a6eb
|
|
@ -94,6 +94,10 @@ Key tables:
|
||||||
|
|
||||||
### Querying Encrypted Data
|
### Querying Encrypted Data
|
||||||
|
|
||||||
|
**Encryption is DETERMINISTIC.** You CAN use encrypted columns in WHERE clauses by encrypting the search value: `WHERE col = lib.CryptoEncrypt("value")`. Do NOT pull large datasets and filter client-side when a precise query is possible.
|
||||||
|
|
||||||
|
Columns ending in `_id` are NOT encrypted (plain text, always filterable).
|
||||||
|
|
||||||
**IMPORTANT:** Do NOT write Python/Go scripts to decrypt database fields. Use the `decrypt` tool:
|
**IMPORTANT:** Do NOT write Python/Go scripts to decrypt database fields. Use the `decrypt` tool:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -1,257 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"inou/lib"
|
|
||||||
)
|
|
||||||
|
|
||||||
type CategoryCount struct {
|
|
||||||
Shown int `json:"shown"`
|
|
||||||
Hidden int `json:"hidden"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func handleCategories(w http.ResponseWriter, r *http.Request) {
|
|
||||||
// Get accessor (who is asking)
|
|
||||||
ctx := getAccessContextOrFail(w, r)
|
|
||||||
if ctx == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
dossierHex := r.URL.Query().Get("dossier")
|
|
||||||
if dossierHex == "" {
|
|
||||||
http.Error(w, "missing dossier", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
dossierID := dossierHex
|
|
||||||
|
|
||||||
obsType := r.URL.Query().Get("type")
|
|
||||||
category := r.URL.Query().Get("category")
|
|
||||||
|
|
||||||
// Pass accessor + dossier to lib - RBAC handled there
|
|
||||||
var counts map[string]CategoryCount
|
|
||||||
|
|
||||||
if obsType == "" {
|
|
||||||
counts = getTopLevelCounts(ctx.AccessorID, dossierID)
|
|
||||||
} else if obsType == "genome" {
|
|
||||||
if category != "" {
|
|
||||||
counts = getGenomeSubcategoryCounts(ctx.AccessorID, dossierID, category)
|
|
||||||
} else {
|
|
||||||
counts = getGenomeCounts(ctx.AccessorID, dossierID)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
counts = make(map[string]CategoryCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
json.NewEncoder(w).Encode(counts)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getTopLevelCounts(accessorID, dossierID string) map[string]CategoryCount {
|
|
||||||
counts := make(map[string]CategoryCount)
|
|
||||||
|
|
||||||
categoryNames := map[int]string{
|
|
||||||
1: "imaging", 2: "documents", 3: "labs", 4: "genome",
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check each category - only include if accessor has access
|
|
||||||
for catInt, catName := range categoryNames {
|
|
||||||
// Try to get a count by querying entries with RBAC
|
|
||||||
entries, err := lib.EntryList(accessorID, "", catInt, &lib.EntryFilter{
|
|
||||||
DossierID: dossierID,
|
|
||||||
Limit: 1, // Just check if we can see any
|
|
||||||
})
|
|
||||||
if err != nil || len(entries) == 0 {
|
|
||||||
continue // No access or no entries
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get actual count (using system context for counting only)
|
|
||||||
var catCounts []struct {
|
|
||||||
Count int `db:"cnt"`
|
|
||||||
}
|
|
||||||
lib.Query("SELECT COUNT(*) as cnt FROM entries WHERE dossier_id = ? AND category = ?",
|
|
||||||
[]any{dossierID, catInt}, &catCounts)
|
|
||||||
|
|
||||||
if len(catCounts) > 0 && catCounts[0].Count > 0 {
|
|
||||||
counts[catName] = CategoryCount{Shown: catCounts[0].Count, Hidden: 0}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// For genome, replace with detailed subcategory counts
|
|
||||||
genomeCats := getGenomeCounts(accessorID, dossierID)
|
|
||||||
if len(genomeCats) > 0 {
|
|
||||||
totalShown, totalHidden := 0, 0
|
|
||||||
for _, c := range genomeCats {
|
|
||||||
totalShown += c.Shown
|
|
||||||
totalHidden += c.Hidden
|
|
||||||
}
|
|
||||||
counts["genome"] = CategoryCount{Shown: len(genomeCats), Hidden: totalHidden}
|
|
||||||
}
|
|
||||||
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// variantData for parsing the data JSON
|
|
||||||
type variantData struct {
|
|
||||||
Mag float64 `json:"mag"`
|
|
||||||
Rep string `json:"rep"`
|
|
||||||
Sub string `json:"sub"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// shouldIncludeVariant returns true if the variant should be counted/shown
|
|
||||||
func shouldIncludeVariant(data variantData, includeHidden bool) bool {
|
|
||||||
if includeHidden {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// Hide high magnitude variants
|
|
||||||
if data.Mag > 4.0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
// Hide "Bad" repute variants
|
|
||||||
if data.Rep == "Bad" || data.Rep == "bad" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// getGenomeCounts reads cached counts from the extraction entry (fast path)
|
|
||||||
func getGenomeCounts(accessorID, dossierID string) map[string]CategoryCount {
|
|
||||||
counts := make(map[string]CategoryCount)
|
|
||||||
|
|
||||||
// Create access context for RBAC
|
|
||||||
ctx := &lib.AccessContext{AccessorID: accessorID}
|
|
||||||
|
|
||||||
// Find extraction entry and read its data
|
|
||||||
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
|
|
||||||
if err != nil {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
if extraction.Data == "" {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse extraction data which contains pre-computed counts
|
|
||||||
var extractionData struct {
|
|
||||||
Counts map[string]CategoryCount `json:"counts"`
|
|
||||||
}
|
|
||||||
if err := json.Unmarshal([]byte(extraction.Data), &extractionData); err != nil {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return cached counts if available
|
|
||||||
if extractionData.Counts != nil {
|
|
||||||
return extractionData.Counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: compute counts (for old data without cached counts)
|
|
||||||
return getGenomeCountsSlow(accessorID, dossierID)
|
|
||||||
}
|
|
||||||
|
|
||||||
// getGenomeCountsSlow computes counts by scanning all variants (fallback for old data)
|
|
||||||
func getGenomeCountsSlow(accessorID, dossierID string) map[string]CategoryCount {
|
|
||||||
counts := make(map[string]CategoryCount)
|
|
||||||
|
|
||||||
// Create access context for RBAC
|
|
||||||
ctx := &lib.AccessContext{AccessorID: accessorID}
|
|
||||||
|
|
||||||
// Find extraction entry
|
|
||||||
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
|
|
||||||
if err != nil {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all tiers
|
|
||||||
tiers, err := lib.GenomeGetTiers(ctx, dossierID, extraction.EntryID)
|
|
||||||
if err != nil {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// For each tier, count shown and hidden variants
|
|
||||||
for _, tier := range tiers {
|
|
||||||
variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
shown, hidden := 0, 0
|
|
||||||
for _, v := range variants {
|
|
||||||
data := variantData{
|
|
||||||
Mag: v.Magnitude,
|
|
||||||
Rep: v.Repute,
|
|
||||||
Sub: v.Subcategory,
|
|
||||||
}
|
|
||||||
if shouldIncludeVariant(data, false) {
|
|
||||||
shown++
|
|
||||||
} else {
|
|
||||||
hidden++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if shown > 0 || hidden > 0 {
|
|
||||||
counts[tier.Category] = CategoryCount{Shown: shown, Hidden: hidden}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
func getGenomeSubcategoryCounts(accessorID, dossierID string, category string) map[string]CategoryCount {
|
|
||||||
counts := make(map[string]CategoryCount)
|
|
||||||
shownCounts := make(map[string]int)
|
|
||||||
hiddenCounts := make(map[string]int)
|
|
||||||
|
|
||||||
// Create access context for RBAC
|
|
||||||
ctx := &lib.AccessContext{AccessorID: accessorID}
|
|
||||||
|
|
||||||
// Find extraction entry
|
|
||||||
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
|
|
||||||
if err != nil {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find tier for this category
|
|
||||||
tier, err := lib.GenomeGetTierByCategory(ctx, dossierID, extraction.EntryID, category)
|
|
||||||
if err != nil {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get variants and count by subcategory
|
|
||||||
variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID)
|
|
||||||
if err != nil {
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, v := range variants {
|
|
||||||
if v.Subcategory == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
data := variantData{
|
|
||||||
Mag: v.Magnitude,
|
|
||||||
Rep: v.Repute,
|
|
||||||
Sub: v.Subcategory,
|
|
||||||
}
|
|
||||||
if shouldIncludeVariant(data, false) {
|
|
||||||
shownCounts[v.Subcategory]++
|
|
||||||
} else {
|
|
||||||
hiddenCounts[v.Subcategory]++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Combine into CategoryCount
|
|
||||||
allSubs := make(map[string]bool)
|
|
||||||
for k := range shownCounts {
|
|
||||||
allSubs[k] = true
|
|
||||||
}
|
|
||||||
for k := range hiddenCounts {
|
|
||||||
allSubs[k] = true
|
|
||||||
}
|
|
||||||
for sub := range allSubs {
|
|
||||||
counts[sub] = CategoryCount{Shown: shownCounts[sub], Hidden: hiddenCounts[sub]}
|
|
||||||
}
|
|
||||||
|
|
||||||
return counts
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unused but keeping for compatibility - remove if not needed
|
|
||||||
var _ = strings.Contains
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"net/http"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"inou/lib"
|
|
||||||
)
|
|
||||||
|
|
||||||
func handleGenomeQuery(w http.ResponseWriter, r *http.Request) {
|
|
||||||
ctx := getAccessContextOrFail(w, r)
|
|
||||||
if ctx == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
dossierID := r.URL.Query().Get("dossier")
|
|
||||||
if dossierID == "" {
|
|
||||||
http.Error(w, "missing dossier", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if !requireDossierAccess(w, ctx, dossierID) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse query params into opts
|
|
||||||
q := r.URL.Query()
|
|
||||||
var minMag float64
|
|
||||||
if s := q.Get("min_magnitude"); s != "" {
|
|
||||||
minMag, _ = strconv.ParseFloat(s, 64)
|
|
||||||
}
|
|
||||||
offset := 0
|
|
||||||
if s := q.Get("offset"); s != "" {
|
|
||||||
offset, _ = strconv.Atoi(s)
|
|
||||||
}
|
|
||||||
limit := 100
|
|
||||||
if s := q.Get("limit"); s != "" {
|
|
||||||
limit, _ = strconv.Atoi(s)
|
|
||||||
}
|
|
||||||
var rsids []string
|
|
||||||
if s := q.Get("rsids"); s != "" {
|
|
||||||
rsids = strings.Split(s, ",")
|
|
||||||
}
|
|
||||||
|
|
||||||
result, err := lib.GenomeQuery(dossierID, lib.GenomeQueryOpts{
|
|
||||||
Category: q.Get("category"),
|
|
||||||
Search: q.Get("search"),
|
|
||||||
Gene: q.Get("gene"),
|
|
||||||
RSIDs: rsids,
|
|
||||||
MinMagnitude: minMag,
|
|
||||||
IncludeHidden: q.Get("include_hidden") == "true",
|
|
||||||
Sort: q.Get("sort"),
|
|
||||||
Offset: offset,
|
|
||||||
Limit: limit,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
||||||
"error": err.Error(),
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
json.NewEncoder(w).Encode(result)
|
|
||||||
}
|
|
||||||
|
|
@ -41,8 +41,6 @@ func main() {
|
||||||
http.HandleFunc("/api/dossiers", handleDossiers)
|
http.HandleFunc("/api/dossiers", handleDossiers)
|
||||||
http.HandleFunc("/api/dossier", handleDossier)
|
http.HandleFunc("/api/dossier", handleDossier)
|
||||||
http.HandleFunc("/api/studies", handleStudies)
|
http.HandleFunc("/api/studies", handleStudies)
|
||||||
http.HandleFunc("/api/genome", handleGenomeQuery)
|
|
||||||
http.HandleFunc("/api/categories", handleCategories)
|
|
||||||
http.HandleFunc("/api/series", handleSeries)
|
http.HandleFunc("/api/series", handleSeries)
|
||||||
http.HandleFunc("/api/slices", handleSlices)
|
http.HandleFunc("/api/slices", handleSlices)
|
||||||
http.HandleFunc("/image/", handleImage)
|
http.HandleFunc("/image/", handleImage)
|
||||||
|
|
|
||||||
|
|
@ -265,7 +265,7 @@ func main() {
|
||||||
|
|
||||||
// ===== PHASE 4: Load SNPedia and match =====
|
// ===== PHASE 4: Load SNPedia and match =====
|
||||||
phase4Start := time.Now()
|
phase4Start := time.Now()
|
||||||
snpediaDB, err := sql.Open("sqlite3", "/home/johan/dev/inou/snpedia-genotypes/genotypes.db?mode=ro")
|
snpediaDB, err := sql.Open("sqlite3", "/tank/inou/data/genotypes.db?mode=ro")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("SNPedia DB open failed:", err)
|
fmt.Println("SNPedia DB open failed:", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|
@ -542,6 +542,7 @@ func main() {
|
||||||
Type: v.RSID,
|
Type: v.RSID,
|
||||||
Value: v.Genotype,
|
Value: v.Genotype,
|
||||||
Tags: v.Gene,
|
Tags: v.Gene,
|
||||||
|
SearchKey: v.Gene,
|
||||||
Ordinal: i + 1,
|
Ordinal: i + 1,
|
||||||
Timestamp: now,
|
Timestamp: now,
|
||||||
Data: string(dataJSON),
|
Data: string(dataJSON),
|
||||||
|
|
|
||||||
299
lib/v2.go
299
lib/v2.go
|
|
@ -1184,8 +1184,7 @@ type GenomeMatch struct {
|
||||||
Magnitude *float64 `json:"magnitude,omitempty"`
|
Magnitude *float64 `json:"magnitude,omitempty"`
|
||||||
Repute string `json:"repute,omitempty"`
|
Repute string `json:"repute,omitempty"`
|
||||||
Summary string `json:"summary,omitempty"`
|
Summary string `json:"summary,omitempty"`
|
||||||
Category string `json:"category,omitempty"`
|
Categories []string `json:"categories,omitempty"`
|
||||||
Subcategory string `json:"subcategory,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GenomeQueryResult is the response from GenomeQuery
|
// GenomeQueryResult is the response from GenomeQuery
|
||||||
|
|
@ -1202,58 +1201,33 @@ type GenomeQueryOpts struct {
|
||||||
Gene string // comma-separated
|
Gene string // comma-separated
|
||||||
RSIDs []string
|
RSIDs []string
|
||||||
MinMagnitude float64
|
MinMagnitude float64
|
||||||
|
Repute string // filter by repute (Good, Bad, Clear)
|
||||||
IncludeHidden bool
|
IncludeHidden bool
|
||||||
Sort string // "magnitude" (default), "gene", "rsid"
|
Sort string // "magnitude" (default), "gene", "rsid"
|
||||||
Offset int
|
Offset int
|
||||||
Limit int
|
Limit int
|
||||||
|
AccessorID string // who is querying (for audit logging)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GenomeQuery queries genome variants for a dossier with filtering, sorting, and pagination.
|
// GenomeQuery queries genome variants for a dossier.
|
||||||
|
// Fast path: gene/rsid use indexed search_key/type columns (precise SQL queries).
|
||||||
|
// Slow path: search/min_magnitude load all variants and filter in memory.
|
||||||
func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, error) {
|
func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, error) {
|
||||||
sysCtx := &AccessContext{IsSystem: true}
|
if opts.IncludeHidden {
|
||||||
|
var details []string
|
||||||
extraction, err := GenomeGetExtraction(sysCtx, dossierID)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get tiers
|
|
||||||
var tiers []GenomeTier
|
|
||||||
tierCategories := make(map[string]string)
|
|
||||||
|
|
||||||
if opts.Category != "" {
|
|
||||||
tier, err := GenomeGetTierByCategory(sysCtx, dossierID, extraction.EntryID, opts.Category)
|
|
||||||
if err == nil {
|
|
||||||
tiers = append(tiers, *tier)
|
|
||||||
tierCategories[tier.TierID] = tier.Category
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tiers, _ = GenomeGetTiers(sysCtx, dossierID, extraction.EntryID)
|
|
||||||
for _, t := range tiers {
|
|
||||||
tierCategories[t.TierID] = t.Category
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(tiers) == 0 {
|
|
||||||
return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
tierIDs := make([]string, len(tiers))
|
|
||||||
for i, t := range tiers {
|
|
||||||
tierIDs[i] = t.TierID
|
|
||||||
}
|
|
||||||
|
|
||||||
variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse genes
|
|
||||||
var genes []string
|
|
||||||
if opts.Gene != "" {
|
if opts.Gene != "" {
|
||||||
for _, g := range strings.Split(opts.Gene, ",") {
|
details = append(details, "gene="+opts.Gene)
|
||||||
genes = append(genes, strings.TrimSpace(g))
|
|
||||||
}
|
}
|
||||||
|
if len(opts.RSIDs) > 0 {
|
||||||
|
details = append(details, "rsids="+strings.Join(opts.RSIDs, ","))
|
||||||
|
}
|
||||||
|
if opts.Search != "" {
|
||||||
|
details = append(details, "search="+opts.Search)
|
||||||
|
}
|
||||||
|
if opts.Category != "" {
|
||||||
|
details = append(details, "category="+opts.Category)
|
||||||
|
}
|
||||||
|
AuditLog(opts.AccessorID, "genome_reveal_hidden", dossierID, strings.Join(details, " "))
|
||||||
}
|
}
|
||||||
|
|
||||||
limit := opts.Limit
|
limit := opts.Limit
|
||||||
|
|
@ -1264,96 +1238,235 @@ func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, er
|
||||||
limit = 500
|
limit = 500
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter and collect
|
// Fast path: gene or rsid — use indexed columns
|
||||||
var matches []GenomeMatch
|
if opts.Gene != "" || len(opts.RSIDs) > 0 {
|
||||||
total := 0
|
return genomeQueryFast(dossierID, opts, limit)
|
||||||
|
}
|
||||||
|
|
||||||
for _, v := range variants {
|
// Slow path: search/min_magnitude — load all, filter in memory
|
||||||
|
return genomeQuerySlow(dossierID, opts, limit)
|
||||||
|
}
|
||||||
|
|
||||||
|
// genomeQueryFast uses indexed search_key (gene) and type (rsid) columns.
|
||||||
|
func genomeQueryFast(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
|
||||||
|
var entries []Entry
|
||||||
|
|
||||||
|
if opts.Gene != "" {
|
||||||
|
// Split comma-separated genes, query each via indexed search_key
|
||||||
|
genes := strings.Split(opts.Gene, ",")
|
||||||
|
for i := range genes {
|
||||||
|
genes[i] = strings.TrimSpace(genes[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build gene IN clause
|
||||||
|
genePlaceholders := make([]string, len(genes))
|
||||||
|
args := []any{dossierID, CategoryGenome}
|
||||||
|
for i, gene := range genes {
|
||||||
|
genePlaceholders[i] = "?"
|
||||||
|
args = append(args, CryptoEncrypt(gene))
|
||||||
|
}
|
||||||
|
sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND search_key IN (" + strings.Join(genePlaceholders, ",") + ")"
|
||||||
|
|
||||||
|
// Add rsid filter if specified
|
||||||
if len(opts.RSIDs) > 0 {
|
if len(opts.RSIDs) > 0 {
|
||||||
found := false
|
rsidPlaceholders := make([]string, len(opts.RSIDs))
|
||||||
for _, r := range opts.RSIDs {
|
for i, rsid := range opts.RSIDs {
|
||||||
if r == v.RSID {
|
rsidPlaceholders[i] = "?"
|
||||||
found = true
|
args = append(args, CryptoEncrypt(rsid))
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
sql += " AND type IN (" + strings.Join(rsidPlaceholders, ",") + ")"
|
||||||
}
|
}
|
||||||
if !found {
|
|
||||||
continue
|
Query(sql, args, &entries)
|
||||||
|
} else if len(opts.RSIDs) > 0 {
|
||||||
|
// rsid only, no gene — single IN query
|
||||||
|
placeholders := make([]string, len(opts.RSIDs))
|
||||||
|
args := []any{dossierID, CategoryGenome}
|
||||||
|
for i, rsid := range opts.RSIDs {
|
||||||
|
placeholders[i] = "?"
|
||||||
|
args = append(args, CryptoEncrypt(rsid))
|
||||||
|
}
|
||||||
|
sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND type IN (" + strings.Join(placeholders, ",") + ")"
|
||||||
|
Query(sql, args, &entries)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look up tier categories for parent_ids (single IN query)
|
||||||
|
tierCategories := make(map[string]string)
|
||||||
|
tierIDSet := make(map[string]bool)
|
||||||
|
for _, e := range entries {
|
||||||
|
tierIDSet[e.ParentID] = true
|
||||||
|
}
|
||||||
|
if len(tierIDSet) > 0 {
|
||||||
|
placeholders := make([]string, 0, len(tierIDSet))
|
||||||
|
args := make([]any, 0, len(tierIDSet))
|
||||||
|
for id := range tierIDSet {
|
||||||
|
placeholders = append(placeholders, "?")
|
||||||
|
args = append(args, id)
|
||||||
|
}
|
||||||
|
var tierEntries []Entry
|
||||||
|
Query("SELECT * FROM entries WHERE entry_id IN ("+strings.Join(placeholders, ",")+")", args, &tierEntries)
|
||||||
|
for _, t := range tierEntries {
|
||||||
|
tierCategories[t.EntryID] = t.Value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(genes) > 0 {
|
return genomeEntriesToResult(entries, tierCategories, opts, limit)
|
||||||
found := false
|
|
||||||
for _, g := range genes {
|
|
||||||
if strings.EqualFold(v.Gene, g) {
|
|
||||||
found = true
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// genomeQuerySlow loads all variants and filters in memory (for search/min_magnitude).
|
||||||
|
func genomeQuerySlow(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
|
||||||
|
sysCtx := &AccessContext{IsSystem: true}
|
||||||
|
|
||||||
|
extraction, err := GenomeGetExtraction(sysCtx, dossierID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err)
|
||||||
}
|
}
|
||||||
if !found {
|
|
||||||
continue
|
tiers, _ := GenomeGetTiers(sysCtx, dossierID, extraction.EntryID)
|
||||||
|
if len(tiers) == 0 {
|
||||||
|
return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tierCategories := make(map[string]string)
|
||||||
|
tierIDs := make([]string, len(tiers))
|
||||||
|
for i, t := range tiers {
|
||||||
|
tierIDs[i] = t.TierID
|
||||||
|
tierCategories[t.TierID] = t.Category
|
||||||
|
}
|
||||||
|
|
||||||
|
variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to entries for shared result builder
|
||||||
|
var entries []Entry
|
||||||
|
for _, v := range variants {
|
||||||
|
dataJSON, _ := json.Marshal(struct {
|
||||||
|
Mag float64 `json:"mag,omitempty"`
|
||||||
|
Rep string `json:"rep,omitempty"`
|
||||||
|
Sum string `json:"sum,omitempty"`
|
||||||
|
Sub string `json:"sub,omitempty"`
|
||||||
|
}{v.Magnitude, v.Repute, v.Summary, v.Subcategory})
|
||||||
|
|
||||||
|
entries = append(entries, Entry{
|
||||||
|
EntryID: v.EntryID,
|
||||||
|
ParentID: v.TierID,
|
||||||
|
Type: v.RSID,
|
||||||
|
Value: v.Genotype,
|
||||||
|
Tags: v.Gene,
|
||||||
|
Data: string(dataJSON),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return genomeEntriesToResult(entries, tierCategories, opts, limit)
|
||||||
|
}
|
||||||
|
|
||||||
|
// genomeEntriesToResult converts raw entries to GenomeQueryResult with filtering/sorting.
|
||||||
|
func genomeEntriesToResult(entries []Entry, tierCategories map[string]string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
|
||||||
|
targeted := opts.Gene != "" || len(opts.RSIDs) > 0
|
||||||
|
|
||||||
|
// Dedup by rsid, merge categories
|
||||||
|
seen := make(map[string]*GenomeMatch)
|
||||||
|
var order []string
|
||||||
|
|
||||||
|
for _, e := range entries {
|
||||||
|
var data struct {
|
||||||
|
Mag float64 `json:"mag"`
|
||||||
|
Rep string `json:"rep"`
|
||||||
|
Sum string `json:"sum"`
|
||||||
|
Sub string `json:"sub"`
|
||||||
|
}
|
||||||
|
if e.Data != "" {
|
||||||
|
json.Unmarshal([]byte(e.Data), &data)
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.Search != "" {
|
if opts.Search != "" {
|
||||||
sl := strings.ToLower(opts.Search)
|
sl := strings.ToLower(opts.Search)
|
||||||
if !strings.Contains(strings.ToLower(v.Gene), sl) &&
|
if !strings.Contains(strings.ToLower(e.Tags), sl) &&
|
||||||
!strings.Contains(strings.ToLower(v.Summary), sl) &&
|
!strings.Contains(strings.ToLower(data.Sum), sl) &&
|
||||||
!strings.Contains(strings.ToLower(v.Subcategory), sl) &&
|
!strings.Contains(strings.ToLower(data.Sub), sl) &&
|
||||||
!strings.Contains(strings.ToLower(v.RSID), sl) {
|
!strings.Contains(strings.ToLower(e.Type), sl) &&
|
||||||
|
!strings.Contains(strings.ToLower(e.Value), sl) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.MinMagnitude > 0 && v.Magnitude < opts.MinMagnitude {
|
if opts.Category != "" && tierCategories[e.ParentID] != opts.Category {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine if variant would be hidden
|
if opts.MinMagnitude > 0 && data.Mag < opts.MinMagnitude {
|
||||||
isHidden := v.Magnitude > 4.0 || strings.EqualFold(v.Repute, "bad")
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Targeted queries (specific rsIDs or gene) return redacted results
|
if opts.Repute != "" && !strings.EqualFold(data.Rep, opts.Repute) {
|
||||||
// Broad queries skip hidden variants entirely
|
continue
|
||||||
targeted := len(opts.RSIDs) > 0 || len(genes) > 0
|
}
|
||||||
|
|
||||||
|
isHidden := data.Mag > 4.0 || strings.EqualFold(data.Rep, "bad")
|
||||||
if isHidden && !opts.IncludeHidden && !targeted {
|
if isHidden && !opts.IncludeHidden && !targeted {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
total++
|
cat := tierCategories[e.ParentID]
|
||||||
if total <= opts.Offset {
|
|
||||||
continue
|
if m, ok := seen[e.Type]; ok {
|
||||||
|
// Already seen this rsid — add category if not duplicate
|
||||||
|
if cat != "" {
|
||||||
|
dup := false
|
||||||
|
for _, c := range m.Categories {
|
||||||
|
if c == cat {
|
||||||
|
dup = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !dup {
|
||||||
|
m.Categories = append(m.Categories, cat)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if len(matches) >= limit {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Redact sensitive fields unless include_hidden is set
|
|
||||||
redact := isHidden && !opts.IncludeHidden
|
redact := isHidden && !opts.IncludeHidden
|
||||||
|
match := &GenomeMatch{
|
||||||
match := GenomeMatch{
|
RSID: e.Type,
|
||||||
RSID: v.RSID,
|
Gene: e.Tags,
|
||||||
Gene: v.Gene,
|
}
|
||||||
Category: tierCategories[v.TierID],
|
if cat != "" {
|
||||||
Subcategory: v.Subcategory,
|
match.Categories = []string{cat}
|
||||||
}
|
}
|
||||||
if redact {
|
if redact {
|
||||||
match.Genotype = "hidden"
|
match.Genotype = "hidden"
|
||||||
match.Summary = "Sensitive variant hidden. Query with include_hidden=true to reveal."
|
match.Summary = "Sensitive variant hidden. Query with include_hidden=true to reveal."
|
||||||
} else {
|
} else {
|
||||||
match.Genotype = v.Genotype
|
match.Genotype = e.Value
|
||||||
if v.Summary != "" {
|
if data.Sum != "" {
|
||||||
match.Summary = v.Summary
|
match.Summary = data.Sum
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if v.Magnitude > 0 {
|
if data.Mag > 0 {
|
||||||
mag := v.Magnitude
|
mag := data.Mag
|
||||||
match.Magnitude = &mag
|
match.Magnitude = &mag
|
||||||
}
|
}
|
||||||
if v.Repute != "" {
|
if data.Rep != "" {
|
||||||
match.Repute = v.Repute
|
match.Repute = data.Rep
|
||||||
}
|
}
|
||||||
matches = append(matches, match)
|
|
||||||
|
seen[e.Type] = match
|
||||||
|
order = append(order, e.Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
total := len(order)
|
||||||
|
var matches []GenomeMatch
|
||||||
|
for i, rsid := range order {
|
||||||
|
if i < opts.Offset {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(matches) >= limit {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
matches = append(matches, *seen[rsid])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort
|
// Sort
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,6 @@ var apiEndpoints = map[string]bool{
|
||||||
"/api/dossiers": true,
|
"/api/dossiers": true,
|
||||||
"/api/dossier": true,
|
"/api/dossier": true,
|
||||||
"/api/studies": true,
|
"/api/studies": true,
|
||||||
"/api/genome": true,
|
|
||||||
"/api/categories": true,
|
|
||||||
"/api/series": true,
|
"/api/series": true,
|
||||||
"/api/slices": true,
|
"/api/slices": true,
|
||||||
"/api/labs/tests": true,
|
"/api/labs/tests": true,
|
||||||
|
|
|
||||||
|
|
@ -369,13 +369,11 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "get_categories",
|
"name": "get_categories",
|
||||||
"description": "List available data categories for a dossier (imaging, labs, documents, genome). Use this first to discover what data types are available, then query entries by category. For lab results, LOINC codes provide best search accuracy.",
|
"description": "List available data categories for a dossier with entry counts. Returns category names and how many entries exist in each.",
|
||||||
"inputSchema": map[string]interface{}{
|
"inputSchema": map[string]interface{}{
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": map[string]interface{}{
|
"properties": map[string]interface{}{
|
||||||
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
|
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
|
||||||
"type": map[string]interface{}{"type": "string", "description": "Observation type (e.g., genome) for subcategories"},
|
|
||||||
"category": map[string]interface{}{"type": "string", "description": "Get subcategories within this category"},
|
|
||||||
},
|
},
|
||||||
"required": []string{"dossier"},
|
"required": []string{"dossier"},
|
||||||
},
|
},
|
||||||
|
|
@ -402,17 +400,20 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "query_genome",
|
"name": "query_genome",
|
||||||
"description": "Query genome variants with specialized genome parameters (magnitude, repute, gene, rsids). Returns matches with significance ratings. For non-genome data, use get_categories to find available categories.",
|
"description": "Query genome variants by gene, rsid, or category. Use gene names (e.g. MTHFR) not concepts (e.g. methylation). Search also matches genotype (e.g. AA, GG). Sensitive variants are redacted; use include_hidden to reveal.",
|
||||||
"inputSchema": map[string]interface{}{
|
"inputSchema": map[string]interface{}{
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": map[string]interface{}{
|
"properties": map[string]interface{}{
|
||||||
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
|
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
|
||||||
"gene": map[string]interface{}{"type": "string", "description": "Gene name(s), comma-separated"},
|
"gene": map[string]interface{}{"type": "string", "description": "Gene name(s), comma-separated"},
|
||||||
"search": map[string]interface{}{"type": "string", "description": "Search gene, subcategory, or summary"},
|
"search": map[string]interface{}{"type": "string", "description": "Free-text search across gene, genotype, subcategory, summary, rsid"},
|
||||||
"category": map[string]interface{}{"type": "string", "description": "Filter by category"},
|
"category": map[string]interface{}{"type": "string", "description": "Filter by genome category (e.g. cancer, disease, traits)"},
|
||||||
"rsids": map[string]interface{}{"type": "string", "description": "Comma-separated rsids"},
|
"rsids": map[string]interface{}{"type": "string", "description": "Comma-separated rsids"},
|
||||||
"min_magnitude": map[string]interface{}{"type": "number", "description": "Minimum magnitude"},
|
"min_magnitude": map[string]interface{}{"type": "number", "description": "Minimum magnitude"},
|
||||||
"include_hidden": map[string]interface{}{"type": "boolean", "description": "Include hidden categories and high-magnitude variants"},
|
"repute": map[string]interface{}{"type": "string", "description": "Filter by repute: Good, Bad, or Clear"},
|
||||||
|
"include_hidden": map[string]interface{}{"type": "boolean", "description": "Reveal redacted sensitive variants (audited)"},
|
||||||
|
"limit": map[string]interface{}{"type": "number", "description": "Max results to return (default 20)"},
|
||||||
|
"offset": map[string]interface{}{"type": "number", "description": "Skip first N results for pagination"},
|
||||||
},
|
},
|
||||||
"required": []string{"dossier"},
|
"required": []string{"dossier"},
|
||||||
},
|
},
|
||||||
|
|
@ -613,9 +614,7 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss
|
||||||
sendMCPError(w, req.ID, -32602, "dossier required")
|
sendMCPError(w, req.ID, -32602, "dossier required")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
typ, _ := params.Arguments["type"].(string)
|
result, err := mcpGetCategories(dossier)
|
||||||
category, _ := params.Arguments["category"].(string)
|
|
||||||
result, err := mcpGetCategories(accessToken, dossier, typ, category)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
sendMCPError(w, req.ID, -32000, err.Error())
|
sendMCPError(w, req.ID, -32000, err.Error())
|
||||||
return
|
return
|
||||||
|
|
@ -633,12 +632,17 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss
|
||||||
category, _ := params.Arguments["category"].(string)
|
category, _ := params.Arguments["category"].(string)
|
||||||
rsids, _ := params.Arguments["rsids"].(string)
|
rsids, _ := params.Arguments["rsids"].(string)
|
||||||
minMag, _ := params.Arguments["min_magnitude"].(float64)
|
minMag, _ := params.Arguments["min_magnitude"].(float64)
|
||||||
|
repute, _ := params.Arguments["repute"].(string)
|
||||||
includeHidden, _ := params.Arguments["include_hidden"].(bool)
|
includeHidden, _ := params.Arguments["include_hidden"].(bool)
|
||||||
|
limitF, _ := params.Arguments["limit"].(float64)
|
||||||
|
offsetF, _ := params.Arguments["offset"].(float64)
|
||||||
|
limit := int(limitF)
|
||||||
|
offset := int(offsetF)
|
||||||
|
|
||||||
fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s includeHidden=%v\n",
|
fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s repute=%s limit=%d offset=%d\n",
|
||||||
dossier, gene, category, includeHidden)
|
dossier, gene, category, repute, limit, offset)
|
||||||
|
|
||||||
result, err := mcpQueryGenome(accessToken, dossier, gene, search, category, rsids, minMag, includeHidden)
|
result, err := mcpQueryGenome(accessToken, dossier, dossierID, gene, search, category, rsids, minMag, repute, includeHidden, limit, offset)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("[MCP] query_genome error: %v\n", err)
|
fmt.Printf("[MCP] query_genome error: %v\n", err)
|
||||||
sendMCPError(w, req.ID, -32000, err.Error())
|
sendMCPError(w, req.ID, -32000, err.Error())
|
||||||
|
|
|
||||||
|
|
@ -181,38 +181,54 @@ func mcpQueryEntries(accessToken, dossier, category, typ, searchKey, parent, fro
|
||||||
return string(pretty), nil
|
return string(pretty), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func mcpGetCategories(accessToken, dossier, typ, category string) (string, error) {
|
func mcpGetCategories(dossier string) (string, error) {
|
||||||
params := map[string]string{"dossier": dossier}
|
var counts []struct {
|
||||||
if typ != "" {
|
Category int `db:"category"`
|
||||||
params["type"] = typ
|
Count int `db:"cnt"`
|
||||||
}
|
}
|
||||||
if category != "" {
|
err := lib.Query("SELECT category, COUNT(*) as cnt FROM entries WHERE dossier_id = ? GROUP BY category", []any{dossier}, &counts)
|
||||||
params["category"] = category
|
|
||||||
}
|
|
||||||
|
|
||||||
body, err := mcpAPICall(accessToken, "/api/categories", params)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
var data interface{}
|
result := make(map[string]int)
|
||||||
json.Unmarshal(body, &data)
|
for _, c := range counts {
|
||||||
pretty, _ := json.MarshalIndent(data, "", " ")
|
name := lib.CategoryName(c.Category)
|
||||||
|
if name != "unknown" {
|
||||||
|
result[name] = c.Count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pretty, _ := json.MarshalIndent(result, "", " ")
|
||||||
return string(pretty), nil
|
return string(pretty), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func mcpQueryGenome(accessToken, dossier, gene, search, category, rsids string, minMag float64, includeHidden bool) (string, error) {
|
func mcpQueryGenome(accessToken, dossier, accessorID, gene, search, category, rsids string, minMag float64, repute string, includeHidden bool, limit, offset int) (string, error) {
|
||||||
var rsidList []string
|
var rsidList []string
|
||||||
if rsids != "" {
|
if rsids != "" {
|
||||||
rsidList = strings.Split(rsids, ",")
|
rsidList = strings.Split(rsids, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if limit <= 0 {
|
||||||
|
numTerms := 1
|
||||||
|
if gene != "" {
|
||||||
|
numTerms = len(strings.Split(gene, ","))
|
||||||
|
}
|
||||||
|
if len(rsidList) > numTerms {
|
||||||
|
numTerms = len(rsidList)
|
||||||
|
}
|
||||||
|
limit = 20 * numTerms
|
||||||
|
}
|
||||||
|
|
||||||
result, err := lib.GenomeQuery(dossier, lib.GenomeQueryOpts{
|
result, err := lib.GenomeQuery(dossier, lib.GenomeQueryOpts{
|
||||||
Category: category,
|
Category: category,
|
||||||
Search: search,
|
Search: search,
|
||||||
Gene: gene,
|
Gene: gene,
|
||||||
RSIDs: rsidList,
|
RSIDs: rsidList,
|
||||||
MinMagnitude: minMag,
|
MinMagnitude: minMag,
|
||||||
|
Repute: repute,
|
||||||
IncludeHidden: includeHidden,
|
IncludeHidden: includeHidden,
|
||||||
|
Limit: limit,
|
||||||
|
Offset: offset,
|
||||||
|
AccessorID: accessorID,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue