feat: optimize genome queries with IN clauses, dedup, repute filter
- Replace N separate SQL queries with single IN clause for rsids and genes - Dedup results by rsid, merging categories from multiple tiers - Add repute filter (Good/Bad/Clear) to genome queries - Expose limit/offset as MCP parameters - Add genotype to search check - Fix category filter in genomeEntriesToResult - Remove deprecated api/api_categories.go and api/api_genome.go - Change GenomeMatch to use Categories []string instead of Category+Subcategory Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d2d77d1503
commit
77db02a6eb
|
|
@ -94,6 +94,10 @@ Key tables:
|
|||
|
||||
### Querying Encrypted Data
|
||||
|
||||
**Encryption is DETERMINISTIC.** You CAN use encrypted columns in WHERE clauses by encrypting the search value: `WHERE col = lib.CryptoEncrypt("value")`. Do NOT pull large datasets and filter client-side when a precise query is possible.
|
||||
|
||||
Columns ending in `_id` are NOT encrypted (plain text, always filterable).
|
||||
|
||||
**IMPORTANT:** Do NOT write Python/Go scripts to decrypt database fields. Use the `decrypt` tool:
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -1,257 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"inou/lib"
|
||||
)
|
||||
|
||||
type CategoryCount struct {
|
||||
Shown int `json:"shown"`
|
||||
Hidden int `json:"hidden"`
|
||||
}
|
||||
|
||||
func handleCategories(w http.ResponseWriter, r *http.Request) {
|
||||
// Get accessor (who is asking)
|
||||
ctx := getAccessContextOrFail(w, r)
|
||||
if ctx == nil {
|
||||
return
|
||||
}
|
||||
|
||||
dossierHex := r.URL.Query().Get("dossier")
|
||||
if dossierHex == "" {
|
||||
http.Error(w, "missing dossier", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
dossierID := dossierHex
|
||||
|
||||
obsType := r.URL.Query().Get("type")
|
||||
category := r.URL.Query().Get("category")
|
||||
|
||||
// Pass accessor + dossier to lib - RBAC handled there
|
||||
var counts map[string]CategoryCount
|
||||
|
||||
if obsType == "" {
|
||||
counts = getTopLevelCounts(ctx.AccessorID, dossierID)
|
||||
} else if obsType == "genome" {
|
||||
if category != "" {
|
||||
counts = getGenomeSubcategoryCounts(ctx.AccessorID, dossierID, category)
|
||||
} else {
|
||||
counts = getGenomeCounts(ctx.AccessorID, dossierID)
|
||||
}
|
||||
} else {
|
||||
counts = make(map[string]CategoryCount)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(counts)
|
||||
}
|
||||
|
||||
func getTopLevelCounts(accessorID, dossierID string) map[string]CategoryCount {
|
||||
counts := make(map[string]CategoryCount)
|
||||
|
||||
categoryNames := map[int]string{
|
||||
1: "imaging", 2: "documents", 3: "labs", 4: "genome",
|
||||
}
|
||||
|
||||
// Check each category - only include if accessor has access
|
||||
for catInt, catName := range categoryNames {
|
||||
// Try to get a count by querying entries with RBAC
|
||||
entries, err := lib.EntryList(accessorID, "", catInt, &lib.EntryFilter{
|
||||
DossierID: dossierID,
|
||||
Limit: 1, // Just check if we can see any
|
||||
})
|
||||
if err != nil || len(entries) == 0 {
|
||||
continue // No access or no entries
|
||||
}
|
||||
|
||||
// Get actual count (using system context for counting only)
|
||||
var catCounts []struct {
|
||||
Count int `db:"cnt"`
|
||||
}
|
||||
lib.Query("SELECT COUNT(*) as cnt FROM entries WHERE dossier_id = ? AND category = ?",
|
||||
[]any{dossierID, catInt}, &catCounts)
|
||||
|
||||
if len(catCounts) > 0 && catCounts[0].Count > 0 {
|
||||
counts[catName] = CategoryCount{Shown: catCounts[0].Count, Hidden: 0}
|
||||
}
|
||||
}
|
||||
|
||||
// For genome, replace with detailed subcategory counts
|
||||
genomeCats := getGenomeCounts(accessorID, dossierID)
|
||||
if len(genomeCats) > 0 {
|
||||
totalShown, totalHidden := 0, 0
|
||||
for _, c := range genomeCats {
|
||||
totalShown += c.Shown
|
||||
totalHidden += c.Hidden
|
||||
}
|
||||
counts["genome"] = CategoryCount{Shown: len(genomeCats), Hidden: totalHidden}
|
||||
}
|
||||
|
||||
return counts
|
||||
}
|
||||
|
||||
// variantData for parsing the data JSON
|
||||
type variantData struct {
|
||||
Mag float64 `json:"mag"`
|
||||
Rep string `json:"rep"`
|
||||
Sub string `json:"sub"`
|
||||
}
|
||||
|
||||
// shouldIncludeVariant returns true if the variant should be counted/shown
|
||||
func shouldIncludeVariant(data variantData, includeHidden bool) bool {
|
||||
if includeHidden {
|
||||
return true
|
||||
}
|
||||
// Hide high magnitude variants
|
||||
if data.Mag > 4.0 {
|
||||
return false
|
||||
}
|
||||
// Hide "Bad" repute variants
|
||||
if data.Rep == "Bad" || data.Rep == "bad" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// getGenomeCounts reads cached counts from the extraction entry (fast path)
|
||||
func getGenomeCounts(accessorID, dossierID string) map[string]CategoryCount {
|
||||
counts := make(map[string]CategoryCount)
|
||||
|
||||
// Create access context for RBAC
|
||||
ctx := &lib.AccessContext{AccessorID: accessorID}
|
||||
|
||||
// Find extraction entry and read its data
|
||||
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
|
||||
if err != nil {
|
||||
return counts
|
||||
}
|
||||
|
||||
if extraction.Data == "" {
|
||||
return counts
|
||||
}
|
||||
|
||||
// Parse extraction data which contains pre-computed counts
|
||||
var extractionData struct {
|
||||
Counts map[string]CategoryCount `json:"counts"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(extraction.Data), &extractionData); err != nil {
|
||||
return counts
|
||||
}
|
||||
|
||||
// Return cached counts if available
|
||||
if extractionData.Counts != nil {
|
||||
return extractionData.Counts
|
||||
}
|
||||
|
||||
// Fallback: compute counts (for old data without cached counts)
|
||||
return getGenomeCountsSlow(accessorID, dossierID)
|
||||
}
|
||||
|
||||
// getGenomeCountsSlow computes counts by scanning all variants (fallback for old data)
|
||||
func getGenomeCountsSlow(accessorID, dossierID string) map[string]CategoryCount {
|
||||
counts := make(map[string]CategoryCount)
|
||||
|
||||
// Create access context for RBAC
|
||||
ctx := &lib.AccessContext{AccessorID: accessorID}
|
||||
|
||||
// Find extraction entry
|
||||
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
|
||||
if err != nil {
|
||||
return counts
|
||||
}
|
||||
|
||||
// Get all tiers
|
||||
tiers, err := lib.GenomeGetTiers(ctx, dossierID, extraction.EntryID)
|
||||
if err != nil {
|
||||
return counts
|
||||
}
|
||||
|
||||
// For each tier, count shown and hidden variants
|
||||
for _, tier := range tiers {
|
||||
variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
shown, hidden := 0, 0
|
||||
for _, v := range variants {
|
||||
data := variantData{
|
||||
Mag: v.Magnitude,
|
||||
Rep: v.Repute,
|
||||
Sub: v.Subcategory,
|
||||
}
|
||||
if shouldIncludeVariant(data, false) {
|
||||
shown++
|
||||
} else {
|
||||
hidden++
|
||||
}
|
||||
}
|
||||
if shown > 0 || hidden > 0 {
|
||||
counts[tier.Category] = CategoryCount{Shown: shown, Hidden: hidden}
|
||||
}
|
||||
}
|
||||
|
||||
return counts
|
||||
}
|
||||
|
||||
func getGenomeSubcategoryCounts(accessorID, dossierID string, category string) map[string]CategoryCount {
|
||||
counts := make(map[string]CategoryCount)
|
||||
shownCounts := make(map[string]int)
|
||||
hiddenCounts := make(map[string]int)
|
||||
|
||||
// Create access context for RBAC
|
||||
ctx := &lib.AccessContext{AccessorID: accessorID}
|
||||
|
||||
// Find extraction entry
|
||||
extraction, err := lib.GenomeGetExtraction(ctx, dossierID)
|
||||
if err != nil {
|
||||
return counts
|
||||
}
|
||||
|
||||
// Find tier for this category
|
||||
tier, err := lib.GenomeGetTierByCategory(ctx, dossierID, extraction.EntryID, category)
|
||||
if err != nil {
|
||||
return counts
|
||||
}
|
||||
|
||||
// Get variants and count by subcategory
|
||||
variants, err := lib.GenomeGetVariantsByTier(ctx, dossierID, tier.TierID)
|
||||
if err != nil {
|
||||
return counts
|
||||
}
|
||||
|
||||
for _, v := range variants {
|
||||
if v.Subcategory == "" {
|
||||
continue
|
||||
}
|
||||
data := variantData{
|
||||
Mag: v.Magnitude,
|
||||
Rep: v.Repute,
|
||||
Sub: v.Subcategory,
|
||||
}
|
||||
if shouldIncludeVariant(data, false) {
|
||||
shownCounts[v.Subcategory]++
|
||||
} else {
|
||||
hiddenCounts[v.Subcategory]++
|
||||
}
|
||||
}
|
||||
|
||||
// Combine into CategoryCount
|
||||
allSubs := make(map[string]bool)
|
||||
for k := range shownCounts {
|
||||
allSubs[k] = true
|
||||
}
|
||||
for k := range hiddenCounts {
|
||||
allSubs[k] = true
|
||||
}
|
||||
for sub := range allSubs {
|
||||
counts[sub] = CategoryCount{Shown: shownCounts[sub], Hidden: hiddenCounts[sub]}
|
||||
}
|
||||
|
||||
return counts
|
||||
}
|
||||
|
||||
// Unused but keeping for compatibility - remove if not needed
|
||||
var _ = strings.Contains
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"inou/lib"
|
||||
)
|
||||
|
||||
func handleGenomeQuery(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := getAccessContextOrFail(w, r)
|
||||
if ctx == nil {
|
||||
return
|
||||
}
|
||||
|
||||
dossierID := r.URL.Query().Get("dossier")
|
||||
if dossierID == "" {
|
||||
http.Error(w, "missing dossier", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if !requireDossierAccess(w, ctx, dossierID) {
|
||||
return
|
||||
}
|
||||
|
||||
// Parse query params into opts
|
||||
q := r.URL.Query()
|
||||
var minMag float64
|
||||
if s := q.Get("min_magnitude"); s != "" {
|
||||
minMag, _ = strconv.ParseFloat(s, 64)
|
||||
}
|
||||
offset := 0
|
||||
if s := q.Get("offset"); s != "" {
|
||||
offset, _ = strconv.Atoi(s)
|
||||
}
|
||||
limit := 100
|
||||
if s := q.Get("limit"); s != "" {
|
||||
limit, _ = strconv.Atoi(s)
|
||||
}
|
||||
var rsids []string
|
||||
if s := q.Get("rsids"); s != "" {
|
||||
rsids = strings.Split(s, ",")
|
||||
}
|
||||
|
||||
result, err := lib.GenomeQuery(dossierID, lib.GenomeQueryOpts{
|
||||
Category: q.Get("category"),
|
||||
Search: q.Get("search"),
|
||||
Gene: q.Get("gene"),
|
||||
RSIDs: rsids,
|
||||
MinMagnitude: minMag,
|
||||
IncludeHidden: q.Get("include_hidden") == "true",
|
||||
Sort: q.Get("sort"),
|
||||
Offset: offset,
|
||||
Limit: limit,
|
||||
})
|
||||
if err != nil {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"error": err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
|
@ -41,8 +41,6 @@ func main() {
|
|||
http.HandleFunc("/api/dossiers", handleDossiers)
|
||||
http.HandleFunc("/api/dossier", handleDossier)
|
||||
http.HandleFunc("/api/studies", handleStudies)
|
||||
http.HandleFunc("/api/genome", handleGenomeQuery)
|
||||
http.HandleFunc("/api/categories", handleCategories)
|
||||
http.HandleFunc("/api/series", handleSeries)
|
||||
http.HandleFunc("/api/slices", handleSlices)
|
||||
http.HandleFunc("/image/", handleImage)
|
||||
|
|
|
|||
|
|
@ -265,7 +265,7 @@ func main() {
|
|||
|
||||
// ===== PHASE 4: Load SNPedia and match =====
|
||||
phase4Start := time.Now()
|
||||
snpediaDB, err := sql.Open("sqlite3", "/home/johan/dev/inou/snpedia-genotypes/genotypes.db?mode=ro")
|
||||
snpediaDB, err := sql.Open("sqlite3", "/tank/inou/data/genotypes.db?mode=ro")
|
||||
if err != nil {
|
||||
fmt.Println("SNPedia DB open failed:", err)
|
||||
os.Exit(1)
|
||||
|
|
@ -542,6 +542,7 @@ func main() {
|
|||
Type: v.RSID,
|
||||
Value: v.Genotype,
|
||||
Tags: v.Gene,
|
||||
SearchKey: v.Gene,
|
||||
Ordinal: i + 1,
|
||||
Timestamp: now,
|
||||
Data: string(dataJSON),
|
||||
|
|
|
|||
311
lib/v2.go
311
lib/v2.go
|
|
@ -1184,8 +1184,7 @@ type GenomeMatch struct {
|
|||
Magnitude *float64 `json:"magnitude,omitempty"`
|
||||
Repute string `json:"repute,omitempty"`
|
||||
Summary string `json:"summary,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
Subcategory string `json:"subcategory,omitempty"`
|
||||
Categories []string `json:"categories,omitempty"`
|
||||
}
|
||||
|
||||
// GenomeQueryResult is the response from GenomeQuery
|
||||
|
|
@ -1202,58 +1201,33 @@ type GenomeQueryOpts struct {
|
|||
Gene string // comma-separated
|
||||
RSIDs []string
|
||||
MinMagnitude float64
|
||||
Repute string // filter by repute (Good, Bad, Clear)
|
||||
IncludeHidden bool
|
||||
Sort string // "magnitude" (default), "gene", "rsid"
|
||||
Offset int
|
||||
Limit int
|
||||
AccessorID string // who is querying (for audit logging)
|
||||
}
|
||||
|
||||
// GenomeQuery queries genome variants for a dossier with filtering, sorting, and pagination.
|
||||
// GenomeQuery queries genome variants for a dossier.
|
||||
// Fast path: gene/rsid use indexed search_key/type columns (precise SQL queries).
|
||||
// Slow path: search/min_magnitude load all variants and filter in memory.
|
||||
func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, error) {
|
||||
sysCtx := &AccessContext{IsSystem: true}
|
||||
|
||||
extraction, err := GenomeGetExtraction(sysCtx, dossierID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err)
|
||||
}
|
||||
|
||||
// Get tiers
|
||||
var tiers []GenomeTier
|
||||
tierCategories := make(map[string]string)
|
||||
|
||||
if opts.Category != "" {
|
||||
tier, err := GenomeGetTierByCategory(sysCtx, dossierID, extraction.EntryID, opts.Category)
|
||||
if err == nil {
|
||||
tiers = append(tiers, *tier)
|
||||
tierCategories[tier.TierID] = tier.Category
|
||||
if opts.IncludeHidden {
|
||||
var details []string
|
||||
if opts.Gene != "" {
|
||||
details = append(details, "gene="+opts.Gene)
|
||||
}
|
||||
} else {
|
||||
tiers, _ = GenomeGetTiers(sysCtx, dossierID, extraction.EntryID)
|
||||
for _, t := range tiers {
|
||||
tierCategories[t.TierID] = t.Category
|
||||
if len(opts.RSIDs) > 0 {
|
||||
details = append(details, "rsids="+strings.Join(opts.RSIDs, ","))
|
||||
}
|
||||
}
|
||||
|
||||
if len(tiers) == 0 {
|
||||
return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil
|
||||
}
|
||||
|
||||
tierIDs := make([]string, len(tiers))
|
||||
for i, t := range tiers {
|
||||
tierIDs[i] = t.TierID
|
||||
}
|
||||
|
||||
variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err)
|
||||
}
|
||||
|
||||
// Parse genes
|
||||
var genes []string
|
||||
if opts.Gene != "" {
|
||||
for _, g := range strings.Split(opts.Gene, ",") {
|
||||
genes = append(genes, strings.TrimSpace(g))
|
||||
if opts.Search != "" {
|
||||
details = append(details, "search="+opts.Search)
|
||||
}
|
||||
if opts.Category != "" {
|
||||
details = append(details, "category="+opts.Category)
|
||||
}
|
||||
AuditLog(opts.AccessorID, "genome_reveal_hidden", dossierID, strings.Join(details, " "))
|
||||
}
|
||||
|
||||
limit := opts.Limit
|
||||
|
|
@ -1264,96 +1238,235 @@ func GenomeQuery(dossierID string, opts GenomeQueryOpts) (*GenomeQueryResult, er
|
|||
limit = 500
|
||||
}
|
||||
|
||||
// Filter and collect
|
||||
var matches []GenomeMatch
|
||||
total := 0
|
||||
// Fast path: gene or rsid — use indexed columns
|
||||
if opts.Gene != "" || len(opts.RSIDs) > 0 {
|
||||
return genomeQueryFast(dossierID, opts, limit)
|
||||
}
|
||||
|
||||
for _, v := range variants {
|
||||
if len(opts.RSIDs) > 0 {
|
||||
found := false
|
||||
for _, r := range opts.RSIDs {
|
||||
if r == v.RSID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
// Slow path: search/min_magnitude — load all, filter in memory
|
||||
return genomeQuerySlow(dossierID, opts, limit)
|
||||
}
|
||||
|
||||
// genomeQueryFast uses indexed search_key (gene) and type (rsid) columns.
|
||||
func genomeQueryFast(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
|
||||
var entries []Entry
|
||||
|
||||
if opts.Gene != "" {
|
||||
// Split comma-separated genes, query each via indexed search_key
|
||||
genes := strings.Split(opts.Gene, ",")
|
||||
for i := range genes {
|
||||
genes[i] = strings.TrimSpace(genes[i])
|
||||
}
|
||||
|
||||
if len(genes) > 0 {
|
||||
found := false
|
||||
for _, g := range genes {
|
||||
if strings.EqualFold(v.Gene, g) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
continue
|
||||
// Build gene IN clause
|
||||
genePlaceholders := make([]string, len(genes))
|
||||
args := []any{dossierID, CategoryGenome}
|
||||
for i, gene := range genes {
|
||||
genePlaceholders[i] = "?"
|
||||
args = append(args, CryptoEncrypt(gene))
|
||||
}
|
||||
sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND search_key IN (" + strings.Join(genePlaceholders, ",") + ")"
|
||||
|
||||
// Add rsid filter if specified
|
||||
if len(opts.RSIDs) > 0 {
|
||||
rsidPlaceholders := make([]string, len(opts.RSIDs))
|
||||
for i, rsid := range opts.RSIDs {
|
||||
rsidPlaceholders[i] = "?"
|
||||
args = append(args, CryptoEncrypt(rsid))
|
||||
}
|
||||
sql += " AND type IN (" + strings.Join(rsidPlaceholders, ",") + ")"
|
||||
}
|
||||
|
||||
Query(sql, args, &entries)
|
||||
} else if len(opts.RSIDs) > 0 {
|
||||
// rsid only, no gene — single IN query
|
||||
placeholders := make([]string, len(opts.RSIDs))
|
||||
args := []any{dossierID, CategoryGenome}
|
||||
for i, rsid := range opts.RSIDs {
|
||||
placeholders[i] = "?"
|
||||
args = append(args, CryptoEncrypt(rsid))
|
||||
}
|
||||
sql := "SELECT * FROM entries WHERE dossier_id = ? AND category = ? AND type IN (" + strings.Join(placeholders, ",") + ")"
|
||||
Query(sql, args, &entries)
|
||||
}
|
||||
|
||||
// Look up tier categories for parent_ids (single IN query)
|
||||
tierCategories := make(map[string]string)
|
||||
tierIDSet := make(map[string]bool)
|
||||
for _, e := range entries {
|
||||
tierIDSet[e.ParentID] = true
|
||||
}
|
||||
if len(tierIDSet) > 0 {
|
||||
placeholders := make([]string, 0, len(tierIDSet))
|
||||
args := make([]any, 0, len(tierIDSet))
|
||||
for id := range tierIDSet {
|
||||
placeholders = append(placeholders, "?")
|
||||
args = append(args, id)
|
||||
}
|
||||
var tierEntries []Entry
|
||||
Query("SELECT * FROM entries WHERE entry_id IN ("+strings.Join(placeholders, ",")+")", args, &tierEntries)
|
||||
for _, t := range tierEntries {
|
||||
tierCategories[t.EntryID] = t.Value
|
||||
}
|
||||
}
|
||||
|
||||
return genomeEntriesToResult(entries, tierCategories, opts, limit)
|
||||
}
|
||||
|
||||
// genomeQuerySlow loads all variants and filters in memory (for search/min_magnitude).
|
||||
func genomeQuerySlow(dossierID string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
|
||||
sysCtx := &AccessContext{IsSystem: true}
|
||||
|
||||
extraction, err := GenomeGetExtraction(sysCtx, dossierID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GENOME_NO_EXTRACTION: %w", err)
|
||||
}
|
||||
|
||||
tiers, _ := GenomeGetTiers(sysCtx, dossierID, extraction.EntryID)
|
||||
if len(tiers) == 0 {
|
||||
return &GenomeQueryResult{Matches: []GenomeMatch{}}, nil
|
||||
}
|
||||
|
||||
tierCategories := make(map[string]string)
|
||||
tierIDs := make([]string, len(tiers))
|
||||
for i, t := range tiers {
|
||||
tierIDs[i] = t.TierID
|
||||
tierCategories[t.TierID] = t.Category
|
||||
}
|
||||
|
||||
variants, err := GenomeGetVariants(sysCtx, dossierID, tierIDs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GENOME_VARIANT_QUERY_FAILED: %w", err)
|
||||
}
|
||||
|
||||
// Convert to entries for shared result builder
|
||||
var entries []Entry
|
||||
for _, v := range variants {
|
||||
dataJSON, _ := json.Marshal(struct {
|
||||
Mag float64 `json:"mag,omitempty"`
|
||||
Rep string `json:"rep,omitempty"`
|
||||
Sum string `json:"sum,omitempty"`
|
||||
Sub string `json:"sub,omitempty"`
|
||||
}{v.Magnitude, v.Repute, v.Summary, v.Subcategory})
|
||||
|
||||
entries = append(entries, Entry{
|
||||
EntryID: v.EntryID,
|
||||
ParentID: v.TierID,
|
||||
Type: v.RSID,
|
||||
Value: v.Genotype,
|
||||
Tags: v.Gene,
|
||||
Data: string(dataJSON),
|
||||
})
|
||||
}
|
||||
|
||||
return genomeEntriesToResult(entries, tierCategories, opts, limit)
|
||||
}
|
||||
|
||||
// genomeEntriesToResult converts raw entries to GenomeQueryResult with filtering/sorting.
|
||||
func genomeEntriesToResult(entries []Entry, tierCategories map[string]string, opts GenomeQueryOpts, limit int) (*GenomeQueryResult, error) {
|
||||
targeted := opts.Gene != "" || len(opts.RSIDs) > 0
|
||||
|
||||
// Dedup by rsid, merge categories
|
||||
seen := make(map[string]*GenomeMatch)
|
||||
var order []string
|
||||
|
||||
for _, e := range entries {
|
||||
var data struct {
|
||||
Mag float64 `json:"mag"`
|
||||
Rep string `json:"rep"`
|
||||
Sum string `json:"sum"`
|
||||
Sub string `json:"sub"`
|
||||
}
|
||||
if e.Data != "" {
|
||||
json.Unmarshal([]byte(e.Data), &data)
|
||||
}
|
||||
|
||||
if opts.Search != "" {
|
||||
sl := strings.ToLower(opts.Search)
|
||||
if !strings.Contains(strings.ToLower(v.Gene), sl) &&
|
||||
!strings.Contains(strings.ToLower(v.Summary), sl) &&
|
||||
!strings.Contains(strings.ToLower(v.Subcategory), sl) &&
|
||||
!strings.Contains(strings.ToLower(v.RSID), sl) {
|
||||
if !strings.Contains(strings.ToLower(e.Tags), sl) &&
|
||||
!strings.Contains(strings.ToLower(data.Sum), sl) &&
|
||||
!strings.Contains(strings.ToLower(data.Sub), sl) &&
|
||||
!strings.Contains(strings.ToLower(e.Type), sl) &&
|
||||
!strings.Contains(strings.ToLower(e.Value), sl) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if opts.MinMagnitude > 0 && v.Magnitude < opts.MinMagnitude {
|
||||
if opts.Category != "" && tierCategories[e.ParentID] != opts.Category {
|
||||
continue
|
||||
}
|
||||
|
||||
// Determine if variant would be hidden
|
||||
isHidden := v.Magnitude > 4.0 || strings.EqualFold(v.Repute, "bad")
|
||||
if opts.MinMagnitude > 0 && data.Mag < opts.MinMagnitude {
|
||||
continue
|
||||
}
|
||||
|
||||
// Targeted queries (specific rsIDs or gene) return redacted results
|
||||
// Broad queries skip hidden variants entirely
|
||||
targeted := len(opts.RSIDs) > 0 || len(genes) > 0
|
||||
if opts.Repute != "" && !strings.EqualFold(data.Rep, opts.Repute) {
|
||||
continue
|
||||
}
|
||||
|
||||
isHidden := data.Mag > 4.0 || strings.EqualFold(data.Rep, "bad")
|
||||
if isHidden && !opts.IncludeHidden && !targeted {
|
||||
continue
|
||||
}
|
||||
|
||||
total++
|
||||
if total <= opts.Offset {
|
||||
continue
|
||||
}
|
||||
if len(matches) >= limit {
|
||||
cat := tierCategories[e.ParentID]
|
||||
|
||||
if m, ok := seen[e.Type]; ok {
|
||||
// Already seen this rsid — add category if not duplicate
|
||||
if cat != "" {
|
||||
dup := false
|
||||
for _, c := range m.Categories {
|
||||
if c == cat {
|
||||
dup = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !dup {
|
||||
m.Categories = append(m.Categories, cat)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Redact sensitive fields unless include_hidden is set
|
||||
redact := isHidden && !opts.IncludeHidden
|
||||
|
||||
match := GenomeMatch{
|
||||
RSID: v.RSID,
|
||||
Gene: v.Gene,
|
||||
Category: tierCategories[v.TierID],
|
||||
Subcategory: v.Subcategory,
|
||||
match := &GenomeMatch{
|
||||
RSID: e.Type,
|
||||
Gene: e.Tags,
|
||||
}
|
||||
if cat != "" {
|
||||
match.Categories = []string{cat}
|
||||
}
|
||||
if redact {
|
||||
match.Genotype = "hidden"
|
||||
match.Summary = "Sensitive variant hidden. Query with include_hidden=true to reveal."
|
||||
} else {
|
||||
match.Genotype = v.Genotype
|
||||
if v.Summary != "" {
|
||||
match.Summary = v.Summary
|
||||
match.Genotype = e.Value
|
||||
if data.Sum != "" {
|
||||
match.Summary = data.Sum
|
||||
}
|
||||
}
|
||||
if v.Magnitude > 0 {
|
||||
mag := v.Magnitude
|
||||
if data.Mag > 0 {
|
||||
mag := data.Mag
|
||||
match.Magnitude = &mag
|
||||
}
|
||||
if v.Repute != "" {
|
||||
match.Repute = v.Repute
|
||||
if data.Rep != "" {
|
||||
match.Repute = data.Rep
|
||||
}
|
||||
matches = append(matches, match)
|
||||
|
||||
seen[e.Type] = match
|
||||
order = append(order, e.Type)
|
||||
}
|
||||
|
||||
total := len(order)
|
||||
var matches []GenomeMatch
|
||||
for i, rsid := range order {
|
||||
if i < opts.Offset {
|
||||
continue
|
||||
}
|
||||
if len(matches) >= limit {
|
||||
break
|
||||
}
|
||||
matches = append(matches, *seen[rsid])
|
||||
}
|
||||
|
||||
// Sort
|
||||
|
|
|
|||
|
|
@ -12,8 +12,6 @@ var apiEndpoints = map[string]bool{
|
|||
"/api/dossiers": true,
|
||||
"/api/dossier": true,
|
||||
"/api/studies": true,
|
||||
"/api/genome": true,
|
||||
"/api/categories": true,
|
||||
"/api/series": true,
|
||||
"/api/slices": true,
|
||||
"/api/labs/tests": true,
|
||||
|
|
|
|||
|
|
@ -369,13 +369,11 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) {
|
|||
},
|
||||
{
|
||||
"name": "get_categories",
|
||||
"description": "List available data categories for a dossier (imaging, labs, documents, genome). Use this first to discover what data types are available, then query entries by category. For lab results, LOINC codes provide best search accuracy.",
|
||||
"description": "List available data categories for a dossier with entry counts. Returns category names and how many entries exist in each.",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
|
||||
"type": map[string]interface{}{"type": "string", "description": "Observation type (e.g., genome) for subcategories"},
|
||||
"category": map[string]interface{}{"type": "string", "description": "Get subcategories within this category"},
|
||||
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
|
||||
},
|
||||
"required": []string{"dossier"},
|
||||
},
|
||||
|
|
@ -402,17 +400,20 @@ func handleMCPToolsList(w http.ResponseWriter, req mcpRequest) {
|
|||
},
|
||||
{
|
||||
"name": "query_genome",
|
||||
"description": "Query genome variants with specialized genome parameters (magnitude, repute, gene, rsids). Returns matches with significance ratings. For non-genome data, use get_categories to find available categories.",
|
||||
"description": "Query genome variants by gene, rsid, or category. Use gene names (e.g. MTHFR) not concepts (e.g. methylation). Search also matches genotype (e.g. AA, GG). Sensitive variants are redacted; use include_hidden to reveal.",
|
||||
"inputSchema": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"dossier": map[string]interface{}{"type": "string", "description": "Dossier ID (16-char hex)"},
|
||||
"gene": map[string]interface{}{"type": "string", "description": "Gene name(s), comma-separated"},
|
||||
"search": map[string]interface{}{"type": "string", "description": "Search gene, subcategory, or summary"},
|
||||
"category": map[string]interface{}{"type": "string", "description": "Filter by category"},
|
||||
"search": map[string]interface{}{"type": "string", "description": "Free-text search across gene, genotype, subcategory, summary, rsid"},
|
||||
"category": map[string]interface{}{"type": "string", "description": "Filter by genome category (e.g. cancer, disease, traits)"},
|
||||
"rsids": map[string]interface{}{"type": "string", "description": "Comma-separated rsids"},
|
||||
"min_magnitude": map[string]interface{}{"type": "number", "description": "Minimum magnitude"},
|
||||
"include_hidden": map[string]interface{}{"type": "boolean", "description": "Include hidden categories and high-magnitude variants"},
|
||||
"repute": map[string]interface{}{"type": "string", "description": "Filter by repute: Good, Bad, or Clear"},
|
||||
"include_hidden": map[string]interface{}{"type": "boolean", "description": "Reveal redacted sensitive variants (audited)"},
|
||||
"limit": map[string]interface{}{"type": "number", "description": "Max results to return (default 20)"},
|
||||
"offset": map[string]interface{}{"type": "number", "description": "Skip first N results for pagination"},
|
||||
},
|
||||
"required": []string{"dossier"},
|
||||
},
|
||||
|
|
@ -613,9 +614,7 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss
|
|||
sendMCPError(w, req.ID, -32602, "dossier required")
|
||||
return
|
||||
}
|
||||
typ, _ := params.Arguments["type"].(string)
|
||||
category, _ := params.Arguments["category"].(string)
|
||||
result, err := mcpGetCategories(accessToken, dossier, typ, category)
|
||||
result, err := mcpGetCategories(dossier)
|
||||
if err != nil {
|
||||
sendMCPError(w, req.ID, -32000, err.Error())
|
||||
return
|
||||
|
|
@ -633,12 +632,17 @@ func handleMCPToolsCall(w http.ResponseWriter, req mcpRequest, accessToken, doss
|
|||
category, _ := params.Arguments["category"].(string)
|
||||
rsids, _ := params.Arguments["rsids"].(string)
|
||||
minMag, _ := params.Arguments["min_magnitude"].(float64)
|
||||
repute, _ := params.Arguments["repute"].(string)
|
||||
includeHidden, _ := params.Arguments["include_hidden"].(bool)
|
||||
limitF, _ := params.Arguments["limit"].(float64)
|
||||
offsetF, _ := params.Arguments["offset"].(float64)
|
||||
limit := int(limitF)
|
||||
offset := int(offsetF)
|
||||
|
||||
fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s includeHidden=%v\n",
|
||||
dossier, gene, category, includeHidden)
|
||||
fmt.Printf("[MCP] query_genome: dossier=%s gene=%s category=%s repute=%s limit=%d offset=%d\n",
|
||||
dossier, gene, category, repute, limit, offset)
|
||||
|
||||
result, err := mcpQueryGenome(accessToken, dossier, gene, search, category, rsids, minMag, includeHidden)
|
||||
result, err := mcpQueryGenome(accessToken, dossier, dossierID, gene, search, category, rsids, minMag, repute, includeHidden, limit, offset)
|
||||
if err != nil {
|
||||
fmt.Printf("[MCP] query_genome error: %v\n", err)
|
||||
sendMCPError(w, req.ID, -32000, err.Error())
|
||||
|
|
|
|||
|
|
@ -181,38 +181,54 @@ func mcpQueryEntries(accessToken, dossier, category, typ, searchKey, parent, fro
|
|||
return string(pretty), nil
|
||||
}
|
||||
|
||||
func mcpGetCategories(accessToken, dossier, typ, category string) (string, error) {
|
||||
params := map[string]string{"dossier": dossier}
|
||||
if typ != "" {
|
||||
params["type"] = typ
|
||||
func mcpGetCategories(dossier string) (string, error) {
|
||||
var counts []struct {
|
||||
Category int `db:"category"`
|
||||
Count int `db:"cnt"`
|
||||
}
|
||||
if category != "" {
|
||||
params["category"] = category
|
||||
}
|
||||
|
||||
body, err := mcpAPICall(accessToken, "/api/categories", params)
|
||||
err := lib.Query("SELECT category, COUNT(*) as cnt FROM entries WHERE dossier_id = ? GROUP BY category", []any{dossier}, &counts)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var data interface{}
|
||||
json.Unmarshal(body, &data)
|
||||
pretty, _ := json.MarshalIndent(data, "", " ")
|
||||
result := make(map[string]int)
|
||||
for _, c := range counts {
|
||||
name := lib.CategoryName(c.Category)
|
||||
if name != "unknown" {
|
||||
result[name] = c.Count
|
||||
}
|
||||
}
|
||||
pretty, _ := json.MarshalIndent(result, "", " ")
|
||||
return string(pretty), nil
|
||||
}
|
||||
|
||||
func mcpQueryGenome(accessToken, dossier, gene, search, category, rsids string, minMag float64, includeHidden bool) (string, error) {
|
||||
func mcpQueryGenome(accessToken, dossier, accessorID, gene, search, category, rsids string, minMag float64, repute string, includeHidden bool, limit, offset int) (string, error) {
|
||||
var rsidList []string
|
||||
if rsids != "" {
|
||||
rsidList = strings.Split(rsids, ",")
|
||||
}
|
||||
|
||||
if limit <= 0 {
|
||||
numTerms := 1
|
||||
if gene != "" {
|
||||
numTerms = len(strings.Split(gene, ","))
|
||||
}
|
||||
if len(rsidList) > numTerms {
|
||||
numTerms = len(rsidList)
|
||||
}
|
||||
limit = 20 * numTerms
|
||||
}
|
||||
|
||||
result, err := lib.GenomeQuery(dossier, lib.GenomeQueryOpts{
|
||||
Category: category,
|
||||
Search: search,
|
||||
Gene: gene,
|
||||
RSIDs: rsidList,
|
||||
MinMagnitude: minMag,
|
||||
Repute: repute,
|
||||
IncludeHidden: includeHidden,
|
||||
Limit: limit,
|
||||
Offset: offset,
|
||||
AccessorID: accessorID,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
|
|
|||
Loading…
Reference in New Issue