From 0ac8f0f42d72dd909281dd92195a5dbf5f65ec3e Mon Sep 17 00:00:00 2001 From: James Date: Mon, 9 Feb 2026 12:21:08 -0500 Subject: [PATCH] fix: add legacy genome data format support to API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: Sophia's genome data was stored in legacy flat format where all variants were top-level category 4 entries with rsID as the type field. The API expected new hierarchical format (extraction → tier → variant), causing "no genome data" errors. Changes: - GenomeGetExtraction(): Detect legacy data, return synthetic extraction - GenomeGetTiers(): Return synthetic "all" tier for legacy format - GenomeGetVariants(): Query top-level entries for legacy format Test results: - Sophia's 4960 genome variants now accessible via /api/genome - Gene queries work: ?gene=MTHFR,COMT,MTR,MTRR returns 5 matches - MCP tools (query_genome) now function correctly Co-Authored-By: Claude Sonnet 4.5 --- lib/v2.go | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 3 deletions(-) diff --git a/lib/v2.go b/lib/v2.go index 7e348c8..a9d190c 100644 --- a/lib/v2.go +++ b/lib/v2.go @@ -1028,16 +1028,34 @@ func AccessRevokeEntry(dossierID, granteeID, entryID string) error { // API handlers, the API should perform its own RBAC check first. // GenomeGetExtraction returns the extraction entry for genome data +// Supports both new hierarchical format (extraction -> tier -> variant) and legacy flat format func GenomeGetExtraction(ctx *AccessContext, dossierID string) (*Entry, error) { + // Try new format first (type="extraction") entries, err := EntryList(accessorIDFromContext(ctx), "", CategoryGenome, &EntryFilter{ DossierID: dossierID, Type: "extraction", Limit: 1, }) - if err != nil || len(entries) == 0 { + if err == nil && len(entries) > 0 { + return entries[0], nil + } + + // Fallback: check if there are ANY category 4 entries (legacy format) + legacyEntries, err := EntryList(accessorIDFromContext(ctx), "", CategoryGenome, &EntryFilter{ + DossierID: dossierID, + Limit: 1, + }) + if err != nil || len(legacyEntries) == 0 { return nil, fmt.Errorf("no genome data") } - return entries[0], nil + + // Return a synthetic extraction entry for legacy data + return &Entry{ + EntryID: "legacy-extraction", + DossierID: dossierID, + Category: CategoryGenome, + Type: "extraction-legacy", + }, nil } // GenomeTier represents a genome category tier @@ -1047,7 +1065,18 @@ type GenomeTier struct { } // GenomeGetTiers returns all tier entries for a genome extraction +// Supports both new hierarchical format and legacy flat format func GenomeGetTiers(ctx *AccessContext, dossierID, extractionID string) ([]GenomeTier, error) { + // Check if this is legacy format + if extractionID == "legacy-extraction" { + // For legacy format, return a single synthetic "all" tier + return []GenomeTier{{ + TierID: "legacy-all", + Category: "all", + }}, nil + } + + // New format: get actual tier entries entries, err := EntryList(accessorIDFromContext(ctx), extractionID, CategoryGenome, &EntryFilter{ DossierID: dossierID, Type: "tier", @@ -1097,12 +1126,53 @@ type GenomeVariant struct { } // GenomeGetVariants returns variants for specified tier IDs +// Supports both new hierarchical format and legacy flat format func GenomeGetVariants(ctx *AccessContext, dossierID string, tierIDs []string) ([]GenomeVariant, error) { if len(tierIDs) == 0 { return nil, nil } - // Query entries for each tier and deduplicate by type (rsid) + // Check if this is legacy format + if len(tierIDs) == 1 && tierIDs[0] == "legacy-all" { + // Legacy format: get all top-level category 4 entries + entries, err := EntryList(accessorIDFromContext(ctx), "", CategoryGenome, &EntryFilter{ + DossierID: dossierID, + }) + if err != nil { + return nil, err + } + + var variants []GenomeVariant + for _, e := range entries { + // Skip non-rsid types (like "extraction", "tier", etc.) + if !strings.HasPrefix(e.Type, "rs") { + continue + } + + var data struct { + Mag float64 `json:"mag"` + Rep string `json:"rep"` + Sum string `json:"sum"` + Sub string `json:"sub"` + } + json.Unmarshal([]byte(e.Data), &data) + + variants = append(variants, GenomeVariant{ + EntryID: e.EntryID, + RSID: e.Type, + Genotype: e.Value, + Gene: e.Tags, + Magnitude: data.Mag, + Repute: data.Rep, + Summary: data.Sum, + Subcategory: data.Sub, + TierID: "legacy-all", + }) + } + return variants, nil + } + + // New format: query entries for each tier and deduplicate by type (rsid) seen := make(map[string]bool) var variants []GenomeVariant