fix: add legacy genome data format support to API

Root cause: Sophia's genome data was stored in legacy flat format where
all variants were top-level category 4 entries with rsID as the type
field. The API expected new hierarchical format (extraction → tier →
variant), causing "no genome data" errors.

Changes:
- GenomeGetExtraction(): Detect legacy data, return synthetic extraction
- GenomeGetTiers(): Return synthetic "all" tier for legacy format
- GenomeGetVariants(): Query top-level entries for legacy format

Test results:
- Sophia's 4960 genome variants now accessible via /api/genome
- Gene queries work: ?gene=MTHFR,COMT,MTR,MTRR returns 5 matches
- MCP tools (query_genome) now function correctly

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
James 2026-02-09 12:21:08 -05:00
parent 7e9395ed6c
commit 0ac8f0f42d
1 changed files with 73 additions and 3 deletions

View File

@ -1028,16 +1028,34 @@ func AccessRevokeEntry(dossierID, granteeID, entryID string) error {
// API handlers, the API should perform its own RBAC check first. // API handlers, the API should perform its own RBAC check first.
// GenomeGetExtraction returns the extraction entry for genome data // GenomeGetExtraction returns the extraction entry for genome data
// Supports both new hierarchical format (extraction -> tier -> variant) and legacy flat format
func GenomeGetExtraction(ctx *AccessContext, dossierID string) (*Entry, error) { func GenomeGetExtraction(ctx *AccessContext, dossierID string) (*Entry, error) {
// Try new format first (type="extraction")
entries, err := EntryList(accessorIDFromContext(ctx), "", CategoryGenome, &EntryFilter{ entries, err := EntryList(accessorIDFromContext(ctx), "", CategoryGenome, &EntryFilter{
DossierID: dossierID, DossierID: dossierID,
Type: "extraction", Type: "extraction",
Limit: 1, Limit: 1,
}) })
if err != nil || len(entries) == 0 { if err == nil && len(entries) > 0 {
return entries[0], nil
}
// Fallback: check if there are ANY category 4 entries (legacy format)
legacyEntries, err := EntryList(accessorIDFromContext(ctx), "", CategoryGenome, &EntryFilter{
DossierID: dossierID,
Limit: 1,
})
if err != nil || len(legacyEntries) == 0 {
return nil, fmt.Errorf("no genome data") return nil, fmt.Errorf("no genome data")
} }
return entries[0], nil
// Return a synthetic extraction entry for legacy data
return &Entry{
EntryID: "legacy-extraction",
DossierID: dossierID,
Category: CategoryGenome,
Type: "extraction-legacy",
}, nil
} }
// GenomeTier represents a genome category tier // GenomeTier represents a genome category tier
@ -1047,7 +1065,18 @@ type GenomeTier struct {
} }
// GenomeGetTiers returns all tier entries for a genome extraction // GenomeGetTiers returns all tier entries for a genome extraction
// Supports both new hierarchical format and legacy flat format
func GenomeGetTiers(ctx *AccessContext, dossierID, extractionID string) ([]GenomeTier, error) { func GenomeGetTiers(ctx *AccessContext, dossierID, extractionID string) ([]GenomeTier, error) {
// Check if this is legacy format
if extractionID == "legacy-extraction" {
// For legacy format, return a single synthetic "all" tier
return []GenomeTier{{
TierID: "legacy-all",
Category: "all",
}}, nil
}
// New format: get actual tier entries
entries, err := EntryList(accessorIDFromContext(ctx), extractionID, CategoryGenome, &EntryFilter{ entries, err := EntryList(accessorIDFromContext(ctx), extractionID, CategoryGenome, &EntryFilter{
DossierID: dossierID, DossierID: dossierID,
Type: "tier", Type: "tier",
@ -1097,12 +1126,53 @@ type GenomeVariant struct {
} }
// GenomeGetVariants returns variants for specified tier IDs // GenomeGetVariants returns variants for specified tier IDs
// Supports both new hierarchical format and legacy flat format
func GenomeGetVariants(ctx *AccessContext, dossierID string, tierIDs []string) ([]GenomeVariant, error) { func GenomeGetVariants(ctx *AccessContext, dossierID string, tierIDs []string) ([]GenomeVariant, error) {
if len(tierIDs) == 0 { if len(tierIDs) == 0 {
return nil, nil return nil, nil
} }
// Query entries for each tier and deduplicate by type (rsid) // Check if this is legacy format
if len(tierIDs) == 1 && tierIDs[0] == "legacy-all" {
// Legacy format: get all top-level category 4 entries
entries, err := EntryList(accessorIDFromContext(ctx), "", CategoryGenome, &EntryFilter{
DossierID: dossierID,
})
if err != nil {
return nil, err
}
var variants []GenomeVariant
for _, e := range entries {
// Skip non-rsid types (like "extraction", "tier", etc.)
if !strings.HasPrefix(e.Type, "rs") {
continue
}
var data struct {
Mag float64 `json:"mag"`
Rep string `json:"rep"`
Sum string `json:"sum"`
Sub string `json:"sub"`
}
json.Unmarshal([]byte(e.Data), &data)
variants = append(variants, GenomeVariant{
EntryID: e.EntryID,
RSID: e.Type,
Genotype: e.Value,
Gene: e.Tags,
Magnitude: data.Mag,
Repute: data.Rep,
Summary: data.Sum,
Subcategory: data.Sub,
TierID: "legacy-all",
})
}
return variants, nil
}
// New format: query entries for each tier and deduplicate by type (rsid)
seen := make(map[string]bool) seen := make(map[string]bool)
var variants []GenomeVariant var variants []GenomeVariant