inou/lib/lab_reference.go

293 lines
8.6 KiB
Go

package lib
import (
"encoding/json"
"fmt"
"log"
"math"
"strings"
)
// LabScale is the multiplier for storing float values as int64 (6 decimal places)
const LabScale = 1000000
// LabNoRef sentinel: ref_low or ref_high not set (no bound)
const LabNoRef = int64(-1)
// ToLabScale converts float64 to scaled int64
func ToLabScale(f float64) int64 { return int64(math.Round(f * LabScale)) }
// FromLabScale converts scaled int64 back to float64
func FromLabScale(i int64) float64 { return float64(i) / LabScale }
// Direction constants for LabTest
const (
DirRange = "range" // both out-of-range bad (default)
DirLowerBetter = "lower_better" // low = good (CRP, LDL, glucose)
DirHigherBetter = "higher_better" // high = good (HDL, hemoglobin)
)
// LabTest holds per-LOINC-code test properties.
// Populated lazily: when we encounter a new test during normalization,
// we ask the LLM to provide LOINC + SI unit + direction.
type LabTest struct {
LoincID string `db:"loinc_id,pk"` // LOINC code e.g. "718-7"
Name string `db:"name"` // English canonical e.g. "Hemoglobin"
SIUnit string `db:"si_unit"` // SI unit e.g. "g/L"
Direction string `db:"direction"` // "range", "lower_better", "higher_better"
SIFactor int64 `db:"si_factor"` // conventional→SI multiplier x1M (e.g. 10.0 = 10000000)
}
// LabReference holds reference ranges, always in SI units.
// Composite key emulated via synthetic ref_id.
type LabReference struct {
RefID string `db:"ref_id,pk"` // "loinc|source|sex|ageDays"
LoincID string `db:"loinc_id"` // FK to lab_test
Source string `db:"source"` // "CALIPER", "IFCC"
Sex string `db:"sex"` // "", "M", "F"
AgeDays int64 `db:"age_days"` // start of age range (0 = birth)
AgeEnd int64 `db:"age_end"` // end of age range in days
RefLow int64 `db:"ref_low"` // lower bound x1M (-1 = no bound)
RefHigh int64 `db:"ref_high"` // upper bound x1M (-1 = no bound)
Unit string `db:"unit"` // SI unit these values are in
}
// MakeRefID builds synthetic PK for LabReference
func MakeRefID(loinc, source, sex string, ageDays int64) string {
return fmt.Sprintf("%s|%s|%s|%d", loinc, source, sex, ageDays)
}
// LabTestGet retrieves a LabTest by LOINC code. Returns nil if not found.
func LabTestGet(loincID string) (*LabTest, error) {
var tests []LabTest
if err := refQuery("SELECT * FROM lab_test WHERE loinc_id = ?", []any{loincID}, &tests); err != nil || len(tests) == 0 {
return nil, err
}
return &tests[0], nil
}
// LabTestSave upserts a LabTest record.
func LabTestSave(t *LabTest) error {
return refSave("lab_test", t)
}
// LabTestSaveBatch upserts multiple LabTest records.
func LabTestSaveBatch(tests []LabTest) error {
if len(tests) == 0 {
return nil
}
return refSave("lab_test", tests)
}
// LabRefSave upserts a LabReference record (auto-generates ref_id).
func LabRefSave(r *LabReference) error {
r.RefID = MakeRefID(r.LoincID, r.Source, r.Sex, r.AgeDays)
return refSave("lab_reference", r)
}
// LabRefSaveBatch upserts multiple LabReference records (auto-generates ref_ids).
func LabRefSaveBatch(refs []LabReference) error {
if len(refs) == 0 {
return nil
}
for i := range refs {
refs[i].RefID = MakeRefID(refs[i].LoincID, refs[i].Source, refs[i].Sex, refs[i].AgeDays)
}
return refSave("lab_reference", refs)
}
// LabRefLookupAll returns all reference ranges for a LOINC code.
func LabRefLookupAll(loincID string) ([]LabReference, error) {
var refs []LabReference
return refs, refQuery("SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
[]any{loincID}, &refs)
}
// LabRefLookup finds the matching reference range for a test at a given age/sex.
// Returns nil if no matching reference found.
func LabRefLookup(loincID, sex string, ageDays int64) (*LabReference, error) {
var refs []LabReference
if err := refQuery(
"SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
[]any{loincID}, &refs,
); err != nil {
return nil, err
}
// Find best match: exact sex > unisex, narrowest age range
var best *LabReference
bestScore := -1
for i := range refs {
r := &refs[i]
// Age must be in range
if ageDays < r.AgeDays || ageDays > r.AgeEnd {
continue
}
score := 0
// Prefer sex-specific over unisex
if r.Sex == sex {
score += 10
} else if r.Sex != "" {
continue // wrong sex
}
// Prefer narrower age range
span := r.AgeEnd - r.AgeDays
if span < 365*100 {
score += int(365*100 - span) // narrower = higher score
}
if score > bestScore {
best = r
bestScore = score
}
}
return best, nil
}
// AgeDays calculates age in days from DOB unix timestamp to a given timestamp.
func AgeDays(dobUnix, atUnix int64) int64 {
return (atUnix - dobUnix) / 86400
}
// PopulateReferences fetches reference ranges from LLM for lab_test entries
// that don't yet have any lab_reference entries. Saves to lab_reference table.
func PopulateReferences() error {
if GeminiKey == "" {
return nil
}
// Load all lab_test entries
var tests []LabTest
if err := refQuery("SELECT loinc_id, name, si_unit, direction, si_factor FROM lab_test", nil, &tests); err != nil {
return fmt.Errorf("load lab_test: %w", err)
}
// Find which ones already have references
var existingRefs []LabReference
if err := refQuery("SELECT ref_id, loinc_id FROM lab_reference", nil, &existingRefs); err != nil {
return fmt.Errorf("load lab_reference: %w", err)
}
hasRef := make(map[string]bool)
for _, r := range existingRefs {
hasRef[r.LoincID] = true
}
// Filter to tests needing references (numeric tests with SI units)
var need []LabTest
for _, t := range tests {
if hasRef[t.LoincID] || t.SIUnit == "" {
continue
}
need = append(need, t)
}
if len(need) == 0 {
log.Printf("populate_refs: all %d tests already have references", len(tests))
return nil
}
log.Printf("populate_refs: %d tests need references (of %d total)", len(need), len(tests))
// Batch LLM calls
batchSize := 50
var allRefs []LabReference
for i := 0; i < len(need); i += batchSize {
end := i + batchSize
if end > len(need) {
end = len(need)
}
batch := need[i:end]
log.Printf("populate_refs: LLM batch %d-%d of %d", i+1, end, len(need))
refs, err := callReferenceLLM(batch)
if err != nil {
log.Printf("populate_refs: batch %d-%d error: %v", i+1, end, err)
continue
}
allRefs = append(allRefs, refs...)
}
if len(allRefs) == 0 {
log.Printf("populate_refs: no references returned")
return nil
}
log.Printf("populate_refs: saving %d reference ranges", len(allRefs))
return LabRefSaveBatch(allRefs)
}
func callReferenceLLM(tests []LabTest) ([]LabReference, error) {
var lines []string
for _, t := range tests {
lines = append(lines, fmt.Sprintf("%s %s (%s)", t.LoincID, t.Name, t.SIUnit))
}
prompt := fmt.Sprintf(`For each LOINC code below, provide standard reference ranges IN THE SI UNIT SHOWN.
Give up to 3 ranges per test:
- Pediatric (ages 2-12): source "CALIPER"
- Adolescent (ages 12-18): source "CALIPER"
- Adult (ages 18+): source "IFCC"
Skip tests that are non-numeric, qualitative, or where standard ranges don't exist.
Return JSON array of objects:
[{"loinc":"718-7","source":"CALIPER","age_min_days":730,"age_max_days":4380,"low":115.0,"high":135.0,"unit":"g/L"}, ...]
age_min_days and age_max_days are ages in days (730=2yr, 4380=12yr, 6570=18yr, 36500=100yr).
Values must be in the SI unit provided. Omit sex-specific ranges for simplicity — use unisex.
Only include tests where you are confident in the reference values.
Tests:
%s`, strings.Join(lines, "\n"))
maxTokens := 8192
temp := 0.0
config := &GeminiConfig{
Temperature: &temp,
MaxOutputTokens: &maxTokens,
}
resp, err := CallGeminiMultimodal([]GeminiPart{{Text: prompt}}, config)
if err != nil {
return nil, err
}
type refResult struct {
Loinc string `json:"loinc"`
Source string `json:"source"`
AgeMinDays int64 `json:"age_min_days"`
AgeMaxDays int64 `json:"age_max_days"`
Low float64 `json:"low"`
High float64 `json:"high"`
Unit string `json:"unit"`
}
var results []refResult
if err := json.Unmarshal([]byte(resp), &results); err != nil {
return nil, fmt.Errorf("parse response: %w (first 300 chars: %.300s)", err, resp)
}
var refs []LabReference
for _, r := range results {
if r.Loinc == "" || r.Low >= r.High {
continue
}
refs = append(refs, LabReference{
LoincID: r.Loinc,
Source: r.Source,
Sex: "",
AgeDays: r.AgeMinDays,
AgeEnd: r.AgeMaxDays,
RefLow: ToLabScale(r.Low),
RefHigh: ToLabScale(r.High),
Unit: r.Unit,
})
}
return refs, nil
}