inou/lib/lab_reference.go

package lib

import (
	"encoding/json"
	"fmt"
	"log"
	"math"
	"strings"
)

// LabScale is the multiplier for storing float values as int64 (6 decimal places)
const LabScale = 1000000

// LabNoRef sentinel: ref_low or ref_high not set (no bound)
const LabNoRef = int64(-1)

// ToLabScale converts float64 to scaled int64
func ToLabScale(f float64) int64 { return int64(math.Round(f * LabScale)) }

// FromLabScale converts scaled int64 back to float64
func FromLabScale(i int64) float64 { return float64(i) / LabScale }

// Direction constants for LabTest
const (
	DirRange       = "range"        // both out-of-range bad (default)
	DirLowerBetter = "lower_better" // low = good (CRP, LDL, glucose)
	DirHigherBetter = "higher_better" // high = good (HDL, hemoglobin)
)

// LabTest holds per-LOINC-code test properties.
// Populated lazily: when we encounter a new test during normalization,
// we ask the LLM to provide LOINC + SI unit + direction.
type LabTest struct {
	LoincID   string `db:"loinc_id,pk"` // LOINC code e.g. "718-7"
	Name      string `db:"name"`        // English canonical e.g. "Hemoglobin"
	SIUnit    string `db:"si_unit"`     // SI unit e.g. "g/L"
	Direction string `db:"direction"`   // "range", "lower_better", "higher_better"
	SIFactor  int64  `db:"si_factor"`   // conventional→SI multiplier x1M (e.g. 10.0 = 10000000)
}

// LabReference holds reference ranges, always in SI units.
// Composite key emulated via synthetic ref_id.
type LabReference struct {
	RefID   string `db:"ref_id,pk"`  // "loinc|source|sex|ageDays"
	LoincID string `db:"loinc_id"`   // FK to lab_test
	Source  string `db:"source"`     // "CALIPER", "IFCC"
	Sex     string `db:"sex"`        // "", "M", "F"
	AgeDays int64  `db:"age_days"`   // start of age range (0 = birth)
	AgeEnd  int64  `db:"age_end"`    // end of age range in days
	RefLow  int64  `db:"ref_low"`    // lower bound x1M (-1 = no bound)
	RefHigh int64  `db:"ref_high"`   // upper bound x1M (-1 = no bound)
	Unit    string `db:"unit"`       // SI unit these values are in
}

// MakeRefID builds synthetic PK for LabReference
func MakeRefID(loinc, source, sex string, ageDays int64) string {
	return fmt.Sprintf("%s|%s|%s|%d", loinc, source, sex, ageDays)
}

// LabTestGet retrieves a LabTest by LOINC code. Returns nil if not found.
func LabTestGet(loincID string) (*LabTest, error) {
	var tests []LabTest
	if err := refQuery("SELECT * FROM lab_test WHERE loinc_id = ?", []any{loincID}, &tests); err != nil || len(tests) == 0 {
		return nil, err
	}
	return &tests[0], nil
}

// LabTestSave upserts a LabTest record.
func LabTestSave(t *LabTest) error {
	return refSave("lab_test", t)
}

// LabTestSaveBatch upserts multiple LabTest records.
func LabTestSaveBatch(tests []LabTest) error {
	if len(tests) == 0 {
		return nil
	}
	return refSave("lab_test", tests)
}

// LabRefSave upserts a LabReference record (auto-generates ref_id).
func LabRefSave(r *LabReference) error {
	r.RefID = MakeRefID(r.LoincID, r.Source, r.Sex, r.AgeDays)
	return refSave("lab_reference", r)
}

// LabRefSaveBatch upserts multiple LabReference records (auto-generates ref_ids).
func LabRefSaveBatch(refs []LabReference) error {
	if len(refs) == 0 {
		return nil
	}
	for i := range refs {
		refs[i].RefID = MakeRefID(refs[i].LoincID, refs[i].Source, refs[i].Sex, refs[i].AgeDays)
	}
	return refSave("lab_reference", refs)
}

// LabRefLookupAll returns all reference ranges for a LOINC code.
func LabRefLookupAll(loincID string) ([]LabReference, error) {
	var refs []LabReference
	return refs, refQuery("SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
		[]any{loincID}, &refs)
}

// LabRefLookup finds the matching reference range for a test at a given age/sex.
// Returns nil if no matching reference found.
func LabRefLookup(loincID, sex string, ageDays int64) (*LabReference, error) {
	var refs []LabReference
	if err := refQuery(
		"SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
		[]any{loincID}, &refs,
	); err != nil {
		return nil, err
	}

	// Find best match: exact sex > unisex, narrowest age range
	var best *LabReference
	bestScore := -1

	for i := range refs {
		r := &refs[i]
		// Age must be in range
		if ageDays < r.AgeDays || ageDays > r.AgeEnd {
			continue
		}

		score := 0
		// Prefer sex-specific over unisex
		if r.Sex == sex {
			score += 10
		} else if r.Sex != "" {
			continue // wrong sex
		}
		// Prefer narrower age range
		span := r.AgeEnd - r.AgeDays
		if span < 365*100 {
			score += int(365*100 - span) // narrower = higher score
		}

		if score > bestScore {
			best = r
			bestScore = score
		}
	}

	return best, nil
}

// AgeDays calculates age in days from DOB unix timestamp to a given timestamp.
func AgeDays(dobUnix, atUnix int64) int64 {
	return (atUnix - dobUnix) / 86400
}

// PopulateReferences fetches reference ranges from LLM for lab_test entries
// that don't yet have any lab_reference entries. Saves to lab_reference table.
func PopulateReferences() error {
	if GeminiKey == "" {
		return nil
	}

	// Load all lab_test entries
	var tests []LabTest
	if err := refQuery("SELECT loinc_id, name, si_unit, direction, si_factor FROM lab_test", nil, &tests); err != nil {
		return fmt.Errorf("load lab_test: %w", err)
	}

	// Find which ones already have references
	var existingRefs []LabReference
	if err := refQuery("SELECT ref_id, loinc_id FROM lab_reference", nil, &existingRefs); err != nil {
		return fmt.Errorf("load lab_reference: %w", err)
	}
	hasRef := make(map[string]bool)
	for _, r := range existingRefs {
		hasRef[r.LoincID] = true
	}

	// Filter to tests needing references (numeric tests with SI units)
	var need []LabTest
	for _, t := range tests {
		if hasRef[t.LoincID] || t.SIUnit == "" {
			continue
		}
		need = append(need, t)
	}

	if len(need) == 0 {
		log.Printf("populate_refs: all %d tests already have references", len(tests))
		return nil
	}

	log.Printf("populate_refs: %d tests need references (of %d total)", len(need), len(tests))

	// Batch LLM calls
	batchSize := 50
	var allRefs []LabReference
	for i := 0; i < len(need); i += batchSize {
		end := i + batchSize
		if end > len(need) {
			end = len(need)
		}
		batch := need[i:end]
		log.Printf("populate_refs: LLM batch %d-%d of %d", i+1, end, len(need))

		refs, err := callReferenceLLM(batch)
		if err != nil {
			log.Printf("populate_refs: batch %d-%d error: %v", i+1, end, err)
			continue
		}
		allRefs = append(allRefs, refs...)
	}

	if len(allRefs) == 0 {
		log.Printf("populate_refs: no references returned")
		return nil
	}

	log.Printf("populate_refs: saving %d reference ranges", len(allRefs))
	return LabRefSaveBatch(allRefs)
}

func callReferenceLLM(tests []LabTest) ([]LabReference, error) {
	var lines []string
	for _, t := range tests {
		lines = append(lines, fmt.Sprintf("%s %s (%s)", t.LoincID, t.Name, t.SIUnit))
	}

	prompt := fmt.Sprintf(`For each LOINC code below, provide standard reference ranges IN THE SI UNIT SHOWN.

Give up to 3 ranges per test:
- Pediatric (ages 2-12): source "CALIPER"
- Adolescent (ages 12-18): source "CALIPER"
- Adult (ages 18+): source "IFCC"

Skip tests that are non-numeric, qualitative, or where standard ranges don't exist.

Return JSON array of objects:
[{"loinc":"718-7","source":"CALIPER","age_min_days":730,"age_max_days":4380,"low":115.0,"high":135.0,"unit":"g/L"}, ...]

age_min_days and age_max_days are ages in days (730=2yr, 4380=12yr, 6570=18yr, 36500=100yr).
Values must be in the SI unit provided. Omit sex-specific ranges for simplicity — use unisex.
Only include tests where you are confident in the reference values.

Tests:
%s`, strings.Join(lines, "\n"))

	maxTokens := 8192
	temp := 0.0
	config := &GeminiConfig{
		Temperature:     &temp,
		MaxOutputTokens: &maxTokens,
	}

	resp, err := CallGeminiMultimodal([]GeminiPart{{Text: prompt}}, config)
	if err != nil {
		return nil, err
	}

	type refResult struct {
		Loinc      string  `json:"loinc"`
		Source     string  `json:"source"`
		AgeMinDays int64   `json:"age_min_days"`
		AgeMaxDays int64   `json:"age_max_days"`
		Low        float64 `json:"low"`
		High       float64 `json:"high"`
		Unit       string  `json:"unit"`
	}

	var results []refResult
	if err := json.Unmarshal([]byte(resp), &results); err != nil {
		return nil, fmt.Errorf("parse response: %w (first 300 chars: %.300s)", err, resp)
	}

	var refs []LabReference
	for _, r := range results {
		if r.Loinc == "" || r.Low >= r.High {
			continue
		}
		refs = append(refs, LabReference{
			LoincID: r.Loinc,
			Source:  r.Source,
			Sex:     "",
			AgeDays: r.AgeMinDays,
			AgeEnd:  r.AgeMaxDays,
			RefLow:  ToLabScale(r.Low),
			RefHigh: ToLabScale(r.High),
			Unit:    r.Unit,
		})
	}

	return refs, nil
}