inou/tools/import-caliper/main.go

357 lines
9.2 KiB
Go

package main
import (
"encoding/json"
"fmt"
"log"
"math"
"os"
"regexp"
"strconv"
"strings"
"inou/lib"
)
// CaliperAnalyte mirrors the JSON structure from the browser export
type CaliperAnalyte struct {
Name string `json:"name"`
ID int `json:"id"`
Loinc string `json:"loinc"`
Analyte CaliperMeta `json:"analyte"`
Intervals []CaliperInterval `json:"intervals"`
}
type CaliperMeta struct {
ID int `json:"id"`
Name string `json:"name"`
SIUnit string `json:"siUnit"`
ConventionalUnit string `json:"conventionalUnit"`
}
type CaliperInterval struct {
Gender string `json:"gender"`
AgeDisplay string `json:"ageDisplay"`
SIRiLowerLimit string `json:"siRiLowerLimit"`
SIRiUpperLimit string `json:"siRiUpperLimit"`
Samples string `json:"samples"`
}
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "Usage: import-caliper <caliper_with_loinc.json>")
fmt.Fprintln(os.Stderr, " Imports CALIPER reference intervals into lab_reference table.")
fmt.Fprintln(os.Stderr, " Pass --dry-run to preview without writing.")
os.Exit(1)
}
file := os.Args[1]
dryRun := false
for _, a := range os.Args[2:] {
if a == "--dry-run" {
dryRun = true
}
}
if err := lib.Init(); err != nil {
log.Fatalf("lib.Init: %v", err)
}
data, err := os.ReadFile(file)
if err != nil {
log.Fatalf("read %s: %v", file, err)
}
var analytes []CaliperAnalyte
if err := json.Unmarshal(data, &analytes); err != nil {
log.Fatalf("parse JSON: %v", err)
}
log.Printf("Loaded %d analytes from %s", len(analytes), file)
// Delete existing CALIPER references
if !dryRun {
existing, err := lib.LabRefListBySource("CALIPER")
if err != nil {
log.Printf("Warning: could not count existing: %v", err)
} else {
log.Printf("Deleting %d existing CALIPER references", len(existing))
for _, r := range existing {
lib.LabRefDeleteByID(r.RefID)
}
}
}
var refs []lib.LabReference
var noLoinc, badAge, badLimits int
for _, a := range analytes {
if a.Loinc == "" {
noLoinc++
continue
}
siUnit := a.Analyte.SIUnit
for _, iv := range a.Intervals {
ageDays, ageEnd, err := parseAgeRange(iv.AgeDisplay)
if err != nil {
badAge++
continue
}
low, errL := parseFloat(iv.SIRiLowerLimit)
high, errH := parseFloat(iv.SIRiUpperLimit)
if errL != nil || errH != nil || low >= high {
badLimits++
continue
}
sex := ""
switch iv.Gender {
case "M":
sex = "M"
case "F":
sex = "F"
case "MF", "":
sex = ""
default:
sex = ""
}
refs = append(refs, lib.LabReference{
LoincID: a.Loinc,
Source: "CALIPER",
Sex: sex,
AgeDays: ageDays,
AgeEnd: ageEnd,
RefLow: lib.ToLabScale(low),
RefHigh: lib.ToLabScale(high),
Unit: siUnit,
})
}
}
log.Printf("Built %d reference rows (skipped: %d no-loinc, %d bad-age, %d bad-limits)",
len(refs), noLoinc, badAge, badLimits)
if dryRun {
log.Printf("DRY RUN — not saving. Showing first 20:")
for i, r := range refs {
if i >= 20 {
break
}
fmt.Printf(" %s sex=%s age=%d-%d low=%.2f high=%.2f %s\n",
r.LoincID, r.Sex, r.AgeDays, r.AgeEnd, lib.FromLabScale(r.RefLow), lib.FromLabScale(r.RefHigh), r.Unit)
}
return
}
// Save in batches
batchSize := 100
saved := 0
for i := 0; i < len(refs); i += batchSize {
end := i + batchSize
if end > len(refs) {
end = len(refs)
}
batch := refs[i:end]
if err := lib.LabRefSaveBatch(batch); err != nil {
log.Printf("Error saving batch %d-%d: %v", i, end, err)
continue
}
saved += len(batch)
}
log.Printf("Saved %d CALIPER reference rows", saved)
// Also ensure lab_test entries exist for all CALIPER analytes with LOINC
testsCreated := 0
for _, a := range analytes {
if a.Loinc == "" || a.Analyte.SIUnit == "" {
continue
}
existing, _ := lib.LabTestGet(a.Loinc)
if existing != nil {
// Update SI unit from CALIPER if missing
if existing.SIUnit == "" {
existing.SIUnit = a.Analyte.SIUnit
lib.LabTestSave(existing)
}
continue
}
// Create new lab_test entry
t := lib.LabTest{
LoincID: a.Loinc,
Name: a.Name,
SIUnit: a.Analyte.SIUnit,
Direction: "range", // default
SIFactor: lib.LabScale, // 1.0 (SI to SI)
}
if err := lib.LabTestSave(&t); err != nil {
log.Printf("Warning: could not save lab_test %s: %v", a.Loinc, err)
} else {
testsCreated++
}
}
log.Printf("Created %d new lab_test entries", testsCreated)
// Copy reference rows for known LOINC aliases (wrong codes from Gemini normalization)
aliases := [][2]string{
{"785-9", "787-2"}, // MCV entries use 785-9, CALIPER has 787-2
{"787-5", "785-6"}, // MCH entries use 787-5, CALIPER has 785-6
{"786-7", "786-4"}, // MCHC entries use 786-7, CALIPER has 786-4
{"2069-8", "2069-3"}, // Cl entries use 2069-8, CALIPER has 2069-3
}
aliasTotal := 0
for _, pair := range aliases {
wrong, correct := pair[0], pair[1]
srcRefs, err := lib.LabRefLookupAll(correct)
if err != nil || len(srcRefs) == 0 {
continue
}
var copies []lib.LabReference
for _, r := range srcRefs {
copies = append(copies, lib.LabReference{
LoincID: wrong, Source: r.Source, Sex: r.Sex,
AgeDays: r.AgeDays, AgeEnd: r.AgeEnd,
RefLow: r.RefLow, RefHigh: r.RefHigh, Unit: r.Unit,
})
}
if !dryRun {
lib.LabRefSaveBatch(copies)
}
aliasTotal += len(copies)
log.Printf("Aliased %d refs: %s → %s", len(copies), correct, wrong)
}
log.Printf("Total LOINC aliases: %d rows", aliasTotal)
}
// parseAgeRange converts CALIPER age display strings to days.
// Examples:
// "0 to < 1 Year" → (0, 365)
// "1 to < 5 Years" → (365, 1825)
// "5 to <21 Years" → (1825, 7665)
// "0 to < 14 Days" → (0, 14)
// "15 Days to < 1 Year" → (15, 365)
// "6 Months to < 2 Years" → (183, 730)
// "1 Week to < 19 Years" → (7, 6935)
func parseAgeRange(s string) (int64, int64, error) {
s = strings.TrimSpace(s)
if s == "" {
return 0, 0, fmt.Errorf("empty age range")
}
// Normalize: remove extra spaces, handle various separators
s = strings.ReplaceAll(s, " ", " ")
s = strings.ReplaceAll(s, " - ", " to ")
s = strings.ToLower(s)
// Pattern: "{start} {unit} to {<} {end} {unit}"
// or: "{start} to {<} {end} {unit}"
// Handle "0 <6 Months" → "0 to <6 Months"
if matched, _ := regexp.MatchString(`^\d+\s+<\d`, s); matched {
s = regexp.MustCompile(`^(\d+)\s+(<\d)`).ReplaceAllString(s, "${1} to ${2}")
}
// Split on "to"
parts := strings.SplitN(s, " to ", 2)
if len(parts) != 2 {
// Try "to<" without space
parts = strings.SplitN(s, " to<", 2)
if len(parts) == 2 {
parts[1] = "<" + parts[1]
}
}
if len(parts) != 2 {
return 0, 0, fmt.Errorf("no 'to' found in %q", s)
}
startStr := strings.TrimSpace(parts[0])
endStr := strings.TrimSpace(parts[1])
endStr = strings.TrimPrefix(endStr, "< ")
endStr = strings.TrimPrefix(endStr, "<")
endStr = strings.TrimSpace(endStr)
startDays, err := parseAgeToDays(startStr)
if err != nil {
return 0, 0, fmt.Errorf("parse start %q: %w", startStr, err)
}
endDays, err := parseAgeToDays(endStr)
if err != nil {
return 0, 0, fmt.Errorf("parse end %q: %w", endStr, err)
}
// Sanity: if start > end, the start was likely bare number meant as days
// e.g. "15 to < 1 Year" really means "15 Days to < 1 Year"
if startDays > endDays {
startDays = int64(math.Round(parseFirstNumber(startStr)))
if startDays >= endDays {
return 0, 0, fmt.Errorf("start >= end after retry in %q", s)
}
}
return startDays, endDays, nil
}
func parseFirstNumber(s string) float64 {
s = strings.TrimSpace(s)
m := ageNumRe.FindStringSubmatch(s)
if m != nil {
v, _ := strconv.ParseFloat(m[1], 64)
return v
}
v, _ := strconv.ParseFloat(s, 64)
return v
}
var ageNumRe = regexp.MustCompile(`^(\d+\.?\d*)\s*(.*)$`)
// parseAgeToDays converts a single age value like "5 years", "6 months", "14 days" to days.
func parseAgeToDays(s string) (int64, error) {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
// Handle bare numbers (assumed years)
if v, err := strconv.ParseFloat(s, 64); err == nil {
return int64(math.Round(v * 365)), nil
}
m := ageNumRe.FindStringSubmatch(s)
if m == nil {
return 0, fmt.Errorf("cannot parse %q", s)
}
val, err := strconv.ParseFloat(m[1], 64)
if err != nil {
return 0, fmt.Errorf("parse number in %q: %w", s, err)
}
unit := strings.TrimSpace(m[2])
// Strip trailing 's' and normalize
unit = strings.TrimSuffix(unit, "s")
unit = strings.TrimSpace(unit)
switch {
case unit == "" || strings.HasPrefix(unit, "year") || strings.HasPrefix(unit, "yr") || unit == "y":
return int64(math.Round(val * 365)), nil
case strings.HasPrefix(unit, "month") || unit == "mo" || unit == "m":
return int64(math.Round(val * 30.44)), nil
case strings.HasPrefix(unit, "week") || unit == "wk" || unit == "w":
return int64(math.Round(val * 7)), nil
case strings.HasPrefix(unit, "day") || unit == "d":
return int64(math.Round(val)), nil
default:
return 0, fmt.Errorf("unknown unit %q in %q", unit, s)
}
}
func parseFloat(s string) (float64, error) {
s = strings.TrimSpace(s)
if s == "" || s == "N/A" || s == "-" {
return 0, fmt.Errorf("empty value")
}
return strconv.ParseFloat(s, 64)
}