361 lines
9.5 KiB
Go
361 lines
9.5 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"math"
|
|
"os"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"inou/lib"
|
|
)
|
|
|
|
// CaliperAnalyte mirrors the JSON structure from the browser export
|
|
type CaliperAnalyte struct {
|
|
Name string `json:"name"`
|
|
ID int `json:"id"`
|
|
Loinc string `json:"loinc"`
|
|
Analyte CaliperMeta `json:"analyte"`
|
|
Intervals []CaliperInterval `json:"intervals"`
|
|
}
|
|
|
|
type CaliperMeta struct {
|
|
ID int `json:"id"`
|
|
Name string `json:"name"`
|
|
SIUnit string `json:"siUnit"`
|
|
ConventionalUnit string `json:"conventionalUnit"`
|
|
}
|
|
|
|
type CaliperInterval struct {
|
|
Gender string `json:"gender"`
|
|
AgeDisplay string `json:"ageDisplay"`
|
|
SIRiLowerLimit string `json:"siRiLowerLimit"`
|
|
SIRiUpperLimit string `json:"siRiUpperLimit"`
|
|
Samples string `json:"samples"`
|
|
}
|
|
|
|
func main() {
|
|
if len(os.Args) < 2 {
|
|
fmt.Fprintln(os.Stderr, "Usage: import-caliper <caliper_with_loinc.json>")
|
|
fmt.Fprintln(os.Stderr, " Imports CALIPER reference intervals into lab_reference table.")
|
|
fmt.Fprintln(os.Stderr, " Pass --dry-run to preview without writing.")
|
|
os.Exit(1)
|
|
}
|
|
|
|
file := os.Args[1]
|
|
dryRun := false
|
|
for _, a := range os.Args[2:] {
|
|
if a == "--dry-run" {
|
|
dryRun = true
|
|
}
|
|
}
|
|
|
|
if err := lib.Init(); err != nil {
|
|
log.Fatalf("lib.Init: %v", err)
|
|
}
|
|
|
|
data, err := os.ReadFile(file)
|
|
if err != nil {
|
|
log.Fatalf("read %s: %v", file, err)
|
|
}
|
|
|
|
var analytes []CaliperAnalyte
|
|
if err := json.Unmarshal(data, &analytes); err != nil {
|
|
log.Fatalf("parse JSON: %v", err)
|
|
}
|
|
log.Printf("Loaded %d analytes from %s", len(analytes), file)
|
|
|
|
// Delete existing CALIPER references
|
|
if !dryRun {
|
|
// Query existing to count
|
|
var existing []lib.LabReference
|
|
if err := lib.Query("SELECT ref_id FROM lab_reference WHERE ref_id LIKE '%|CALIPER|%'", nil, &existing); err != nil {
|
|
log.Printf("Warning: could not count existing: %v", err)
|
|
} else {
|
|
log.Printf("Deleting %d existing CALIPER references", len(existing))
|
|
for _, r := range existing {
|
|
lib.Delete("lab_reference", "ref_id", r.RefID)
|
|
}
|
|
}
|
|
}
|
|
|
|
var refs []lib.LabReference
|
|
var noLoinc, badAge, badLimits int
|
|
|
|
for _, a := range analytes {
|
|
if a.Loinc == "" {
|
|
noLoinc++
|
|
continue
|
|
}
|
|
|
|
siUnit := a.Analyte.SIUnit
|
|
|
|
for _, iv := range a.Intervals {
|
|
ageDays, ageEnd, err := parseAgeRange(iv.AgeDisplay)
|
|
if err != nil {
|
|
badAge++
|
|
continue
|
|
}
|
|
|
|
low, errL := parseFloat(iv.SIRiLowerLimit)
|
|
high, errH := parseFloat(iv.SIRiUpperLimit)
|
|
if errL != nil || errH != nil || low >= high {
|
|
badLimits++
|
|
continue
|
|
}
|
|
|
|
sex := ""
|
|
switch iv.Gender {
|
|
case "M":
|
|
sex = "M"
|
|
case "F":
|
|
sex = "F"
|
|
case "MF", "":
|
|
sex = ""
|
|
default:
|
|
sex = ""
|
|
}
|
|
|
|
refs = append(refs, lib.LabReference{
|
|
LoincID: a.Loinc,
|
|
Source: "CALIPER",
|
|
Sex: sex,
|
|
AgeDays: ageDays,
|
|
AgeEnd: ageEnd,
|
|
RefLow: lib.ToLabScale(low),
|
|
RefHigh: lib.ToLabScale(high),
|
|
Unit: siUnit,
|
|
})
|
|
}
|
|
}
|
|
|
|
log.Printf("Built %d reference rows (skipped: %d no-loinc, %d bad-age, %d bad-limits)",
|
|
len(refs), noLoinc, badAge, badLimits)
|
|
|
|
if dryRun {
|
|
log.Printf("DRY RUN — not saving. Showing first 20:")
|
|
for i, r := range refs {
|
|
if i >= 20 {
|
|
break
|
|
}
|
|
fmt.Printf(" %s sex=%s age=%d-%d low=%.2f high=%.2f %s\n",
|
|
r.LoincID, r.Sex, r.AgeDays, r.AgeEnd, lib.FromLabScale(r.RefLow), lib.FromLabScale(r.RefHigh), r.Unit)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Save in batches
|
|
batchSize := 100
|
|
saved := 0
|
|
for i := 0; i < len(refs); i += batchSize {
|
|
end := i + batchSize
|
|
if end > len(refs) {
|
|
end = len(refs)
|
|
}
|
|
batch := refs[i:end]
|
|
if err := lib.LabRefSaveBatch(batch); err != nil {
|
|
log.Printf("Error saving batch %d-%d: %v", i, end, err)
|
|
continue
|
|
}
|
|
saved += len(batch)
|
|
}
|
|
|
|
log.Printf("Saved %d CALIPER reference rows", saved)
|
|
|
|
// Also ensure lab_test entries exist for all CALIPER analytes with LOINC
|
|
testsCreated := 0
|
|
for _, a := range analytes {
|
|
if a.Loinc == "" || a.Analyte.SIUnit == "" {
|
|
continue
|
|
}
|
|
existing, _ := lib.LabTestGet(a.Loinc)
|
|
if existing != nil {
|
|
// Update SI unit from CALIPER if missing
|
|
if existing.SIUnit == "" {
|
|
existing.SIUnit = a.Analyte.SIUnit
|
|
lib.LabTestSave(existing)
|
|
}
|
|
continue
|
|
}
|
|
// Create new lab_test entry
|
|
t := lib.LabTest{
|
|
LoincID: a.Loinc,
|
|
Name: a.Name,
|
|
SIUnit: a.Analyte.SIUnit,
|
|
Direction: "range", // default
|
|
SIFactor: lib.LabScale, // 1.0 (SI to SI)
|
|
}
|
|
if err := lib.LabTestSave(&t); err != nil {
|
|
log.Printf("Warning: could not save lab_test %s: %v", a.Loinc, err)
|
|
} else {
|
|
testsCreated++
|
|
}
|
|
}
|
|
log.Printf("Created %d new lab_test entries", testsCreated)
|
|
|
|
// Copy reference rows for known LOINC aliases (wrong codes from Gemini normalization)
|
|
aliases := [][2]string{
|
|
{"785-9", "787-2"}, // MCV entries use 785-9, CALIPER has 787-2
|
|
{"787-5", "785-6"}, // MCH entries use 787-5, CALIPER has 785-6
|
|
{"786-7", "786-4"}, // MCHC entries use 786-7, CALIPER has 786-4
|
|
{"2069-8", "2069-3"}, // Cl entries use 2069-8, CALIPER has 2069-3
|
|
}
|
|
aliasTotal := 0
|
|
for _, pair := range aliases {
|
|
wrong, correct := pair[0], pair[1]
|
|
var srcRefs []lib.LabReference
|
|
if err := lib.Query(
|
|
"SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
|
|
[]any{correct}, &srcRefs,
|
|
); err != nil || len(srcRefs) == 0 {
|
|
continue
|
|
}
|
|
var copies []lib.LabReference
|
|
for _, r := range srcRefs {
|
|
copies = append(copies, lib.LabReference{
|
|
LoincID: wrong, Source: r.Source, Sex: r.Sex,
|
|
AgeDays: r.AgeDays, AgeEnd: r.AgeEnd,
|
|
RefLow: r.RefLow, RefHigh: r.RefHigh, Unit: r.Unit,
|
|
})
|
|
}
|
|
if !dryRun {
|
|
lib.LabRefSaveBatch(copies)
|
|
}
|
|
aliasTotal += len(copies)
|
|
log.Printf("Aliased %d refs: %s → %s", len(copies), correct, wrong)
|
|
}
|
|
log.Printf("Total LOINC aliases: %d rows", aliasTotal)
|
|
}
|
|
|
|
// parseAgeRange converts CALIPER age display strings to days.
|
|
// Examples:
|
|
// "0 to < 1 Year" → (0, 365)
|
|
// "1 to < 5 Years" → (365, 1825)
|
|
// "5 to <21 Years" → (1825, 7665)
|
|
// "0 to < 14 Days" → (0, 14)
|
|
// "15 Days to < 1 Year" → (15, 365)
|
|
// "6 Months to < 2 Years" → (183, 730)
|
|
// "1 Week to < 19 Years" → (7, 6935)
|
|
func parseAgeRange(s string) (int64, int64, error) {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return 0, 0, fmt.Errorf("empty age range")
|
|
}
|
|
|
|
// Normalize: remove extra spaces, handle various separators
|
|
s = strings.ReplaceAll(s, " ", " ")
|
|
s = strings.ReplaceAll(s, " - ", " to ")
|
|
s = strings.ToLower(s)
|
|
|
|
// Pattern: "{start} {unit} to {<} {end} {unit}"
|
|
// or: "{start} to {<} {end} {unit}"
|
|
|
|
// Handle "0 <6 Months" → "0 to <6 Months"
|
|
if matched, _ := regexp.MatchString(`^\d+\s+<\d`, s); matched {
|
|
s = regexp.MustCompile(`^(\d+)\s+(<\d)`).ReplaceAllString(s, "${1} to ${2}")
|
|
}
|
|
|
|
// Split on "to"
|
|
parts := strings.SplitN(s, " to ", 2)
|
|
if len(parts) != 2 {
|
|
// Try "to<" without space
|
|
parts = strings.SplitN(s, " to<", 2)
|
|
if len(parts) == 2 {
|
|
parts[1] = "<" + parts[1]
|
|
}
|
|
}
|
|
if len(parts) != 2 {
|
|
return 0, 0, fmt.Errorf("no 'to' found in %q", s)
|
|
}
|
|
|
|
startStr := strings.TrimSpace(parts[0])
|
|
endStr := strings.TrimSpace(parts[1])
|
|
endStr = strings.TrimPrefix(endStr, "< ")
|
|
endStr = strings.TrimPrefix(endStr, "<")
|
|
endStr = strings.TrimSpace(endStr)
|
|
|
|
startDays, err := parseAgeToDays(startStr)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("parse start %q: %w", startStr, err)
|
|
}
|
|
|
|
endDays, err := parseAgeToDays(endStr)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("parse end %q: %w", endStr, err)
|
|
}
|
|
|
|
// Sanity: if start > end, the start was likely bare number meant as days
|
|
// e.g. "15 to < 1 Year" really means "15 Days to < 1 Year"
|
|
if startDays > endDays {
|
|
startDays = int64(math.Round(parseFirstNumber(startStr)))
|
|
if startDays >= endDays {
|
|
return 0, 0, fmt.Errorf("start >= end after retry in %q", s)
|
|
}
|
|
}
|
|
|
|
return startDays, endDays, nil
|
|
}
|
|
|
|
func parseFirstNumber(s string) float64 {
|
|
s = strings.TrimSpace(s)
|
|
m := ageNumRe.FindStringSubmatch(s)
|
|
if m != nil {
|
|
v, _ := strconv.ParseFloat(m[1], 64)
|
|
return v
|
|
}
|
|
v, _ := strconv.ParseFloat(s, 64)
|
|
return v
|
|
}
|
|
|
|
var ageNumRe = regexp.MustCompile(`^(\d+\.?\d*)\s*(.*)$`)
|
|
|
|
// parseAgeToDays converts a single age value like "5 years", "6 months", "14 days" to days.
|
|
func parseAgeToDays(s string) (int64, error) {
|
|
s = strings.TrimSpace(s)
|
|
s = strings.ToLower(s)
|
|
|
|
// Handle bare numbers (assumed years)
|
|
if v, err := strconv.ParseFloat(s, 64); err == nil {
|
|
return int64(math.Round(v * 365)), nil
|
|
}
|
|
|
|
m := ageNumRe.FindStringSubmatch(s)
|
|
if m == nil {
|
|
return 0, fmt.Errorf("cannot parse %q", s)
|
|
}
|
|
|
|
val, err := strconv.ParseFloat(m[1], 64)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("parse number in %q: %w", s, err)
|
|
}
|
|
|
|
unit := strings.TrimSpace(m[2])
|
|
// Strip trailing 's' and normalize
|
|
unit = strings.TrimSuffix(unit, "s")
|
|
unit = strings.TrimSpace(unit)
|
|
|
|
switch {
|
|
case unit == "" || strings.HasPrefix(unit, "year") || strings.HasPrefix(unit, "yr") || unit == "y":
|
|
return int64(math.Round(val * 365)), nil
|
|
case strings.HasPrefix(unit, "month") || unit == "mo" || unit == "m":
|
|
return int64(math.Round(val * 30.44)), nil
|
|
case strings.HasPrefix(unit, "week") || unit == "wk" || unit == "w":
|
|
return int64(math.Round(val * 7)), nil
|
|
case strings.HasPrefix(unit, "day") || unit == "d":
|
|
return int64(math.Round(val)), nil
|
|
default:
|
|
return 0, fmt.Errorf("unknown unit %q in %q", unit, s)
|
|
}
|
|
}
|
|
|
|
func parseFloat(s string) (float64, error) {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" || s == "N/A" || s == "-" {
|
|
return 0, fmt.Errorf("empty value")
|
|
}
|
|
return strconv.ParseFloat(s, 64)
|
|
}
|