inou/lib/loinc.go

315 lines
8.3 KiB
Go

package lib
import (
"encoding/json"
"fmt"
"log"
"strings"
)
// LoincInfo holds official LOINC data from loinc_lab.
type LoincInfo struct {
Code string `db:"loinc_num"`
LongName string `db:"long_name"`
ShortName string `db:"short_name"`
Component string `db:"component"`
System string `db:"system"`
Property string `db:"property"`
}
// LoincGet returns official LOINC info from loinc_lab.
func LoincGet(code string) *LoincInfo {
var results []LoincInfo
RefQuery("SELECT loinc_num, long_name, short_name, component, system, property FROM loinc_lab WHERE loinc_num = ?", []any{code}, &results)
if len(results) > 0 {
return &results[0]
}
return nil
}
// LoincAbbr derives a short abbreviation from loinc_lab.short_name.
// Examples: "Hgb Bld-mCnc" → "Hgb", "Neutrophils/leuk NFr Bld" → "Neut"
func LoincAbbr(info *LoincInfo) string {
if info == nil {
return ""
}
s := info.ShortName
// Take first token (before space), strip trailing /... for differentials
if i := strings.IndexByte(s, ' '); i > 0 {
s = s[:i]
}
if i := strings.IndexByte(s, '/'); i > 0 {
s = s[:i]
}
// Common abbreviation overrides
overrides := map[string]string{
"Neutrophils": "Neut", "Lymphocytes": "Lymph", "Monocytes": "Mono",
"Eosinophils": "Eos", "Basophils": "Baso", "Platelets": "PLT",
"Leukocytes": "WBC", "Erythrocytes": "RBC", "Hemoglobin": "Hgb",
"Hematocrit": "Hct", "Glucose": "Glu", "Creatinine": "Cr",
"Sodium": "Na", "Potassium": "K", "Chloride": "Cl",
"Calcium": "Ca", "Albumin": "Alb", "Phosphate": "Phos",
"Magnesium": "Mg",
}
// Match on component first word for overrides
comp := info.Component
if i := strings.IndexAny(comp, "/."); i > 0 {
comp = comp[:i]
}
if abbr, ok := overrides[comp]; ok {
return abbr
}
// Truncate long abbreviations
if len(s) > 8 {
s = s[:8]
}
return s
}
// LoincLookup resolves a hospital test name to a LOINC code.
// Checks cache first; on miss, uses Gemini expand → search → pick → cache.
// Returns empty string if lookup fails (no candidates, LLM error, etc).
func LoincLookup(name, specimen, unit string) string {
// 1. Check cache
cacheKey := strings.ToLower(name + "|" + specimen + "|" + unit)
var cached []struct {
LoincCode string `db:"loinc_code"`
}
RefQuery("SELECT loinc_code FROM loinc_cache WHERE cache_key = ?", []any{cacheKey}, &cached)
if len(cached) > 0 {
return cached[0].LoincCode
}
// 2. No Gemini key = can't do LLM lookup
if GeminiKey == "" {
return ""
}
// 3. Expand + search + pick
lookupUnit := unit
if lookupUnit == "%" {
lookupUnit = "percentage"
}
tokens := loincTokenize(name + " " + specimen + " " + lookupUnit)
if expanded, err := loincExpand(name, specimen, lookupUnit); err == nil {
tokens = expanded
}
candidates := loincSearch(tokens)
// Filter: if unit is %, drop count codes (NCnc)
if unit == "%" {
var filtered []LoincInfo
for _, c := range candidates {
if c.Property != "NCnc" {
filtered = append(filtered, c)
}
}
if len(filtered) > 0 {
candidates = filtered
}
}
if len(candidates) == 0 {
log.Printf("loinc-lookup: no candidates for %q", name)
return ""
}
// 4. LLM pick
code, lname, err := loincPick(name, specimen, lookupUnit, candidates)
if err != nil {
log.Printf("loinc-lookup: pick failed for %q: %v", name, err)
return ""
}
// 5. Cache
RefExec(`INSERT OR REPLACE INTO loinc_cache (cache_key, input_name, input_specimen, input_unit, loinc_code, loinc_name, confidence)
VALUES (?, ?, ?, ?, ?, ?, 'llm')`, cacheKey, name, specimen, unit, code, lname)
return code
}
// --- internal helpers ---
func loincTokenize(s string) []string {
s = strings.ToLower(s)
for _, c := range []string{",", ";", "(", ")", "[", "]", "/", "-", ".", ":"} {
s = strings.ReplaceAll(s, c, " ")
}
var tokens []string
seen := map[string]bool{}
for _, t := range strings.Fields(s) {
if len(t) < 2 || seen[t] {
continue
}
tokens = append(tokens, t)
seen[t] = true
}
return tokens
}
func loincSearch(tokens []string) []LoincInfo {
if len(tokens) == 0 {
return nil
}
type entry struct {
c LoincInfo
hits int
}
entries := map[string]*entry{}
for _, t := range tokens {
pattern := "%" + t + "%"
query := "SELECT loinc_num, long_name, short_name, system, component, property FROM loinc_lab WHERE " +
"LOWER(long_name) LIKE ? OR LOWER(short_name) LIKE ? OR LOWER(component) LIKE ?"
var results []LoincInfo
RefQuery(query, []any{pattern, pattern, pattern}, &results)
for _, c := range results {
if e, ok := entries[c.Code]; ok {
e.hits++
} else {
entries[c.Code] = &entry{c: c, hits: 1}
}
}
}
minHits := 2
if len(tokens) <= 1 {
minHits = 1
}
type scored struct {
c LoincInfo
score int
}
var scoredResults []scored
for _, e := range entries {
if e.hits < minHits {
continue
}
s := e.hits * 100
compLen := len(e.c.Component)
if compLen > 0 && compLen < 50 {
s += 50 - compLen
}
if !strings.Contains(e.c.Component, "/") {
s += 20
}
scoredResults = append(scoredResults, scored{e.c, s})
}
for i := range scoredResults {
for j := i + 1; j < len(scoredResults); j++ {
if scoredResults[j].score > scoredResults[i].score {
scoredResults[i], scoredResults[j] = scoredResults[j], scoredResults[i]
}
}
}
var top []LoincInfo
for i, s := range scoredResults {
if i >= 30 {
break
}
top = append(top, s.c)
}
return top
}
func loincExpand(name, specimen, unit string) ([]string, error) {
prompt := fmt.Sprintf(`Given a lab test, return search terms to find it in the LOINC database.
LOINC uses formal medical terminology (e.g. "Leukocytes" not "White Blood Cells", "Erythrocytes" not "Red Blood Cells", "Oxygen" not "O2" or "pO2").
Lab test:
Name: %s
Specimen: %s
Unit: %s
Return a JSON object: {"terms": ["term1", "term2", ...]}
Include: the LOINC component name, specimen system code (e.g. Bld, BldA, BldC, BldV, Ser/Plas, Urine), and any synonyms that might appear in LOINC long names.
Keep it to 3-6 terms. JSON only.`, name, specimen, unit)
resp, err := CallGemini(prompt)
if err != nil {
return nil, err
}
var result struct {
Terms []string `json:"terms"`
}
if err := json.Unmarshal([]byte(resp), &result); err != nil {
return nil, fmt.Errorf("parse expand response: %w", err)
}
var terms []string
seen := map[string]bool{}
for _, t := range result.Terms {
t = strings.ToLower(strings.TrimSpace(t))
if t != "" && !seen[t] {
terms = append(terms, t)
seen[t] = true
}
}
for _, t := range loincTokenize(name + " " + specimen) {
if !seen[t] {
terms = append(terms, t)
seen[t] = true
}
}
return terms, nil
}
func loincPick(name, specimen, unit string, candidates []LoincInfo) (string, string, error) {
var lines []string
for i, c := range candidates {
display := c.LongName
display = strings.ReplaceAll(display, "/100 ", "percentage of ")
display = strings.ReplaceAll(display, "fraction", "percentage")
lines = append(lines, fmt.Sprintf("%d. %s — %s [System: %s]", i+1, c.Code, display, c.System))
}
prompt := fmt.Sprintf(`You are a clinical laboratory informatics system. Given a lab test, pick the BEST matching LOINC code from the candidate list.
Lab test:
Name: %s
Specimen: %s
Unit: %s
Candidates:
%s
Return ONLY a JSON object: {"pick": <number>, "loinc": "<code>", "name": "<long name>"}
Pick the candidate that best matches the test name, specimen type, and unit. If none match well, pick the closest.
JSON only, no explanation.`, name, specimen, unit, strings.Join(lines, "\n"))
resp, err := CallGemini(prompt)
if err != nil {
return "", "", fmt.Errorf("LLM call failed: %w", err)
}
var result struct {
Pick int `json:"pick"`
Loinc string `json:"loinc"`
Name string `json:"name"`
}
if err := json.Unmarshal([]byte(resp), &result); err != nil {
return "", "", fmt.Errorf("parse LLM response: %w", err)
}
if result.Loinc == "" && result.Pick > 0 && result.Pick <= len(candidates) {
result.Loinc = candidates[result.Pick-1].Code
result.Name = candidates[result.Pick-1].LongName
}
for _, c := range candidates {
if c.Code == result.Loinc {
return result.Loinc, c.LongName, nil
}
}
if result.Pick > 0 && result.Pick <= len(candidates) {
c := candidates[result.Pick-1]
return c.Code, c.LongName, nil
}
return "", "", fmt.Errorf("LLM returned %q (pick %d) — not in %d candidates", result.Loinc, result.Pick, len(candidates))
}