315 lines
8.3 KiB
Go
315 lines
8.3 KiB
Go
package lib
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"strings"
|
|
)
|
|
|
|
// LoincInfo holds official LOINC data from loinc_lab.
|
|
type LoincInfo struct {
|
|
Code string `db:"loinc_num"`
|
|
LongName string `db:"long_name"`
|
|
ShortName string `db:"short_name"`
|
|
Component string `db:"component"`
|
|
System string `db:"system"`
|
|
Property string `db:"property"`
|
|
}
|
|
|
|
// LoincGet returns official LOINC info from loinc_lab.
|
|
func LoincGet(code string) *LoincInfo {
|
|
var results []LoincInfo
|
|
RefQuery("SELECT loinc_num, long_name, short_name, component, system, property FROM loinc_lab WHERE loinc_num = ?", []any{code}, &results)
|
|
if len(results) > 0 {
|
|
return &results[0]
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// LoincAbbr derives a short abbreviation from loinc_lab.short_name.
|
|
// Examples: "Hgb Bld-mCnc" → "Hgb", "Neutrophils/leuk NFr Bld" → "Neut"
|
|
func LoincAbbr(info *LoincInfo) string {
|
|
if info == nil {
|
|
return ""
|
|
}
|
|
s := info.ShortName
|
|
// Take first token (before space), strip trailing /... for differentials
|
|
if i := strings.IndexByte(s, ' '); i > 0 {
|
|
s = s[:i]
|
|
}
|
|
if i := strings.IndexByte(s, '/'); i > 0 {
|
|
s = s[:i]
|
|
}
|
|
// Common abbreviation overrides
|
|
overrides := map[string]string{
|
|
"Neutrophils": "Neut", "Lymphocytes": "Lymph", "Monocytes": "Mono",
|
|
"Eosinophils": "Eos", "Basophils": "Baso", "Platelets": "PLT",
|
|
"Leukocytes": "WBC", "Erythrocytes": "RBC", "Hemoglobin": "Hgb",
|
|
"Hematocrit": "Hct", "Glucose": "Glu", "Creatinine": "Cr",
|
|
"Sodium": "Na", "Potassium": "K", "Chloride": "Cl",
|
|
"Calcium": "Ca", "Albumin": "Alb", "Phosphate": "Phos",
|
|
"Magnesium": "Mg",
|
|
}
|
|
// Match on component first word for overrides
|
|
comp := info.Component
|
|
if i := strings.IndexAny(comp, "/."); i > 0 {
|
|
comp = comp[:i]
|
|
}
|
|
if abbr, ok := overrides[comp]; ok {
|
|
return abbr
|
|
}
|
|
// Truncate long abbreviations
|
|
if len(s) > 8 {
|
|
s = s[:8]
|
|
}
|
|
return s
|
|
}
|
|
|
|
// LoincLookup resolves a hospital test name to a LOINC code.
|
|
// Checks cache first; on miss, uses Gemini expand → search → pick → cache.
|
|
// Returns empty string if lookup fails (no candidates, LLM error, etc).
|
|
func LoincLookup(name, specimen, unit string) string {
|
|
// 1. Check cache
|
|
cacheKey := strings.ToLower(name + "|" + specimen + "|" + unit)
|
|
var cached []struct {
|
|
LoincCode string `db:"loinc_code"`
|
|
}
|
|
RefQuery("SELECT loinc_code FROM loinc_cache WHERE cache_key = ?", []any{cacheKey}, &cached)
|
|
if len(cached) > 0 {
|
|
return cached[0].LoincCode
|
|
}
|
|
|
|
// 2. No Gemini key = can't do LLM lookup
|
|
if GeminiKey == "" {
|
|
return ""
|
|
}
|
|
|
|
// 3. Expand + search + pick
|
|
lookupUnit := unit
|
|
if lookupUnit == "%" {
|
|
lookupUnit = "percentage"
|
|
}
|
|
|
|
tokens := loincTokenize(name + " " + specimen + " " + lookupUnit)
|
|
if expanded, err := loincExpand(name, specimen, lookupUnit); err == nil {
|
|
tokens = expanded
|
|
}
|
|
|
|
candidates := loincSearch(tokens)
|
|
|
|
// Filter: if unit is %, drop count codes (NCnc)
|
|
if unit == "%" {
|
|
var filtered []LoincInfo
|
|
for _, c := range candidates {
|
|
if c.Property != "NCnc" {
|
|
filtered = append(filtered, c)
|
|
}
|
|
}
|
|
if len(filtered) > 0 {
|
|
candidates = filtered
|
|
}
|
|
}
|
|
|
|
if len(candidates) == 0 {
|
|
log.Printf("loinc-lookup: no candidates for %q", name)
|
|
return ""
|
|
}
|
|
|
|
// 4. LLM pick
|
|
code, lname, err := loincPick(name, specimen, lookupUnit, candidates)
|
|
if err != nil {
|
|
log.Printf("loinc-lookup: pick failed for %q: %v", name, err)
|
|
return ""
|
|
}
|
|
|
|
// 5. Cache
|
|
RefExec(`INSERT OR REPLACE INTO loinc_cache (cache_key, input_name, input_specimen, input_unit, loinc_code, loinc_name, confidence)
|
|
VALUES (?, ?, ?, ?, ?, ?, 'llm')`, cacheKey, name, specimen, unit, code, lname)
|
|
|
|
return code
|
|
}
|
|
|
|
// --- internal helpers ---
|
|
|
|
func loincTokenize(s string) []string {
|
|
s = strings.ToLower(s)
|
|
for _, c := range []string{",", ";", "(", ")", "[", "]", "/", "-", ".", ":"} {
|
|
s = strings.ReplaceAll(s, c, " ")
|
|
}
|
|
var tokens []string
|
|
seen := map[string]bool{}
|
|
for _, t := range strings.Fields(s) {
|
|
if len(t) < 2 || seen[t] {
|
|
continue
|
|
}
|
|
tokens = append(tokens, t)
|
|
seen[t] = true
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
func loincSearch(tokens []string) []LoincInfo {
|
|
if len(tokens) == 0 {
|
|
return nil
|
|
}
|
|
type entry struct {
|
|
c LoincInfo
|
|
hits int
|
|
}
|
|
entries := map[string]*entry{}
|
|
|
|
for _, t := range tokens {
|
|
pattern := "%" + t + "%"
|
|
query := "SELECT loinc_num, long_name, short_name, system, component, property FROM loinc_lab WHERE " +
|
|
"LOWER(long_name) LIKE ? OR LOWER(short_name) LIKE ? OR LOWER(component) LIKE ?"
|
|
var results []LoincInfo
|
|
RefQuery(query, []any{pattern, pattern, pattern}, &results)
|
|
for _, c := range results {
|
|
if e, ok := entries[c.Code]; ok {
|
|
e.hits++
|
|
} else {
|
|
entries[c.Code] = &entry{c: c, hits: 1}
|
|
}
|
|
}
|
|
}
|
|
|
|
minHits := 2
|
|
if len(tokens) <= 1 {
|
|
minHits = 1
|
|
}
|
|
|
|
type scored struct {
|
|
c LoincInfo
|
|
score int
|
|
}
|
|
var scoredResults []scored
|
|
for _, e := range entries {
|
|
if e.hits < minHits {
|
|
continue
|
|
}
|
|
s := e.hits * 100
|
|
compLen := len(e.c.Component)
|
|
if compLen > 0 && compLen < 50 {
|
|
s += 50 - compLen
|
|
}
|
|
if !strings.Contains(e.c.Component, "/") {
|
|
s += 20
|
|
}
|
|
scoredResults = append(scoredResults, scored{e.c, s})
|
|
}
|
|
|
|
for i := range scoredResults {
|
|
for j := i + 1; j < len(scoredResults); j++ {
|
|
if scoredResults[j].score > scoredResults[i].score {
|
|
scoredResults[i], scoredResults[j] = scoredResults[j], scoredResults[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
var top []LoincInfo
|
|
for i, s := range scoredResults {
|
|
if i >= 30 {
|
|
break
|
|
}
|
|
top = append(top, s.c)
|
|
}
|
|
return top
|
|
}
|
|
|
|
func loincExpand(name, specimen, unit string) ([]string, error) {
|
|
prompt := fmt.Sprintf(`Given a lab test, return search terms to find it in the LOINC database.
|
|
LOINC uses formal medical terminology (e.g. "Leukocytes" not "White Blood Cells", "Erythrocytes" not "Red Blood Cells", "Oxygen" not "O2" or "pO2").
|
|
|
|
Lab test:
|
|
Name: %s
|
|
Specimen: %s
|
|
Unit: %s
|
|
|
|
Return a JSON object: {"terms": ["term1", "term2", ...]}
|
|
Include: the LOINC component name, specimen system code (e.g. Bld, BldA, BldC, BldV, Ser/Plas, Urine), and any synonyms that might appear in LOINC long names.
|
|
Keep it to 3-6 terms. JSON only.`, name, specimen, unit)
|
|
|
|
resp, err := CallGemini(prompt)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var result struct {
|
|
Terms []string `json:"terms"`
|
|
}
|
|
if err := json.Unmarshal([]byte(resp), &result); err != nil {
|
|
return nil, fmt.Errorf("parse expand response: %w", err)
|
|
}
|
|
|
|
var terms []string
|
|
seen := map[string]bool{}
|
|
for _, t := range result.Terms {
|
|
t = strings.ToLower(strings.TrimSpace(t))
|
|
if t != "" && !seen[t] {
|
|
terms = append(terms, t)
|
|
seen[t] = true
|
|
}
|
|
}
|
|
for _, t := range loincTokenize(name + " " + specimen) {
|
|
if !seen[t] {
|
|
terms = append(terms, t)
|
|
seen[t] = true
|
|
}
|
|
}
|
|
return terms, nil
|
|
}
|
|
|
|
func loincPick(name, specimen, unit string, candidates []LoincInfo) (string, string, error) {
|
|
var lines []string
|
|
for i, c := range candidates {
|
|
display := c.LongName
|
|
display = strings.ReplaceAll(display, "/100 ", "percentage of ")
|
|
display = strings.ReplaceAll(display, "fraction", "percentage")
|
|
lines = append(lines, fmt.Sprintf("%d. %s — %s [System: %s]", i+1, c.Code, display, c.System))
|
|
}
|
|
|
|
prompt := fmt.Sprintf(`You are a clinical laboratory informatics system. Given a lab test, pick the BEST matching LOINC code from the candidate list.
|
|
|
|
Lab test:
|
|
Name: %s
|
|
Specimen: %s
|
|
Unit: %s
|
|
|
|
Candidates:
|
|
%s
|
|
|
|
Return ONLY a JSON object: {"pick": <number>, "loinc": "<code>", "name": "<long name>"}
|
|
Pick the candidate that best matches the test name, specimen type, and unit. If none match well, pick the closest.
|
|
JSON only, no explanation.`, name, specimen, unit, strings.Join(lines, "\n"))
|
|
|
|
resp, err := CallGemini(prompt)
|
|
if err != nil {
|
|
return "", "", fmt.Errorf("LLM call failed: %w", err)
|
|
}
|
|
|
|
var result struct {
|
|
Pick int `json:"pick"`
|
|
Loinc string `json:"loinc"`
|
|
Name string `json:"name"`
|
|
}
|
|
if err := json.Unmarshal([]byte(resp), &result); err != nil {
|
|
return "", "", fmt.Errorf("parse LLM response: %w", err)
|
|
}
|
|
|
|
if result.Loinc == "" && result.Pick > 0 && result.Pick <= len(candidates) {
|
|
result.Loinc = candidates[result.Pick-1].Code
|
|
result.Name = candidates[result.Pick-1].LongName
|
|
}
|
|
for _, c := range candidates {
|
|
if c.Code == result.Loinc {
|
|
return result.Loinc, c.LongName, nil
|
|
}
|
|
}
|
|
if result.Pick > 0 && result.Pick <= len(candidates) {
|
|
c := candidates[result.Pick-1]
|
|
return c.Code, c.LongName, nil
|
|
}
|
|
return "", "", fmt.Errorf("LLM returned %q (pick %d) — not in %d candidates", result.Loinc, result.Pick, len(candidates))
|
|
}
|