258 lines
7.6 KiB
Go
258 lines
7.6 KiB
Go
package lib
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// Normalize normalizes entry names within a dossier for a given category.
|
|
// Uses heuristic pre-grouping + LLM to map variant names to canonical forms.
|
|
// Updates Summary (display) and Data JSON (normalized_name, abbreviation).
|
|
// Original Type field is never modified.
|
|
// Silently returns nil if no API key is configured.
|
|
func Normalize(dossierID string, category int) error {
|
|
if GeminiKey == "" {
|
|
return nil
|
|
}
|
|
|
|
// 1. Get unique type names via SQL GROUP BY
|
|
type typeRow struct {
|
|
Type string `db:"type"`
|
|
}
|
|
var rows []typeRow
|
|
if err := dbQuery("SELECT type FROM entries WHERE dossier_id = ? AND category = ? GROUP BY type",
|
|
[]any{dossierID, category}, &rows); err != nil {
|
|
return fmt.Errorf("query unique types: %w", err)
|
|
}
|
|
|
|
// Filter out parent types (e.g. "lab_order")
|
|
var allNames []string
|
|
for _, r := range rows {
|
|
if r.Type != "lab_order" && r.Type != "" {
|
|
allNames = append(allNames, r.Type)
|
|
}
|
|
}
|
|
|
|
if len(allNames) < 2 {
|
|
return nil
|
|
}
|
|
|
|
// 2. Pre-group by heuristic key (strip POCT, specimen suffixes, normalize case)
|
|
groups := make(map[string][]string) // cleanKey → [original names]
|
|
for _, name := range allNames {
|
|
key := normalizeKey(name)
|
|
groups[key] = append(groups[key], name)
|
|
}
|
|
|
|
// Send just the group keys to LLM
|
|
keys := make([]string, 0, len(groups))
|
|
for k := range groups {
|
|
keys = append(keys, k)
|
|
}
|
|
sort.Strings(keys)
|
|
|
|
log.Printf("normalize: %d unique types → %d groups after pre-grouping", len(allNames), len(keys))
|
|
|
|
// 3. Call LLM with group keys (batched to stay within token limits)
|
|
mapping := make(map[string]normMapping)
|
|
batchSize := 100
|
|
for i := 0; i < len(keys); i += batchSize {
|
|
end := i + batchSize
|
|
if end > len(keys) {
|
|
end = len(keys)
|
|
}
|
|
batch := keys[i:end]
|
|
log.Printf("normalize: LLM batch %d-%d of %d", i+1, end, len(keys))
|
|
|
|
batchMap, err := callNormalizeLLM(batch)
|
|
if err != nil {
|
|
return fmt.Errorf("LLM batch %d-%d: %w", i+1, end, err)
|
|
}
|
|
for k, v := range batchMap {
|
|
mapping[k] = v
|
|
}
|
|
}
|
|
|
|
// 4. Expand: each original name in a group gets the group's canonical mapping
|
|
fullMapping := make(map[string]normMapping)
|
|
for key, origNames := range groups {
|
|
if m, ok := mapping[key]; ok {
|
|
for _, orig := range origNames {
|
|
fullMapping[orig] = m
|
|
}
|
|
}
|
|
}
|
|
|
|
log.Printf("normalize: LLM mapped %d groups → %d original names covered", len(mapping), len(fullMapping))
|
|
|
|
// 5. Save LabTest entries for any new LOINC codes
|
|
seenLoinc := make(map[string]bool)
|
|
var labTests []LabTest
|
|
for _, m := range fullMapping {
|
|
if m.Loinc == "" || seenLoinc[m.Loinc] {
|
|
continue
|
|
}
|
|
seenLoinc[m.Loinc] = true
|
|
dir := m.Direction
|
|
if dir == "" {
|
|
dir = DirRange
|
|
}
|
|
factor := m.SIFactor
|
|
if factor == 0 {
|
|
factor = 1.0
|
|
}
|
|
labTests = append(labTests, LabTest{
|
|
LoincID: m.Loinc,
|
|
Name: m.Name,
|
|
SIUnit: m.SIUnit,
|
|
Direction: dir,
|
|
SIFactor: ToLabScale(factor),
|
|
})
|
|
}
|
|
// NOTE: Lab test saving removed - import tools handle this directly
|
|
// if len(labTests) > 0 {
|
|
// log.Printf("normalize: found %d lab tests (saving disabled)", len(labTests))
|
|
// }
|
|
|
|
// 6. Load entries, apply mapping, save only changed ones
|
|
entries, err := EntryQueryOld(dossierID, category, "")
|
|
if err != nil {
|
|
return fmt.Errorf("load entries: %w", err)
|
|
}
|
|
|
|
var toSave []Entry
|
|
for _, e := range entries {
|
|
if e.ParentID == "" {
|
|
continue
|
|
}
|
|
norm, ok := fullMapping[e.Type]
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
var data map[string]interface{}
|
|
if json.Unmarshal([]byte(e.Data), &data) != nil {
|
|
data = make(map[string]interface{})
|
|
}
|
|
|
|
// Skip if already fully normalized (name + loinc + search_key match)
|
|
existingName, _ := data["normalized_name"].(string)
|
|
existingLoinc, _ := data["loinc"].(string)
|
|
needsSearchKey := (norm.Loinc != "" && e.SearchKey == "")
|
|
if existingName == norm.Name && (norm.Loinc == "" || existingLoinc == norm.Loinc) && !needsSearchKey {
|
|
continue
|
|
}
|
|
|
|
data["normalized_name"] = norm.Name
|
|
data["abbreviation"] = norm.Abbr
|
|
if norm.Loinc != "" {
|
|
data["loinc"] = norm.Loinc
|
|
}
|
|
if norm.SIUnit != "" {
|
|
data["si_unit"] = norm.SIUnit
|
|
}
|
|
if norm.SIFactor != 0 && norm.SIFactor != 1.0 {
|
|
data["si_factor"] = norm.SIFactor
|
|
}
|
|
b, _ := json.Marshal(data)
|
|
e.Data = string(b)
|
|
|
|
// Update SearchKey with LOINC code (encrypted)
|
|
if norm.Loinc != "" {
|
|
e.SearchKey = norm.Loinc
|
|
}
|
|
|
|
// Rebuild Summary: "Abbr: value unit"
|
|
unit, _ := data["unit"].(string)
|
|
summary := norm.Abbr + ": " + e.Value
|
|
if unit != "" {
|
|
summary += " " + unit
|
|
}
|
|
e.Summary = summary
|
|
|
|
toSave = append(toSave, *e)
|
|
}
|
|
|
|
if len(toSave) == 0 {
|
|
log.Printf("normalize: no changes needed")
|
|
return nil
|
|
}
|
|
|
|
log.Printf("normalize: updating %d entries", len(toSave))
|
|
return dbSave("entries", toSave)
|
|
}
|
|
|
|
// normalizeKey reduces a test name to a heuristic grouping key.
|
|
// Groups obvious duplicates: POCT variants, specimen suffixes, case.
|
|
func normalizeKey(name string) string {
|
|
s := strings.ToLower(strings.TrimSpace(name))
|
|
s = strings.TrimPrefix(s, "poct ")
|
|
// Strip specimen-type suffixes only (not qualifiers like ", total", ", direct")
|
|
for _, suf := range []string{", whole blood", ", wblood", ", wb", ", wbl", ", blood", ", s/p", " ach"} {
|
|
s = strings.TrimSuffix(s, suf)
|
|
}
|
|
return strings.TrimSpace(s)
|
|
}
|
|
|
|
type normMapping struct {
|
|
Name string `json:"name"`
|
|
Abbr string `json:"abbr"`
|
|
Loinc string `json:"loinc"`
|
|
SIUnit string `json:"si_unit"`
|
|
SIFactor float64 `json:"si_factor"`
|
|
Direction string `json:"direction"`
|
|
}
|
|
|
|
func callNormalizeLLM(names []string) (map[string]normMapping, error) {
|
|
nameList := strings.Join(names, "\n")
|
|
|
|
prompt := fmt.Sprintf(`Given these medical test names from a single patient's records, normalize each to a canonical name, abbreviation, LOINC code, SI unit, conversion factor, and direction.
|
|
|
|
Rules:
|
|
- Use standard medical abbreviations: WBC, RBC, Hgb, Hct, PLT, Na, K, Cl, CO2, BUN, Cr, Ca, Glu, ALT, AST, ALP, Bili, Alb, TP, Mg, Phos, Fe, etc.
|
|
- For tests without standard abbreviations, use a short canonical name as abbreviation
|
|
- Keep abbreviations concise (1-8 chars)
|
|
- If two names are the same test, give them the same canonical name and abbreviation
|
|
- loinc: the most common LOINC code for this test (e.g. "718-7" for Hemoglobin). Use "" if unknown.
|
|
- si_unit: the standard SI unit (e.g. "g/L", "mmol/L", "10^9/L"). Use "" if not numeric.
|
|
- si_factor: multiplier to convert from the most common conventional unit to SI. E.g. Hemoglobin g/dL→g/L = 10.0. Use 1.0 if already SI or unknown.
|
|
- direction: "range" if both high and low are bad (most tests), "lower_better" if low values are healthy (CRP, LDL, triglycerides, glucose), "higher_better" if high values are healthy (HDL). Default to "range".
|
|
|
|
Return a JSON object where each key is the EXACT input name, value is {"name":"Canonical Name","abbr":"Abbreviation","loinc":"CODE","si_unit":"unit","si_factor":1.0,"direction":"range"}.
|
|
|
|
Test names:
|
|
%s`, nameList)
|
|
|
|
maxTokens := 8192
|
|
temp := 0.0
|
|
config := &GeminiConfig{
|
|
Temperature: &temp,
|
|
MaxOutputTokens: &maxTokens,
|
|
}
|
|
|
|
resp, err := CallGeminiMultimodal([]GeminiPart{{Text: prompt}}, config)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Gemini sometimes returns object, sometimes array of objects
|
|
var mapping map[string]normMapping
|
|
if err := json.Unmarshal([]byte(resp), &mapping); err != nil {
|
|
var arr []map[string]normMapping
|
|
if err2 := json.Unmarshal([]byte(resp), &arr); err2 != nil {
|
|
return nil, fmt.Errorf("parse response: %w (first 300 chars: %.300s)", err, resp)
|
|
}
|
|
mapping = make(map[string]normMapping)
|
|
for _, item := range arr {
|
|
for k, v := range item {
|
|
mapping[k] = v
|
|
}
|
|
}
|
|
}
|
|
|
|
return mapping, nil
|
|
}
|