refactor: clean up reference data and remove rate limiting

Reference data simplification (choke point pattern):
- Remove RefSave/RefDelete from lib (import-time only, not runtime)
- Remove LabTestSave*, LabRefSave* from lib/lab_reference.go
- Remove PopulateReferences (LLM-based ref generation)
- Keep only RefQuery() for runtime reads
- Import tools handle their own SQL inserts

Rate limiting removed:
- Delete new_signups table and all rate limit code
- Solved via different approach (not in codebase)

Database consolidation (on staging):
- Moved genotypes table (30K SNPs) to reference.db
- Deleted empty DBs: portal.db, rate_limit.db, snpedia.db, ratelimit.db

Net -293 lines. Runtime code now only reads reference data via RefQuery().

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
James 2026-02-11 01:20:18 -05:00
parent 6486a52ad9
commit 6ba57df6ae
9 changed files with 24 additions and 317 deletions

View File

@ -59,7 +59,7 @@ EXAMPLE:
import-genome /path/to/dna.txt 3b38234f2b0f7ee6
DATABASE:
SNPedia reference: ~/dev/inou/snpedia-genotypes/genotypes.db (read-only)
SNPedia reference: /tank/inou/data/reference.db (genotypes table, read-only)
Entries: via lib.EntryAddBatchValues() to /tank/inou/data/inou.db
VERSION: ` + version)
@ -265,7 +265,7 @@ func main() {
// ===== PHASE 4: Load SNPedia and match =====
phase4Start := time.Now()
snpediaDB, err := sql.Open("sqlite3", "/tank/inou/data/genotypes.db?mode=ro")
snpediaDB, err := sql.Open("sqlite3", "/tank/inou/data/reference.db?mode=ro")
if err != nil {
fmt.Println("SNPedia DB open failed:", err)
os.Exit(1)

View File

@ -944,8 +944,8 @@ func OAuthCleanup() error {
// Reference Database Queries (lab_test, lab_reference)
// ============================================================================
// refQuery queries the reference database (read-only reference data)
func refQuery(query string, args []any, slicePtr any) error {
// RefQuery queries the reference database (read-only reference data)
func RefQuery(query string, args []any, slicePtr any) error {
start := time.Now()
defer func() { logSlowQuery(query, time.Since(start), args...) }()
@ -1057,75 +1057,3 @@ func refQuery(query string, args []any, slicePtr any) error {
return rows.Err()
}
// refSave saves to reference database (for import tools)
func refSave(table string, v any) error {
val := reflect.ValueOf(v)
if val.Kind() == reflect.Ptr {
val = val.Elem()
}
// Handle slice
if val.Kind() == reflect.Slice {
for i := 0; i < val.Len(); i++ {
item := val.Index(i)
if item.Kind() == reflect.Ptr {
item = item.Elem()
}
if err := refSave(table, item.Addr().Interface()); err != nil {
return err
}
}
return nil
}
// Single struct
info, err := getTableInfo(table, v)
if err != nil {
return err
}
var cols []string
var vals []any
var placeholders []string
for _, fi := range info.Fields {
field := val.Field(fi.Index)
cols = append(cols, fi.Column)
placeholders = append(placeholders, "?")
switch fi.Type.Kind() {
case reflect.String:
vals = append(vals, field.String())
case reflect.Int, reflect.Int64:
vals = append(vals, field.Int())
case reflect.Bool:
v := 0
if field.Bool() {
v = 1
}
vals = append(vals, v)
default:
vals = append(vals, nil)
}
}
query := fmt.Sprintf("INSERT OR REPLACE INTO %s (%s) VALUES (%s)",
table, strings.Join(cols, ", "), strings.Join(placeholders, ", "))
start := time.Now()
defer func() { logSlowQuery(query, time.Since(start), vals...) }()
_, err = refDB.Exec(query, vals...)
return err
}
// refDelete deletes from reference database
func refDelete(table, pkCol, pkVal string) error {
query := fmt.Sprintf("DELETE FROM %s WHERE %s = ?", table, pkCol)
start := time.Now()
defer func() { logSlowQuery(query, time.Since(start), pkVal) }()
_, err := refDB.Exec(query, pkVal)
return err
}

View File

@ -1,11 +1,8 @@
package lib
import (
"encoding/json"
"fmt"
"log"
"math"
"strings"
)
// LabScale is the multiplier for storing float values as int64 (6 decimal places)
@ -60,46 +57,16 @@ func MakeRefID(loinc, source, sex string, ageDays int64) string {
// LabTestGet retrieves a LabTest by LOINC code. Returns nil if not found.
func LabTestGet(loincID string) (*LabTest, error) {
var tests []LabTest
if err := refQuery("SELECT * FROM lab_test WHERE loinc_id = ?", []any{loincID}, &tests); err != nil || len(tests) == 0 {
if err := RefQuery("SELECT * FROM lab_test WHERE loinc_id = ?", []any{loincID}, &tests); err != nil || len(tests) == 0 {
return nil, err
}
return &tests[0], nil
}
// LabTestSave upserts a LabTest record.
func LabTestSave(t *LabTest) error {
return refSave("lab_test", t)
}
// LabTestSaveBatch upserts multiple LabTest records.
func LabTestSaveBatch(tests []LabTest) error {
if len(tests) == 0 {
return nil
}
return refSave("lab_test", tests)
}
// LabRefSave upserts a LabReference record (auto-generates ref_id).
func LabRefSave(r *LabReference) error {
r.RefID = MakeRefID(r.LoincID, r.Source, r.Sex, r.AgeDays)
return refSave("lab_reference", r)
}
// LabRefSaveBatch upserts multiple LabReference records (auto-generates ref_ids).
func LabRefSaveBatch(refs []LabReference) error {
if len(refs) == 0 {
return nil
}
for i := range refs {
refs[i].RefID = MakeRefID(refs[i].LoincID, refs[i].Source, refs[i].Sex, refs[i].AgeDays)
}
return refSave("lab_reference", refs)
}
// LabRefLookupAll returns all reference ranges for a LOINC code.
func LabRefLookupAll(loincID string) ([]LabReference, error) {
var refs []LabReference
return refs, refQuery("SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
return refs, RefQuery("SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
[]any{loincID}, &refs)
}
@ -107,7 +74,7 @@ func LabRefLookupAll(loincID string) ([]LabReference, error) {
// Returns nil if no matching reference found.
func LabRefLookup(loincID, sex string, ageDays int64) (*LabReference, error) {
var refs []LabReference
if err := refQuery(
if err := RefQuery(
"SELECT ref_id, loinc_id, source, sex, age_days, age_end, ref_low, ref_high, unit FROM lab_reference WHERE loinc_id = ?",
[]any{loincID}, &refs,
); err != nil {
@ -152,141 +119,3 @@ func AgeDays(dobUnix, atUnix int64) int64 {
return (atUnix - dobUnix) / 86400
}
// PopulateReferences fetches reference ranges from LLM for lab_test entries
// that don't yet have any lab_reference entries. Saves to lab_reference table.
func PopulateReferences() error {
if GeminiKey == "" {
return nil
}
// Load all lab_test entries
var tests []LabTest
if err := refQuery("SELECT loinc_id, name, si_unit, direction, si_factor FROM lab_test", nil, &tests); err != nil {
return fmt.Errorf("load lab_test: %w", err)
}
// Find which ones already have references
var existingRefs []LabReference
if err := refQuery("SELECT ref_id, loinc_id FROM lab_reference", nil, &existingRefs); err != nil {
return fmt.Errorf("load lab_reference: %w", err)
}
hasRef := make(map[string]bool)
for _, r := range existingRefs {
hasRef[r.LoincID] = true
}
// Filter to tests needing references (numeric tests with SI units)
var need []LabTest
for _, t := range tests {
if hasRef[t.LoincID] || t.SIUnit == "" {
continue
}
need = append(need, t)
}
if len(need) == 0 {
log.Printf("populate_refs: all %d tests already have references", len(tests))
return nil
}
log.Printf("populate_refs: %d tests need references (of %d total)", len(need), len(tests))
// Batch LLM calls
batchSize := 50
var allRefs []LabReference
for i := 0; i < len(need); i += batchSize {
end := i + batchSize
if end > len(need) {
end = len(need)
}
batch := need[i:end]
log.Printf("populate_refs: LLM batch %d-%d of %d", i+1, end, len(need))
refs, err := callReferenceLLM(batch)
if err != nil {
log.Printf("populate_refs: batch %d-%d error: %v", i+1, end, err)
continue
}
allRefs = append(allRefs, refs...)
}
if len(allRefs) == 0 {
log.Printf("populate_refs: no references returned")
return nil
}
log.Printf("populate_refs: saving %d reference ranges", len(allRefs))
return LabRefSaveBatch(allRefs)
}
func callReferenceLLM(tests []LabTest) ([]LabReference, error) {
var lines []string
for _, t := range tests {
lines = append(lines, fmt.Sprintf("%s %s (%s)", t.LoincID, t.Name, t.SIUnit))
}
prompt := fmt.Sprintf(`For each LOINC code below, provide standard reference ranges IN THE SI UNIT SHOWN.
Give up to 3 ranges per test:
- Pediatric (ages 2-12): source "CALIPER"
- Adolescent (ages 12-18): source "CALIPER"
- Adult (ages 18+): source "IFCC"
Skip tests that are non-numeric, qualitative, or where standard ranges don't exist.
Return JSON array of objects:
[{"loinc":"718-7","source":"CALIPER","age_min_days":730,"age_max_days":4380,"low":115.0,"high":135.0,"unit":"g/L"}, ...]
age_min_days and age_max_days are ages in days (730=2yr, 4380=12yr, 6570=18yr, 36500=100yr).
Values must be in the SI unit provided. Omit sex-specific ranges for simplicity use unisex.
Only include tests where you are confident in the reference values.
Tests:
%s`, strings.Join(lines, "\n"))
maxTokens := 8192
temp := 0.0
config := &GeminiConfig{
Temperature: &temp,
MaxOutputTokens: &maxTokens,
}
resp, err := CallGeminiMultimodal([]GeminiPart{{Text: prompt}}, config)
if err != nil {
return nil, err
}
type refResult struct {
Loinc string `json:"loinc"`
Source string `json:"source"`
AgeMinDays int64 `json:"age_min_days"`
AgeMaxDays int64 `json:"age_max_days"`
Low float64 `json:"low"`
High float64 `json:"high"`
Unit string `json:"unit"`
}
var results []refResult
if err := json.Unmarshal([]byte(resp), &results); err != nil {
return nil, fmt.Errorf("parse response: %w (first 300 chars: %.300s)", err, resp)
}
var refs []LabReference
for _, r := range results {
if r.Loinc == "" || r.Low >= r.High {
continue
}
refs = append(refs, LabReference{
LoincID: r.Loinc,
Source: r.Source,
Sex: "",
AgeDays: r.AgeMinDays,
AgeEnd: r.AgeMaxDays,
RefLow: ToLabScale(r.Low),
RefHigh: ToLabScale(r.High),
Unit: r.Unit,
})
}
return refs, nil
}

View File

@ -112,13 +112,10 @@ func Normalize(dossierID string, category int) error {
SIFactor: ToLabScale(factor),
})
}
if len(labTests) > 0 {
if err := LabTestSaveBatch(labTests); err != nil {
log.Printf("normalize: warning: save lab_test: %v", err)
} else {
log.Printf("normalize: saved %d lab_test entries", len(labTests))
}
}
// NOTE: Lab test saving removed - import tools handle this directly
// if len(labTests) > 0 {
// log.Printf("normalize: found %d lab tests (saving disabled)", len(labTests))
// }
// 6. Load entries, apply mapping, save only changed ones
entries, err := EntryQueryOld(dossierID, category, "")

View File

@ -1540,12 +1540,6 @@ func EntryListByDossier(ctx *AccessContext, dossierID string) ([]*Entry, error)
return entries, dbQuery("SELECT * FROM entries WHERE dossier_id = ? ORDER BY category, timestamp", []any{dossierID}, &entries)
}
// LabTestList returns all lab tests (reference data, no RBAC needed).
func LabTestList() ([]LabTest, error) {
var tests []LabTest
return tests, dbQuery("SELECT loinc_id, name FROM lab_test", nil, &tests)
}
// LabEntryListForIndex returns lab entries with data for building search indexes.
func LabEntryListForIndex() ([]*Entry, error) {
var entries []*Entry
@ -1553,17 +1547,6 @@ func LabEntryListForIndex() ([]*Entry, error) {
[]any{CategoryLab}, &entries)
}
// LabRefListBySource returns lab references matching a source pattern in ref_id.
func LabRefListBySource(source string) ([]LabReference, error) {
var refs []LabReference
return refs, dbQuery("SELECT * FROM lab_reference WHERE ref_id LIKE ?", []any{"%|" + source + "|%"}, &refs)
}
// LabRefDeleteByID deletes a lab reference by ref_id.
func LabRefDeleteByID(refID string) error {
return dbDelete("lab_reference", "ref_id", refID)
}
// --- HELPERS ---
func deleteByIDs(table, col string, ids []string) error {

View File

@ -85,12 +85,7 @@ func handleAPISend(w http.ResponseWriter, r *http.Request) {
expiresAt := time.Now().UTC().Add(10 * time.Minute).Unix()
if existing == nil {
// Rate limit for new signups
clientIP := getClientIP(r)
if !checkNewSignupLimit(clientIP) {
jsonError(w, "Too many attempts, try later", 429)
return
}
// New user
d := &lib.Dossier{
Email: email,
AuthCode: code,
@ -98,7 +93,6 @@ func handleAPISend(w http.ResponseWriter, r *http.Request) {
Language: "en",
}
lib.DossierWrite(nil, d) // nil ctx - auth operation
recordNewSignup(clientIP)
} else {
lib.DossierSetAuthCode(existing.DossierID, code, expiresAt)
}

View File

@ -492,8 +492,8 @@ func entriesToSectionItems(entries []*lib.Entry) []SectionItem {
// buildLoincNameMap builds a JSON map of LOINC code → full test name
// for displaying full names in charts.
func buildLoincNameMap() string {
tests, err := lib.LabTestList()
if err != nil {
var tests []lib.LabTest
if err := lib.RefQuery("SELECT loinc_id, name FROM lab_test", nil, &tests); err != nil {
return "{}"
}
@ -510,8 +510,8 @@ func buildLoincNameMap() string {
// buildLabSearchIndex builds a JSON map of search terms → LOINC codes
// for client-side lab result filtering. Keys are lowercase test names and abbreviations.
func buildLabSearchIndex() string {
tests, err := lib.LabTestList()
if err != nil {
var tests []lib.LabTest
if err := lib.RefQuery("SELECT loinc_id, name FROM lab_test", nil, &tests); err != nil {
return "{}"
}

View File

@ -5,7 +5,6 @@ import (
"crypto/rand"
"crypto/sha256"
"crypto/tls"
"database/sql"
"encoding/base64"
"encoding/json"
"fmt"
@ -28,7 +27,6 @@ import (
var (
Version string = "dev" // Set via ldflags at build time
rateDB *sql.DB
templates *template.Template
translations map[string]map[string]string
smtpHost, smtpPort, smtpUser, smtpToken, smtpFrom string
@ -163,14 +161,7 @@ type Study struct {
}
func initDB() {
var err error
// Separate rate limit DB
rateDB, err = sql.Open("sqlite3", "data/ratelimit.db")
if err != nil { panic(err) }
rateDB.Exec(`CREATE TABLE IF NOT EXISTS new_signups (ip TEXT, created_at INTEGER)`)
rateDB.Exec(`CREATE INDEX IF NOT EXISTS idx_signups_ip ON new_signups(ip)`)
// Cleanup old entries on startup
rateDB.Exec(`DELETE FROM new_signups WHERE created_at < ?`, time.Now().Add(-24*time.Hour).Unix())
// Rate limiting removed - handled differently now
}
func loadTranslations() {
@ -526,20 +517,6 @@ func getClientIP(r *http.Request) string {
return strings.Trim(ip, "[]")
}
func checkNewSignupLimit(ip string) bool {
// Allow max 3 new signups per IP per 24 hours
var count int
cutoff := time.Now().Add(-24 * time.Hour).Unix()
rateDB.QueryRow(`SELECT COUNT(*) FROM new_signups WHERE ip = ? AND created_at > ?`, ip, cutoff).Scan(&count)
return count < 3
}
func recordNewSignup(ip string) {
rateDB.Exec(`INSERT INTO new_signups (ip, created_at) VALUES (?, ?)`, ip, time.Now().Unix())
// Periodic cleanup
rateDB.Exec(`DELETE FROM new_signups WHERE created_at < ?`, time.Now().Add(-24*time.Hour).Unix())
}
func handleSendCode(w http.ResponseWriter, r *http.Request) {
if r.Method != "POST" { fmt.Println("send-code: not POST"); http.Redirect(w, r, "/", http.StatusSeeOther); return }
// Bot detection via JS nonce
@ -571,12 +548,8 @@ func handleSendCode(w http.ResponseWriter, r *http.Request) {
var signupIP string
if existing == nil {
// New user - rate limit check
// New user
clientIP := getClientIP(r)
if !checkNewSignupLimit(clientIP) {
render(w, r, PageData{Page: "landing", Lang: lang, Email: email, Error: T(lang, "rate_limit_exceeded")})
return
}
d := &lib.Dossier{
Email: email,
AuthCode: code,
@ -584,7 +557,6 @@ func handleSendCode(w http.ResponseWriter, r *http.Request) {
Language: lang,
}
lib.DossierWrite(nil, d) // nil ctx - auth operation
recordNewSignup(clientIP)
signupIP = clientIP
} else {
fmt.Println("send-code: setting auth code for existing user")

View File

@ -56,6 +56,9 @@ func main() {
if err := lib.Init(); err != nil {
log.Fatalf("lib.Init: %v", err)
}
if err := lib.RefDBInit("/tank/inou/data/reference.db"); err != nil {
log.Fatalf("lib.RefDBInit: %v", err)
}
data, err := os.ReadFile(file)
if err != nil {
@ -70,13 +73,14 @@ func main() {
// Delete existing CALIPER references
if !dryRun {
existing, err := lib.LabRefListBySource("CALIPER")
var existing []lib.LabReference
err := lib.RefQuery("SELECT * FROM lab_reference WHERE ref_id LIKE ?", []any{"%|CALIPER|%"}, &existing)
if err != nil {
log.Printf("Warning: could not count existing: %v", err)
} else {
log.Printf("Deleting %d existing CALIPER references", len(existing))
for _, r := range existing {
lib.LabRefDeleteByID(r.RefID)
lib.RefDelete("lab_reference", "ref_id", r.RefID)
}
}
}