inou/import-dicom/main.go

1217 lines
37 KiB
Go

package main
import (
"bufio"
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"image"
"image/color"
"image/png"
"math"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"inou/lib"
)
// Cache for studies and series (avoids redundant DB queries)
var (
studyCache = make(map[string]string) // studyUID -> entryID
seriesCache = make(map[string]string) // seriesUID|desc -> entryID
)
// ============================================================================
// DICOM TAG READING
// ============================================================================
// findTag locates a DICOM tag in the data, returning its byte position or -1
func findTag(data []byte, group, elem uint16) int {
target := make([]byte, 4)
binary.LittleEndian.PutUint16(target[0:2], group)
binary.LittleEndian.PutUint16(target[2:4], elem)
for i := 0; i < len(data)-4; i++ {
if data[i] == target[0] && data[i+1] == target[1] &&
data[i+2] == target[2] && data[i+3] == target[3] {
return i
}
}
return -1
}
// findLastTag finds the last occurrence of a tag (useful for pixel data)
func findLastTag(data []byte, group, elem uint16) int {
target := make([]byte, 4)
binary.LittleEndian.PutUint16(target[0:2], group)
binary.LittleEndian.PutUint16(target[2:4], elem)
lastPos := -1
for i := 0; i < len(data)-4; i++ {
if data[i] == target[0] && data[i+1] == target[1] &&
data[i+2] == target[2] && data[i+3] == target[3] {
lastPos = i
}
}
return lastPos
}
// readStringTag extracts a string value from a DICOM tag
func readStringTag(data []byte, group, elem uint16) string {
pos := findTag(data, group, elem)
if pos < 0 {
return ""
}
vr := string(data[pos+4 : pos+6])
var length uint16
var valPos int
if vr == "OB" || vr == "OW" || vr == "SQ" || vr == "UN" {
length = uint16(binary.LittleEndian.Uint32(data[pos+8 : pos+12]))
valPos = pos + 12
} else {
length = binary.LittleEndian.Uint16(data[pos+6 : pos+8])
valPos = pos + 8
}
if valPos+int(length) > len(data) {
return ""
}
s := string(data[valPos : valPos+int(length)])
// Trim trailing spaces and nulls
for len(s) > 0 && (s[len(s)-1] == ' ' || s[len(s)-1] == 0) {
s = s[:len(s)-1]
}
return s
}
// readIntTag reads a string tag and converts to int
func readIntTag(data []byte, group, elem uint16) int {
s := strings.TrimSpace(readStringTag(data, group, elem))
n, _ := strconv.Atoi(s)
return n
}
// readIntTagSmart handles both string IS and binary US/SS value representations
func readIntTagSmart(data []byte, group, elem uint16) int {
pos := findTag(data, group, elem)
if pos < 0 {
return 0
}
vr := string(data[pos+4 : pos+6])
if vr == "US" || vr == "SS" {
valPos := pos + 8
if valPos+2 <= len(data) {
return int(binary.LittleEndian.Uint16(data[valPos : valPos+2]))
}
}
s := strings.TrimSpace(readStringTag(data, group, elem))
n, _ := strconv.Atoi(s)
return n
}
// readFloatTag reads a string tag and converts to float64
func readFloatTag(data []byte, group, elem uint16) float64 {
s := strings.TrimSpace(readStringTag(data, group, elem))
f, _ := strconv.ParseFloat(s, 64)
return f
}
// readFloatTag2 reads a backslash-separated pair of floats (e.g., window center/width)
func readFloatTag2(data []byte, group, elem uint16) (float64, float64) {
s := strings.TrimSpace(readStringTag(data, group, elem))
parts := strings.Split(s, "\\")
var v1, v2 float64
if len(parts) >= 1 {
v1, _ = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
}
if len(parts) >= 2 {
v2, _ = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64)
}
return v1, v2
}
// ============================================================================
// PIXEL DATA HANDLING
// ============================================================================
// findPixelData locates the pixel data tag, validating expected size
func findPixelData(data []byte, expectedBytes int) int {
target := []byte{0xE0, 0x7F, 0x10, 0x00}
pos := 0
for {
idx := -1
for i := pos; i < len(data)-12; i++ {
if data[i] == target[0] && data[i+1] == target[1] &&
data[i+2] == target[2] && data[i+3] == target[3] {
idx = i
break
}
}
if idx < 0 {
return -1
}
vr := string(data[idx+4 : idx+6])
var length int
if vr == "OW" || vr == "OB" {
length = int(binary.LittleEndian.Uint32(data[idx+8 : idx+12]))
} else {
length = int(binary.LittleEndian.Uint16(data[idx+6 : idx+8]))
}
// Accept if within 10% of expected size
if length > 0 && (length == expectedBytes || (length > expectedBytes*9/10 && length < expectedBytes*11/10)) {
return idx
}
pos = idx + 1
}
}
// getTransferSyntax reads the Transfer Syntax UID from file meta header
func getTransferSyntax(data []byte) string {
return readStringTag(data, 0x0002, 0x0010)
}
// isCompressedTransferSyntax checks if the transfer syntax is JPEG-compressed
func isCompressedTransferSyntax(ts string) bool {
return strings.HasPrefix(ts, "1.2.840.10008.1.2.4")
}
// decompressDICOM uses gdcmconv to decompress any compressed DICOM to uncompressed DICOM
func decompressDICOM(dicomPath string) ([]byte, error) {
tmpFile := fmt.Sprintf("/tmp/dcm_%d_%d.dcm", os.Getpid(), time.Now().UnixNano())
defer os.Remove(tmpFile)
cmd := exec.Command("gdcmconv", "-w", "-i", dicomPath, "-o", tmpFile)
output, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("gdcmconv failed: %v - %s", err, string(output))
}
data, err := os.ReadFile(tmpFile)
if err != nil {
return nil, fmt.Errorf("failed to read decompressed file: %v", err)
}
return data, nil
}
// FramePosition holds per-frame position data from enhanced multi-frame DICOM
type FramePosition struct {
X, Y, Z float64
}
// parsePerFramePositions extracts position data for each frame in enhanced multi-frame DICOM
func parsePerFramePositions(data []byte, numFrames int) []FramePosition {
positions := make([]FramePosition, numFrames)
perFrameTag := []byte{0x00, 0x52, 0x30, 0x92}
perFramePos := -1
for i := 0; i < len(data)-4; i++ {
if data[i] == perFrameTag[0] && data[i+1] == perFrameTag[1] &&
data[i+2] == perFrameTag[2] && data[i+3] == perFrameTag[3] {
perFramePos = i
break
}
}
if perFramePos < 0 {
return positions
}
planeTag := []byte{0x20, 0x00, 0x13, 0x91}
imgPosTag := []byte{0x20, 0x00, 0x32, 0x00}
frameIdx := 0
searchStart := perFramePos
for frameIdx < numFrames {
planePos := -1
for i := searchStart; i < len(data)-4; i++ {
if data[i] == planeTag[0] && data[i+1] == planeTag[1] &&
data[i+2] == planeTag[2] && data[i+3] == planeTag[3] {
planePos = i
break
}
}
if planePos < 0 {
break
}
for i := planePos; i < planePos+100 && i < len(data)-8; i++ {
if data[i] == imgPosTag[0] && data[i+1] == imgPosTag[1] &&
data[i+2] == imgPosTag[2] && data[i+3] == imgPosTag[3] {
if data[i+4] == 'D' && data[i+5] == 'S' {
length := int(binary.LittleEndian.Uint16(data[i+6 : i+8]))
if i+8+length <= len(data) {
value := strings.TrimSpace(string(data[i+8 : i+8+length]))
parts := strings.Split(value, "\\")
if len(parts) >= 3 {
positions[frameIdx].X, _ = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
positions[frameIdx].Y, _ = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64)
positions[frameIdx].Z, _ = strconv.ParseFloat(strings.TrimSpace(parts[2]), 64)
}
}
}
break
}
}
searchStart = planePos + 1
frameIdx++
}
return positions
}
// ============================================================================
// IMAGE PROCESSING
// ============================================================================
// Gray16Image implements image.Image for 16-bit grayscale
type Gray16Image struct {
Pixels []uint16
Width, Height int
}
func (g *Gray16Image) ColorModel() color.Model { return color.Gray16Model }
func (g *Gray16Image) Bounds() image.Rectangle { return image.Rect(0, 0, g.Width, g.Height) }
func (g *Gray16Image) At(x, y int) color.Color {
if x < 0 || x >= g.Width || y < 0 || y >= g.Height {
return color.Gray16{0}
}
return color.Gray16{g.Pixels[y*g.Width+x]}
}
// encode16BitPNG encodes 16-bit grayscale pixels to PNG bytes
func encode16BitPNG(pixels []uint16, width, height int) ([]byte, error) {
img := &Gray16Image{Pixels: pixels, Width: width, Height: height}
var buf bytes.Buffer
if err := png.Encode(&buf, img); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// encodeRGBPNG encodes RGB pixels to PNG bytes
func encodeRGBPNG(pixels []byte, width, height int) ([]byte, error) {
img := image.NewRGBA(image.Rect(0, 0, width, height))
for y := 0; y < height; y++ {
for x := 0; x < width; x++ {
idx := (y*width + x) * 3
img.Set(x, y, color.RGBA{pixels[idx], pixels[idx+1], pixels[idx+2], 255})
}
}
var buf bytes.Buffer
if err := png.Encode(&buf, img); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// boundingBox finds the content bounding box in a 16-bit grayscale image.
// Returns x1, y1, x2, y2 (inclusive coordinates) of the smallest rectangle
// containing all pixels above the threshold. Uses a threshold based on
// the image's min/max values to detect "background".
func boundingBox(pixels []uint16, width, height int, minVal, maxVal uint16) (x1, y1, x2, y2 int, valid bool) {
// Threshold: 5% of range, but minimum 50 to avoid noise in near-empty slices
threshold := minVal + uint16(float64(maxVal-minVal)*0.05)
if threshold < 50 {
threshold = 50
}
x1, y1 = width, height
x2, y2 = -1, -1
for y := 0; y < height; y++ {
for x := 0; x < width; x++ {
if pixels[y*width+x] > threshold {
if x < x1 { x1 = x }
if y < y1 { y1 = y }
if x > x2 { x2 = x }
if y > y2 { y2 = y }
}
}
}
if x2 < 0 {
return 0, 0, 0, 0, false
}
return x1, y1, x2, y2, true
}
// unionBBox computes the union of two bounding boxes
func unionBBox(a, b [4]int) [4]int {
return [4]int{
min(a[0], b[0]), // x1
min(a[1], b[1]), // y1
max(a[2], b[2]), // x2
max(a[3], b[3]), // y2
}
}
// ============================================================================
// ORIENTATION AND LOCALIZER DETECTION
// ============================================================================
// getOrientationType determines if slice is AX, SAG, or COR based on image orientation
func getOrientationType(data []byte) string {
orientStr := readStringTag(data, 0x0020, 0x0037)
if orientStr == "" {
return ""
}
parts := strings.Split(orientStr, "\\")
if len(parts) < 6 {
return ""
}
vals := make([]float64, 6)
for i := 0; i < 6; i++ {
vals[i], _ = strconv.ParseFloat(strings.TrimSpace(parts[i]), 64)
}
rowX, rowY, rowZ := vals[0], vals[1], vals[2]
colX, colY, colZ := vals[3], vals[4], vals[5]
// Cross product to get normal vector
normX := rowY*colZ - rowZ*colY
normY := rowZ*colX - rowX*colZ
normZ := rowX*colY - rowY*colX
absX := math.Abs(normX)
absY := math.Abs(normY)
absZ := math.Abs(normZ)
if absZ >= absX && absZ >= absY {
return "AX"
} else if absX >= absY && absX >= absZ {
return "SAG"
}
return "COR"
}
// isLocalizer detects scout/localizer images that should be skipped
func isLocalizer(data []byte, seriesDesc string) bool {
imageType := strings.ToUpper(readStringTag(data, 0x0008, 0x0008))
if strings.Contains(imageType, "LOCALIZER") || strings.Contains(imageType, "SCOUT") {
return true
}
orientStr := readStringTag(data, 0x0020, 0x0037)
if orientStr == "" {
return false
}
parts := strings.Split(orientStr, "\\")
if len(parts) < 6 {
return false
}
rowX, _ := strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
rowXAbs := math.Abs(rowX)
seriesUpper := strings.ToUpper(seriesDesc)
// Detect orientation mismatch (e.g., AX series with non-AX orientation)
if strings.Contains(seriesUpper, "AX") && rowXAbs < 0.8 {
return true
}
if strings.Contains(seriesUpper, "SAG") && rowXAbs > 0.5 {
return true
}
return false
}
// ============================================================================
// METADATA HELPERS
// ============================================================================
// formatPatientName converts DICOM format (Last^First^Middle^Prefix^Suffix) to "First Middle Last"
func formatPatientName(name string) string {
titleCase := cases.Title(language.English)
parts := strings.Split(name, "^")
for i, p := range parts {
parts[i] = titleCase.String(strings.ToLower(strings.TrimSpace(p)))
}
if len(parts) >= 3 && parts[2] != "" {
return parts[1] + " " + parts[2] + " " + parts[0]
} else if len(parts) >= 2 {
return parts[1] + " " + parts[0]
}
return name
}
// calculateStepSize computes how many slices to skip for ~5mm thumbnail spacing
func calculateStepSize(requestedSpacingMM, sliceThicknessMM float64) int {
if sliceThicknessMM <= 0 {
return 1
}
step := int(math.Round(requestedSpacingMM / sliceThicknessMM))
if step < 1 {
step = 1
}
return step
}
// ============================================================================
// STUDY/SERIES/SLICE CREATION (using V2 API)
// ============================================================================
// getOrCreateStudy finds existing study or creates new one (child of imaging category root)
func getOrCreateStudy(data []byte, dossierID string) (string, error) {
studyUID := readStringTag(data, 0x0020, 0x000D)
if id, ok := studyCache[studyUID]; ok {
return id, nil
}
// Query for existing study by category+type (parent-agnostic)
studies, err := lib.EntryQuery(dossierID, lib.CategoryImaging, "study")
if err == nil {
for _, s := range studies {
if s.Value == studyUID {
studyCache[studyUID] = s.EntryID
return s.EntryID, nil
}
}
}
// Get imaging category root (create if needed)
catRootID, err := lib.EnsureCategoryRoot(dossierID, lib.CategoryImaging)
if err != nil {
return "", fmt.Errorf("ensure imaging category root: %w", err)
}
// Extract study metadata
patientName := formatPatientName(readStringTag(data, 0x0010, 0x0010))
studyDesc := readStringTag(data, 0x0008, 0x1030)
studyDate := readStringTag(data, 0x0008, 0x0020)
bodyPart := readStringTag(data, 0x0018, 0x0015)
studyData := map[string]interface{}{
"patient_name": patientName,
"patient_dob": readStringTag(data, 0x0010, 0x0030),
"patient_sex": readStringTag(data, 0x0010, 0x0040),
"patient_age": readStringTag(data, 0x0010, 0x1010),
"study_date": studyDate,
"study_time": readStringTag(data, 0x0008, 0x0030),
"study_desc": studyDesc,
"institution": readStringTag(data, 0x0008, 0x0080),
"accession_number": readStringTag(data, 0x0008, 0x0050),
"referring_physician": readStringTag(data, 0x0008, 0x0090),
"body_part": bodyPart,
}
dataJSON, _ := json.Marshal(studyData)
// Build summary for progressive disclosure
summary := studyDesc
if bodyPart != "" && !strings.Contains(strings.ToUpper(studyDesc), strings.ToUpper(bodyPart)) {
summary = bodyPart + " " + studyDesc
}
if studyDate != "" {
summary = summary + " (" + studyDate + ")"
}
if summary == "" {
summary = "Imaging Study"
}
e := &lib.Entry{
EntryID: lib.NewID(),
DossierID: dossierID,
ParentID: catRootID, // child of imaging category root
Category: lib.CategoryImaging,
Type: "study",
Value: studyUID,
Summary: summary,
Timestamp: time.Now().Unix(),
Data: string(dataJSON),
}
if err := lib.EntryWrite(nil, e); err != nil {
return "", err
}
studyCache[studyUID] = e.EntryID
return e.EntryID, nil
}
// getOrCreateSeries finds existing series or creates new one
func getOrCreateSeries(data []byte, dossierID, studyID string) (string, error) {
seriesUID := readStringTag(data, 0x0020, 0x000E)
seriesDesc := readStringTag(data, 0x0008, 0x103E)
cacheKey := seriesUID + "|" + seriesDesc
if id, ok := seriesCache[cacheKey]; ok {
return id, nil
}
// Query for existing series using V2 API
children, err := lib.EntryList(lib.SystemAccessorID, studyID, lib.CategoryImaging, &lib.EntryFilter{ // nil ctx - import tool
DossierID: dossierID,
Type: "series",
})
if err == nil {
for _, c := range children {
if c.Value == seriesUID && c.Tags == seriesDesc {
seriesCache[cacheKey] = c.EntryID
return c.EntryID, nil
}
}
}
modality := readStringTag(data, 0x0008, 0x0060)
if modality == "" {
modality = "UN"
}
bodyPart := readStringTag(data, 0x0018, 0x0015)
seriesData := map[string]interface{}{
"series_instance_uid": seriesUID,
"body_part": bodyPart,
"protocol_name": readStringTag(data, 0x0018, 0x1030),
"manufacturer": readStringTag(data, 0x0008, 0x0070),
"model": readStringTag(data, 0x0008, 0x1090),
"magnetic_field_strength": readStringTag(data, 0x0018, 0x0087),
"modality": modality,
}
dataJSON, _ := json.Marshal(seriesData)
// Build summary: "MR T2 SAG BRAIN" or similar
summary := modality
if seriesDesc != "" {
summary = modality + " " + seriesDesc
}
e := &lib.Entry{
EntryID: lib.NewID(),
DossierID: dossierID,
ParentID: studyID,
Category: lib.CategoryImaging,
Type: "series",
Value: seriesUID,
Summary: summary,
Ordinal: readIntTag(data, 0x0020, 0x0011),
Timestamp: time.Now().Unix(),
Tags: seriesDesc,
Data: string(dataJSON),
SearchKey: modality,
}
if err := lib.EntryWrite(nil, e); err != nil { // nil ctx - import tool
return "", err
}
seriesCache[cacheKey] = e.EntryID
return e.EntryID, nil
}
// insertSlice creates a slice entry with all metadata
func insertSlice(data []byte, sliceID, dossierID, seriesID string, pixelMin, pixelMax int, orientationType string, instanceNumber int, framePos *FramePosition) error {
objPath := filepath.Join(dossierID, sliceID[0:1], sliceID)
sopUID := readStringTag(data, 0x0008, 0x0018)
if framePos != nil {
sopUID = fmt.Sprintf("%s.%d", sopUID, instanceNumber)
}
psRow, psCol := readFloatTag2(data, 0x0028, 0x0030)
var posX, posY, posZ float64
if framePos != nil && (framePos.X != 0 || framePos.Y != 0 || framePos.Z != 0) {
posX, posY, posZ = framePos.X, framePos.Y, framePos.Z
} else {
posStr := readStringTag(data, 0x0020, 0x0032)
if posStr != "" {
parts := strings.Split(posStr, "\\")
if len(parts) >= 3 {
posX, _ = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
posY, _ = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64)
posZ, _ = strconv.ParseFloat(strings.TrimSpace(parts[2]), 64)
}
}
}
var sliceLocation float64
if framePos != nil {
switch orientationType {
case "SAG":
sliceLocation = posX
case "COR":
sliceLocation = posY
default:
sliceLocation = posZ
}
} else {
sliceLocation = readFloatTag(data, 0x0020, 0x1041)
}
wc1, wc2 := readFloatTag2(data, 0x0028, 0x1050)
ww1, ww2 := readFloatTag2(data, 0x0028, 0x1051)
rows := readIntTagSmart(data, 0x0028, 0x0010)
cols := readIntTagSmart(data, 0x0028, 0x0011)
sliceData := map[string]interface{}{
"sop_instance_uid": sopUID,
"slice_location": sliceLocation,
"slice_thickness": readFloatTag(data, 0x0018, 0x0050),
"rows": rows,
"cols": cols,
"pixel_spacing_row": psRow,
"pixel_spacing_col": psCol,
"bits_allocated": readIntTagSmart(data, 0x0028, 0x0100),
"bits_stored": readIntTagSmart(data, 0x0028, 0x0101),
"high_bit": readIntTagSmart(data, 0x0028, 0x0102),
"pixel_representation": readIntTagSmart(data, 0x0028, 0x0103),
"window_center": wc1,
"window_width": ww1,
"window_center_2": wc2,
"window_width_2": ww2,
"rescale_intercept": readFloatTag(data, 0x0028, 0x1052),
"rescale_slope": readFloatTag(data, 0x0028, 0x1053),
"image_position_x": posX,
"image_position_y": posY,
"image_position_z": posZ,
"image_orientation": readStringTag(data, 0x0020, 0x0037),
"tr": readFloatTag(data, 0x0018, 0x0080),
"te": readFloatTag(data, 0x0018, 0x0081),
"ti": readFloatTag(data, 0x0018, 0x0082),
"flip_angle": readFloatTag(data, 0x0018, 0x1314),
"echo_number": readIntTag(data, 0x0018, 0x0086),
"spacing_between_slices": readFloatTag(data, 0x0018, 0x0088),
"acquisition_time": readStringTag(data, 0x0008, 0x0032),
"content_time": readStringTag(data, 0x0008, 0x0033),
"photometric_interpretation": readStringTag(data, 0x0028, 0x0004),
"samples_per_pixel": readIntTagSmart(data, 0x0028, 0x0002),
"pixel_min": pixelMin,
"pixel_max": pixelMax,
"path": objPath,
}
dataJSON, _ := json.Marshal(sliceData)
summary := fmt.Sprintf("%s #%d z=%.1fmm %dx%d", orientationType, instanceNumber, sliceLocation, cols, rows)
e := &lib.Entry{
EntryID: sliceID,
DossierID: dossierID,
ParentID: seriesID,
Category: lib.CategoryImaging,
Type: "slice",
Value: sopUID,
Summary: summary,
Ordinal: instanceNumber,
Timestamp: time.Now().Unix(),
Data: string(dataJSON),
}
return lib.EntryWrite(nil, e) // nil ctx - import tool
}
// ============================================================================
// FILE SCANNING
// ============================================================================
// Slice holds file path and instance number for sorting
type Slice struct {
Path string
InstanceNum int
}
// findDICOMFiles recursively finds all DICOM files in a directory
func findDICOMFiles(root string) ([]string, error) {
var files []string
var nonDicom []string
scanned := 0
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() {
return nil
}
scanned++
if scanned%100 == 0 {
fmt.Printf("\rScanning... %d files, %d DICOM, %d other", scanned, len(files), len(nonDicom))
}
name := info.Name()
if name == "DICOMDIR" || name == "LOCKFILE" || name == "VERSION" ||
strings.HasSuffix(name, ".jpg") || strings.HasSuffix(name, ".png") ||
strings.HasSuffix(name, ".exe") || strings.HasSuffix(name, ".dll") {
nonDicom = append(nonDicom, path)
return nil
}
data, err := os.ReadFile(path)
if err != nil || len(data) < 132 {
nonDicom = append(nonDicom, path)
return nil
}
if string(data[128:132]) == "DICM" {
files = append(files, path)
} else {
nonDicom = append(nonDicom, path)
}
return nil
})
fmt.Printf("\rScanned %d files, found %d DICOM, %d other\n", scanned, len(files), len(nonDicom))
if len(nonDicom) > 0 {
logPath := "/tmp/dicom_import_other.log"
if f, err := os.Create(logPath); err == nil {
for _, path := range nonDicom {
fmt.Fprintln(f, path)
}
f.Close()
fmt.Printf("Non-DICOM files written to: %s\n", logPath)
}
}
return files, err
}
// ============================================================================
// MAIN IMPORT LOGIC
// ============================================================================
// importFromPath converts all DICOM files in a directory
func importFromPath(inputPath string, dossierID string, seriesFilter string) error {
log("Scanning %s\n", inputPath)
files, err := findDICOMFiles(inputPath)
if err != nil {
return err
}
if len(files) == 0 {
return fmt.Errorf("no DICOM files found in %s", inputPath)
}
fmt.Printf("Dossier ID: %s\n", dossierID)
// Group files by series
seriesFiles := make(map[string][]Slice)
seriesData := make(map[string][]byte)
for _, path := range files {
data, err := os.ReadFile(path)
if err != nil {
continue
}
seriesUID := readStringTag(data, 0x0020, 0x000E)
seriesDesc := readStringTag(data, 0x0008, 0x103E)
instanceNum := readIntTagSmart(data, 0x0020, 0x0013)
seriesKey := seriesUID + "|" + seriesDesc
seriesFiles[seriesKey] = append(seriesFiles[seriesKey], Slice{Path: path, InstanceNum: instanceNum})
if _, exists := seriesData[seriesKey]; !exists {
seriesData[seriesKey] = data
}
}
log("Found %d series\n", len(seriesFiles))
for seriesKey, slices := range seriesFiles {
sort.Slice(slices, func(i, j int) bool {
return slices[i].InstanceNum < slices[j].InstanceNum
})
data := seriesData[seriesKey]
seriesDesc := readStringTag(data, 0x0008, 0x103E)
if seriesDesc == "" {
seriesDesc = "UNKNOWN"
}
// Apply series filter if specified
if seriesFilter != "" && !strings.Contains(strings.ToUpper(seriesDesc), seriesFilter) {
log("Skipping series (filter): %s\n", seriesDesc)
continue
}
studyID, err := getOrCreateStudy(data, dossierID)
if err != nil {
fmt.Printf("Error creating study: %v\n", err)
continue
}
seriesID, err := getOrCreateSeries(data, dossierID, studyID)
if err != nil {
fmt.Printf("Error creating series: %v\n", err)
continue
}
totalFrames := 0
for _, slice := range slices {
sliceData, err := os.ReadFile(slice.Path)
if err != nil {
continue
}
nf := readIntTagSmart(sliceData, 0x0028, 0x0008)
if nf == 0 {
nf = 1
}
totalFrames += nf
}
// Get slice thickness
sliceThickness := readFloatTag(data, 0x0018, 0x0050)
if sliceThickness == 0 {
sliceThickness = readFloatTag(data, 0x0018, 0x0088)
}
log("\nProcessing series: %s (%d frames, ST=%.1fmm)\n", seriesDesc, totalFrames, sliceThickness)
// Track bounding box union across all slices in this series
seriesBBox := [4]int{-1, -1, -1, -1} // x1, y1, x2, y2
seriesRows, seriesCols := 0, 0 // Track dimensions for validation
frameCounter := 0
for _, slice := range slices {
data, err := os.ReadFile(slice.Path)
if err != nil {
continue
}
if isLocalizer(data, seriesDesc) {
fmt.Printf(" Skipping localizer: %s\n", slice.Path)
continue
}
transferSyntax := getTransferSyntax(data)
isCompressed := isCompressedTransferSyntax(transferSyntax)
rows := readIntTagSmart(data, 0x0028, 0x0010)
cols := readIntTagSmart(data, 0x0028, 0x0011)
if rows == 0 || cols == 0 {
continue
}
numFrames := readIntTagSmart(data, 0x0028, 0x0008)
if numFrames == 0 {
numFrames = 1
}
samplesPerPixel := readIntTagSmart(data, 0x0028, 0x0002)
if samplesPerPixel == 0 {
samplesPerPixel = 1
}
isRGB := samplesPerPixel == 3
framePositions := parsePerFramePositions(data, numFrames)
orientationType := getOrientationType(data)
bytesPerPixel := 2
if isRGB {
bytesPerPixel = 3
}
frameSize := rows * cols * bytesPerPixel
expectedBytes := frameSize * numFrames
pixelPos := findPixelData(data, expectedBytes)
if pixelPos < 0 {
pixelPos = findLastTag(data, 0x7FE0, 0x0010)
}
if pixelPos < 0 {
continue
}
vr := string(data[pixelPos+4 : pixelPos+6])
var pixelDataStart int
if vr == "OW" || vr == "OB" {
pixelDataStart = pixelPos + 12
} else {
pixelDataStart = pixelPos + 8
}
// Handle compressed DICOM (JPEG Lossless, JPEG 2000, etc) - decompress first
if isCompressed {
decompData, err := decompressDICOM(slice.Path)
if err != nil {
fmt.Printf(" Error decompressing: %v\n", err)
continue
}
// Re-read dimensions and find pixel data in decompressed DICOM
decompRows := readIntTagSmart(decompData, 0x0028, 0x0010)
decompCols := readIntTagSmart(decompData, 0x0028, 0x0011)
if decompRows == 0 || decompCols == 0 {
decompRows, decompCols = rows, cols
}
decompExpectedBytes := decompRows * decompCols * bytesPerPixel * numFrames
decompPixelPos := findPixelData(decompData, decompExpectedBytes)
if decompPixelPos < 0 {
decompPixelPos = findLastTag(decompData, 0x7FE0, 0x0010)
}
if decompPixelPos < 0 {
fmt.Printf(" Error: no pixel data in decompressed DICOM\n")
continue
}
decompVR := string(decompData[decompPixelPos+4 : decompPixelPos+6])
var decompPixelStart int
if decompVR == "OW" || decompVR == "OB" {
decompPixelStart = decompPixelPos + 12
} else {
decompPixelStart = decompPixelPos + 8
}
// Process each frame from decompressed data
decompFrameSize := decompRows * decompCols * bytesPerPixel
for frameIdx := 0; frameIdx < numFrames; frameIdx++ {
frameCounter++
frameOffset := decompPixelStart + (frameIdx * decompFrameSize)
var framePos *FramePosition
if frameIdx < len(framePositions) {
framePos = &framePositions[frameIdx]
}
sliceID := lib.NewID()
if isRGB {
numPixels := decompRows * decompCols
pixels := make([]byte, numPixels*3)
for i := 0; i < numPixels*3; i++ {
offset := frameOffset + i
if offset < len(decompData) {
pixels[i] = decompData[offset]
}
}
pngData, err := encodeRGBPNG(pixels, decompCols, decompRows)
if err != nil {
fmt.Printf(" Error encoding RGB: %v\n", err)
continue
}
if err := lib.ObjectWrite(nil, dossierID, sliceID, pngData); err != nil { // nil ctx - import tool
fmt.Printf(" Error saving RGB: %v\n", err)
continue
}
if err := insertSlice(data, sliceID, dossierID, seriesID, 0, 0, orientationType, frameCounter, framePos); err != nil {
fmt.Printf(" Error inserting slice: %v\n", err)
}
} else {
numPixels := decompRows * decompCols
pixels := make([]uint16, numPixels)
var minVal, maxVal uint16 = 65535, 0
for i := 0; i < numPixels; i++ {
offset := frameOffset + i*2
if offset+2 > len(decompData) {
break
}
v := binary.LittleEndian.Uint16(decompData[offset : offset+2])
pixels[i] = v
if v < minVal {
minVal = v
}
if v > maxVal {
maxVal = v
}
}
// Compute bounding box and union with series
x1, y1, x2, y2, valid := boundingBox(pixels, decompCols, decompRows, minVal, maxVal)
if valid {
sliceBBox := [4]int{x1, y1, x2, y2}
if seriesBBox[0] < 0 {
seriesBBox = sliceBBox
seriesRows, seriesCols = decompRows, decompCols
} else {
seriesBBox = unionBBox(seriesBBox, sliceBBox)
}
}
pngData, err := encode16BitPNG(pixels, decompCols, decompRows)
if err != nil {
fmt.Printf(" Error encoding 16-bit: %v\n", err)
continue
}
if err := lib.ObjectWrite(nil, dossierID, sliceID, pngData); err != nil { // nil ctx - import tool
fmt.Printf(" Error saving 16-bit: %v\n", err)
continue
}
if err := insertSlice(data, sliceID, dossierID, seriesID, int(minVal), int(maxVal), orientationType, frameCounter, framePos); err != nil {
fmt.Printf(" Error inserting slice: %v\n", err)
}
}
fmt.Printf(" %d/%d %s (decompressed) %s\n", frameCounter, totalFrames, sliceID, slice.Path)
}
continue
}
// Handle uncompressed DICOM (possibly multi-frame)
for frameIdx := 0; frameIdx < numFrames; frameIdx++ {
frameCounter++
frameOffset := pixelDataStart + (frameIdx * frameSize)
var framePos *FramePosition
if frameIdx < len(framePositions) {
framePos = &framePositions[frameIdx]
}
sliceID := lib.NewID()
if isRGB {
numPixels := rows * cols
pixels := make([]byte, numPixels*3)
for i := 0; i < numPixels*3; i++ {
offset := frameOffset + i
if offset < len(data) {
pixels[i] = data[offset]
}
}
pngData, err := encodeRGBPNG(pixels, cols, rows)
if err != nil {
fmt.Printf(" Error encoding RGB: %v\n", err)
continue
}
if err := lib.ObjectWrite(nil, dossierID, sliceID, pngData); err != nil { // nil ctx - import tool
fmt.Printf(" Error saving RGB: %v\n", err)
continue
}
if err := insertSlice(data, sliceID, dossierID, seriesID, 0, 0, orientationType, frameCounter, framePos); err != nil {
fmt.Printf(" Error inserting slice: %v\n", err)
}
} else {
numPixels := rows * cols
pixels := make([]uint16, numPixels)
var minVal, maxVal uint16 = 65535, 0
for i := 0; i < numPixels; i++ {
offset := frameOffset + i*2
if offset+2 > len(data) {
break
}
v := binary.LittleEndian.Uint16(data[offset : offset+2])
pixels[i] = v
if v < minVal {
minVal = v
}
if v > maxVal {
maxVal = v
}
}
// Compute bounding box and union with series
x1, y1, x2, y2, valid := boundingBox(pixels, cols, rows, minVal, maxVal)
if valid {
sliceBBox := [4]int{x1, y1, x2, y2}
if seriesBBox[0] < 0 {
seriesBBox = sliceBBox
seriesRows, seriesCols = rows, cols
} else {
seriesBBox = unionBBox(seriesBBox, sliceBBox)
}
}
pngData, err := encode16BitPNG(pixels, cols, rows)
if err != nil {
fmt.Printf(" Error encoding 16-bit: %v\n", err)
continue
}
if err := lib.ObjectWrite(nil, dossierID, sliceID, pngData); err != nil { // nil ctx - import tool
fmt.Printf(" Error saving 16-bit: %v\n", err)
continue
}
if err := insertSlice(data, sliceID, dossierID, seriesID, int(minVal), int(maxVal), orientationType, frameCounter, framePos); err != nil {
fmt.Printf(" Error inserting slice: %v\n", err)
}
}
fmt.Printf(" %d/%d %s %s\n", frameCounter, totalFrames, sliceID, slice.Path)
}
}
// Update series with bounding box if we have valid data
if seriesBBox[0] >= 0 && seriesRows > 0 && seriesCols > 0 {
// Only store crop if it actually removes something (at least 5% border)
cropW := seriesBBox[2] - seriesBBox[0] + 1
cropH := seriesBBox[3] - seriesBBox[1] + 1
savedPct := 100.0 * (1.0 - float64(cropW*cropH)/float64(seriesCols*seriesRows))
if savedPct > 5.0 {
// Read existing series entry to update its Data field
series, err := lib.EntryGet(nil, seriesID) // nil ctx - import tool
if err == nil {
var seriesDataMap map[string]interface{}
json.Unmarshal([]byte(series.Data), &seriesDataMap)
if seriesDataMap == nil {
seriesDataMap = make(map[string]interface{})
}
seriesDataMap["crop_x1"] = seriesBBox[0]
seriesDataMap["crop_y1"] = seriesBBox[1]
seriesDataMap["crop_x2"] = seriesBBox[2]
seriesDataMap["crop_y2"] = seriesBBox[3]
seriesDataMap["crop_width"] = cropW
seriesDataMap["crop_height"] = cropH
updatedData, _ := json.Marshal(seriesDataMap)
series.Data = string(updatedData)
lib.EntryWrite(nil, series) // nil ctx - import tool
log(" Crop: %d,%d → %d,%d (%dx%d, saves %.0f%% pixels)\n",
seriesBBox[0], seriesBBox[1], seriesBBox[2], seriesBBox[3], cropW, cropH, savedPct)
}
} else {
log(" No significant crop (would only save %.0f%%)\n", savedPct)
}
}
}
return nil
}
const Version = "2026-01-13a" // Use gdcmconv for all compressed DICOM (JPEG 2000 support)
var logFile *os.File
func log(format string, args ...interface{}) {
msg := fmt.Sprintf(format, args...)
fmt.Print(msg)
if logFile != nil {
logFile.WriteString(msg)
}
}
func main() {
fmt.Printf("dicom_import %s\n", Version)
if len(os.Args) < 3 {
fmt.Println("Usage: dicom_import <dossier_id_hex> <path> [series_filter]")
fmt.Println(" series_filter: optional, e.g. 'AX FLAIR' to only import matching series")
os.Exit(1)
}
// Setup log file
logPath := fmt.Sprintf("/tmp/dicom_import_%s.log", time.Now().Format("20060102_150405"))
var err error
logFile, err = os.Create(logPath)
if err != nil {
fmt.Printf("Warning: could not create log file: %v\n", err)
} else {
defer logFile.Close()
fmt.Printf("Logging to: %s\n", logPath)
}
dossierID := os.Args[1]
if len(dossierID) != 16 {
fmt.Printf("Invalid dossier ID: %s (must be 16 hex characters)\n", dossierID)
os.Exit(1)
}
inputPath := os.Args[2]
if strings.HasPrefix(inputPath, "~/") {
home, _ := os.UserHomeDir()
inputPath = filepath.Join(home, inputPath[2:])
}
seriesFilter := ""
if len(os.Args) >= 4 {
seriesFilter = strings.ToUpper(os.Args[3])
fmt.Printf("Series filter: %s\n", seriesFilter)
}
if err := lib.Init(); err != nil {
fmt.Printf("Error initializing: %v\n", err)
os.Exit(1)
}
defer lib.DBClose()
fmt.Println("Initialized")
// Look up dossier
dossier, err := lib.DossierGet(nil, dossierID) // nil ctx - import tool
if err != nil {
fmt.Printf("Error: dossier %s not found\n", dossierID)
os.Exit(1)
}
dob := "unknown"
if !dossier.DOB.IsZero() {
dob = dossier.DOB.Format("2006-01-02")
}
fmt.Printf("Dossier: %s (DOB: %s)\n", dossier.Name, dob)
// Check for existing imaging data
existing, _ := lib.EntryList(lib.SystemAccessorID, "", lib.CategoryImaging, &lib.EntryFilter{DossierID: dossierID, Limit: 1}) // nil ctx - import tool
if len(existing) > 0 {
fmt.Printf("Clean existing imaging data? (yes/no): ")
reader := bufio.NewReader(os.Stdin)
answer, _ := reader.ReadString('\n')
if strings.TrimSpace(answer) == "yes" {
fmt.Print("Cleaning...")
lib.EntryRemoveByDossier(nil, dossierID) // nil ctx - import tool
lib.ObjectRemoveByDossier(nil, dossierID) // nil ctx - import tool
fmt.Println(" done")
}
}
if err := importFromPath(inputPath, dossierID, seriesFilter); err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
}