inou/tools/translate/main.go

525 lines
14 KiB
Go

// translate: MiniMax M2.5 draft → GLM 5 review → write
//
// translate --lang de --what faq
// translate --lang all --what all
// translate --lang de --what yaml --no-review
package main
import (
"bufio"
"bytes"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
const apiURL = "https://openrouter.ai/api/v1/chat/completions"
var (
translateModel = "anthropic/claude-sonnet-4-6"
reviewModel = "" // unused with --no-review
apiKey string
)
var langName = map[string]string{
"da": "Danish", "de": "German", "es": "Spanish", "fi": "Finnish",
"fr": "French", "hi": "Hindi", "id": "Indonesian", "it": "Italian",
"ja": "Japanese", "ko": "Korean", "nl": "Dutch", "no": "Norwegian",
"pl": "Polish", "pt": "Portuguese", "ru": "Russian", "sv": "Swedish",
"th": "Thai", "tr": "Turkish", "uk": "Ukrainian", "vi": "Vietnamese",
"zh": "Chinese (Simplified)",
}
var langNative = map[string]string{
"da": "Dansk", "de": "Deutsch", "es": "Español", "fi": "Suomi",
"fr": "Français", "hi": "हिन्दी", "id": "Bahasa Indonesia", "it": "Italiano",
"ja": "日本語", "ko": "한국어", "nl": "Nederlands", "no": "Norsk",
"pl": "Polski", "pt": "Português", "ru": "Русский", "sv": "Svenska",
"th": "ไทย", "tr": "Türkçe", "uk": "Українська", "vi": "Tiếng Việt",
"zh": "中文",
}
var langAddress = map[string]string{
"da": "du/din", "de": "du/dein (informal)", "es": "tú (informal)",
"fr": "tu/ton (informal)", "hi": "तुम (informal)", "id": "kamu (informal)",
"it": "tu (informal)", "ja": "polite form (です/ます)", "ko": "존댓말 (polite)",
"nl": "je/jouw (informal)", "no": "du/din", "pl": "ty (informal)",
"pt": "você", "ru": "ты (informal)", "sv": "du/din", "th": "คุณ (polite)",
"tr": "sen (informal)", "uk": "ти (informal)", "vi": "bạn (informal)",
"zh": "你 (informal)",
}
var knownTemplates = []string{
// Public pages
"landing", "faq", "pricing", "privacy", "security", "dpa", "terms", "connect", "docs", "consent",
// App forms
"upload", "audit", "invite", "add_dossier", "onboard", "edit_rbac", "install_public", "api",
// Email (resolved to lib/templates/)
"email_invite", "email_invite_copy", "email_share", "email_verify",
}
func main() {
lang := flag.String("lang", "", "language(s) or 'all'")
what := flag.String("what", "", "template name(s), 'yaml', or 'all'")
key := flag.String("key", "", "OpenRouter API key")
portal := flag.String("portal", "", "portal directory")
noReview := flag.Bool("no-review", false, "skip review pass")
flag.Parse()
apiKey = pick(*key, os.Getenv("OPENROUTER_API_KEY"), envKey("OPENROUTER_API_KEY"))
if apiKey == "" {
die("no API key")
}
portalDir := *portal
if portalDir == "" {
portalDir = findPortalDir()
}
if *lang == "" || *what == "" {
die("Usage: translate --lang <code|all> --what <name|yaml|all>")
}
langs := resolveLangs(*lang)
tmpls, doYAML := resolveWhat(*what)
review := !*noReview
total := len(langs) * (len(tmpls) + b2i(doYAML))
n, errors := 0, 0
for _, l := range langs {
for _, t := range tmpls {
n++
fmt.Printf("[%d/%d] %s/%s ", n, total, l, t)
if err := doTemplate(portalDir, l, t, review); err != nil {
fmt.Printf("ERROR: %v\n", err)
errors++
}
}
if doYAML {
n++
fmt.Printf("[%d/%d] %s/yaml ", n, total, l)
if err := doYAML_(portalDir, l, review); err != nil {
fmt.Printf("ERROR: %v\n", err)
errors++
}
}
}
fmt.Printf("\nDone: %d/%d succeeded\n", total-errors, total)
if errors > 0 {
os.Exit(1)
}
}
// --- Template ---
func doTemplate(portalDir, lang, name string, review bool) error {
// Email templates live in lib/templates/, everything else in portal/templates/
tmplDir := filepath.Join(portalDir, "templates")
if strings.HasPrefix(name, "email_") {
tmplDir = filepath.Join(portalDir, "..", "lib", "templates")
}
src, err := os.ReadFile(filepath.Join(tmplDir, name+".tmpl"))
if err != nil {
return err
}
ln, addr := langName[lang], langAddress[lang]
prompt := fmt.Sprintf(`Translate this web page template from English to %s.
RULES:
- Preserve ALL HTML tags, attributes, classes, IDs exactly
- Preserve ALL Go template syntax ({{.T.key}}, {{range}}, {{end}}, {{if}}, etc.)
- Preserve ALL JavaScript and CSS untranslated
- Never translate "inou"
- Keep English loanwords %s speakers actually use
- Use %s address forms
- Keep technical terms: DICOM, MRI, CT, FLAIR, MCP, API, JSON, CSV, HIPAA, GDPR, OCR, PDF
- Rename {{define "X"}} to {{define "X_%s"}}
- Output ONLY the translated template`, ln, ln, addr, lang)
fmt.Printf("draft... ")
translated, err := callLLM(translateModel, prompt, string(src), 8192)
if err != nil {
return err
}
translated = stripFences(translated)
translated = ensureDefine(translated, name, lang)
if review {
fmt.Printf("review... ")
translated = reviewContent(translated, ln, "template")
}
os.WriteFile(filepath.Join(tmplDir, name+"_"+lang+".tmpl"), []byte(translated), 0644)
fmt.Println("ok")
return nil
}
// --- YAML ---
func doYAML_(portalDir, lang string, review bool) error {
src, err := os.ReadFile(filepath.Join(portalDir, "lang", "en.yaml"))
if err != nil {
return err
}
ln, addr := langName[lang], langAddress[lang]
allKeys := extractKeys(string(src))
keys := make(map[string]string)
passthrough := make(map[string]string)
for k, v := range allKeys {
switch {
case strings.HasPrefix(k, "lang_"):
passthrough[k] = v
case k == "language_name":
passthrough[k] = langNative[lang]
default:
keys[k] = v
}
}
keysJSON, _ := json.Marshal(keys)
prompt := fmt.Sprintf(`Translate these UI strings from English to %s.
Return a JSON object with same keys and translated values.
RULES: keep format placeholders (%%s %%d), never translate "inou", use %s address, keep technical terms.
Return ONLY valid JSON.`, ln, addr)
fmt.Printf("draft... ")
result, err := callLLM(translateModel, prompt, string(keysJSON), 16384)
if err != nil {
return err
}
result = stripFences(result)
var translated map[string]string
if err := json.Unmarshal([]byte(result), &translated); err != nil {
return fmt.Errorf("bad JSON: %w", err)
}
if review {
fmt.Printf("GLM review... ")
reviewYAML_(keys, translated, ln)
}
for k, v := range passthrough {
translated[k] = v
}
os.WriteFile(filepath.Join(portalDir, "lang", lang+".yaml"), []byte(rebuildYAML(string(src), translated)), 0644)
fmt.Println("ok")
return nil
}
// --- LLM call ---
func callLLM(model, system, user string, maxTokHint int) (string, error) {
body := map[string]any{
"model": model,
"messages": []map[string]string{{"role": "system", "content": system}, {"role": "user", "content": user}},
"temperature": 0.1,
}
if maxTokHint > 0 {
body["max_tokens"] = maxTokHint
}
data, _ := json.Marshal(body)
req, _ := http.NewRequest("POST", apiURL, bytes.NewReader(data))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+apiKey)
// MiniMax M2.5 reasons before generating — needs generous timeout
// pricing (8KB) = 40s, landing (16KB) ~80s, faq (51KB) ~200s
timeout := time.Duration(len(user)/80+60) * time.Second
if timeout < 3*time.Minute {
timeout = 3 * time.Minute
}
t0 := time.Now()
resp, err := (&http.Client{Timeout: timeout}).Do(req)
if err != nil {
return "", fmt.Errorf("[%.0fs] %v", time.Since(t0).Seconds(), err)
}
defer resp.Body.Close()
respData, _ := io.ReadAll(resp.Body)
fmt.Printf("[%.0fs %dKB] ", time.Since(t0).Seconds(), len(respData)/1024)
if resp.StatusCode != 200 {
return "", fmt.Errorf("HTTP %d: %.300s", resp.StatusCode, respData)
}
var r struct {
Choices []struct {
Message struct {
Content string `json:"content"`
Reasoning string `json:"reasoning"`
} `json:"message"`
FinishReason string `json:"finish_reason"`
} `json:"choices"`
Error *struct {
Message string `json:"message"`
} `json:"error"`
}
if err := json.Unmarshal(respData, &r); err != nil {
return "", fmt.Errorf("bad JSON: %.300s", respData)
}
if r.Error != nil {
return "", fmt.Errorf("API: %s", r.Error.Message)
}
if len(r.Choices) == 0 {
return "", fmt.Errorf("no choices: %.300s", respData)
}
content := r.Choices[0].Message.Content
if content == "" {
return "", fmt.Errorf("empty content (reasoning only): %.200s", r.Choices[0].Message.Reasoning)
}
if r.Choices[0].FinishReason == "length" {
return "", fmt.Errorf("truncated (output hit max_tokens)")
}
return content, nil
}
// --- Review ---
type fix struct {
Old string `json:"old"`
New string `json:"new"`
Reason string `json:"reason"`
}
func reviewContent(content, ln, kind string) string {
prompt := fmt.Sprintf(`Review this %s translation. Fix anything that sounds translated rather than natural — awkward phrasing, overly literal constructions, unidiomatic word choices. Also fix broken HTML/Go template tags.
Return JSON array: [{"old":"exact string","new":"fix","reason":"why"}]. If natural, return [].`, ln)
result, err := callLLM(reviewModel, prompt, content, 4096)
if err != nil {
fmt.Printf("(review error: %v) ", err)
return content
}
fixes := parseFixes(result)
applied := 0
for _, f := range fixes {
if f.Old != "" && f.New != "" && strings.Contains(content, f.Old) {
content = strings.Replace(content, f.Old, f.New, 1)
applied++
}
}
if applied > 0 {
fmt.Printf("(%d fixes) ", applied)
}
return content
}
func reviewYAML_(english, translated map[string]string, ln string) {
type pair struct {
EN string `json:"en"`
TR string `json:"tr"`
}
combined := make(map[string]pair)
for k, v := range english {
combined[k] = pair{EN: v, TR: translated[k]}
}
data, _ := json.Marshal(combined)
prompt := fmt.Sprintf(`Review these %s UI string translations. "en" is original, "tr" is translation.
Fix anything that sounds translated rather than natural — awkward phrasing, unidiomatic choices. Preserve format placeholders (%%s %%d).
Return JSON: [{"old":"yaml_key","new":"corrected value","reason":"why"}]. If natural, return [].`, ln)
result, err := callLLM(reviewModel, prompt, string(data), 4096)
if err != nil {
fmt.Printf("(review error: %v) ", err)
return
}
fixes := parseFixes(result)
for _, f := range fixes {
if _, ok := translated[f.Old]; ok {
translated[f.Old] = f.New
}
}
if len(fixes) > 0 {
fmt.Printf("(%d fixes) ", len(fixes))
}
}
func parseFixes(raw string) []fix {
raw = stripFences(strings.TrimSpace(raw))
var fixes []fix
if json.Unmarshal([]byte(raw), &fixes) != nil {
if s := strings.Index(raw, "["); s >= 0 {
if e := strings.LastIndex(raw, "]"); e > s {
json.Unmarshal([]byte(raw[s:e+1]), &fixes)
}
}
}
return fixes
}
// --- Helpers ---
func ensureDefine(content, name, lang string) string {
want := fmt.Sprintf(`{{define "%s_%s"}}`, name, lang)
if strings.Contains(content, want) {
return content
}
if i := strings.Index(content, `{{define "`); i >= 0 {
if j := strings.Index(content[i:], `"}}`); j > 0 {
return content[:i] + want + content[i+j+3:]
}
}
return want + "\n" + content
}
func extractKeys(src string) map[string]string {
keys := make(map[string]string)
scanner := bufio.NewScanner(strings.NewReader(src))
for scanner.Scan() {
line := scanner.Text()
if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") || t == "" {
continue
}
if idx := strings.Index(line, ":"); idx > 0 {
k := strings.TrimSpace(line[:idx])
v := strings.TrimSpace(line[idx+1:])
if len(v) >= 2 && v[0] == '"' && v[len(v)-1] == '"' {
v = v[1 : len(v)-1]
}
if k != "" && v != "" {
keys[k] = v
}
}
}
return keys
}
func rebuildYAML(src string, translated map[string]string) string {
var out strings.Builder
scanner := bufio.NewScanner(strings.NewReader(src))
for scanner.Scan() {
line := scanner.Text()
t := strings.TrimSpace(line)
if strings.HasPrefix(t, "#") || t == "" {
out.WriteString(line + "\n")
continue
}
idx := strings.Index(line, ":")
if idx < 0 {
out.WriteString(line + "\n")
continue
}
key := strings.TrimSpace(line[:idx])
if val, ok := translated[key]; ok {
if needsQuote(val) {
val = strings.ReplaceAll(val, `\`, `\\`)
val = strings.ReplaceAll(val, `"`, `\"`)
out.WriteString(key + ": \"" + val + "\"\n")
} else {
out.WriteString(key + ": " + val + "\n")
}
} else {
out.WriteString(line + "\n")
}
}
return out.String()
}
func needsQuote(s string) bool {
return s == "" || strings.ContainsAny(s, `:{}[]&*?|>!%@`+"`\"'#") ||
s[0] == ' ' || s[len(s)-1] == ' ' || s[0] == '-' || s[0] == '{' ||
s == "true" || s == "false" || s == "null" || s == "yes" || s == "no"
}
func stripFences(s string) string {
s = strings.TrimSpace(s)
for _, p := range []string{"```json", "```yaml", "```html", "```"} {
if strings.HasPrefix(s, p) {
s = s[len(p):]
break
}
}
if strings.HasSuffix(s, "```") {
s = s[:len(s)-3]
}
return strings.TrimSpace(s)
}
func envKey(name string) string {
for _, p := range []string{"anthropic.env", "/tank/inou/anthropic.env"} {
if data, err := os.ReadFile(p); err == nil {
for _, line := range strings.Split(string(data), "\n") {
if strings.HasPrefix(strings.TrimSpace(line), name+"=") {
return strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(line), name+"="))
}
}
}
}
return ""
}
func findPortalDir() string {
dir, _ := os.Getwd()
for {
if _, err := os.Stat(filepath.Join(dir, "portal", "templates")); err == nil {
return filepath.Join(dir, "portal")
}
p := filepath.Dir(dir)
if p == dir {
break
}
dir = p
}
die("cannot find portal/")
return ""
}
func resolveLangs(spec string) []string {
if spec == "all" {
var out []string
for l := range langName {
out = append(out, l)
}
sort.Strings(out)
return out
}
return strings.Split(spec, ",")
}
func resolveWhat(spec string) ([]string, bool) {
if spec == "all" {
return knownTemplates, true
}
var t []string
yaml := false
for _, p := range strings.Split(spec, ",") {
if p == "yaml" {
yaml = true
} else {
t = append(t, p)
}
}
return t, yaml
}
func pick(ss ...string) string {
for _, s := range ss {
if s != "" {
return s
}
}
return ""
}
func b2i(b bool) int {
if b {
return 1
}
return 0
}
func die(msg string) {
fmt.Fprintln(os.Stderr, msg)
os.Exit(1)
}