diff --git a/docproc b/docproc index 575960e..5e8b18b 100755 Binary files a/docproc and b/docproc differ diff --git a/processor/processor.go b/processor/processor.go index af618af..5ce1a81 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -130,12 +130,19 @@ func (p *Processor) ProcessFile(path string) error { mimeType = "image/png" case ".jpg", ".jpeg": mimeType = "image/jpeg" + case ".txt", ".md": + mimeType = "text/plain" default: return fmt.Errorf("unsupported file type: %s", ext) } // Send to Kimi for OCR + classification - result, err := p.analyzeWithKimi(data, mimeType, filepath.Base(path)) + var result *KimiResult + if mimeType == "text/plain" { + result, err = p.analyzeTextWithKimi(data, filepath.Base(path)) + } else { + result, err = p.analyzeWithKimi(data, mimeType, filepath.Base(path)) + } if err != nil { return fmt.Errorf("Kimi analysis failed: %w", err) } @@ -230,6 +237,120 @@ type KimiResult struct { Date string } +func (p *Processor) analyzeTextWithKimi(data []byte, filename string) (*KimiResult, error) { + prompt := fmt.Sprintf(`Analyze this document text and extract: + +1. **Category**: One of: tax, expense, bill, invoice, medical, receipt, bank, insurance, legal, correspondence, other +2. **Summary**: 2-3 sentence summary of the document +3. **Full Text**: The text itself (it's already provided) +4. **Is Expense**: true/false - is this a business expense, receipt, or invoice? +5. **Amount**: If expense/invoice, the total amount (e.g., "$123.45") +6. **Vendor**: If expense/invoice, the vendor/company name +7. **Date**: Document date in YYYY-MM-DD format if visible + +Respond in JSON format: +{ + "category": "...", + "summary": "...", + "full_text": "...", + "is_expense": true/false, + "amount": "...", + "vendor": "...", + "date": "..." +} + +DOCUMENT TEXT: +%s`, string(data)) + + reqBody := map[string]interface{}{ + "model": "accounts/fireworks/models/kimi-k2p5", + "messages": []map[string]interface{}{ + { + "role": "user", + "content": prompt, + }, + }, + "max_tokens": 4096, + "temperature": 0.1, + } + + jsonBody, err := json.Marshal(reqBody) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", p.apiURL, bytes.NewReader(jsonBody)) + if err != nil { + return nil, err + } + + req.Header.Set("Authorization", "Bearer "+p.apiKey) + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: 120 * time.Second} + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(body)) + } + + var apiResp struct { + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` + } + + if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { + return nil, err + } + + if len(apiResp.Choices) == 0 { + return nil, fmt.Errorf("no response from API") + } + + content_str := apiResp.Choices[0].Message.Content + jsonStart := strings.Index(content_str, "{") + jsonEnd := strings.LastIndex(content_str, "}") + if jsonStart >= 0 && jsonEnd > jsonStart { + content_str = content_str[jsonStart : jsonEnd+1] + } + + var result struct { + Category string `json:"category"` + Summary string `json:"summary"` + FullText string `json:"full_text"` + IsExpense bool `json:"is_expense"` + Amount string `json:"amount"` + Vendor string `json:"vendor"` + Date string `json:"date"` + } + + if err := json.Unmarshal([]byte(content_str), &result); err != nil { + return &KimiResult{ + Category: "other", + Summary: "Failed to parse text document", + FullText: string(data), + }, nil + } + + return &KimiResult{ + Category: result.Category, + Summary: result.Summary, + FullText: result.FullText, + IsExpense: result.IsExpense, + Amount: result.Amount, + Vendor: result.Vendor, + Date: result.Date, + }, nil +} + func (p *Processor) analyzeWithKimi(data []byte, mimeType, filename string) (*KimiResult, error) { // Encode file as base64 b64Data := base64.StdEncoding.EncodeToString(data) @@ -282,7 +403,7 @@ Respond in JSON format: }) reqBody := map[string]interface{}{ - "model": "accounts/fireworks/models/kimi-k2-5-instruct", + "model": "accounts/fireworks/models/kimi-k2p5", "messages": []map[string]interface{}{ { "role": "user", diff --git a/watcher/watcher.go b/watcher/watcher.go index 1a5143f..274829e 100644 --- a/watcher/watcher.go +++ b/watcher/watcher.go @@ -88,7 +88,7 @@ func (w *Watcher) processExisting() { func (w *Watcher) processFile(path string) { ext := strings.ToLower(filepath.Ext(path)) - if ext != ".pdf" && ext != ".png" && ext != ".jpg" && ext != ".jpeg" { + if ext != ".pdf" && ext != ".png" && ext != ".jpg" && ext != ".jpeg" && ext != ".md" && ext != ".txt" { return }