From b020d2e193b2e00ea25360da315cc688d45f3e41 Mon Sep 17 00:00:00 2001
From: Anders Holck <anders@holck.se>
Date: Wed, 27 May 2026 11:03:30 +0200
Subject: [PATCH] Stream AI responses (SSE) - text appears as it generates

- Backend streams tokens via Server-Sent Events
- Frontend reads stream with fetch + ReadableStream
- Edit mode: document updates live as tokens arrive
- Chat mode: response text appears progressively
- No more waiting for full generation to complete
---
 frontend/src/App.vue | 44 ++++++++++++++++++------
 internal/api/ai.go   | 82 +++++++++++++++++++++++++++++++-------------
 2 files changed, 93 insertions(+), 33 deletions(-)

diff --git a/frontend/src/App.vue b/frontend/src/App.vue
index 57f6110..b74b2c8 100644
--- a/frontend/src/App.vue
+++ b/frontend/src/App.vue
@@ -1014,21 +1014,45 @@ async function sendAiChat() {
 
   const action = aiChatMode.value === 'edit' ? 'edit' : 'chat'
   try {
-    const res = await api('/api/ai/chat', {
-      path: currentFile.value,
-      content: content.value,
-      message: msg,
-      mode: action,
+    const res = await fetch('/api/ai/chat', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + token.value },
+      body: JSON.stringify({ path: currentFile.value, content: content.value, message: msg, mode: action }),
     })
-    if (action === 'edit' && res.content) {
-      content.value = res.content
+
+    const reader = res.body.getReader()
+    const decoder = new TextDecoder()
+    let fullText = ''
+    let buffer = ''
+
+    while (true) {
+      const { done, value } = await reader.read()
+      if (done) break
+      buffer += decoder.decode(value, { stream: true })
+      const lines = buffer.split('\n')
+      buffer = lines.pop()
+      for (const line of lines) {
+        if (!line.startsWith('data: ')) continue
+        const data = line.slice(6)
+        if (data === '[DONE]') break
+        try {
+          const token_text = JSON.parse(data)
+          fullText += token_text
+          if (action === 'edit') {
+            content.value = fullText
+          } else {
+            aiChatResponse.value = fullText
+          }
+        } catch {}
+      }
+    }
+
+    if (action === 'edit') {
       isDirty.value = true
       aiChatResponse.value = 'Document updated.'
-    } else {
-      aiChatResponse.value = res.result || res.content || 'No response'
     }
   } catch (e) {
-    aiChatResponse.value = 'AI request failed. Check MH_AI_ENDPOINT.'
+    aiChatResponse.value = 'AI request failed.'
   }
   aiChatLoading.value = false
 }
diff --git a/internal/api/ai.go b/internal/api/ai.go
index c2bc280..6475933 100644
--- a/internal/api/ai.go
+++ b/internal/api/ai.go
@@ -1,6 +1,7 @@
 package api
 
 import (
+	"bufio"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -136,7 +137,7 @@ func (s *Server) handleAIChat(w http.ResponseWriter, r *http.Request) {
 		Path    string `json:"path"`
 		Content string `json:"content"`
 		Message string `json:"message"`
-		Mode    string `json:"mode"` // "edit" or "chat"
+		Mode    string `json:"mode"`
 	}
 	if err := decodeBody(r, &req); err != nil || req.Message == "" {
 		writeJSON(w, 400, map[string]string{"error": "message required"})
@@ -154,42 +155,77 @@ func (s *Server) handleAIChat(w http.ResponseWriter, r *http.Request) {
 		aiModel = "gpt-4"
 	}
 
-	var systemPrompt string
-	var userMsg string
-
+	var systemPrompt, userMsg string
 	if req.Mode == "edit" {
 		systemPrompt = "You are a document editor. The user will give you a markdown document and an instruction. " +
 			"Apply the instruction and return the COMPLETE updated document. " +
 			"Do not add explanations or wrap in code fences. Return raw markdown only."
 		userMsg = "Document:\n\n" + req.Content + "\n\nInstruction: " + req.Message
 	} else {
-		systemPrompt = `You are a helpful writing assistant. The user has a markdown document open and is asking a question about it.
-Answer concisely in markdown. Reference the document content when relevant.`
+		systemPrompt = "You are a helpful writing assistant. The user has a markdown document open. Answer concisely in markdown."
 		userMsg = "Document:\n\n" + req.Content + "\n\nQuestion: " + req.Message
 	}
 
-	response, err := callLLM(aiEndpoint, aiKey, aiModel, systemPrompt, userMsg)
-	if err != nil {
-		writeJSON(w, 500, map[string]string{"error": "AI call failed: " + err.Error()})
+	// Stream SSE response
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		writeJSON(w, 500, map[string]string{"error": "streaming not supported"})
 		return
 	}
 
-	if req.Mode == "edit" {
-		// Strip markdown code fences if AI wrapped the output
-		response = strings.TrimSpace(response)
-		if strings.HasPrefix(response, "```") {
-			lines := strings.Split(response, "\n")
-			if len(lines) > 2 {
-				lines = lines[1:] // remove opening fence
-				if strings.TrimSpace(lines[len(lines)-1]) == "```" {
-					lines = lines[:len(lines)-1] // remove closing fence
-				}
-				response = strings.Join(lines, "\n")
+	payload := map[string]interface{}{
+		"model":       aiModel,
+		"messages":    []map[string]string{{"role": "system", "content": systemPrompt}, {"role": "user", "content": userMsg}},
+		"temperature": 0.3,
+		"stream":      true,
+	}
+	body, _ := json.Marshal(payload)
+	url := strings.TrimRight(aiEndpoint, "/") + "/chat/completions"
+	aiReq, _ := http.NewRequest("POST", url, strings.NewReader(string(body)))
+	aiReq.Header.Set("Content-Type", "application/json")
+	if aiKey != "" {
+		aiReq.Header.Set("Authorization", "Bearer "+aiKey)
+	}
+
+	client := &http.Client{Timeout: 120 * time.Second}
+	resp, err := client.Do(aiReq)
+	if err != nil {
+		fmt.Fprintf(w, "data: {\"error\":\"AI unreachable\"}\n\n")
+		flusher.Flush()
+		return
+	}
+	defer resp.Body.Close()
+
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+		data := line[6:]
+		if data == "[DONE]" {
+			fmt.Fprintf(w, "data: [DONE]\n\n")
+			flusher.Flush()
+			break
+		}
+		var chunk struct {
+			Choices []struct {
+				Delta struct {
+					Content string `json:"content"`
+				} `json:"delta"`
+			} `json:"choices"`
+		}
+		if json.Unmarshal([]byte(data), &chunk) == nil && len(chunk.Choices) > 0 {
+			token := chunk.Choices[0].Delta.Content
+			if token != "" {
+				tokenJSON, _ := json.Marshal(token)
+				fmt.Fprintf(w, "data: %s\n\n", tokenJSON)
+				flusher.Flush()
 			}
 		}
-		writeJSON(w, 200, map[string]string{"content": response})
-	} else {
-		writeJSON(w, 200, map[string]string{"result": response})
 	}
 }