From b020d2e193b2e00ea25360da315cc688d45f3e41 Mon Sep 17 00:00:00 2001 From: Anders Holck Date: Wed, 27 May 2026 11:03:30 +0200 Subject: [PATCH] Stream AI responses (SSE) - text appears as it generates - Backend streams tokens via Server-Sent Events - Frontend reads stream with fetch + ReadableStream - Edit mode: document updates live as tokens arrive - Chat mode: response text appears progressively - No more waiting for full generation to complete --- frontend/src/App.vue | 44 ++++++++++++++++++------ internal/api/ai.go | 82 +++++++++++++++++++++++++++++++------------- 2 files changed, 93 insertions(+), 33 deletions(-) diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 57f6110..b74b2c8 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -1014,21 +1014,45 @@ async function sendAiChat() { const action = aiChatMode.value === 'edit' ? 'edit' : 'chat' try { - const res = await api('/api/ai/chat', { - path: currentFile.value, - content: content.value, - message: msg, - mode: action, + const res = await fetch('/api/ai/chat', { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + token.value }, + body: JSON.stringify({ path: currentFile.value, content: content.value, message: msg, mode: action }), }) - if (action === 'edit' && res.content) { - content.value = res.content + + const reader = res.body.getReader() + const decoder = new TextDecoder() + let fullText = '' + let buffer = '' + + while (true) { + const { done, value } = await reader.read() + if (done) break + buffer += decoder.decode(value, { stream: true }) + const lines = buffer.split('\n') + buffer = lines.pop() + for (const line of lines) { + if (!line.startsWith('data: ')) continue + const data = line.slice(6) + if (data === '[DONE]') break + try { + const token_text = JSON.parse(data) + fullText += token_text + if (action === 'edit') { + content.value = fullText + } else { + aiChatResponse.value = fullText + } + } catch {} + } + } + + if (action === 'edit') { isDirty.value = true aiChatResponse.value = 'Document updated.' - } else { - aiChatResponse.value = res.result || res.content || 'No response' } } catch (e) { - aiChatResponse.value = 'AI request failed. Check MH_AI_ENDPOINT.' + aiChatResponse.value = 'AI request failed.' } aiChatLoading.value = false } diff --git a/internal/api/ai.go b/internal/api/ai.go index c2bc280..6475933 100644 --- a/internal/api/ai.go +++ b/internal/api/ai.go @@ -1,6 +1,7 @@ package api import ( + "bufio" "encoding/json" "fmt" "io" @@ -136,7 +137,7 @@ func (s *Server) handleAIChat(w http.ResponseWriter, r *http.Request) { Path string `json:"path"` Content string `json:"content"` Message string `json:"message"` - Mode string `json:"mode"` // "edit" or "chat" + Mode string `json:"mode"` } if err := decodeBody(r, &req); err != nil || req.Message == "" { writeJSON(w, 400, map[string]string{"error": "message required"}) @@ -154,42 +155,77 @@ func (s *Server) handleAIChat(w http.ResponseWriter, r *http.Request) { aiModel = "gpt-4" } - var systemPrompt string - var userMsg string - + var systemPrompt, userMsg string if req.Mode == "edit" { systemPrompt = "You are a document editor. The user will give you a markdown document and an instruction. " + "Apply the instruction and return the COMPLETE updated document. " + "Do not add explanations or wrap in code fences. Return raw markdown only." userMsg = "Document:\n\n" + req.Content + "\n\nInstruction: " + req.Message } else { - systemPrompt = `You are a helpful writing assistant. The user has a markdown document open and is asking a question about it. -Answer concisely in markdown. Reference the document content when relevant.` + systemPrompt = "You are a helpful writing assistant. The user has a markdown document open. Answer concisely in markdown." userMsg = "Document:\n\n" + req.Content + "\n\nQuestion: " + req.Message } - response, err := callLLM(aiEndpoint, aiKey, aiModel, systemPrompt, userMsg) - if err != nil { - writeJSON(w, 500, map[string]string{"error": "AI call failed: " + err.Error()}) + // Stream SSE response + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + flusher, ok := w.(http.Flusher) + if !ok { + writeJSON(w, 500, map[string]string{"error": "streaming not supported"}) return } - if req.Mode == "edit" { - // Strip markdown code fences if AI wrapped the output - response = strings.TrimSpace(response) - if strings.HasPrefix(response, "```") { - lines := strings.Split(response, "\n") - if len(lines) > 2 { - lines = lines[1:] // remove opening fence - if strings.TrimSpace(lines[len(lines)-1]) == "```" { - lines = lines[:len(lines)-1] // remove closing fence - } - response = strings.Join(lines, "\n") + payload := map[string]interface{}{ + "model": aiModel, + "messages": []map[string]string{{"role": "system", "content": systemPrompt}, {"role": "user", "content": userMsg}}, + "temperature": 0.3, + "stream": true, + } + body, _ := json.Marshal(payload) + url := strings.TrimRight(aiEndpoint, "/") + "/chat/completions" + aiReq, _ := http.NewRequest("POST", url, strings.NewReader(string(body))) + aiReq.Header.Set("Content-Type", "application/json") + if aiKey != "" { + aiReq.Header.Set("Authorization", "Bearer "+aiKey) + } + + client := &http.Client{Timeout: 120 * time.Second} + resp, err := client.Do(aiReq) + if err != nil { + fmt.Fprintf(w, "data: {\"error\":\"AI unreachable\"}\n\n") + flusher.Flush() + return + } + defer resp.Body.Close() + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + if !strings.HasPrefix(line, "data: ") { + continue + } + data := line[6:] + if data == "[DONE]" { + fmt.Fprintf(w, "data: [DONE]\n\n") + flusher.Flush() + break + } + var chunk struct { + Choices []struct { + Delta struct { + Content string `json:"content"` + } `json:"delta"` + } `json:"choices"` + } + if json.Unmarshal([]byte(data), &chunk) == nil && len(chunk.Choices) > 0 { + token := chunk.Choices[0].Delta.Content + if token != "" { + tokenJSON, _ := json.Marshal(token) + fmt.Fprintf(w, "data: %s\n\n", tokenJSON) + flusher.Flush() } } - writeJSON(w, 200, map[string]string{"content": response}) - } else { - writeJSON(w, 200, map[string]string{"result": response}) } }