BareGit

Tavern-translate: handle reasoning-model output

Reasoning / harmony-style models (gpt-oss, DeepSeek-R1, Qwen) emit
their chain of thought alongside the translation. Strip <think> blocks
and harmony channel markers, keeping only the final answer.

Some inference servers fail to parse such output and return an HTTP
error that echoes the raw generation; recover the translation embedded
in those error bodies instead of failing the node.
Author: MetroWind <chris.corsair@gmail.com>
Date: Sun May 31 21:52:33 2026 -0700
Commit: c80994622aa0d1205c31b061761b6e4b5838d1fe

Changes

diff --git a/tavern-translate/app.js b/tavern-translate/app.js
index 624a885..4571c3f 100644
--- a/tavern-translate/app.js
+++ b/tavern-translate/app.js
@@ -160,6 +160,45 @@ function extractApiError(raw)
     }
 }
 
+// Matches a "harmony" channel marker as emitted by reasoning models such
+// as gpt-oss: the canonical <|channel|> as well as the mangled <|channel>
+// and <channel|> variants seen in the wild.
+const CHANNEL_RE = /<\|?channel\|?>/;
+
+function hasChannelMarkers(text)
+{
+    return typeof text === 'string' && CHANNEL_RE.test(text);
+}
+
+// Some models emit their chain of thought alongside the final answer.
+// Strip the thinking portion and return only the final response.
+function extractFinalAnswer(text)
+{
+    if(typeof text !== 'string') return text;
+
+    let result = text;
+
+    // <think>...</think> style reasoning (DeepSeek-R1, Qwen, etc.).
+    result = result.replace(/<think>[\s\S]*?<\/think>/gi, '');
+
+    // Harmony channel markers (gpt-oss and similar): the output is split
+    // into channels such as "analysis" and "final"; keep only the last.
+    if(CHANNEL_RE.test(result))
+    {
+        const parts = result.split(/<\|?channel\|?>/g);
+        result = parts[parts.length - 1];
+        // Drop a leading channel label plus message marker, e.g.
+        // "final<|message|>".
+        result = result.replace(
+            /^\s*(final|analysis|commentary|thought)?\s*<\|?message\|?>\s*/i, '');
+    }
+
+    // Strip trailing harmony control tokens.
+    result = result.replace(/<\|(end|return|endoftext)\|>\s*$/i, '');
+
+    return result.trim();
+}
+
 // Perform a single translation request. Reads the response as text
 // first so that a non-JSON body (an HTML error page from a proxy, an
 // empty body, etc.) produces a meaningful error instead of a cryptic
@@ -188,6 +227,14 @@ async function requestTranslation(text)
     {
         const message = extractApiError(raw) ||
             `HTTP ${response.status} ${response.statusText}`;
+        // Some inference servers (gpt-oss / "harmony" models) fail to
+        // parse the model's own output and return an HTTP error whose
+        // body echoes the raw generation. The real translation is still
+        // in there, after the final channel marker -- recover it.
+        if(hasChannelMarkers(message))
+        {
+            return extractFinalAnswer(message);
+        }
         const err = new Error(message);
         // Rate limiting and server errors are usually transient.
         err.retriable = response.status === 429 || response.status >= 500;
@@ -216,7 +263,7 @@ async function requestTranslation(text)
         throw new Error(extractApiError(raw) ||
             'API response did not contain a translation.');
     }
-    return content;
+    return extractFinalAnswer(content);
 }
 
 async function translateNode(node, force = false)