dotfiles/pi/.pi/agent/extensions/chat-claude.ts

/**
 * chat-claude — Distinctive Claude chat MODE inside pi.
 *
 * When chat mode is active, typed user input is routed to a Claude model
 * (haiku/sonnet/opus) via the `claude` CLI — NOT to pi's active LLM.
 *
 * Rendering goals (match pi's native chat UX):
 *   - Text appears as full markdown (no truncated previews, no dim grey).
 *   - Thinking blocks stream live as italic `thinkingText`-coloured markdown
 *     (the `claude` CLI is invoked with --include-partial-messages).
 *   - Tool calls use pi's normal tool-execution look (renderToolBlock).
 *
 * All turns of a single chat-mode session are rendered inside ONE continuous
 * orange border: the top line sits above the first turn, the bottom line
 * below the most recent turn, and the border extends live as new turns
 * (user + assistant) arrive. A new border starts each time the user enters
 * chat mode again via /claude / /claude-new.
 *
 * Commands:
 *   /claude [haiku|sonnet|opus]      — enter chat mode / switch model
 *   /claude-new [haiku|sonnet|opus]  — enter chat mode with a fresh Claude session
 *   /claude-resume                   — pick a past session for the current cwd and resume it
 *   /claude-end                      — exit chat mode
 *   /claude-abort                    — cancel an in-flight Claude response
 */

import { closeSync, openSync, readdirSync, readFileSync, readSync, statSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { copyToClipboard, CustomEditor, getMarkdownTheme } from "@mariozechner/pi-coding-agent";
import type { ExtensionAPI, KeybindingsManager } from "@mariozechner/pi-coding-agent";
import { Box, Container, matchesKey, Markdown, Spacer, Text, truncateToWidth, TUI, visibleWidth, type Component, type EditorTheme } from "@mariozechner/pi-tui";
import {
	formatUsage,
	renderToolBlock,
	runClaude,
	type StreamBlock,
} from "../shared/claude-stream.js";
import { startAskBridge, type AskBridge } from "../shared/pi-ask-bridge.js";
import { askSingleQuestionWithInlineNote } from "./pi-ask-tool/ask-inline-ui.js";

// ---------------------------------------------------------------------------
// Orange styling
// ---------------------------------------------------------------------------
const ORANGE     = "\x1b[38;5;208m"; // pumpkin / tangerine
const ORANGE_DIM = "\x1b[38;5;94m";
const RESET      = "\x1b[0m";
const BOLD       = "\x1b[1m";
const orange     = (s: string) => ORANGE + s + RESET;
const orangeBold = (s: string) => ORANGE + BOLD + s + RESET;
const orangeDim  = (s: string) => ORANGE_DIM + s + RESET;

// ---------------------------------------------------------------------------
// Orange border wrapping helper — wraps an array of inner lines in a
// continuous orange box. Applied at the session level so the WHOLE chat
// conversation sits inside ONE box (top above first turn, bottom below
// most recent turn). Pure string→string — no component allocation per frame.
//
// IMPORTANT: `innerLines` must ALREADY be padded to `innerWidth` columns of
// visible width. We don't call visibleWidth() here because that function
// invokes Intl.Segmenter (expensive ICU BreakIterator on every miss) and
// this wrapper runs on every single line of the session on every frame.
// Profile data showed 85% of pi's idle CPU being burned in Segmenter via
// this function. Callers (renderSessionLines) pre-pad inner lines once
// per turn and cache them, so the cost amortises to O(streaming tail).
// ---------------------------------------------------------------------------
function wrapInOrangeBorder(paddedInnerLines: string[], width: number): string[] {
	const v      = orange("│");
	const top    = orange("╭" + "─".repeat(width - 2) + "╮");
	const bottom = orange("╰" + "─".repeat(width - 2) + "╯");
	const out: string[] = [top];
	for (const line of paddedInnerLines) out.push(v + " " + line + " " + v);
	out.push(bottom);
	return out;
}

// Pad a single inner line to exactly `innerWidth` visible columns, OR
// truncate it if it's already over-wide. Uses visibleWidth() — pi-tui's
// grapheme-aware width function (which is what sits on top of the hot
// Intl.Segmenter path). Intended to be called ONCE per line at cache-build
// time, NOT per frame.
//
// Truncation is a defensive safety net: any component that emits a line
// wider than the width it was handed would otherwise crash pi's TUI (see
// tui.js doRender: "Rendered line N exceeds terminal width"). Without this,
// one stray over-wide line (e.g. a long source code line inside a Read
// tool result) takes down the entire session.
function padToInnerWidth(line: string, innerWidth: number): string {
	const w = visibleWidth(line);
	if (w > innerWidth) return truncateToWidth(line, innerWidth, "…", true);
	const padRight = innerWidth - w;
	return padRight > 0 ? line + " ".repeat(padRight) : line;
}

// ---------------------------------------------------------------------------
// Read-tool result truncation
//
// `Read` tool calls inside chat mode often dump entire files into the result
// banner — many hundreds of lines, which buries the surrounding conversation.
// We cap the rendered file content at MAX_READ_LINES and append a single
// centered notice line describing how many lines were hidden. This is a
// PRESENTATION-only truncation: `block.result.text` is left untouched, so
// resumed sessions / re-renders still see the full content.
//
// Centering needs render-time width, so we implement a tiny custom Component
// (TruncatedReadResult) and swap it into the Box body produced by the shared
// renderToolBlock helper. The same dim line-number formatting used by
// renderToolResultBox is preserved so the truncated view looks identical to
// the un-truncated one above the notice.
// ---------------------------------------------------------------------------
const MAX_READ_LINES = 40;

class TruncatedReadResult implements Component {
	constructor(
		private readonly numbered: { num: string; content: string }[],
		private readonly maxNumLen: number,
		private readonly dimFn: (s: string) => string,
		private readonly noticeFn: (s: string) => string,
	) {}

	invalidate(): void { /* stateless */ }

	render(width: number): string[] {
		const total   = this.numbered.length;
		const visible = Math.min(MAX_READ_LINES, total);
		const lines: string[] = [];
		for (let i = 0; i < visible; i++) {
			const l = this.numbered[i];
			// Truncate to `width` so a single long source-code line (think
			// minified JS or a long comment) can't blow past the TUI's width
			// check and crash the whole session. `truncateToWidth` is
			// ANSI-aware so the dim SGR sequences wrapping the line number
			// survive the cut.
			const raw = this.dimFn(l.num.padStart(this.maxNumLen)) + " " + l.content;
			lines.push(truncateToWidth(raw, width, "…", false));
		}
		if (total > visible) {
			const hidden = total - visible;
			const notice = `… ${hidden} more line${hidden === 1 ? "" : "s"} hidden …`;
			const visLen = visibleWidth(notice);
			const left   = Math.max(0, Math.floor((width - visLen) / 2));
			lines.push(" ".repeat(left) + this.noticeFn(notice));
		}
		return lines;
	}
}

// Wrap shared renderToolBlock: for `Read` tool blocks whose result exceeds
// MAX_READ_LINES, replace the Box body's child Text with our truncating
// component. All other tool kinds, error results, and short reads pass
// through unchanged.
function renderToolBlockTruncated(block: Extract<StreamBlock, { type: "tool" }>, theme: any): Container {
	const c = renderToolBlock(block, theme);
	if (block.name.toLowerCase() !== "read") return c;
	if (!block.result || block.result.isError) return c;

	const rawLines = block.result.text.split("\n").filter((l) => l.length > 0);
	if (rawLines.length <= MAX_READ_LINES) return c;

	const parsed = rawLines.map((l) => {
		const tab = l.indexOf("\t");
		return tab >= 0 ? { num: l.slice(0, tab), content: l.slice(tab + 1) } : { num: "", content: l };
	});
	const maxNumLen = parsed.reduce((m, l) => Math.max(m, l.num.length), 0);

	// renderToolBlock's container is [headerText, bodyBox]. Bail safely if a
	// future change to that helper alters the structure.
	const body = c.children[1];
	if (!(body instanceof Box)) return c;
	body.clear();
	body.addChild(new TruncatedReadResult(
		parsed,
		maxNumLen,
		(s) => theme.fg("dim", s),
		(s) => theme.fg("dim", s),
	));
	return c;
}

// ---------------------------------------------------------------------------
// Models / turn types
// ---------------------------------------------------------------------------
const MODELS = ["haiku", "sonnet", "opus"] as const;
type Model = (typeof MODELS)[number];
const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1);

// UI-facing model slot → actual `claude --model <id>` argument.
//
// `opus` is pinned to claude-opus-4-6 on purpose: Opus 4.7 (what the plain
// `opus` alias currently resolves to) returns thinking as an encrypted
// signature only — no `thinking_delta` events ever stream, so the italic
// thinking-block rendering stays blank the entire turn. 4.6 streams
// plaintext thinking normally, so pinning here restores the feature for
// the `opus` slot. Haiku/Sonnet use the plain alias (newest).
//
// We also pin haiku/sonnet to their CLI aliases for symmetry — if a
// future CLI alias bump lands on a model with the same redacted-thinking
// behaviour, we can downgrade the pin here without touching the rest of
// the extension.
const CLI_MODEL: Record<Model, string> = {
	haiku:  "haiku",
	sonnet: "sonnet",
	opus:   "claude-opus-4-6",
};

// ---------------------------------------------------------------------------
// Past-session discovery (used by /claude-resume).
//
// Claude CLI persists every session's transcript at:
//   ~/.claude/projects/<mangled-cwd>/<session-uuid>.jsonl
// where the mangling rule (verified empirically) is "replace every '/' and
// '.' with '-'". So /home/jonas/dotfiles/pi/.pi → -home-jonas-dotfiles-pi--pi
// (the leading '-' comes from the leading '/'; '.pi' contributes '--pi'
// because both '/' and '.' map to '-').
//
// We don't need to consult ~/.claude/sessions/ for this picker — that
// directory only contains metadata for currently-running Claude processes.
// The on-disk transcript at projects/<cwd>/<id>.jsonl is the source of
// truth for "past sessions in this directory".
// ---------------------------------------------------------------------------
function mangleCwd(cwd: string): string {
	return cwd.replace(/[/.]/g, "-");
}

function relativeTime(ms: number): string {
	const diff = Date.now() - ms;
	if (diff < 0)               return "in the future";
	const sec = Math.floor(diff / 1000);
	if (sec < 60)               return `${sec}s ago`;
	const min = Math.floor(sec / 60);
	if (min < 60)               return `${min}m ago`;
	const hr  = Math.floor(min / 60);
	if (hr  < 24)               return `${hr}h ago`;
	const day = Math.floor(hr / 24);
	if (day < 30)               return `${day}d ago`;
	const mon = Math.floor(day / 30);
	if (mon < 12)               return `${mon}mo ago`;
	return `${Math.floor(day / 365)}y ago`;
}

/** Map a raw Claude model identifier (e.g. "claude-haiku-4-5-20251001") to
 *  one of our canonical short names. Returns null if no match. */
function normalizeRawModel(raw: string): Model | null {
	const lc = raw.toLowerCase();
	if (lc.includes("haiku"))  return "haiku";
	if (lc.includes("sonnet")) return "sonnet";
	if (lc.includes("opus"))   return "opus";
	return null;
}

interface PastSession {
	sessionId: string;
	mtimeMs: number;
	firstUserMessage: string; // truncated/normalised, "" if not found
	model: Model | null;      // null ⇒ couldn't determine
	rawModel: string;         // raw string from JSONL ("" if not found)
}

/** Read the head of a file (avoids slurping multi-MB JSONL transcripts). */
function readFileHead(path: string, maxBytes: number): string {
	const fd = openSync(path, "r");
	try {
		const buf = Buffer.alloc(maxBytes);
		const n   = readSync(fd, buf, 0, maxBytes, 0);
		return buf.subarray(0, n).toString("utf8");
	} finally {
		closeSync(fd);
	}
}

/** Pluck the first user message + first model id from a transcript head. */
function extractSessionMeta(head: string): { firstUserMessage: string; rawModel: string } {
	let firstUserMessage = "";
	let rawModel = "";

	for (const line of head.split("\n")) {
		if (firstUserMessage && rawModel) break;
		if (!line.trim()) continue;
		let ev: any;
		try { ev = JSON.parse(line); } catch { continue; }

		if (!firstUserMessage) {
			// Two equivalent sources: a queue-operation enqueue carries the raw
			// text the user typed; a `type: "user"` event carries it inside
			// message.content (which is either a string or an array of blocks).
			if (ev.type === "queue-operation" && ev.operation === "enqueue" && typeof ev.content === "string") {
				firstUserMessage = ev.content;
			} else if (ev.type === "user" && ev.message) {
				const c = ev.message.content;
				if (typeof c === "string") {
					firstUserMessage = c;
				} else if (Array.isArray(c)) {
					firstUserMessage = c
						.filter((b: any) => b?.type === "text" && typeof b.text === "string")
						.map((b: any) => b.text as string)
						.join(" ");
				}
			}
		}

		if (!rawModel && typeof ev?.message?.model === "string") {
			rawModel = ev.message.model;
		}
	}

	return {
		firstUserMessage: firstUserMessage.replace(/\s+/g, " ").trim(),
		rawModel,
	};
}

function readPastSessions(cwd: string): PastSession[] {
	const dir = join(homedir(), ".claude", "projects", mangleCwd(cwd));
	let entries: string[];
	try {
		entries = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
	} catch {
		return [];
	}

	const out: PastSession[] = [];
	for (const f of entries) {
		const full = join(dir, f);
		let st;
		try { st = statSync(full); } catch { continue; }
		// Read up to ~256 KB — enough to find the first user message and the
		// first assistant turn (which carries the model id) in any reasonable
		// transcript without paying for multi-MB reads.
		let head: string;
		try { head = readFileHead(full, 256 * 1024); } catch { continue; }
		const { firstUserMessage, rawModel } = extractSessionMeta(head);
		out.push({
			sessionId: f.replace(/\.jsonl$/, ""),
			mtimeMs:   st.mtimeMs,
			firstUserMessage,
			model:     rawModel ? normalizeRawModel(rawModel) : null,
			rawModel,
		});
	}

	out.sort((a, b) => b.mtimeMs - a.mtimeMs);
	return out;
}

/** Truncate a string to `max` chars, appending "…" when cut. */
function truncate(s: string, max: number): string {
	if (s.length <= max) return s;
	return s.slice(0, Math.max(0, max - 1)).trimEnd() + "…";
}

// ---------------------------------------------------------------------------
// JSONL transcript → ChatTurn[]
//
// Given a sessionId and cwd, load the full transcript at
//   ~/.claude/projects/<mangled-cwd>/<sessionId>.jsonl
// and convert it into the same UserTurn / AssistantTurn shape the live
// runChatTurn() path produces. This lets /claude-resume render the past
// context inside the orange border so the user can SEE what they're
// resuming, not just blindly continue an invisible thread.
//
// JSONL event reference (observed in 2.1.118 transcripts):
//   {type:"user",     message:{role:"user",      content: <string>}}                     ← typed prompt
//   {type:"user",     message:{role:"user",      content: [{type:"tool_result", …}, …]}} ← tool outputs
//   {type:"assistant",message:{role:"assistant", content: [<one of: thinking|text|tool_use>], usage:{…}, model:"claude-sonnet-4-6"}}
// Each assistant content block is emitted as its OWN line, all sharing the
// same usage / model fields (one API call → many lines). We coalesce every
// run of consecutive assistant lines into a single AssistantTurn whose
// `blocks` array preserves the in-order list of thinking/text/tool blocks.
// Tool results that arrive in subsequent user-lines are attached back onto
// the matching tool block by tool_use_id.
//
// Lines we ignore: agent-setting, queue-operation, attachment, last-prompt,
// summary, and anything else without a recognisable role/content shape.
// Tokens/cost are intentionally NOT carried over — the JSONL repeats usage
// per content block so summing naively would over-count, and the user is
// here to see CONTENT, not a token panel for old turns.
// ---------------------------------------------------------------------------
function loadSessionTurns(sessionId: string, cwd: string, fallbackModel: Model): ChatTurn[] {
	const path = join(homedir(), ".claude", "projects", mangleCwd(cwd), `${sessionId}.jsonl`);
	let raw: string;
	try { raw = readFileSync(path, "utf8"); } catch { return []; }

	const turns: ChatTurn[] = [];
	let current: AssistantTurn | null = null;

	const flush = () => {
		if (!current) return;
		current.finalText = current.blocks
			.filter((b) => b.type === "text")
			.map((b: any) => b.text as string)
			.join("");
		turns.push(current);
		current = null;
	};

	const ensureCurrent = (model: Model): AssistantTurn => {
		if (current) return current;
		current = {
			role:      "assistant",
			model,
			blocks:    [],
			finalText: "",
			sessionId,
			isResume:  false,
			done:      true,
		};
		return current;
	};

	const tool_resultText = (content: any): { text: string; isError: boolean } => {
		if (typeof content === "string") return { text: content, isError: false };
		if (Array.isArray(content)) {
			const text = content
				.filter((b: any) => b?.type === "text" && typeof b.text === "string")
				.map((b: any) => b.text as string)
				.join("\n");
			return { text, isError: false };
		}
		return { text: "", isError: false };
	};

	for (const line of raw.split("\n")) {
		if (!line.trim()) continue;
		let ev: any;
		try { ev = JSON.parse(line); } catch { continue; }

		if (ev.type === "user") {
			const c = ev.message?.content;
			if (typeof c === "string") {
				// Typed user prompt — closes any in-flight assistant turn.
				flush();
				if (c.trim()) turns.push({ role: "user", text: c });
			} else if (Array.isArray(c)) {
				let sawToolResult = false;
				for (const block of c) {
					if (block?.type === "tool_result") {
						sawToolResult = true;
						const { text } = tool_resultText(block.content);
						const isError  = block.is_error === true;
						// TS 5.x loses narrowing of the `let current` that is
						// reassigned by the `flush` closure — even a `const cur
						// = current` annotation doesn't survive the for-of
						// header re-evaluation. A direct cast on the `.blocks`
						// access is the minimal escape hatch confirmed to work
						// in isolation tests with TS 5.9.
						if (current !== null) {
							const curBlocks = (current as AssistantTurn).blocks;
							for (const tb of curBlocks) {
								if (tb.type === "tool" && tb.id === block.tool_use_id) {
									tb.result = { text, isError };
									break;
								}
							}
						}
					} else if (block?.type === "text" && typeof block.text === "string") {
						// Some clients send array-shaped user prompts.
						if (!sawToolResult) {
							flush();
							if (block.text.trim()) turns.push({ role: "user", text: block.text });
						}
					}
				}
			}
		} else if (ev.type === "assistant") {
			const content  = (ev.message?.content ?? []) as any[];
			const rawModel = String(ev.message?.model ?? "");
			const model    = (rawModel ? normalizeRawModel(rawModel) : null) ?? fallbackModel;
			const a = ensureCurrent(model);
			// If the per-line model differs from what we opened the turn with,
			// keep the first one — a single coalesced "turn" inherits the model
			// of its first API call. (This is purely for the header label.)
			for (const block of content) {
				if (block?.type === "thinking" && typeof block.thinking === "string") {
					if (block.thinking.trim()) a.blocks.push({ type: "thinking", text: block.thinking });
				} else if (block?.type === "text" && typeof block.text === "string") {
					if (block.text.trim()) a.blocks.push({ type: "text", text: block.text });
				} else if (block?.type === "tool_use") {
					a.blocks.push({
						type:      "tool",
						id:        String(block.id ?? ""),
						name:      String(block.name ?? ""),
						inputJson: JSON.stringify(block.input ?? {}),
					});
				}
			}
		}
		// All other event types (agent-setting, queue-operation, attachment,
		// last-prompt, summary, …) are intentionally ignored.
	}

	flush();
	return turns;
}

// Per-turn render cache: once a turn is "frozen" (user turns are always
// frozen; assistant turns after done=true), its rendered output at a given
// (innerWidth, theme) is invariant. Caching avoids O(turns) rebuild on every
// frame, which otherwise creates quadratic-ish lag during streaming because
// partial-message updates drive tens of renders per second.
interface TurnRenderCache {
	cachedLines?: string[];
	cachedWidth?: number;
	cachedTheme?: unknown;
}

interface UserTurn extends TurnRenderCache {
	role: "user";
	text: string;
}
interface AssistantTurn extends TurnRenderCache {
	role: "assistant";
	model: Model;
	blocks: StreamBlock[];
	finalText: string;
	sessionId?: string;
	isResume: boolean;
	done: boolean;
	error?: string;
	cancelled?: boolean;
	costUsd?: number;
	inputTokens?: number;
	outputTokens?: number;
	cacheReadTokens?: number;
	cacheWriteTokens?: number;
}
type ChatTurn = UserTurn | AssistantTurn;

interface ChatSessionDetails {
	turns: ChatTurn[];
}

// ---------------------------------------------------------------------------
// Todo extraction — scan the session for the most recent TodoWrite tool call
// and return its todos array. Rendered BETWEEN the orange-bordered
// conversation and the mode banner by the chat-claude widget so the
// current task list is always visible without scrolling through history.
//
// Only the latest TodoWrite wins (earlier ones are superseded); empty or
// malformed inputs are treated as "no todos" and suppress the section.
// ---------------------------------------------------------------------------
type TodoStatus = "completed" | "in_progress" | "pending";
interface Todo {
	content:    string;
	status:     TodoStatus;
	activeForm: string;
}
function getLatestTodos(details: ChatSessionDetails | null): Todo[] | null {
	if (!details) return null;
	for (let i = details.turns.length - 1; i >= 0; i--) {
		const turn = details.turns[i];
		if (turn.role !== "assistant") continue;
		for (let j = turn.blocks.length - 1; j >= 0; j--) {
			const block = turn.blocks[j];
			if (block.type !== "tool")    continue;
			if (block.name !== "TodoWrite") continue;
			try {
				const input = JSON.parse(block.inputJson);
				if (Array.isArray(input?.todos) && input.todos.length > 0) {
					return input.todos as Todo[];
				}
				// Hit the latest TodoWrite but it's empty/malformed — stop,
				// don't fall through to an older one (the user cleared it).
				return null;
			} catch {
				return null;
			}
		}
	}
	return null;
}

// Cap so a runaway todo list can't push the editor off-screen. In practice
// lists stay well under this; when they don't, we render the first N-1 items
// plus a "… X more" notice. Non-completed items are prioritised over
// completed ones in the visible slice, since the point of surfacing todos
// on-screen is to show what's left to do.
const MAX_TODO_LINES = 12;
function sliceTodosForDisplay(todos: Todo[]): { shown: Todo[]; hidden: number } {
	if (todos.length <= MAX_TODO_LINES) return { shown: todos, hidden: 0 };
	const budget = MAX_TODO_LINES - 1; // reserve one line for the "… more" notice
	const nonCompleted = todos.filter((t) => t.status !== "completed");
	const completed    = todos.filter((t) => t.status === "completed");
	const shown: Todo[] = [];
	// Non-completed items come first so in-flight / pending work is always
	// visible; any leftover budget is filled with completed items (for
	// context) in original order.
	for (const t of nonCompleted) {
		if (shown.length >= budget) break;
		shown.push(t);
	}
	for (const t of completed) {
		if (shown.length >= budget) break;
		shown.push(t);
	}
	return { shown, hidden: todos.length - shown.length };
}

// ---------------------------------------------------------------------------
// Code block extraction — raw fenced code from the session's text blocks.
//
// Used by the Ctrl+Shift+C shortcut to copy clean, unrendered code directly
// from the parsed JSON stream, avoiding the ANSI escape sequences, stray
// indentation, and line-continuation artefacts that terminal selection gives.
//
// Blocks are returned newest-first (last assistant turn first; within a turn,
// last code fence first) so the most recent snippet is always at index 0.
// ---------------------------------------------------------------------------
interface ExtractedCodeBlock {
	lang:  string;  // language tag after the opening fence ("" when absent)
	code:  string;  // raw content between the fences (no surrounding ```)
	label: string;  // compact one-line description for the picker UI
}

function extractCodeBlocksFromSession(details: ChatSessionDetails): ExtractedCodeBlock[] {
	const out: ExtractedCodeBlock[] = [];
	for (let ti = details.turns.length - 1; ti >= 0; ti--) {
		const turn = details.turns[ti];
		if (turn.role !== "assistant") continue;
		const turnBlocks: ExtractedCodeBlock[] = [];
		for (const block of turn.blocks) {
			if (block.type !== "text") continue;
			// Match fenced code: ```lang\n…content…``` (lang optional)
			// \r? handles CRLF transcripts; [\s\S]*? is non-greedy so nested
			// fences (rare but possible in prose) are handled correctly.
			const fence = /```(\w*)\r?\n([\s\S]*?)```/g;
			let m: RegExpExecArray | null;
			while ((m = fence.exec(block.text)) !== null) {
				const lang = m[1] ?? "";
				const code = m[2] ?? "";
				if (!code.trim()) continue; // skip empty fences
				// Build a compact one-line label: [lang] first-non-blank-line
				const firstLine = code.split("\n").find((l) => l.trim()) ?? "";
				const preview   = firstLine.length > 55
					? firstLine.slice(0, 52).trimEnd() + "…"
					: firstLine;
				const langTag = lang ? `[${lang}] ` : "";
				turnBlocks.push({ lang, code, label: `${langTag}${preview}` });
			}
		}
		// Reverse within the turn so the last fence in that turn comes first.
		for (let i = turnBlocks.length - 1; i >= 0; i--) out.push(turnBlocks[i]!);
	}
	return out;
}

// =============================================================================
// Extension entry point
// =============================================================================

// ── Reload-persistent state ─────────────────────────────────────────────────
// pi's `/reload` tears the extension down and re-invokes the default export,
// which resets every closure-local `let`/`const`. The Map of resumable Claude
// session ids (model → sessionId) is the one piece of state we want to
// survive that — otherwise /reload silently orphans the ongoing Claude
// threads, forcing the user to re-pick them via /claude-resume.
//
// Everything else (chatMode, currentDetails, askBridge, tuiRef, isGenerating)
// is intentionally NOT persisted: the bridge/TUI references are bound to the
// torn-down ctx and must be rebuilt on the next enterChatMode(), and any
// in-flight stream is already aborted when the old closure is discarded.
//
// We stash the Map on globalThis behind a namespaced key. globalThis survives
// module re-evaluation (only top-level lexical bindings are reset), and the
// guarded getter keeps initialization idempotent across repeated reloads.
// Valid extended-thinking effort levels accepted by `claude --effort`, plus
// our synthetic "off" sentinel which skips the flag entirely (falling back
// to the CLI's default of no thinking emission in -p mode).
const EFFORTS = ["off", "low", "medium", "high", "xhigh", "max"] as const;
type Effort = (typeof EFFORTS)[number];
const DEFAULT_EFFORT: Effort = "max";

interface ChatClaudePersistedState {
	sessions: Map<Model, string>;
	// Current extended-thinking effort level — persisted across `/reload`
	// so the user's choice survives the extension teardown the same way
	// resumable session ids do.
	effort: Effort;
	// Prompts typed in chat mode, oldest-first. Capped at MAX_PROMPT_HISTORY.
	// Replayed into the editor on every ChatEscEditor creation so up-arrow
	// history is available immediately in any new chat session.
	promptHistory: string[];
}
const CHAT_CLAUDE_STATE_KEY = "__pi_chat_claude_persisted__";
// Maximum number of prompts to persist. The Editor caps its own in-memory
// list at 100; we persist more so the most recent 100 are always available
// even after many reloads without hitting the per-instance limit.
const MAX_PROMPT_HISTORY = 200;

function getPersistedState(): ChatClaudePersistedState {
	const g = globalThis as unknown as Record<string, ChatClaudePersistedState>;
	let state = g[CHAT_CLAUDE_STATE_KEY];
	if (!state) {
		state = { sessions: new Map<Model, string>(), effort: DEFAULT_EFFORT, promptHistory: [] };
		g[CHAT_CLAUDE_STATE_KEY] = state;
	}
	// Back-fill for any persisted state written by an older revision of
	// the extension (pre-/claude-effort) that didn't carry an effort field.
	if (!state.effort) state.effort = DEFAULT_EFFORT;
	// Back-fill for pre-promptHistory revisions.
	if (!state.promptHistory) state.promptHistory = [];
	return state;
}

export default function (pi: ExtensionAPI) {
	// ── Mode state ────────────────────────────────────────────────────────────
	let chatMode: Model | null = null;           // null ⇒ not in chat mode
	// model → resumable claude session id. Pulled from globalThis so the
	// mapping (and the current effort level) survive `/reload` (see
	// getPersistedState above). `persisted` is kept as a handle so
	// `/claude-effort` can mutate `persisted.effort` in place and have
	// the change picked up by subsequent runChatTurn calls.
	const persisted = getPersistedState();
	const { sessions } = persisted;
	let isGenerating = false;
	let currentAbort: AbortController | null = null;

	// pi-ask bridge — opens a Unix socket + generates an --mcp-config so
	// Claude (running inside this chat) can ask the user questions through
	// pi's native ask UI. Bound to the chat-mode lifetime: started on
	// enterChatMode, closed on exitChatMode.
	let askBridge: AskBridge | null = null;

	// Live TUI reference captured from the mode-banner widget factory, used to
	// schedule re-renders while a Claude response is streaming into the
	// current chat-claude-session message.
	let tuiRef: { requestRender: () => void } | null = null;

	// Reference to the active ChatEscEditor instance so we can call
	// addToHistory() on it after each prompt submission, making the new entry
	// immediately navigable with the up-arrow inside the same session.
	let editorRef: ChatEscEditor | null = null;

	// The in-flight chat session's `details` object. Stored by reference so
	// mutations here are reflected in the CustomMessage already displayed
	// in pi's conversation. Null between chat-mode sessions.
	let currentDetails: ChatSessionDetails | null = null;

	// Keep a module-level set of the extension's custom-message types so the
	// `context` event handler can strip them out of pi's LLM context — chat
	// mode is between the user and Claude and has no business in pi's
	// prompt payload.
	const CHAT_CLAUDE_CUSTOM_TYPES = new Set(["chat-claude-session"]);

	// ── Render throttling ────────────────────────────────────────────────────
	// Claude's `--include-partial-messages` fires an onUpdate for every token
	// delta (100+ Hz under a fast stream). Rendering per-token was the second
	// half of the progressive-lag problem — even with per-turn caching, the
	// TUI would be asked to diff+repaint dozens of times per second.
	//
	// scheduleStreamRender coalesces back-to-back requests into a trailing-
	// edge timer at ~30 Hz. The first update within a quiet window waits up
	// to 33 ms before rendering; any further updates in that window are
	// folded into the same render. flushStreamRender cancels the pending
	// timer and renders immediately — used on stream completion, abort, and
	// chat-mode teardown so the user sees the terminal frame right away.
	let streamRenderTimer: ReturnType<typeof setTimeout> | null = null;
	const STREAM_RENDER_INTERVAL_MS = 33; // ~30 Hz
	function scheduleStreamRender() {
		if (streamRenderTimer) return;
		streamRenderTimer = setTimeout(() => {
			streamRenderTimer = null;
			tuiRef?.requestRender();
		}, STREAM_RENDER_INTERVAL_MS);
	}
	function flushStreamRender() {
		if (streamRenderTimer) {
			clearTimeout(streamRenderTimer);
			streamRenderTimer = null;
		}
		tuiRef?.requestRender();
	}

	// ── Rendering helpers ────────────────────────────────────────────────────
	// Mirrors pi's AssistantMessageComponent conventions (see
	// modes/interactive/components/assistant-message.js): Markdown at
	// paddingX=1, paddingY=0; thinking as italic `thinkingText`-coloured
	// markdown; tool blocks via the shared renderToolBlock (same one
	// ask-claude uses) so bash / read / edit / write all look identical to
	// pi's own tool executions.
	function renderTurnInto(container: Container, turn: ChatTurn, theme: any) {
		const md = getMarkdownTheme();

		if (turn.role === "user") {
			container.addChild(new Text(orangeBold(" you"), 1, 0));
			container.addChild(new Spacer(1));
			container.addChild(new Markdown(turn.text.trim(), 1, 0, md));
			return;
		}

		// Assistant turn header
		const icon =
			turn.cancelled ? orange("◇ ")
			: turn.error  ? theme.fg("error", "✗ ")
			: turn.isResume ? orange(" ")
			: orange("◆ ");
		const header =
			icon + orangeBold(`Claude ${capitalize(turn.model)}`)
			+ (turn.sessionId ? theme.fg("dim", `  session:${turn.sessionId.slice(0, 8)}`) : "")
			+ (!turn.done ? theme.fg("warning", "  ") : "");
		container.addChild(new Text(header, 1, 0));
		container.addChild(new Spacer(1));

		// Defensive dedup — see claude-stream.ts for the root-cause fix, but
		// keep a safety net here in case a future Claude CLI change re-orders
		// events differently.
		const rawBlocks = turn.blocks ?? [];
		const seenToolIds = new Set<string>();
		const blocks: StreamBlock[] = [];
		for (const b of rawBlocks) {
			if (b.type === "tool") {
				if (seenToolIds.has(b.id)) continue;
				seenToolIds.add(b.id);
			}
			blocks.push(b);
		}

		let addedAny = false;
		for (let i = 0; i < blocks.length; i++) {
			const block = blocks[i];
			if (block.type === "thinking" && block.text.trim()) {
				if (addedAny) container.addChild(new Spacer(1));
				container.addChild(new Markdown(block.text.trim(), 1, 0, md, {
					color:  (t: string) => theme.fg("thinkingText", t),
					italic: true,
				}));
				addedAny = true;
			} else if (block.type === "tool") {
				if (addedAny) container.addChild(new Spacer(1));
				container.addChild(renderToolBlockTruncated(block, theme));
				addedAny = true;
			} else if (block.type === "text" && block.text.trim()) {
				if (addedAny) container.addChild(new Spacer(1));
				container.addChild(new Markdown(block.text.trim(), 1, 0, md));
				addedAny = true;
			}
		}

		// Render the terminal notice AFTER any partial blocks so streamed
		// output accumulated before a timeout / abort / error is preserved
		// and visible rather than being silently discarded.
		if (turn.cancelled) {
			if (addedAny) container.addChild(new Spacer(1));
			container.addChild(new Text(orange("(Cancelled)"), 1, 0));
		} else if (turn.error) {
			if (addedAny) container.addChild(new Spacer(1));
			container.addChild(new Text(theme.fg("error", `Error: ${turn.error}`), 1, 0));
		} else if (turn.done) {
			const usage = formatUsage(turn as any);
			if (usage) {
				container.addChild(new Spacer(1));
				container.addChild(new Text(theme.fg("dim", usage), 1, 0));
			}
		}
	}

	// Render one turn in isolation and return its lines PRE-PADDED to
	// `innerWidth` visible columns.
	//
	// Pre-padding here means `visibleWidth()` (which calls `Intl.Segmenter`
	// — the measured hot spot: 85% of pi's CPU in a laggy session) runs
	// exactly ONCE per line per turn, not once per line per frame. For
	// completed turns these padded lines are cached and reused forever at
	// that (width, theme); for the streaming tail turn the work is bounded
	// to just the in-flight turn's lines.
	function renderTurnLines(turn: ChatTurn, theme: any, innerWidth: number): string[] {
		const c = new Container();
		renderTurnInto(c, turn, theme);
		const rawLines = c.render(innerWidth);
		const padded: string[] = new Array(rawLines.length);
		for (let i = 0; i < rawLines.length; i++) {
			padded[i] = padToInnerWidth(rawLines[i], innerWidth);
		}
		return padded;
	}

	// Assemble the WHOLE session's inner lines with per-turn caching.
	//
	// Cache invariants:
	//   • User turns are immutable → always cacheable.
	//   • Assistant turns are mutated in-place by runClaude's onUpdate
	//     callback while streaming, and only become stable after
	//     `done: true` is set (see runChatTurn). So we only cache
	//     assistants once they're done.
	//   • Cache keys on (innerWidth, theme) — terminal resize or theme
	//     switch invalidates all per-turn caches transparently by forcing
	//     a rebuild on the next render.
	//
	// With this cache, a streaming frame only rebuilds the one in-flight
	// assistant turn (the tail); all prior turns are an O(1) line-copy.
	// That eliminates the O(turns × blocks) rebuild that previously ran
	// every time a partial Claude message arrived.
	//
	// Returned lines are PRE-PADDED to `innerWidth` visible columns — see
	// renderTurnLines/padToInnerWidth for why. The caller can hand them
	// straight to wrapInOrangeBorder without any further visibleWidth()
	// calls, which is critical: visibleWidth drives Intl.Segmenter, whose
	// 512-entry LRU thrashes when called per-line-per-frame on a long chat.
	function renderSessionLines(details: ChatSessionDetails, theme: any, innerWidth: number): string[] {
		// Streaming placeholder so the border grows immediately after the
		// user submits, even before any block has arrived from Claude.
		if (details.turns.length === 0) {
			const c = new Container();
			c.addChild(new Text(orangeDim("(chat mode — waiting for first message)"), 0, 0));
			const rawLines = c.render(innerWidth);
			const padded: string[] = new Array(rawLines.length);
			for (let i = 0; i < rawLines.length; i++) padded[i] = padToInnerWidth(rawLines[i], innerWidth);
			return padded;
		}

		const out: string[] = [];
		// The blank inter-turn spacer must ALSO be padded — otherwise
		// wrapInOrangeBorder emits "│   │" with a visibly short interior,
		// producing a ragged right edge on the orange border.
		const spacerLine = " ".repeat(innerWidth);
		for (let i = 0; i < details.turns.length; i++) {
			if (i > 0) out.push(spacerLine);
			const turn = details.turns[i];
			const cacheable = turn.role === "user" || (turn.role === "assistant" && turn.done);
			if (
				cacheable
				&& turn.cachedLines
				&& turn.cachedWidth === innerWidth
				&& turn.cachedTheme === theme
			) {
				for (const line of turn.cachedLines) out.push(line);
			} else {
				const lines = renderTurnLines(turn, theme, innerWidth);
				if (cacheable) {
					turn.cachedLines = lines;
					turn.cachedWidth = innerWidth;
					turn.cachedTheme = theme;
				} else {
					// Streaming turn — make sure we don't accidentally
					// carry stale cached output from a prior life.
					turn.cachedLines = undefined;
					turn.cachedWidth = undefined;
					turn.cachedTheme = undefined;
				}
				for (const line of lines) out.push(line);
			}
		}
		return out;
	}

	// Drop every turn's render cache — called from the message renderer's
	// `invalidate()` hook (triggered by pi when theme changes or when a
	// from-scratch re-render is needed).
	function invalidateSessionCache(details: ChatSessionDetails) {
		for (const turn of details.turns) {
			turn.cachedLines = undefined;
			turn.cachedWidth = undefined;
			turn.cachedTheme = undefined;
		}
	}

	// ── Mode banner + status ─────────────────────────────────────────────────
	function syncUI(ctx: any) {
		if (!ctx?.hasUI) return;

		if (!chatMode) {
			ctx.ui.setWidget("chat-claude", undefined);
			ctx.ui.setStatus("chat-claude", undefined);
			ctx.ui.setTitle("pi");
			return;
		}

		const sessionId = sessions.get(chatMode);
		const short = sessionId ? sessionId.slice(0, 8) : "new";
		const modelUp = capitalize(chatMode).toUpperCase();

		ctx.ui.setWidget("chat-claude", (tui: any, theme: any) => {
			tuiRef = tui;  // ← captured for live streaming re-renders
			return {
				invalidate: () => {},
				render: (width: number) => {
					const rail = orange("▌ ");
					const out: string[] = [];

					// ── Todos (if any) ────────────────────────────────────
					// Sourced from the most recent TodoWrite tool call in
					// this chat session. Rendered BEFORE the mode banner so
					// the layout reads, top→bottom:
					//   orange-bordered conversation
					//   ▌ ☒ completed todo
					//   ▌ ▸ current in-progress todo (activeForm)
					//   ▌ ☐ pending todo
					//   ▌ ◆ CLAUDE CHAT MODE  …
					//   ▌ Type to chat · …
					const todos = getLatestTodos(currentDetails);
					if (todos && todos.length > 0) {
						const { shown, hidden } = sliceTodosForDisplay(todos);
						for (const todo of shown) {
							let marker: string;
							let text:   string;
							if (todo.status === "completed") {
								marker = theme.fg("success", "☒");
								text   = theme.fg("dim", todo.content);
							} else if (todo.status === "in_progress") {
								marker = orangeBold("▸");
								text   = orangeBold(todo.activeForm || todo.content);
							} else {
								marker = orangeDim("☐");
								text   = todo.content;
							}
							out.push(truncateToWidth(rail + marker + " " + text, width, "…", false));
						}
						if (hidden > 0) {
							const notice = `… ${hidden} more todo${hidden === 1 ? "" : "s"} hidden`;
							out.push(truncateToWidth(rail + theme.fg("dim", notice), width, "…", false));
						}
					}

					// ── Mode banner ──────────────────────────────────────
					const title      = orangeBold("◆ CLAUDE CHAT MODE");
					const modelLabel = orangeBold(modelUp);
					const sessionTag = orangeDim("session:" + short);
					const effortTag  = orangeDim("effort:" + persisted.effort);
					const running    = isGenerating ? "  " + orange(" streaming…") : "";
					const line1 = rail + title + "  " + modelLabel + "  " + sessionTag + "  " + effortTag + running;
					const line2 = rail + theme.fg("dim",
						"Type to chat · /claude haiku|sonnet|opus · /claude-new · /claude-effort · /claude-end · /claude-abort");
					out.push(line1, line2);
					return out;
				},
			};
		}, { placement: "aboveEditor" });

		const busy = isGenerating ? " · streaming" : "";
		ctx.ui.setStatus("chat-claude",
			orange(`◆ Claude ${capitalize(chatMode)} · ${short} · effort:${persisted.effort}${busy}`));
		ctx.ui.setTitle(`pi · Claude ${capitalize(chatMode)} Chat`);
	}

	// ── ESC-to-abort editor ──────────────────────────────────────────────────
	// ESC (the "interrupt" action) is on the extension-runner's reserved list
	// (see node_modules/@mariozechner/pi-coding-agent/.../runner.js — any
	// registerShortcut("escape", …) is silently dropped), so a custom editor is
	// the sanctioned way to intercept it. We subclass pi's exported CustomEditor
	// and short-circuit ESC ONLY while a chat-claude response is streaming.
	// For every other case we defer to `super.handleInput`, which runs the
	// app-level keybindings — including pi's own onEscape handler, which
	// setCustomEditorComponent copies onto the custom editor at install time
	// (see interactive-mode.js setCustomEditorComponent, ~line 1258).
	class ChatEscEditor extends CustomEditor {
		constructor(tui: TUI, theme: EditorTheme, keybindings: KeybindingsManager) {
			super(tui, theme, keybindings);
			// Store a module-level reference so runChatTurn can feed the new
			// prompt into the editor's history after each successful submission.
			editorRef = this;
			// Replay persisted history oldest-first: addToHistory() unshifts each
			// entry, so the last call's text lands at index 0 (most recent) and
			// up-arrow shows it first — exactly the expected shell-history UX.
			// We cap the replay at 100 (the Editor's own internal limit) so the
			// unshift loop doesn't silently discard entries mid-way.
			const toReplay = persisted.promptHistory.slice(-100);
			for (const text of toReplay) {
				this.addToHistory(text);
			}
		}

		handleInput(data: string): void {
			if (matchesKey(data, "escape") && isGenerating && currentAbort) {
				try { currentAbort.abort(); } catch { /* ok */ }
				// We may not have a direct ctx here, but the UI is live during
				// chat mode, so flush any pending throttled render and force
				// a frame now; the chat-claude-session renderer will show the
				// assistant turn as cancelled once runClaude's promise
				// rejects with AbortError.
				flushStreamRender();
				return;
			}
			super.handleInput(data);
		}
	}

	// ── Mode transitions ─────────────────────────────────────────────────────
	function enterChatMode(model: Model, ctx: any, freshSession: boolean) {
		const wasActive = chatMode !== null;
		const modelChanged = chatMode !== model;

		if (freshSession) sessions.delete(model);
		// A new /claude invocation after an exit starts a fresh border box, so
		// drop any reference to the previous session's details. The existing
		// CustomMessage in the conversation keeps its own reference and stays
		// visible in the scrollback.
		if (!wasActive || modelChanged || freshSession) {
			currentDetails = null;
		}

		chatMode = model;

		// Stand up (or refresh) the pi-ask bridge so Claude can ask the user
		// questions through pi's native overlay. Re-create on every entry so
		// the socket+temp dir lifetime is bounded by the chat session.
		if (ctx?.hasUI) {
			askBridge?.close();
			try {
				askBridge = startAskBridge({
					ui: ctx.ui,
					onAsk: () => tuiRef?.requestRender(),
				});
			} catch (err) {
				askBridge = null;
				ctx.ui.notify(
					`pi-ask bridge unavailable: ${err instanceof Error ? err.message : String(err)} — Claude won't be able to ask questions.`,
					"warning",
				);
			}

			// Install the ESC-aborts-Claude custom editor. Idempotent: if chat
			// mode was already active (e.g. /claude haiku → /claude opus), setting
			// it again just re-wires the same class cleanly.
			ctx.ui.setEditorComponent((tui: TUI, theme: EditorTheme, keybindings: KeybindingsManager) =>
				new ChatEscEditor(tui, theme, keybindings),
			);
		}

		syncUI(ctx);

		if (ctx?.hasUI) {
			const sess = sessions.get(model);
			const kind = freshSession || !sess ? "new session" : `resume ${sess.slice(0, 8)}`;
			const verb = wasActive ? (modelChanged ? "Switched to" : "Re-entered") : "Entered chat mode:";
			ctx.ui.notify(`${verb} Claude ${capitalize(model)} · ${kind}`, "info");
		}
	}

	function exitChatMode(ctx: any) {
		if (currentAbort) try { currentAbort.abort(); } catch { /* ok */ }
		currentAbort = null;
		isGenerating = false;
		chatMode = null;
		// Cancel any pending throttled stream render so we don't leave a
		// dangling timer firing tuiRef.requestRender() after chat mode ends
		// (tuiRef itself lingers, so the render would be harmless but wasted).
		if (streamRenderTimer) {
			clearTimeout(streamRenderTimer);
			streamRenderTimer = null;
		}
		// Detach from current session details so the next entry starts a new
		// border. The message and its details stay in place in pi's scrollback.
		currentDetails = null;
		// Tear down the pi-ask bridge: close the socket and remove the temp
		// dir holding the socket + generated mcp.json.
		askBridge?.close();
		askBridge = null;
		// Restore pi's default editor (undoes ChatEscEditor from enterChatMode).
		if (ctx?.hasUI) ctx.ui.setEditorComponent(undefined);
		editorRef = null;
		syncUI(ctx);
		if (ctx?.hasUI) ctx.ui.notify("Exited chat mode — back to normal pi.", "info");
	}

	// ── Session / turn management ────────────────────────────────────────────
	function ensureSessionMessage(): ChatSessionDetails {
		if (currentDetails) return currentDetails;
		const details: ChatSessionDetails = { turns: [] };
		currentDetails = details;
		pi.sendMessage(
			{
				customType: "chat-claude-session",
				// content is only used if we had no custom renderer; stays
				// hidden from pi's LLM via the context filter below.
				content: "",
				display: true,
				details,
			},
			{ triggerTurn: false },
		);
		return details;
	}

	async function runChatTurn(userText: string, ctx: any) {
		if (!chatMode) return;
		const model = chatMode;
		const details = ensureSessionMessage();

		// Persist the prompt so it survives /reload and is available in future
		// chat sessions. We record it here — before the async Claude call —
		// so cancellations and errors still land in history.
		// Deduplicate: skip if identical to the most recent persisted entry.
		const trimmedPrompt = userText.trim();
		if (trimmedPrompt && persisted.promptHistory.at(-1) !== trimmedPrompt) {
			persisted.promptHistory.push(trimmedPrompt);
			if (persisted.promptHistory.length > MAX_PROMPT_HISTORY) {
				persisted.promptHistory = persisted.promptHistory.slice(-MAX_PROMPT_HISTORY);
			}
		}
		// Also push into the live editor so the entry is navigable immediately
		// (without requiring a reload to replay from persisted state).
		if (trimmedPrompt) editorRef?.addToHistory(trimmedPrompt);

		// Append user turn + placeholder assistant turn up front so the
		// border extends as soon as the user hits enter.
		details.turns.push({ role: "user", text: userText });
		const existingSession = sessions.get(model);
		const assistantTurn: AssistantTurn = {
			role: "assistant",
			model,
			blocks: [],
			finalText: "",
			isResume: !!existingSession,
			done: false,
		};
		details.turns.push(assistantTurn);
		tuiRef?.requestRender();

		isGenerating = true;
		currentAbort = new AbortController();
		syncUI(ctx);
		if (ctx?.hasUI) ctx.ui.setWorkingMessage(`Claude ${capitalize(model)} is thinking…`);

		try {
			const r = await runClaude(userText, {
				// Resolve UI slot ("opus") → CLI model id ("claude-opus-4-6")
				// so Opus streams plaintext thinking (4.7 redacts it).
				model: CLI_MODEL[model],
				sessionId: existingSession,
				cwd: ctx.cwd,
				signal: currentAbort.signal,
				// Enable extended thinking — without --effort, `claude -p`
				// NEVER emits thinking_delta events regardless of the user's
				// interactive defaultThinkingLevel setting, and the italic
				// thinking-block rendering below sits idle. Default is "max"
				// and is configurable live via /claude-effort; the model
				// still decides on-demand whether it actually needs to think.
				effort: persisted.effort,
				// Route AskUserQuestion-style requests through pi's native
				// overlay via the pi-ask-mcp bridge. Disallowing the built-in
				// AskUserQuestion forces Claude to use mcp__pi__ask if it
				// wants to ask a structured question.
				mcpConfigPath:   askBridge?.mcpConfigPath,
				disallowedTools: askBridge ? ["AskUserQuestion"] : undefined,
				onUpdate: (partial) => {
					assistantTurn.blocks    = partial.blocks;
					assistantTurn.finalText = partial.finalText;
					// Throttle to ~30 Hz so a fast token stream doesn't cause
					// a render-per-token, which compounds with any other
					// extension's per-frame work (footer, widgets, etc.).
					scheduleStreamRender();
				},
			});

			if (r.sessionId) sessions.set(model, r.sessionId);
			assistantTurn.blocks           = r.blocks;
			assistantTurn.finalText        = r.finalText;
			assistantTurn.sessionId        = r.sessionId;
			assistantTurn.costUsd          = r.costUsd;
			assistantTurn.inputTokens      = r.inputTokens;
			assistantTurn.outputTokens     = r.outputTokens;
			assistantTurn.cacheReadTokens  = r.cacheReadTokens;
			assistantTurn.cacheWriteTokens = r.cacheWriteTokens;
			assistantTurn.done             = true;
		} catch (err) {
			const aborted = currentAbort?.signal.aborted === true;
			assistantTurn.done      = true;
			assistantTurn.cancelled = aborted;
			assistantTurn.error     = aborted ? undefined : (err instanceof Error ? err.message : String(err));
		} finally {
			isGenerating = false;
			currentAbort = null;
			if (ctx?.hasUI) ctx.ui.setWorkingMessage(undefined);
			syncUI(ctx);
			// Flush (not schedule): the stream just ended or was aborted —
			// we want the final frame on screen immediately, not 33 ms later.
			// Also cancels any in-flight throttled timer so it doesn't fire
			// a stale second render after the assistant turn is already
			// marked done and cached.
			flushStreamRender();
		}
	}

	// ── Input interception ───────────────────────────────────────────────────
	// Registered pi commands (/claude, /claude-end, etc.) dispatch BEFORE this
	// event fires, so they still work normally. Bash via `!` goes through
	// user_bash, not here. Every other text the user submits in chat mode is
	// routed straight to Claude.
	pi.on("input", async (event, ctx) => {
		if (!chatMode)                          return { action: "continue" } as const;
		if (event.source !== "interactive")     return { action: "continue" } as const;
		const text = event.text ?? "";
		if (!text.trim())                       return { action: "continue" } as const;
		if (text.trimStart().startsWith("!"))   return { action: "continue" } as const;

		if (isGenerating) {
			ctx.ui.notify(
				"Claude is still responding. Use /claude-abort to cancel, then try again.",
				"warning",
			);
			return { action: "handled" } as const;
		}

		runChatTurn(text, ctx).catch((err) => {
			ctx.ui.notify(
				`Chat error: ${err instanceof Error ? err.message : String(err)}`,
				"error",
			);
		});
		return { action: "handled" } as const;
	});

	// Keep chat-mode custom messages out of pi's LLM context — chat mode is
	// between the user and Claude, not part of pi's conversation.
	pi.on("context", (event) => {
		const filtered = event.messages.filter((m: any) =>
			!(m.role === "custom" && CHAT_CLAUDE_CUSTOM_TYPES.has(m.customType)),
		);
		return { messages: filtered };
	});

	// ── Session lifecycle ────────────────────────────────────────────────────
	pi.on("session_start", (_event, ctx) => { syncUI(ctx); });
	pi.on("session_shutdown", (_event, ctx) => {
		if (chatMode) exitChatMode(ctx);
		// Defensive: if exitChatMode was never reached (chatMode was already
		// null but a bridge somehow lingered), close it directly.
		if (askBridge) { askBridge.close(); askBridge = null; }
		// Defensive: same for the throttled render timer — exitChatMode
		// already clears it, but this keeps the Node process clean in the
		// case where chat mode was never entered but some hypothetical
		// future code path scheduled a render anyway.
		if (streamRenderTimer) {
			clearTimeout(streamRenderTimer);
			streamRenderTimer = null;
		}
	});

	// ── Commands ─────────────────────────────────────────────────────────────
	const modelCompletions = (prefix: string) =>
		MODELS.filter((m) => m.startsWith(prefix.toLowerCase()))
			.map((m) => ({ value: m, label: m }));

	pi.registerCommand("claude", {
		description: [
			"Enter distinct Claude chat mode — typed input bypasses pi's LLM and goes to Claude.",
			"  /claude                    — enter with last/default model (sonnet)",
			"  /claude haiku|sonnet|opus  — enter/switch model",
		].join("\n"),
		getArgumentCompletions: modelCompletions,
		handler: async (args, ctx) => {
			const arg = (args ?? "").trim().toLowerCase();
			const target: Model = (MODELS as readonly string[]).includes(arg)
				? (arg as Model)
				: (chatMode ?? "sonnet");
			enterChatMode(target, ctx, false);
		},
	});

	pi.registerCommand("claude-new", {
		description: "Enter chat mode with a fresh Claude session (discards any resumed session id). Example: /claude-new opus",
		getArgumentCompletions: modelCompletions,
		handler: async (args, ctx) => {
			const arg = (args ?? "").trim().toLowerCase();
			const target: Model = (MODELS as readonly string[]).includes(arg)
				? (arg as Model)
				: (chatMode ?? "sonnet");
			enterChatMode(target, ctx, true);
		},
	});

	// /claude-effort — set the extended-thinking effort level for subsequent
	// chat turns. Without the flag `claude -p` emits no thinking_delta
	// events at all (the interactive `defaultThinkingLevel` setting is
	// ignored in -p mode); with it, the model decides on-demand whether
	// to actually think. Stored on the persisted state so the choice
	// survives `/reload`.
	//
	//   /claude-effort               — show current value
	//   /claude-effort max           — set to max (default)
	//   /claude-effort off           — disable (skip the --effort flag)
	const effortCompletions = (prefix: string) =>
		EFFORTS.filter((e) => e.startsWith(prefix.toLowerCase()))
			.map((e) => ({ value: e, label: e }));

	pi.registerCommand("claude-effort", {
		description: [
			"Set the extended-thinking effort level for Claude chat turns.",
			"  /claude-effort                          — show current value",
			"  /claude-effort off|low|medium|high|xhigh|max",
			"",
			"Note: without an effort setting, `claude -p` emits no thinking",
			"blocks at all — so lowering this trades thought visibility for speed.",
		].join("\n"),
		getArgumentCompletions: effortCompletions,
		handler: async (args, ctx) => {
			const arg = (args ?? "").trim().toLowerCase();
			if (!arg) {
				ctx.ui.notify(
					`Current Claude effort: ${persisted.effort}. Options: ${EFFORTS.join(", ")}.`,
					"info",
				);
				return;
			}
			if (!(EFFORTS as readonly string[]).includes(arg)) {
				ctx.ui.notify(
					`Unknown effort "${arg}". Valid levels: ${EFFORTS.join(", ")}.`,
					"warning",
				);
				return;
			}
			const prev = persisted.effort;
			persisted.effort = arg as Effort;
			syncUI(ctx);
			const note = arg === "off"
				? "thinking disabled — Claude will no longer emit thinking blocks"
				: `thinking effort set to ${arg}`;
			ctx.ui.notify(
				`${note} (was ${prev}). Applies to the next chat turn.`,
				"info",
			);
		},
	});

	pi.registerCommand("claude-end", {
		description: "Exit Claude chat mode and resume normal pi operation.",
		handler: async (_args, ctx) => {
			if (!chatMode) { ctx.ui.notify("Not in chat mode.", "info"); return; }
			exitChatMode(ctx);
		},
	});

	pi.registerCommand("claude-abort", {
		description: "Cancel the in-flight Claude response (no effect if nothing is generating).",
		handler: async (_args, ctx) => {
			if (!isGenerating || !currentAbort) {
				ctx.ui.notify("No active Claude response to cancel.", "info");
				return;
			}
			try { currentAbort.abort(); } catch { /* ok */ }
			ctx.ui.notify("Aborting Claude response…", "info");
		},
	});

	// /claude-resume — present a picker of past Claude sessions whose cwd matches
	// the current project directory, then resume the chosen one in chat mode.
	//
	// Caveat: this only sets the session id and starts a fresh orange border.
	// The historical transcript is NOT replayed inside pi (rendering it would
	// require a separate translation pass from JSONL → ChatTurn[]); however
	// `claude --resume <id>` keeps the FULL conversation context alive on the
	// Claude side, so subsequent prompts behave exactly like a continuation.
	pi.registerCommand("claude-resume", {
		description: "Pick a past Claude session for the current project directory and resume it in chat mode.",
		handler: async (_args, ctx) => {
			if (!ctx?.hasUI) {
				ctx?.ui?.notify?.("/claude-resume requires interactive mode.", "error");
				return;
			}
			if (isGenerating) {
				ctx.ui.notify(
					"A Claude response is still streaming. Use /claude-abort first, then /claude-resume.",
					"warning",
				);
				return;
			}

			const past = readPastSessions(ctx.cwd);
			if (past.length === 0) {
				ctx.ui.notify(
					`No past Claude sessions found for ${ctx.cwd}.`,
					"info",
				);
				return;
			}

			// Cap the picker at the 25 most recent sessions to keep the
			// inline-note overlay tractable. Sessions are already sorted
			// newest-first by readPastSessions().
			const MAX_OPTIONS = 25;
			const choices = past.slice(0, MAX_OPTIONS);

			// Label format (per user spec):
			//   <relative time> · <first user message truncated> · (session:<short-id>)
			const PREVIEW_MAX = 60;
			const buildLabel = (s: PastSession) => {
				const preview = s.firstUserMessage
					? truncate(s.firstUserMessage, PREVIEW_MAX)
					: "(no user message)";
				return `${relativeTime(s.mtimeMs)} · ${preview} · (session:${s.sessionId.slice(0, 8)})`;
			};

			// Disambiguate: in the very unlikely event two sessions produce
			// the same display label, append a counter so the post-pick lookup
			// can match exactly.
			const labels: string[] = [];
			const seen = new Map<string, number>();
			for (const s of choices) {
				const base = buildLabel(s);
				const n    = seen.get(base) ?? 0;
				seen.set(base, n + 1);
				labels.push(n === 0 ? base : `${base} #${n + 1}`);
			}

			const sessionPick = await askSingleQuestionWithInlineNote(ctx.ui, {
				question: `Resume which past Claude session in ${ctx.cwd}?`,
				options:  labels.map((label) => ({ label })),
			});
			if (sessionPick.selectedOptions.length === 0) {
				ctx.ui.notify("Resume cancelled.", "info");
				return;
			}
			const pickedLabel = sessionPick.selectedOptions[0];
			const idx = labels.indexOf(pickedLabel);
			if (idx < 0) {
				ctx.ui.notify("Picked session not found (label mismatch).", "warning");
				return;
			}
			const picked = choices[idx];

			// Second picker: which model to display the resumed conversation
			// under in pi's UI. Note: claude CLI ignores --model when --resume
			// is set, so this is purely a UI/labelling choice. We mark the
			// session's original model with "(used by this session)" and set
			// it as the recommended default so most users can just hit Enter.
			const originalModel = picked.model;
			const modelLabels   = MODELS.map((m) =>
				originalModel === m ? `${m} (used by this session)` : m,
			);
			const recommendedIdx = originalModel ? MODELS.indexOf(originalModel) : 1; // default sonnet

			const modelPick = await askSingleQuestionWithInlineNote(ctx.ui, {
				question:    "Display this resumed session under which model in pi's UI?",
				options:     modelLabels.map((label) => ({ label })),
				recommended: recommendedIdx,
			});
			if (modelPick.selectedOptions.length === 0) {
				ctx.ui.notify("Resume cancelled.", "info");
				return;
			}
			// Strip any "(used by this session)" suffix and parse the bare
			// model name (the first whitespace-separated token).
			const bare         = modelPick.selectedOptions[0].split(/\s+/)[0].toLowerCase();
			const targetModel: Model = (MODELS as readonly string[]).includes(bare)
				? (bare as Model)
				: "sonnet";

			// Wire up the session id BEFORE entering chat mode, so the next
			// turn the user sends triggers --resume <id>.
			sessions.set(targetModel, picked.sessionId);
			enterChatMode(targetModel, ctx, false);

			// Replay the historical transcript inside the orange border so the
			// user can SEE the context they're resuming. ensureSessionMessage()
			// creates the (now-empty) session CustomMessage; we then push every
			// past turn into details.turns and ask for a re-render.
			const historical = loadSessionTurns(picked.sessionId, ctx.cwd, targetModel);
			const details    = ensureSessionMessage();
			details.turns.push(...historical);
			tuiRef?.requestRender();

			const ago = relativeTime(picked.mtimeMs);
			const preview = picked.firstUserMessage
				? `: "${truncate(picked.firstUserMessage, 50)}"`
				: "";
			const histNote = historical.length > 0
				? ` (${historical.length} historical turn${historical.length === 1 ? "" : "s"} loaded)`
				: " (transcript empty or unreadable)";
			ctx.ui.notify(
				`Resuming session ${picked.sessionId.slice(0, 8)} (${ago})${preview} as Claude ${capitalize(targetModel)}.${histNote}`,
				"info",
			);
		},
	});
	// Note on ESC: pi's extension runner reserves the "interrupt" action, so
	// pi.registerShortcut("escape", …) is silently ignored. ESC-to-abort is
	// wired via the ChatEscEditor custom editor installed in enterChatMode.

	// ── Raw code copy shortcut ───────────────────────────────────────────────
	// Ctrl+Shift+C copies the raw, unrendered content of a fenced code block
	// from the current chat-claude session by reading directly from the parsed
	// JSON stream — bypassing ANSI sequences, stray indentation, and
	// line-continuation garbage that normal terminal selection produces.
	//
	//   0 blocks found → notify; nothing copied
	//   1 block  found → copy immediately + notify
	//   N blocks found → inline picker (newest first) → copy selected + notify
	//
	// Note: most terminal emulators handle Ctrl+Shift+C at the VTE layer
	// (before the app sees it) so this shortcut is only reachable when
	// Kitty keyboard protocol is active and the terminal forwards the combo.
	// It does NOT intercept the terminal's own clipboard mechanism when pi
	// is not the foreground process receiving extended key events.
	pi.registerShortcut("ctrl+shift+c", {
		description: "Copy a raw fenced code block from the current Claude chat session (bypasses ANSI rendering).",
		handler: async (ctx) => {
			if (!currentDetails) {
				ctx.ui.notify(
					"No active chat-claude session — start one with /claude first.",
					"info",
				);
				return;
			}
			const blocks = extractCodeBlocksFromSession(currentDetails);
			if (blocks.length === 0) {
				ctx.ui.notify(
					"No fenced code blocks found in the current chat-claude session.",
					"info",
				);
				return;
			}

			let chosen: ExtractedCodeBlock;

			if (blocks.length === 1 || !ctx.hasUI) {
				// Single block or no UI — copy the newest (index 0) directly.
				chosen = blocks[0]!;
			} else {
				// Multiple blocks — present a picker, numbered for uniqueness.
				// Number prefix guarantees distinct labels even when two blocks
				// share the same first line.
				const labels = blocks.map((b, i) => `${i + 1}. ${b.label}`);
				const pick = await askSingleQuestionWithInlineNote(ctx.ui, {
					question:    `${blocks.length} code blocks in this session — pick one to copy:`,
					options:     labels.map((label) => ({ label })),
					recommended: 0, // default: newest block
				});
				if (pick.selectedOptions.length === 0) return; // user cancelled
				const idx = labels.indexOf(pick.selectedOptions[0] ?? "");
				if (idx < 0) return;
				chosen = blocks[idx]!;
			}

			copyToClipboard(chosen.code);
			const lines   = chosen.code.split("\n").length;
			const langNote = chosen.lang ? ` (${chosen.lang})` : "";
			ctx.ui.notify(
				`Copied${langNote} · ${lines} line${lines === 1 ? "" : "s"}`,
				"success",
			);
		},
	});

	// ── Message renderer ─────────────────────────────────────────────────────
	// ONE custom message type holds the WHOLE chat-mode session. Returning a
	// live component (render reads `details.turns` on every frame) lets
	// streaming updates appear with a simple `tuiRef.requestRender()` — no
	// full rebuild of pi's chat container required.
	//
	// Performance: each frame now reuses cached per-turn line output for
	// completed turns (see renderSessionLines). Only the in-flight assistant
	// turn (if any) is rebuilt each frame, so long conversations stop driving
	// O(turns × blocks) allocation during Claude streaming.
	pi.registerMessageRenderer("chat-claude-session", (message, _opts, theme) => {
		const d = message.details as ChatSessionDetails | undefined;
		if (!d || !Array.isArray(d.turns)) return undefined;

		return {
			// pi calls invalidate() when theme changes or a from-scratch
			// re-render is needed — drop every turn's render cache so the
			// next render pass rebuilds against the new theme.
			invalidate: () => invalidateSessionCache(d),
			render: (width: number) => {
				if (width < 6) return renderSessionLines(d, theme, width);
				const innerWidth = width - 4; // 2 border chars + 2 padding chars
				// renderSessionLines returns lines already padded to
				// `innerWidth` visible columns, so wrapInOrangeBorder does
				// NO visibleWidth() call per frame — the previous hot path
				// (~85% CPU in Intl.Segmenter) is gone.
				const paddedInnerLines = renderSessionLines(d, theme, innerWidth);
				return wrapInOrangeBorder(paddedInnerLines, width);
			},
		};
	});
}