dotfiles/pi/.pi/agent/extensions/chat-claude.ts

/**
 * chat-claude — Distinctive Claude chat MODE inside pi.
 *
 * When chat mode is active, typed user input is routed to a Claude model
 * (haiku/sonnet/opus) via the `claude` CLI — NOT to pi's active LLM.
 *
 * Rendering goals (match pi's native chat UX):
 *   - Text appears as full markdown (no truncated previews, no dim grey).
 *   - Thinking blocks stream live as italic `thinkingText`-coloured markdown
 *     (the `claude` CLI is invoked with --include-partial-messages).
 *   - Tool calls use pi's normal tool-execution look (renderToolBlock).
 *
 * All turns of a single chat-mode session are rendered inside ONE continuous
 * orange border: the top line sits above the first turn, the bottom line
 * below the most recent turn, and the border extends live as new turns
 * (user + assistant) arrive. A new border starts each time the user enters
 * chat mode again via /claude / /claude-new.
 *
 * Commands:
 *   /claude [haiku|sonnet|opus]      — enter chat mode / switch model
 *   /claude-new [haiku|sonnet|opus]  — enter chat mode with a fresh Claude session
 *   /claude-resume                   — pick a past session for the current cwd and resume it
 *   /claude-end                      — exit chat mode
 *   /claude-abort                    — cancel an in-flight Claude response
 */

import { closeSync, openSync, readdirSync, readFileSync, readSync, statSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { CustomEditor, getMarkdownTheme } from "@mariozechner/pi-coding-agent";
import type { ExtensionAPI, KeybindingsManager } from "@mariozechner/pi-coding-agent";
import { Box, Container, matchesKey, Markdown, Spacer, Text, truncateToWidth, TUI, visibleWidth, type Component, type EditorTheme } from "@mariozechner/pi-tui";
import {
	formatUsage,
	renderToolBlock,
	runClaude,
	type StreamBlock,
} from "../shared/claude-stream.js";
import { startAskBridge, type AskBridge } from "../shared/pi-ask-bridge.js";
import { askSingleQuestionWithInlineNote } from "./pi-ask-tool/ask-inline-ui.js";

// ---------------------------------------------------------------------------
// Orange styling
// ---------------------------------------------------------------------------
const ORANGE     = "\x1b[38;5;208m"; // pumpkin / tangerine
const ORANGE_DIM = "\x1b[38;5;130m";
const RESET      = "\x1b[0m";
const BOLD       = "\x1b[1m";
const orange     = (s: string) => ORANGE + s + RESET;
const orangeBold = (s: string) => ORANGE + BOLD + s + RESET;
const orangeDim  = (s: string) => ORANGE_DIM + s + RESET;

// ---------------------------------------------------------------------------
// Orange border wrapping helper — wraps an array of inner lines in a
// continuous orange box. Applied at the session level so the WHOLE chat
// conversation sits inside ONE box (top above first turn, bottom below
// most recent turn). Pure string→string — no component allocation per frame.
//
// IMPORTANT: `innerLines` must ALREADY be padded to `innerWidth` columns of
// visible width. We don't call visibleWidth() here because that function
// invokes Intl.Segmenter (expensive ICU BreakIterator on every miss) and
// this wrapper runs on every single line of the session on every frame.
// Profile data showed 85% of pi's idle CPU being burned in Segmenter via
// this function. Callers (renderSessionLines) pre-pad inner lines once
// per turn and cache them, so the cost amortises to O(streaming tail).
// ---------------------------------------------------------------------------
function wrapInOrangeBorder(paddedInnerLines: string[], width: number): string[] {
	const v      = orange("│");
	const top    = orange("╭" + "─".repeat(width - 2) + "╮");
	const bottom = orange("╰" + "─".repeat(width - 2) + "╯");
	const out: string[] = [top];
	for (const line of paddedInnerLines) out.push(v + " " + line + " " + v);
	out.push(bottom);
	return out;
}

// Pad a single inner line to exactly `innerWidth` visible columns, OR
// truncate it if it's already over-wide. Uses visibleWidth() — pi-tui's
// grapheme-aware width function (which is what sits on top of the hot
// Intl.Segmenter path). Intended to be called ONCE per line at cache-build
// time, NOT per frame.
//
// Truncation is a defensive safety net: any component that emits a line
// wider than the width it was handed would otherwise crash pi's TUI (see
// tui.js doRender: "Rendered line N exceeds terminal width"). Without this,
// one stray over-wide line (e.g. a long source code line inside a Read
// tool result) takes down the entire session.
function padToInnerWidth(line: string, innerWidth: number): string {
	const w = visibleWidth(line);
	if (w > innerWidth) return truncateToWidth(line, innerWidth, "…", true);
	const padRight = innerWidth - w;
	return padRight > 0 ? line + " ".repeat(padRight) : line;
}

// ---------------------------------------------------------------------------
// Read-tool result truncation
//
// `Read` tool calls inside chat mode often dump entire files into the result
// banner — many hundreds of lines, which buries the surrounding conversation.
// We cap the rendered file content at MAX_READ_LINES and append a single
// centered notice line describing how many lines were hidden. This is a
// PRESENTATION-only truncation: `block.result.text` is left untouched, so
// resumed sessions / re-renders still see the full content.
//
// Centering needs render-time width, so we implement a tiny custom Component
// (TruncatedReadResult) and swap it into the Box body produced by the shared
// renderToolBlock helper. The same dim line-number formatting used by
// renderToolResultBox is preserved so the truncated view looks identical to
// the un-truncated one above the notice.
// ---------------------------------------------------------------------------
const MAX_READ_LINES = 40;

class TruncatedReadResult implements Component {
	constructor(
		private readonly numbered: { num: string; content: string }[],
		private readonly maxNumLen: number,
		private readonly dimFn: (s: string) => string,
		private readonly noticeFn: (s: string) => string,
	) {}

	invalidate(): void { /* stateless */ }

	render(width: number): string[] {
		const total   = this.numbered.length;
		const visible = Math.min(MAX_READ_LINES, total);
		const lines: string[] = [];
		for (let i = 0; i < visible; i++) {
			const l = this.numbered[i];
			// Truncate to `width` so a single long source-code line (think
			// minified JS or a long comment) can't blow past the TUI's width
			// check and crash the whole session. `truncateToWidth` is
			// ANSI-aware so the dim SGR sequences wrapping the line number
			// survive the cut.
			const raw = this.dimFn(l.num.padStart(this.maxNumLen)) + " " + l.content;
			lines.push(truncateToWidth(raw, width, "…", false));
		}
		if (total > visible) {
			const hidden = total - visible;
			const notice = `… ${hidden} more line${hidden === 1 ? "" : "s"} hidden …`;
			const visLen = visibleWidth(notice);
			const left   = Math.max(0, Math.floor((width - visLen) / 2));
			lines.push(" ".repeat(left) + this.noticeFn(notice));
		}
		return lines;
	}
}

// Wrap shared renderToolBlock: for `Read` tool blocks whose result exceeds
// MAX_READ_LINES, replace the Box body's child Text with our truncating
// component. All other tool kinds, error results, and short reads pass
// through unchanged.
function renderToolBlockTruncated(block: Extract<StreamBlock, { type: "tool" }>, theme: any): Container {
	const c = renderToolBlock(block, theme);
	if (block.name.toLowerCase() !== "read") return c;
	if (!block.result || block.result.isError) return c;

	const rawLines = block.result.text.split("\n").filter((l) => l.length > 0);
	if (rawLines.length <= MAX_READ_LINES) return c;

	const parsed = rawLines.map((l) => {
		const tab = l.indexOf("\t");
		return tab >= 0 ? { num: l.slice(0, tab), content: l.slice(tab + 1) } : { num: "", content: l };
	});
	const maxNumLen = parsed.reduce((m, l) => Math.max(m, l.num.length), 0);

	// renderToolBlock's container is [headerText, bodyBox]. Bail safely if a
	// future change to that helper alters the structure.
	const body = c.children[1];
	if (!(body instanceof Box)) return c;
	body.clear();
	body.addChild(new TruncatedReadResult(
		parsed,
		maxNumLen,
		(s) => theme.fg("dim", s),
		(s) => theme.fg("dim", s),
	));
	return c;
}

// ---------------------------------------------------------------------------
// Models / turn types
// ---------------------------------------------------------------------------
const MODELS = ["haiku", "sonnet", "opus"] as const;
type Model = (typeof MODELS)[number];
const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1);

// ---------------------------------------------------------------------------
// Past-session discovery (used by /claude-resume).
//
// Claude CLI persists every session's transcript at:
//   ~/.claude/projects/<mangled-cwd>/<session-uuid>.jsonl
// where the mangling rule (verified empirically) is "replace every '/' and
// '.' with '-'". So /home/jonas/dotfiles/pi/.pi → -home-jonas-dotfiles-pi--pi
// (the leading '-' comes from the leading '/'; '.pi' contributes '--pi'
// because both '/' and '.' map to '-').
//
// We don't need to consult ~/.claude/sessions/ for this picker — that
// directory only contains metadata for currently-running Claude processes.
// The on-disk transcript at projects/<cwd>/<id>.jsonl is the source of
// truth for "past sessions in this directory".
// ---------------------------------------------------------------------------
function mangleCwd(cwd: string): string {
	return cwd.replace(/[/.]/g, "-");
}

function relativeTime(ms: number): string {
	const diff = Date.now() - ms;
	if (diff < 0)               return "in the future";
	const sec = Math.floor(diff / 1000);
	if (sec < 60)               return `${sec}s ago`;
	const min = Math.floor(sec / 60);
	if (min < 60)               return `${min}m ago`;
	const hr  = Math.floor(min / 60);
	if (hr  < 24)               return `${hr}h ago`;
	const day = Math.floor(hr / 24);
	if (day < 30)               return `${day}d ago`;
	const mon = Math.floor(day / 30);
	if (mon < 12)               return `${mon}mo ago`;
	return `${Math.floor(day / 365)}y ago`;
}

/** Map a raw Claude model identifier (e.g. "claude-haiku-4-5-20251001") to
 *  one of our canonical short names. Returns null if no match. */
function normalizeRawModel(raw: string): Model | null {
	const lc = raw.toLowerCase();
	if (lc.includes("haiku"))  return "haiku";
	if (lc.includes("sonnet")) return "sonnet";
	if (lc.includes("opus"))   return "opus";
	return null;
}

interface PastSession {
	sessionId: string;
	mtimeMs: number;
	firstUserMessage: string; // truncated/normalised, "" if not found
	model: Model | null;      // null ⇒ couldn't determine
	rawModel: string;         // raw string from JSONL ("" if not found)
}

/** Read the head of a file (avoids slurping multi-MB JSONL transcripts). */
function readFileHead(path: string, maxBytes: number): string {
	const fd = openSync(path, "r");
	try {
		const buf = Buffer.alloc(maxBytes);
		const n   = readSync(fd, buf, 0, maxBytes, 0);
		return buf.subarray(0, n).toString("utf8");
	} finally {
		closeSync(fd);
	}
}

/** Pluck the first user message + first model id from a transcript head. */
function extractSessionMeta(head: string): { firstUserMessage: string; rawModel: string } {
	let firstUserMessage = "";
	let rawModel = "";

	for (const line of head.split("\n")) {
		if (firstUserMessage && rawModel) break;
		if (!line.trim()) continue;
		let ev: any;
		try { ev = JSON.parse(line); } catch { continue; }

		if (!firstUserMessage) {
			// Two equivalent sources: a queue-operation enqueue carries the raw
			// text the user typed; a `type: "user"` event carries it inside
			// message.content (which is either a string or an array of blocks).
			if (ev.type === "queue-operation" && ev.operation === "enqueue" && typeof ev.content === "string") {
				firstUserMessage = ev.content;
			} else if (ev.type === "user" && ev.message) {
				const c = ev.message.content;
				if (typeof c === "string") {
					firstUserMessage = c;
				} else if (Array.isArray(c)) {
					firstUserMessage = c
						.filter((b: any) => b?.type === "text" && typeof b.text === "string")
						.map((b: any) => b.text as string)
						.join(" ");
				}
			}
		}

		if (!rawModel && typeof ev?.message?.model === "string") {
			rawModel = ev.message.model;
		}
	}

	return {
		firstUserMessage: firstUserMessage.replace(/\s+/g, " ").trim(),
		rawModel,
	};
}

function readPastSessions(cwd: string): PastSession[] {
	const dir = join(homedir(), ".claude", "projects", mangleCwd(cwd));
	let entries: string[];
	try {
		entries = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
	} catch {
		return [];
	}

	const out: PastSession[] = [];
	for (const f of entries) {
		const full = join(dir, f);
		let st;
		try { st = statSync(full); } catch { continue; }
		// Read up to ~256 KB — enough to find the first user message and the
		// first assistant turn (which carries the model id) in any reasonable
		// transcript without paying for multi-MB reads.
		let head: string;
		try { head = readFileHead(full, 256 * 1024); } catch { continue; }
		const { firstUserMessage, rawModel } = extractSessionMeta(head);
		out.push({
			sessionId: f.replace(/\.jsonl$/, ""),
			mtimeMs:   st.mtimeMs,
			firstUserMessage,
			model:     rawModel ? normalizeRawModel(rawModel) : null,
			rawModel,
		});
	}

	out.sort((a, b) => b.mtimeMs - a.mtimeMs);
	return out;
}

/** Truncate a string to `max` chars, appending "…" when cut. */
function truncate(s: string, max: number): string {
	if (s.length <= max) return s;
	return s.slice(0, Math.max(0, max - 1)).trimEnd() + "…";
}

// ---------------------------------------------------------------------------
// JSONL transcript → ChatTurn[]
//
// Given a sessionId and cwd, load the full transcript at
//   ~/.claude/projects/<mangled-cwd>/<sessionId>.jsonl
// and convert it into the same UserTurn / AssistantTurn shape the live
// runChatTurn() path produces. This lets /claude-resume render the past
// context inside the orange border so the user can SEE what they're
// resuming, not just blindly continue an invisible thread.
//
// JSONL event reference (observed in 2.1.118 transcripts):
//   {type:"user",     message:{role:"user",      content: <string>}}                     ← typed prompt
//   {type:"user",     message:{role:"user",      content: [{type:"tool_result", …}, …]}} ← tool outputs
//   {type:"assistant",message:{role:"assistant", content: [<one of: thinking|text|tool_use>], usage:{…}, model:"claude-sonnet-4-6"}}
// Each assistant content block is emitted as its OWN line, all sharing the
// same usage / model fields (one API call → many lines). We coalesce every
// run of consecutive assistant lines into a single AssistantTurn whose
// `blocks` array preserves the in-order list of thinking/text/tool blocks.
// Tool results that arrive in subsequent user-lines are attached back onto
// the matching tool block by tool_use_id.
//
// Lines we ignore: agent-setting, queue-operation, attachment, last-prompt,
// summary, and anything else without a recognisable role/content shape.
// Tokens/cost are intentionally NOT carried over — the JSONL repeats usage
// per content block so summing naively would over-count, and the user is
// here to see CONTENT, not a token panel for old turns.
// ---------------------------------------------------------------------------
function loadSessionTurns(sessionId: string, cwd: string, fallbackModel: Model): ChatTurn[] {
	const path = join(homedir(), ".claude", "projects", mangleCwd(cwd), `${sessionId}.jsonl`);
	let raw: string;
	try { raw = readFileSync(path, "utf8"); } catch { return []; }

	const turns: ChatTurn[] = [];
	let current: AssistantTurn | null = null;

	const flush = () => {
		if (!current) return;
		current.finalText = current.blocks
			.filter((b) => b.type === "text")
			.map((b: any) => b.text as string)
			.join("");
		turns.push(current);
		current = null;
	};

	const ensureCurrent = (model: Model): AssistantTurn => {
		if (current) return current;
		current = {
			role:      "assistant",
			model,
			blocks:    [],
			finalText: "",
			sessionId,
			isResume:  false,
			done:      true,
		};
		return current;
	};

	const tool_resultText = (content: any): { text: string; isError: boolean } => {
		if (typeof content === "string") return { text: content, isError: false };
		if (Array.isArray(content)) {
			const text = content
				.filter((b: any) => b?.type === "text" && typeof b.text === "string")
				.map((b: any) => b.text as string)
				.join("\n");
			return { text, isError: false };
		}
		return { text: "", isError: false };
	};

	for (const line of raw.split("\n")) {
		if (!line.trim()) continue;
		let ev: any;
		try { ev = JSON.parse(line); } catch { continue; }

		if (ev.type === "user") {
			const c = ev.message?.content;
			if (typeof c === "string") {
				// Typed user prompt — closes any in-flight assistant turn.
				flush();
				if (c.trim()) turns.push({ role: "user", text: c });
			} else if (Array.isArray(c)) {
				let sawToolResult = false;
				for (const block of c) {
					if (block?.type === "tool_result") {
						sawToolResult = true;
						const { text } = tool_resultText(block.content);
						const isError  = block.is_error === true;
						if (current) {
							for (const tb of current.blocks) {
								if (tb.type === "tool" && tb.id === block.tool_use_id) {
									tb.result = { text, isError };
									break;
								}
							}
						}
					} else if (block?.type === "text" && typeof block.text === "string") {
						// Some clients send array-shaped user prompts.
						if (!sawToolResult) {
							flush();
							if (block.text.trim()) turns.push({ role: "user", text: block.text });
						}
					}
				}
			}
		} else if (ev.type === "assistant") {
			const content  = (ev.message?.content ?? []) as any[];
			const rawModel = String(ev.message?.model ?? "");
			const model    = (rawModel ? normalizeRawModel(rawModel) : null) ?? fallbackModel;
			const a = ensureCurrent(model);
			// If the per-line model differs from what we opened the turn with,
			// keep the first one — a single coalesced "turn" inherits the model
			// of its first API call. (This is purely for the header label.)
			for (const block of content) {
				if (block?.type === "thinking" && typeof block.thinking === "string") {
					if (block.thinking.trim()) a.blocks.push({ type: "thinking", text: block.thinking });
				} else if (block?.type === "text" && typeof block.text === "string") {
					if (block.text.trim()) a.blocks.push({ type: "text", text: block.text });
				} else if (block?.type === "tool_use") {
					a.blocks.push({
						type:      "tool",
						id:        String(block.id ?? ""),
						name:      String(block.name ?? ""),
						inputJson: JSON.stringify(block.input ?? {}),
					});
				}
			}
		}
		// All other event types (agent-setting, queue-operation, attachment,
		// last-prompt, summary, …) are intentionally ignored.
	}

	flush();
	return turns;
}

// Per-turn render cache: once a turn is "frozen" (user turns are always
// frozen; assistant turns after done=true), its rendered output at a given
// (innerWidth, theme) is invariant. Caching avoids O(turns) rebuild on every
// frame, which otherwise creates quadratic-ish lag during streaming because
// partial-message updates drive tens of renders per second.
interface TurnRenderCache {
	cachedLines?: string[];
	cachedWidth?: number;
	cachedTheme?: unknown;
}

interface UserTurn extends TurnRenderCache {
	role: "user";
	text: string;
}
interface AssistantTurn extends TurnRenderCache {
	role: "assistant";
	model: Model;
	blocks: StreamBlock[];
	finalText: string;
	sessionId?: string;
	isResume: boolean;
	done: boolean;
	error?: string;
	cancelled?: boolean;
	costUsd?: number;
	inputTokens?: number;
	outputTokens?: number;
	cacheReadTokens?: number;
	cacheWriteTokens?: number;
}
type ChatTurn = UserTurn | AssistantTurn;

interface ChatSessionDetails {
	turns: ChatTurn[];
}

// =============================================================================
// Extension entry point
// =============================================================================

// ── Reload-persistent state ─────────────────────────────────────────────────
// pi's `/reload` tears the extension down and re-invokes the default export,
// which resets every closure-local `let`/`const`. The Map of resumable Claude
// session ids (model → sessionId) is the one piece of state we want to
// survive that — otherwise /reload silently orphans the ongoing Claude
// threads, forcing the user to re-pick them via /claude-resume.
//
// Everything else (chatMode, currentDetails, askBridge, tuiRef, isGenerating)
// is intentionally NOT persisted: the bridge/TUI references are bound to the
// torn-down ctx and must be rebuilt on the next enterChatMode(), and any
// in-flight stream is already aborted when the old closure is discarded.
//
// We stash the Map on globalThis behind a namespaced key. globalThis survives
// module re-evaluation (only top-level lexical bindings are reset), and the
// guarded getter keeps initialization idempotent across repeated reloads.
// Valid extended-thinking effort levels accepted by `claude --effort`, plus
// our synthetic "off" sentinel which skips the flag entirely (falling back
// to the CLI's default of no thinking emission in -p mode).
const EFFORTS = ["off", "low", "medium", "high", "xhigh", "max"] as const;
type Effort = (typeof EFFORTS)[number];
const DEFAULT_EFFORT: Effort = "max";

interface ChatClaudePersistedState {
	sessions: Map<Model, string>;
	// Current extended-thinking effort level — persisted across `/reload`
	// so the user's choice survives the extension teardown the same way
	// resumable session ids do.
	effort: Effort;
}
const CHAT_CLAUDE_STATE_KEY = "__pi_chat_claude_persisted__";
function getPersistedState(): ChatClaudePersistedState {
	const g = globalThis as unknown as Record<string, ChatClaudePersistedState>;
	let state = g[CHAT_CLAUDE_STATE_KEY];
	if (!state) {
		state = { sessions: new Map<Model, string>(), effort: DEFAULT_EFFORT };
		g[CHAT_CLAUDE_STATE_KEY] = state;
	}
	// Back-fill for any persisted state written by an older revision of
	// the extension (pre-/claude-effort) that didn't carry an effort field.
	if (!state.effort) state.effort = DEFAULT_EFFORT;
	return state;
}

export default function (pi: ExtensionAPI) {
	// ── Mode state ────────────────────────────────────────────────────────────
	let chatMode: Model | null = null;           // null ⇒ not in chat mode
	// model → resumable claude session id. Pulled from globalThis so the
	// mapping (and the current effort level) survive `/reload` (see
	// getPersistedState above). `persisted` is kept as a handle so
	// `/claude-effort` can mutate `persisted.effort` in place and have
	// the change picked up by subsequent runChatTurn calls.
	const persisted = getPersistedState();
	const { sessions } = persisted;
	let isGenerating = false;
	let currentAbort: AbortController | null = null;

	// pi-ask bridge — opens a Unix socket + generates an --mcp-config so
	// Claude (running inside this chat) can ask the user questions through
	// pi's native ask UI. Bound to the chat-mode lifetime: started on
	// enterChatMode, closed on exitChatMode.
	let askBridge: AskBridge | null = null;

	// Live TUI reference captured from the mode-banner widget factory, used to
	// schedule re-renders while a Claude response is streaming into the
	// current chat-claude-session message.
	let tuiRef: { requestRender: () => void } | null = null;

	// The in-flight chat session's `details` object. Stored by reference so
	// mutations here are reflected in the CustomMessage already displayed
	// in pi's conversation. Null between chat-mode sessions.
	let currentDetails: ChatSessionDetails | null = null;

	// Keep a module-level set of the extension's custom-message types so the
	// `context` event handler can strip them out of pi's LLM context — chat
	// mode is between the user and Claude and has no business in pi's
	// prompt payload.
	const CHAT_CLAUDE_CUSTOM_TYPES = new Set(["chat-claude-session"]);

	// ── Render throttling ────────────────────────────────────────────────────
	// Claude's `--include-partial-messages` fires an onUpdate for every token
	// delta (100+ Hz under a fast stream). Rendering per-token was the second
	// half of the progressive-lag problem — even with per-turn caching, the
	// TUI would be asked to diff+repaint dozens of times per second.
	//
	// scheduleStreamRender coalesces back-to-back requests into a trailing-
	// edge timer at ~30 Hz. The first update within a quiet window waits up
	// to 33 ms before rendering; any further updates in that window are
	// folded into the same render. flushStreamRender cancels the pending
	// timer and renders immediately — used on stream completion, abort, and
	// chat-mode teardown so the user sees the terminal frame right away.
	let streamRenderTimer: ReturnType<typeof setTimeout> | null = null;
	const STREAM_RENDER_INTERVAL_MS = 33; // ~30 Hz
	function scheduleStreamRender() {
		if (streamRenderTimer) return;
		streamRenderTimer = setTimeout(() => {
			streamRenderTimer = null;
			tuiRef?.requestRender();
		}, STREAM_RENDER_INTERVAL_MS);
	}
	function flushStreamRender() {
		if (streamRenderTimer) {
			clearTimeout(streamRenderTimer);
			streamRenderTimer = null;
		}
		tuiRef?.requestRender();
	}

	// ── Rendering helpers ────────────────────────────────────────────────────
	// Mirrors pi's AssistantMessageComponent conventions (see
	// modes/interactive/components/assistant-message.js): Markdown at
	// paddingX=1, paddingY=0; thinking as italic `thinkingText`-coloured
	// markdown; tool blocks via the shared renderToolBlock (same one
	// ask-claude uses) so bash / read / edit / write all look identical to
	// pi's own tool executions.
	function renderTurnInto(container: Container, turn: ChatTurn, theme: any) {
		const md = getMarkdownTheme();

		if (turn.role === "user") {
			container.addChild(new Text(orangeBold("▶ you"), 1, 0));
			container.addChild(new Spacer(1));
			container.addChild(new Markdown(turn.text.trim(), 1, 0, md));
			return;
		}

		// Assistant turn header
		const icon =
			turn.cancelled ? orange("◇ ")
			: turn.error  ? theme.fg("error", "✗ ")
			: turn.isResume ? orange("↩ ")
			: orange("◆ ");
		const header =
			icon + orangeBold(`Claude ${capitalize(turn.model)}`)
			+ (turn.sessionId ? theme.fg("dim", `  session:${turn.sessionId.slice(0, 8)}`) : "")
			+ (!turn.done ? theme.fg("warning", "  ⏳") : "");
		container.addChild(new Text(header, 1, 0));
		container.addChild(new Spacer(1));

		if (turn.cancelled) {
			container.addChild(new Text(orange("(Cancelled)"), 1, 0));
			return;
		}
		if (turn.error) {
			container.addChild(new Text(theme.fg("error", `Error: ${turn.error}`), 1, 0));
			return;
		}

		// Defensive dedup — see claude-stream.ts for the root-cause fix, but
		// keep a safety net here in case a future Claude CLI change re-orders
		// events differently.
		const rawBlocks = turn.blocks ?? [];
		const seenToolIds = new Set<string>();
		const blocks: StreamBlock[] = [];
		for (const b of rawBlocks) {
			if (b.type === "tool") {
				if (seenToolIds.has(b.id)) continue;
				seenToolIds.add(b.id);
			}
			blocks.push(b);
		}

		let addedAny = false;
		for (let i = 0; i < blocks.length; i++) {
			const block = blocks[i];
			if (block.type === "thinking" && block.text.trim()) {
				if (addedAny) container.addChild(new Spacer(1));
				container.addChild(new Markdown(block.text.trim(), 1, 0, md, {
					color:  (t: string) => theme.fg("thinkingText", t),
					italic: true,
				}));
				addedAny = true;
			} else if (block.type === "tool") {
				if (addedAny) container.addChild(new Spacer(1));
				container.addChild(renderToolBlockTruncated(block, theme));
				addedAny = true;
			} else if (block.type === "text" && block.text.trim()) {
				if (addedAny) container.addChild(new Spacer(1));
				container.addChild(new Markdown(block.text.trim(), 1, 0, md));
				addedAny = true;
			}
		}

		if (turn.done) {
			const usage = formatUsage(turn as any);
			if (usage) {
				container.addChild(new Spacer(1));
				container.addChild(new Text(theme.fg("dim", usage), 1, 0));
			}
		}
	}

	// Render one turn in isolation and return its lines PRE-PADDED to
	// `innerWidth` visible columns.
	//
	// Pre-padding here means `visibleWidth()` (which calls `Intl.Segmenter`
	// — the measured hot spot: 85% of pi's CPU in a laggy session) runs
	// exactly ONCE per line per turn, not once per line per frame. For
	// completed turns these padded lines are cached and reused forever at
	// that (width, theme); for the streaming tail turn the work is bounded
	// to just the in-flight turn's lines.
	function renderTurnLines(turn: ChatTurn, theme: any, innerWidth: number): string[] {
		const c = new Container();
		renderTurnInto(c, turn, theme);
		const rawLines = c.render(innerWidth);
		const padded: string[] = new Array(rawLines.length);
		for (let i = 0; i < rawLines.length; i++) {
			padded[i] = padToInnerWidth(rawLines[i], innerWidth);
		}
		return padded;
	}

	// Assemble the WHOLE session's inner lines with per-turn caching.
	//
	// Cache invariants:
	//   • User turns are immutable → always cacheable.
	//   • Assistant turns are mutated in-place by runClaude's onUpdate
	//     callback while streaming, and only become stable after
	//     `done: true` is set (see runChatTurn). So we only cache
	//     assistants once they're done.
	//   • Cache keys on (innerWidth, theme) — terminal resize or theme
	//     switch invalidates all per-turn caches transparently by forcing
	//     a rebuild on the next render.
	//
	// With this cache, a streaming frame only rebuilds the one in-flight
	// assistant turn (the tail); all prior turns are an O(1) line-copy.
	// That eliminates the O(turns × blocks) rebuild that previously ran
	// every time a partial Claude message arrived.
	//
	// Returned lines are PRE-PADDED to `innerWidth` visible columns — see
	// renderTurnLines/padToInnerWidth for why. The caller can hand them
	// straight to wrapInOrangeBorder without any further visibleWidth()
	// calls, which is critical: visibleWidth drives Intl.Segmenter, whose
	// 512-entry LRU thrashes when called per-line-per-frame on a long chat.
	function renderSessionLines(details: ChatSessionDetails, theme: any, innerWidth: number): string[] {
		// Streaming placeholder so the border grows immediately after the
		// user submits, even before any block has arrived from Claude.
		if (details.turns.length === 0) {
			const c = new Container();
			c.addChild(new Text(orangeDim("(chat mode — waiting for first message)"), 0, 0));
			const rawLines = c.render(innerWidth);
			const padded: string[] = new Array(rawLines.length);
			for (let i = 0; i < rawLines.length; i++) padded[i] = padToInnerWidth(rawLines[i], innerWidth);
			return padded;
		}

		const out: string[] = [];
		// The blank inter-turn spacer must ALSO be padded — otherwise
		// wrapInOrangeBorder emits "│   │" with a visibly short interior,
		// producing a ragged right edge on the orange border.
		const spacerLine = " ".repeat(innerWidth);
		for (let i = 0; i < details.turns.length; i++) {
			if (i > 0) out.push(spacerLine);
			const turn = details.turns[i];
			const cacheable = turn.role === "user" || (turn.role === "assistant" && turn.done);
			if (
				cacheable
				&& turn.cachedLines
				&& turn.cachedWidth === innerWidth
				&& turn.cachedTheme === theme
			) {
				for (const line of turn.cachedLines) out.push(line);
			} else {
				const lines = renderTurnLines(turn, theme, innerWidth);
				if (cacheable) {
					turn.cachedLines = lines;
					turn.cachedWidth = innerWidth;
					turn.cachedTheme = theme;
				} else {
					// Streaming turn — make sure we don't accidentally
					// carry stale cached output from a prior life.
					turn.cachedLines = undefined;
					turn.cachedWidth = undefined;
					turn.cachedTheme = undefined;
				}
				for (const line of lines) out.push(line);
			}
		}
		return out;
	}

	// Drop every turn's render cache — called from the message renderer's
	// `invalidate()` hook (triggered by pi when theme changes or when a
	// from-scratch re-render is needed).
	function invalidateSessionCache(details: ChatSessionDetails) {
		for (const turn of details.turns) {
			turn.cachedLines = undefined;
			turn.cachedWidth = undefined;
			turn.cachedTheme = undefined;
		}
	}

	// ── Mode banner + status ─────────────────────────────────────────────────
	function syncUI(ctx: any) {
		if (!ctx?.hasUI) return;

		if (!chatMode) {
			ctx.ui.setWidget("chat-claude", undefined);
			ctx.ui.setStatus("chat-claude", undefined);
			ctx.ui.setTitle("pi");
			return;
		}

		const sessionId = sessions.get(chatMode);
		const short = sessionId ? sessionId.slice(0, 8) : "new";
		const modelUp = capitalize(chatMode).toUpperCase();

		ctx.ui.setWidget("chat-claude", (tui: any, theme: any) => {
			tuiRef = tui;  // ← captured for live streaming re-renders
			return {
				invalidate: () => {},
				render: () => {
					const rail       = orange("▌ ");
					const title      = orangeBold("◆ CLAUDE CHAT MODE");
					const modelLabel = orangeBold(modelUp);
					const sessionTag = orangeDim("session:" + short);
					const effortTag  = orangeDim("effort:" + persisted.effort);
					const running    = isGenerating ? "  " + orange("⏳ streaming…") : "";
					const line1 = rail + title + "  " + modelLabel + "  " + sessionTag + "  " + effortTag + running;
					const line2 = rail + theme.fg("dim",
						"Type to chat · /claude haiku|sonnet|opus · /claude-new · /claude-effort · /claude-end · /claude-abort");
					return [line1, line2];
				},
			};
		}, { placement: "aboveEditor" });

		const busy = isGenerating ? " · streaming" : "";
		ctx.ui.setStatus("chat-claude",
			orange(`◆ Claude ${capitalize(chatMode)} · ${short} · effort:${persisted.effort}${busy}`));
		ctx.ui.setTitle(`pi · Claude ${capitalize(chatMode)} Chat`);
	}

	// ── ESC-to-abort editor ──────────────────────────────────────────────────
	// ESC (the "interrupt" action) is on the extension-runner's reserved list
	// (see node_modules/@mariozechner/pi-coding-agent/.../runner.js — any
	// registerShortcut("escape", …) is silently dropped), so a custom editor is
	// the sanctioned way to intercept it. We subclass pi's exported CustomEditor
	// and short-circuit ESC ONLY while a chat-claude response is streaming.
	// For every other case we defer to `super.handleInput`, which runs the
	// app-level keybindings — including pi's own onEscape handler, which
	// setCustomEditorComponent copies onto the custom editor at install time
	// (see interactive-mode.js setCustomEditorComponent, ~line 1258).
	class ChatEscEditor extends CustomEditor {
		handleInput(data: string): void {
			if (matchesKey(data, "escape") && isGenerating && currentAbort) {
				try { currentAbort.abort(); } catch { /* ok */ }
				// We may not have a direct ctx here, but the UI is live during
				// chat mode, so flush any pending throttled render and force
				// a frame now; the chat-claude-session renderer will show the
				// assistant turn as cancelled once runClaude's promise
				// rejects with AbortError.
				flushStreamRender();
				return;
			}
			super.handleInput(data);
		}
	}

	// ── Mode transitions ─────────────────────────────────────────────────────
	function enterChatMode(model: Model, ctx: any, freshSession: boolean) {
		const wasActive = chatMode !== null;
		const modelChanged = chatMode !== model;

		if (freshSession) sessions.delete(model);
		// A new /claude invocation after an exit starts a fresh border box, so
		// drop any reference to the previous session's details. The existing
		// CustomMessage in the conversation keeps its own reference and stays
		// visible in the scrollback.
		if (!wasActive || modelChanged || freshSession) {
			currentDetails = null;
		}

		chatMode = model;

		// Stand up (or refresh) the pi-ask bridge so Claude can ask the user
		// questions through pi's native overlay. Re-create on every entry so
		// the socket+temp dir lifetime is bounded by the chat session.
		if (ctx?.hasUI) {
			askBridge?.close();
			try {
				askBridge = startAskBridge({
					ui: ctx.ui,
					onAsk: () => tuiRef?.requestRender(),
				});
			} catch (err) {
				askBridge = null;
				ctx.ui.notify(
					`pi-ask bridge unavailable: ${err instanceof Error ? err.message : String(err)} — Claude won't be able to ask questions.`,
					"warning",
				);
			}

			// Install the ESC-aborts-Claude custom editor. Idempotent: if chat
			// mode was already active (e.g. /claude haiku → /claude opus), setting
			// it again just re-wires the same class cleanly.
			ctx.ui.setEditorComponent((tui: TUI, theme: EditorTheme, keybindings: KeybindingsManager) =>
				new ChatEscEditor(tui, theme, keybindings),
			);
		}

		syncUI(ctx);

		if (ctx?.hasUI) {
			const sess = sessions.get(model);
			const kind = freshSession || !sess ? "new session" : `resume ${sess.slice(0, 8)}`;
			const verb = wasActive ? (modelChanged ? "Switched to" : "Re-entered") : "Entered chat mode:";
			ctx.ui.notify(`${verb} Claude ${capitalize(model)} · ${kind}`, "info");
		}
	}

	function exitChatMode(ctx: any) {
		if (currentAbort) try { currentAbort.abort(); } catch { /* ok */ }
		currentAbort = null;
		isGenerating = false;
		chatMode = null;
		// Cancel any pending throttled stream render so we don't leave a
		// dangling timer firing tuiRef.requestRender() after chat mode ends
		// (tuiRef itself lingers, so the render would be harmless but wasted).
		if (streamRenderTimer) {
			clearTimeout(streamRenderTimer);
			streamRenderTimer = null;
		}
		// Detach from current session details so the next entry starts a new
		// border. The message and its details stay in place in pi's scrollback.
		currentDetails = null;
		// Tear down the pi-ask bridge: close the socket and remove the temp
		// dir holding the socket + generated mcp.json.
		askBridge?.close();
		askBridge = null;
		// Restore pi's default editor (undoes ChatEscEditor from enterChatMode).
		if (ctx?.hasUI) ctx.ui.setEditorComponent(undefined);
		syncUI(ctx);
		if (ctx?.hasUI) ctx.ui.notify("Exited chat mode — back to normal pi.", "info");
	}

	// ── Session / turn management ────────────────────────────────────────────
	function ensureSessionMessage(): ChatSessionDetails {
		if (currentDetails) return currentDetails;
		const details: ChatSessionDetails = { turns: [] };
		currentDetails = details;
		pi.sendMessage(
			{
				customType: "chat-claude-session",
				// content is only used if we had no custom renderer; stays
				// hidden from pi's LLM via the context filter below.
				content: "",
				display: true,
				details,
			},
			{ triggerTurn: false },
		);
		return details;
	}

	async function runChatTurn(userText: string, ctx: any) {
		if (!chatMode) return;
		const model = chatMode;
		const details = ensureSessionMessage();

		// Append user turn + placeholder assistant turn up front so the
		// border extends as soon as the user hits enter.
		details.turns.push({ role: "user", text: userText });
		const existingSession = sessions.get(model);
		const assistantTurn: AssistantTurn = {
			role: "assistant",
			model,
			blocks: [],
			finalText: "",
			isResume: !!existingSession,
			done: false,
		};
		details.turns.push(assistantTurn);
		tuiRef?.requestRender();

		isGenerating = true;
		currentAbort = new AbortController();
		syncUI(ctx);
		if (ctx?.hasUI) ctx.ui.setWorkingMessage(`Claude ${capitalize(model)} is thinking…`);

		try {
			const r = await runClaude(userText, {
				model,
				sessionId: existingSession,
				cwd: ctx.cwd,
				signal: currentAbort.signal,
				// Enable extended thinking — without --effort, `claude -p`
				// NEVER emits thinking_delta events regardless of the user's
				// interactive defaultThinkingLevel setting, and the italic
				// thinking-block rendering below sits idle. Default is "max"
				// and is configurable live via /claude-effort; the model
				// still decides on-demand whether it actually needs to think.
				effort: persisted.effort,
				// Route AskUserQuestion-style requests through pi's native
				// overlay via the pi-ask-mcp bridge. Disallowing the built-in
				// AskUserQuestion forces Claude to use mcp__pi__ask if it
				// wants to ask a structured question.
				mcpConfigPath:   askBridge?.mcpConfigPath,
				disallowedTools: askBridge ? ["AskUserQuestion"] : undefined,
				onUpdate: (partial) => {
					assistantTurn.blocks    = partial.blocks;
					assistantTurn.finalText = partial.finalText;
					// Throttle to ~30 Hz so a fast token stream doesn't cause
					// a render-per-token, which compounds with any other
					// extension's per-frame work (footer, widgets, etc.).
					scheduleStreamRender();
				},
			});

			if (r.sessionId) sessions.set(model, r.sessionId);
			assistantTurn.blocks           = r.blocks;
			assistantTurn.finalText        = r.finalText;
			assistantTurn.sessionId        = r.sessionId;
			assistantTurn.costUsd          = r.costUsd;
			assistantTurn.inputTokens      = r.inputTokens;
			assistantTurn.outputTokens     = r.outputTokens;
			assistantTurn.cacheReadTokens  = r.cacheReadTokens;
			assistantTurn.cacheWriteTokens = r.cacheWriteTokens;
			assistantTurn.done             = true;
		} catch (err) {
			const aborted = currentAbort?.signal.aborted === true;
			assistantTurn.done      = true;
			assistantTurn.cancelled = aborted;
			assistantTurn.error     = aborted ? undefined : (err instanceof Error ? err.message : String(err));
		} finally {
			isGenerating = false;
			currentAbort = null;
			if (ctx?.hasUI) ctx.ui.setWorkingMessage(undefined);
			syncUI(ctx);
			// Flush (not schedule): the stream just ended or was aborted —
			// we want the final frame on screen immediately, not 33 ms later.
			// Also cancels any in-flight throttled timer so it doesn't fire
			// a stale second render after the assistant turn is already
			// marked done and cached.
			flushStreamRender();
		}
	}

	// ── Input interception ───────────────────────────────────────────────────
	// Registered pi commands (/claude, /claude-end, etc.) dispatch BEFORE this
	// event fires, so they still work normally. Bash via `!` goes through
	// user_bash, not here. Every other text the user submits in chat mode is
	// routed straight to Claude.
	pi.on("input", async (event, ctx) => {
		if (!chatMode)                          return { action: "continue" } as const;
		if (event.source !== "interactive")     return { action: "continue" } as const;
		const text = event.text ?? "";
		if (!text.trim())                       return { action: "continue" } as const;
		if (text.trimStart().startsWith("!"))   return { action: "continue" } as const;

		if (isGenerating) {
			ctx.ui.notify(
				"Claude is still responding. Use /claude-abort to cancel, then try again.",
				"warning",
			);
			return { action: "handled" } as const;
		}

		runChatTurn(text, ctx).catch((err) => {
			ctx.ui.notify(
				`Chat error: ${err instanceof Error ? err.message : String(err)}`,
				"error",
			);
		});
		return { action: "handled" } as const;
	});

	// Keep chat-mode custom messages out of pi's LLM context — chat mode is
	// between the user and Claude, not part of pi's conversation.
	pi.on("context", (event) => {
		const filtered = event.messages.filter((m: any) =>
			!(m.role === "custom" && CHAT_CLAUDE_CUSTOM_TYPES.has(m.customType)),
		);
		return { messages: filtered };
	});

	// ── Session lifecycle ────────────────────────────────────────────────────
	pi.on("session_start", (_event, ctx) => { syncUI(ctx); });
	pi.on("session_shutdown", (_event, ctx) => {
		if (chatMode) exitChatMode(ctx);
		// Defensive: if exitChatMode was never reached (chatMode was already
		// null but a bridge somehow lingered), close it directly.
		if (askBridge) { askBridge.close(); askBridge = null; }
		// Defensive: same for the throttled render timer — exitChatMode
		// already clears it, but this keeps the Node process clean in the
		// case where chat mode was never entered but some hypothetical
		// future code path scheduled a render anyway.
		if (streamRenderTimer) {
			clearTimeout(streamRenderTimer);
			streamRenderTimer = null;
		}
	});

	// ── Commands ─────────────────────────────────────────────────────────────
	const modelCompletions = (prefix: string) =>
		MODELS.filter((m) => m.startsWith(prefix.toLowerCase()))
			.map((m) => ({ value: m, label: m }));

	pi.registerCommand("claude", {
		description: [
			"Enter distinct Claude chat mode — typed input bypasses pi's LLM and goes to Claude.",
			"  /claude                    — enter with last/default model (sonnet)",
			"  /claude haiku|sonnet|opus  — enter/switch model",
		].join("\n"),
		getArgumentCompletions: modelCompletions,
		handler: async (args, ctx) => {
			const arg = (args ?? "").trim().toLowerCase();
			const target: Model = (MODELS as readonly string[]).includes(arg)
				? (arg as Model)
				: (chatMode ?? "sonnet");
			enterChatMode(target, ctx, false);
		},
	});

	pi.registerCommand("claude-new", {
		description: "Enter chat mode with a fresh Claude session (discards any resumed session id). Example: /claude-new opus",
		getArgumentCompletions: modelCompletions,
		handler: async (args, ctx) => {
			const arg = (args ?? "").trim().toLowerCase();
			const target: Model = (MODELS as readonly string[]).includes(arg)
				? (arg as Model)
				: (chatMode ?? "sonnet");
			enterChatMode(target, ctx, true);
		},
	});

	// /claude-effort — set the extended-thinking effort level for subsequent
	// chat turns. Without the flag `claude -p` emits no thinking_delta
	// events at all (the interactive `defaultThinkingLevel` setting is
	// ignored in -p mode); with it, the model decides on-demand whether
	// to actually think. Stored on the persisted state so the choice
	// survives `/reload`.
	//
	//   /claude-effort               — show current value
	//   /claude-effort max           — set to max (default)
	//   /claude-effort off           — disable (skip the --effort flag)
	const effortCompletions = (prefix: string) =>
		EFFORTS.filter((e) => e.startsWith(prefix.toLowerCase()))
			.map((e) => ({ value: e, label: e }));

	pi.registerCommand("claude-effort", {
		description: [
			"Set the extended-thinking effort level for Claude chat turns.",
			"  /claude-effort                          — show current value",
			"  /claude-effort off|low|medium|high|xhigh|max",
			"",
			"Note: without an effort setting, `claude -p` emits no thinking",
			"blocks at all — so lowering this trades thought visibility for speed.",
		].join("\n"),
		getArgumentCompletions: effortCompletions,
		handler: async (args, ctx) => {
			const arg = (args ?? "").trim().toLowerCase();
			if (!arg) {
				ctx.ui.notify(
					`Current Claude effort: ${persisted.effort}. Options: ${EFFORTS.join(", ")}.`,
					"info",
				);
				return;
			}
			if (!(EFFORTS as readonly string[]).includes(arg)) {
				ctx.ui.notify(
					`Unknown effort "${arg}". Valid levels: ${EFFORTS.join(", ")}.`,
					"warning",
				);
				return;
			}
			const prev = persisted.effort;
			persisted.effort = arg as Effort;
			syncUI(ctx);
			const note = arg === "off"
				? "thinking disabled — Claude will no longer emit thinking blocks"
				: `thinking effort set to ${arg}`;
			ctx.ui.notify(
				`${note} (was ${prev}). Applies to the next chat turn.`,
				"info",
			);
		},
	});

	pi.registerCommand("claude-end", {
		description: "Exit Claude chat mode and resume normal pi operation.",
		handler: async (_args, ctx) => {
			if (!chatMode) { ctx.ui.notify("Not in chat mode.", "info"); return; }
			exitChatMode(ctx);
		},
	});

	pi.registerCommand("claude-abort", {
		description: "Cancel the in-flight Claude response (no effect if nothing is generating).",
		handler: async (_args, ctx) => {
			if (!isGenerating || !currentAbort) {
				ctx.ui.notify("No active Claude response to cancel.", "info");
				return;
			}
			try { currentAbort.abort(); } catch { /* ok */ }
			ctx.ui.notify("Aborting Claude response…", "info");
		},
	});

	// /claude-resume — present a picker of past Claude sessions whose cwd matches
	// the current project directory, then resume the chosen one in chat mode.
	//
	// Caveat: this only sets the session id and starts a fresh orange border.
	// The historical transcript is NOT replayed inside pi (rendering it would
	// require a separate translation pass from JSONL → ChatTurn[]); however
	// `claude --resume <id>` keeps the FULL conversation context alive on the
	// Claude side, so subsequent prompts behave exactly like a continuation.
	pi.registerCommand("claude-resume", {
		description: "Pick a past Claude session for the current project directory and resume it in chat mode.",
		handler: async (_args, ctx) => {
			if (!ctx?.hasUI) {
				ctx?.ui?.notify?.("/claude-resume requires interactive mode.", "error");
				return;
			}
			if (isGenerating) {
				ctx.ui.notify(
					"A Claude response is still streaming. Use /claude-abort first, then /claude-resume.",
					"warning",
				);
				return;
			}

			const past = readPastSessions(ctx.cwd);
			if (past.length === 0) {
				ctx.ui.notify(
					`No past Claude sessions found for ${ctx.cwd}.`,
					"info",
				);
				return;
			}

			// Cap the picker at the 25 most recent sessions to keep the
			// inline-note overlay tractable. Sessions are already sorted
			// newest-first by readPastSessions().
			const MAX_OPTIONS = 25;
			const choices = past.slice(0, MAX_OPTIONS);

			// Label format (per user spec):
			//   <relative time> · <first user message truncated> · (session:<short-id>)
			const PREVIEW_MAX = 60;
			const buildLabel = (s: PastSession) => {
				const preview = s.firstUserMessage
					? truncate(s.firstUserMessage, PREVIEW_MAX)
					: "(no user message)";
				return `${relativeTime(s.mtimeMs)} · ${preview} · (session:${s.sessionId.slice(0, 8)})`;
			};

			// Disambiguate: in the very unlikely event two sessions produce
			// the same display label, append a counter so the post-pick lookup
			// can match exactly.
			const labels: string[] = [];
			const seen = new Map<string, number>();
			for (const s of choices) {
				const base = buildLabel(s);
				const n    = seen.get(base) ?? 0;
				seen.set(base, n + 1);
				labels.push(n === 0 ? base : `${base} #${n + 1}`);
			}

			const sessionPick = await askSingleQuestionWithInlineNote(ctx.ui, {
				question: `Resume which past Claude session in ${ctx.cwd}?`,
				options:  labels.map((label) => ({ label })),
			});
			if (sessionPick.selectedOptions.length === 0) {
				ctx.ui.notify("Resume cancelled.", "info");
				return;
			}
			const pickedLabel = sessionPick.selectedOptions[0];
			const idx = labels.indexOf(pickedLabel);
			if (idx < 0) {
				ctx.ui.notify("Picked session not found (label mismatch).", "warning");
				return;
			}
			const picked = choices[idx];

			// Second picker: which model to display the resumed conversation
			// under in pi's UI. Note: claude CLI ignores --model when --resume
			// is set, so this is purely a UI/labelling choice. We mark the
			// session's original model with "(used by this session)" and set
			// it as the recommended default so most users can just hit Enter.
			const originalModel = picked.model;
			const modelLabels   = MODELS.map((m) =>
				originalModel === m ? `${m} (used by this session)` : m,
			);
			const recommendedIdx = originalModel ? MODELS.indexOf(originalModel) : 1; // default sonnet

			const modelPick = await askSingleQuestionWithInlineNote(ctx.ui, {
				question:    "Display this resumed session under which model in pi's UI?",
				options:     modelLabels.map((label) => ({ label })),
				recommended: recommendedIdx,
			});
			if (modelPick.selectedOptions.length === 0) {
				ctx.ui.notify("Resume cancelled.", "info");
				return;
			}
			// Strip any "(used by this session)" suffix and parse the bare
			// model name (the first whitespace-separated token).
			const bare         = modelPick.selectedOptions[0].split(/\s+/)[0].toLowerCase();
			const targetModel: Model = (MODELS as readonly string[]).includes(bare)
				? (bare as Model)
				: "sonnet";

			// Wire up the session id BEFORE entering chat mode, so the next
			// turn the user sends triggers --resume <id>.
			sessions.set(targetModel, picked.sessionId);
			enterChatMode(targetModel, ctx, false);

			// Replay the historical transcript inside the orange border so the
			// user can SEE the context they're resuming. ensureSessionMessage()
			// creates the (now-empty) session CustomMessage; we then push every
			// past turn into details.turns and ask for a re-render.
			const historical = loadSessionTurns(picked.sessionId, ctx.cwd, targetModel);
			const details    = ensureSessionMessage();
			details.turns.push(...historical);
			tuiRef?.requestRender();

			const ago = relativeTime(picked.mtimeMs);
			const preview = picked.firstUserMessage
				? `: "${truncate(picked.firstUserMessage, 50)}"`
				: "";
			const histNote = historical.length > 0
				? ` (${historical.length} historical turn${historical.length === 1 ? "" : "s"} loaded)`
				: " (transcript empty or unreadable)";
			ctx.ui.notify(
				`Resuming session ${picked.sessionId.slice(0, 8)} (${ago})${preview} as Claude ${capitalize(targetModel)}.${histNote}`,
				"info",
			);
		},
	});
	// Note on ESC: pi's extension runner reserves the "interrupt" action, so
	// pi.registerShortcut("escape", …) is silently ignored. ESC-to-abort is
	// wired via the ChatEscEditor custom editor installed in enterChatMode.

	// ── Message renderer ─────────────────────────────────────────────────────
	// ONE custom message type holds the WHOLE chat-mode session. Returning a
	// live component (render reads `details.turns` on every frame) lets
	// streaming updates appear with a simple `tuiRef.requestRender()` — no
	// full rebuild of pi's chat container required.
	//
	// Performance: each frame now reuses cached per-turn line output for
	// completed turns (see renderSessionLines). Only the in-flight assistant
	// turn (if any) is rebuilt each frame, so long conversations stop driving
	// O(turns × blocks) allocation during Claude streaming.
	pi.registerMessageRenderer("chat-claude-session", (message, _opts, theme) => {
		const d = message.details as ChatSessionDetails | undefined;
		if (!d || !Array.isArray(d.turns)) return undefined;

		return {
			// pi calls invalidate() when theme changes or a from-scratch
			// re-render is needed — drop every turn's render cache so the
			// next render pass rebuilds against the new theme.
			invalidate: () => invalidateSessionCache(d),
			render: (width: number) => {
				if (width < 6) return renderSessionLines(d, theme, width);
				const innerWidth = width - 4; // 2 border chars + 2 padding chars
				// renderSessionLines returns lines already padded to
				// `innerWidth` visible columns, so wrapInOrangeBorder does
				// NO visibleWidth() call per frame — the previous hot path
				// (~85% CPU in Intl.Segmenter) is gone.
				const paddedInnerLines = renderSessionLines(d, theme, innerWidth);
				return wrapInOrangeBorder(paddedInnerLines, width);
			},
		};
	});
}