/** * chat-claude — Distinctive Claude chat MODE inside pi. * * When chat mode is active, typed user input is routed to a Claude model * (haiku/sonnet/opus) via the `claude` CLI — NOT to pi's active LLM. * * Rendering goals (match pi's native chat UX): * - Text appears as full markdown (no truncated previews, no dim grey). * - Thinking blocks stream live as italic `thinkingText`-coloured markdown * (the `claude` CLI is invoked with --include-partial-messages). * - Tool calls use pi's normal tool-execution look (renderToolBlock). * * All turns of a single chat-mode session are rendered inside ONE continuous * orange border: the top line sits above the first turn, the bottom line * below the most recent turn, and the border extends live as new turns * (user + assistant) arrive. A new border starts each time the user enters * chat mode again via /claude / /claude-new. * * Commands: * /claude [haiku|sonnet|opus] — enter chat mode / switch model * /claude-new [haiku|sonnet|opus] — enter chat mode with a fresh Claude session * /claude-resume — pick a past session for the current cwd and resume it * /claude-end — exit chat mode * /claude-abort — cancel an in-flight Claude response */ import { closeSync, openSync, readdirSync, readFileSync, readSync, statSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; import { CustomEditor, getMarkdownTheme } from "@mariozechner/pi-coding-agent"; import type { ExtensionAPI, KeybindingsManager } from "@mariozechner/pi-coding-agent"; import { Box, Container, matchesKey, Markdown, Spacer, Text, truncateToWidth, TUI, visibleWidth, type Component, type EditorTheme } from "@mariozechner/pi-tui"; import { formatUsage, renderToolBlock, runClaude, type StreamBlock, } from "../shared/claude-stream.js"; import { startAskBridge, type AskBridge } from "../shared/pi-ask-bridge.js"; import { askSingleQuestionWithInlineNote } from "./pi-ask-tool/ask-inline-ui.js"; // --------------------------------------------------------------------------- // Orange styling // --------------------------------------------------------------------------- const ORANGE = "\x1b[38;5;208m"; // pumpkin / tangerine const ORANGE_DIM = "\x1b[38;5;130m"; const RESET = "\x1b[0m"; const BOLD = "\x1b[1m"; const orange = (s: string) => ORANGE + s + RESET; const orangeBold = (s: string) => ORANGE + BOLD + s + RESET; const orangeDim = (s: string) => ORANGE_DIM + s + RESET; // --------------------------------------------------------------------------- // Orange border wrapping helper — wraps an array of inner lines in a // continuous orange box. Applied at the session level so the WHOLE chat // conversation sits inside ONE box (top above first turn, bottom below // most recent turn). Pure string→string — no component allocation per frame. // // IMPORTANT: `innerLines` must ALREADY be padded to `innerWidth` columns of // visible width. We don't call visibleWidth() here because that function // invokes Intl.Segmenter (expensive ICU BreakIterator on every miss) and // this wrapper runs on every single line of the session on every frame. // Profile data showed 85% of pi's idle CPU being burned in Segmenter via // this function. Callers (renderSessionLines) pre-pad inner lines once // per turn and cache them, so the cost amortises to O(streaming tail). // --------------------------------------------------------------------------- function wrapInOrangeBorder(paddedInnerLines: string[], width: number): string[] { const v = orange("│"); const top = orange("╭" + "─".repeat(width - 2) + "╮"); const bottom = orange("╰" + "─".repeat(width - 2) + "╯"); const out: string[] = [top]; for (const line of paddedInnerLines) out.push(v + " " + line + " " + v); out.push(bottom); return out; } // Pad a single inner line to exactly `innerWidth` visible columns, OR // truncate it if it's already over-wide. Uses visibleWidth() — pi-tui's // grapheme-aware width function (which is what sits on top of the hot // Intl.Segmenter path). Intended to be called ONCE per line at cache-build // time, NOT per frame. // // Truncation is a defensive safety net: any component that emits a line // wider than the width it was handed would otherwise crash pi's TUI (see // tui.js doRender: "Rendered line N exceeds terminal width"). Without this, // one stray over-wide line (e.g. a long source code line inside a Read // tool result) takes down the entire session. function padToInnerWidth(line: string, innerWidth: number): string { const w = visibleWidth(line); if (w > innerWidth) return truncateToWidth(line, innerWidth, "…", true); const padRight = innerWidth - w; return padRight > 0 ? line + " ".repeat(padRight) : line; } // --------------------------------------------------------------------------- // Read-tool result truncation // // `Read` tool calls inside chat mode often dump entire files into the result // banner — many hundreds of lines, which buries the surrounding conversation. // We cap the rendered file content at MAX_READ_LINES and append a single // centered notice line describing how many lines were hidden. This is a // PRESENTATION-only truncation: `block.result.text` is left untouched, so // resumed sessions / re-renders still see the full content. // // Centering needs render-time width, so we implement a tiny custom Component // (TruncatedReadResult) and swap it into the Box body produced by the shared // renderToolBlock helper. The same dim line-number formatting used by // renderToolResultBox is preserved so the truncated view looks identical to // the un-truncated one above the notice. // --------------------------------------------------------------------------- const MAX_READ_LINES = 40; class TruncatedReadResult implements Component { constructor( private readonly numbered: { num: string; content: string }[], private readonly maxNumLen: number, private readonly dimFn: (s: string) => string, private readonly noticeFn: (s: string) => string, ) {} invalidate(): void { /* stateless */ } render(width: number): string[] { const total = this.numbered.length; const visible = Math.min(MAX_READ_LINES, total); const lines: string[] = []; for (let i = 0; i < visible; i++) { const l = this.numbered[i]; // Truncate to `width` so a single long source-code line (think // minified JS or a long comment) can't blow past the TUI's width // check and crash the whole session. `truncateToWidth` is // ANSI-aware so the dim SGR sequences wrapping the line number // survive the cut. const raw = this.dimFn(l.num.padStart(this.maxNumLen)) + " " + l.content; lines.push(truncateToWidth(raw, width, "…", false)); } if (total > visible) { const hidden = total - visible; const notice = `… ${hidden} more line${hidden === 1 ? "" : "s"} hidden …`; const visLen = visibleWidth(notice); const left = Math.max(0, Math.floor((width - visLen) / 2)); lines.push(" ".repeat(left) + this.noticeFn(notice)); } return lines; } } // Wrap shared renderToolBlock: for `Read` tool blocks whose result exceeds // MAX_READ_LINES, replace the Box body's child Text with our truncating // component. All other tool kinds, error results, and short reads pass // through unchanged. function renderToolBlockTruncated(block: Extract, theme: any): Container { const c = renderToolBlock(block, theme); if (block.name.toLowerCase() !== "read") return c; if (!block.result || block.result.isError) return c; const rawLines = block.result.text.split("\n").filter((l) => l.length > 0); if (rawLines.length <= MAX_READ_LINES) return c; const parsed = rawLines.map((l) => { const tab = l.indexOf("\t"); return tab >= 0 ? { num: l.slice(0, tab), content: l.slice(tab + 1) } : { num: "", content: l }; }); const maxNumLen = parsed.reduce((m, l) => Math.max(m, l.num.length), 0); // renderToolBlock's container is [headerText, bodyBox]. Bail safely if a // future change to that helper alters the structure. const body = c.children[1]; if (!(body instanceof Box)) return c; body.clear(); body.addChild(new TruncatedReadResult( parsed, maxNumLen, (s) => theme.fg("dim", s), (s) => theme.fg("dim", s), )); return c; } // --------------------------------------------------------------------------- // Models / turn types // --------------------------------------------------------------------------- const MODELS = ["haiku", "sonnet", "opus"] as const; type Model = (typeof MODELS)[number]; const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1); // --------------------------------------------------------------------------- // Past-session discovery (used by /claude-resume). // // Claude CLI persists every session's transcript at: // ~/.claude/projects//.jsonl // where the mangling rule (verified empirically) is "replace every '/' and // '.' with '-'". So /home/jonas/dotfiles/pi/.pi → -home-jonas-dotfiles-pi--pi // (the leading '-' comes from the leading '/'; '.pi' contributes '--pi' // because both '/' and '.' map to '-'). // // We don't need to consult ~/.claude/sessions/ for this picker — that // directory only contains metadata for currently-running Claude processes. // The on-disk transcript at projects//.jsonl is the source of // truth for "past sessions in this directory". // --------------------------------------------------------------------------- function mangleCwd(cwd: string): string { return cwd.replace(/[/.]/g, "-"); } function relativeTime(ms: number): string { const diff = Date.now() - ms; if (diff < 0) return "in the future"; const sec = Math.floor(diff / 1000); if (sec < 60) return `${sec}s ago`; const min = Math.floor(sec / 60); if (min < 60) return `${min}m ago`; const hr = Math.floor(min / 60); if (hr < 24) return `${hr}h ago`; const day = Math.floor(hr / 24); if (day < 30) return `${day}d ago`; const mon = Math.floor(day / 30); if (mon < 12) return `${mon}mo ago`; return `${Math.floor(day / 365)}y ago`; } /** Map a raw Claude model identifier (e.g. "claude-haiku-4-5-20251001") to * one of our canonical short names. Returns null if no match. */ function normalizeRawModel(raw: string): Model | null { const lc = raw.toLowerCase(); if (lc.includes("haiku")) return "haiku"; if (lc.includes("sonnet")) return "sonnet"; if (lc.includes("opus")) return "opus"; return null; } interface PastSession { sessionId: string; mtimeMs: number; firstUserMessage: string; // truncated/normalised, "" if not found model: Model | null; // null ⇒ couldn't determine rawModel: string; // raw string from JSONL ("" if not found) } /** Read the head of a file (avoids slurping multi-MB JSONL transcripts). */ function readFileHead(path: string, maxBytes: number): string { const fd = openSync(path, "r"); try { const buf = Buffer.alloc(maxBytes); const n = readSync(fd, buf, 0, maxBytes, 0); return buf.subarray(0, n).toString("utf8"); } finally { closeSync(fd); } } /** Pluck the first user message + first model id from a transcript head. */ function extractSessionMeta(head: string): { firstUserMessage: string; rawModel: string } { let firstUserMessage = ""; let rawModel = ""; for (const line of head.split("\n")) { if (firstUserMessage && rawModel) break; if (!line.trim()) continue; let ev: any; try { ev = JSON.parse(line); } catch { continue; } if (!firstUserMessage) { // Two equivalent sources: a queue-operation enqueue carries the raw // text the user typed; a `type: "user"` event carries it inside // message.content (which is either a string or an array of blocks). if (ev.type === "queue-operation" && ev.operation === "enqueue" && typeof ev.content === "string") { firstUserMessage = ev.content; } else if (ev.type === "user" && ev.message) { const c = ev.message.content; if (typeof c === "string") { firstUserMessage = c; } else if (Array.isArray(c)) { firstUserMessage = c .filter((b: any) => b?.type === "text" && typeof b.text === "string") .map((b: any) => b.text as string) .join(" "); } } } if (!rawModel && typeof ev?.message?.model === "string") { rawModel = ev.message.model; } } return { firstUserMessage: firstUserMessage.replace(/\s+/g, " ").trim(), rawModel, }; } function readPastSessions(cwd: string): PastSession[] { const dir = join(homedir(), ".claude", "projects", mangleCwd(cwd)); let entries: string[]; try { entries = readdirSync(dir).filter((f) => f.endsWith(".jsonl")); } catch { return []; } const out: PastSession[] = []; for (const f of entries) { const full = join(dir, f); let st; try { st = statSync(full); } catch { continue; } // Read up to ~256 KB — enough to find the first user message and the // first assistant turn (which carries the model id) in any reasonable // transcript without paying for multi-MB reads. let head: string; try { head = readFileHead(full, 256 * 1024); } catch { continue; } const { firstUserMessage, rawModel } = extractSessionMeta(head); out.push({ sessionId: f.replace(/\.jsonl$/, ""), mtimeMs: st.mtimeMs, firstUserMessage, model: rawModel ? normalizeRawModel(rawModel) : null, rawModel, }); } out.sort((a, b) => b.mtimeMs - a.mtimeMs); return out; } /** Truncate a string to `max` chars, appending "…" when cut. */ function truncate(s: string, max: number): string { if (s.length <= max) return s; return s.slice(0, Math.max(0, max - 1)).trimEnd() + "…"; } // --------------------------------------------------------------------------- // JSONL transcript → ChatTurn[] // // Given a sessionId and cwd, load the full transcript at // ~/.claude/projects//.jsonl // and convert it into the same UserTurn / AssistantTurn shape the live // runChatTurn() path produces. This lets /claude-resume render the past // context inside the orange border so the user can SEE what they're // resuming, not just blindly continue an invisible thread. // // JSONL event reference (observed in 2.1.118 transcripts): // {type:"user", message:{role:"user", content: }} ← typed prompt // {type:"user", message:{role:"user", content: [{type:"tool_result", …}, …]}} ← tool outputs // {type:"assistant",message:{role:"assistant", content: [], usage:{…}, model:"claude-sonnet-4-6"}} // Each assistant content block is emitted as its OWN line, all sharing the // same usage / model fields (one API call → many lines). We coalesce every // run of consecutive assistant lines into a single AssistantTurn whose // `blocks` array preserves the in-order list of thinking/text/tool blocks. // Tool results that arrive in subsequent user-lines are attached back onto // the matching tool block by tool_use_id. // // Lines we ignore: agent-setting, queue-operation, attachment, last-prompt, // summary, and anything else without a recognisable role/content shape. // Tokens/cost are intentionally NOT carried over — the JSONL repeats usage // per content block so summing naively would over-count, and the user is // here to see CONTENT, not a token panel for old turns. // --------------------------------------------------------------------------- function loadSessionTurns(sessionId: string, cwd: string, fallbackModel: Model): ChatTurn[] { const path = join(homedir(), ".claude", "projects", mangleCwd(cwd), `${sessionId}.jsonl`); let raw: string; try { raw = readFileSync(path, "utf8"); } catch { return []; } const turns: ChatTurn[] = []; let current: AssistantTurn | null = null; const flush = () => { if (!current) return; current.finalText = current.blocks .filter((b) => b.type === "text") .map((b: any) => b.text as string) .join(""); turns.push(current); current = null; }; const ensureCurrent = (model: Model): AssistantTurn => { if (current) return current; current = { role: "assistant", model, blocks: [], finalText: "", sessionId, isResume: false, done: true, }; return current; }; const tool_resultText = (content: any): { text: string; isError: boolean } => { if (typeof content === "string") return { text: content, isError: false }; if (Array.isArray(content)) { const text = content .filter((b: any) => b?.type === "text" && typeof b.text === "string") .map((b: any) => b.text as string) .join("\n"); return { text, isError: false }; } return { text: "", isError: false }; }; for (const line of raw.split("\n")) { if (!line.trim()) continue; let ev: any; try { ev = JSON.parse(line); } catch { continue; } if (ev.type === "user") { const c = ev.message?.content; if (typeof c === "string") { // Typed user prompt — closes any in-flight assistant turn. flush(); if (c.trim()) turns.push({ role: "user", text: c }); } else if (Array.isArray(c)) { let sawToolResult = false; for (const block of c) { if (block?.type === "tool_result") { sawToolResult = true; const { text } = tool_resultText(block.content); const isError = block.is_error === true; if (current) { for (const tb of current.blocks) { if (tb.type === "tool" && tb.id === block.tool_use_id) { tb.result = { text, isError }; break; } } } } else if (block?.type === "text" && typeof block.text === "string") { // Some clients send array-shaped user prompts. if (!sawToolResult) { flush(); if (block.text.trim()) turns.push({ role: "user", text: block.text }); } } } } } else if (ev.type === "assistant") { const content = (ev.message?.content ?? []) as any[]; const rawModel = String(ev.message?.model ?? ""); const model = (rawModel ? normalizeRawModel(rawModel) : null) ?? fallbackModel; const a = ensureCurrent(model); // If the per-line model differs from what we opened the turn with, // keep the first one — a single coalesced "turn" inherits the model // of its first API call. (This is purely for the header label.) for (const block of content) { if (block?.type === "thinking" && typeof block.thinking === "string") { if (block.thinking.trim()) a.blocks.push({ type: "thinking", text: block.thinking }); } else if (block?.type === "text" && typeof block.text === "string") { if (block.text.trim()) a.blocks.push({ type: "text", text: block.text }); } else if (block?.type === "tool_use") { a.blocks.push({ type: "tool", id: String(block.id ?? ""), name: String(block.name ?? ""), inputJson: JSON.stringify(block.input ?? {}), }); } } } // All other event types (agent-setting, queue-operation, attachment, // last-prompt, summary, …) are intentionally ignored. } flush(); return turns; } // Per-turn render cache: once a turn is "frozen" (user turns are always // frozen; assistant turns after done=true), its rendered output at a given // (innerWidth, theme) is invariant. Caching avoids O(turns) rebuild on every // frame, which otherwise creates quadratic-ish lag during streaming because // partial-message updates drive tens of renders per second. interface TurnRenderCache { cachedLines?: string[]; cachedWidth?: number; cachedTheme?: unknown; } interface UserTurn extends TurnRenderCache { role: "user"; text: string; } interface AssistantTurn extends TurnRenderCache { role: "assistant"; model: Model; blocks: StreamBlock[]; finalText: string; sessionId?: string; isResume: boolean; done: boolean; error?: string; cancelled?: boolean; costUsd?: number; inputTokens?: number; outputTokens?: number; cacheReadTokens?: number; cacheWriteTokens?: number; } type ChatTurn = UserTurn | AssistantTurn; interface ChatSessionDetails { turns: ChatTurn[]; } // ============================================================================= // Extension entry point // ============================================================================= // ── Reload-persistent state ───────────────────────────────────────────────── // pi's `/reload` tears the extension down and re-invokes the default export, // which resets every closure-local `let`/`const`. The Map of resumable Claude // session ids (model → sessionId) is the one piece of state we want to // survive that — otherwise /reload silently orphans the ongoing Claude // threads, forcing the user to re-pick them via /claude-resume. // // Everything else (chatMode, currentDetails, askBridge, tuiRef, isGenerating) // is intentionally NOT persisted: the bridge/TUI references are bound to the // torn-down ctx and must be rebuilt on the next enterChatMode(), and any // in-flight stream is already aborted when the old closure is discarded. // // We stash the Map on globalThis behind a namespaced key. globalThis survives // module re-evaluation (only top-level lexical bindings are reset), and the // guarded getter keeps initialization idempotent across repeated reloads. // Valid extended-thinking effort levels accepted by `claude --effort`, plus // our synthetic "off" sentinel which skips the flag entirely (falling back // to the CLI's default of no thinking emission in -p mode). const EFFORTS = ["off", "low", "medium", "high", "xhigh", "max"] as const; type Effort = (typeof EFFORTS)[number]; const DEFAULT_EFFORT: Effort = "max"; interface ChatClaudePersistedState { sessions: Map; // Current extended-thinking effort level — persisted across `/reload` // so the user's choice survives the extension teardown the same way // resumable session ids do. effort: Effort; } const CHAT_CLAUDE_STATE_KEY = "__pi_chat_claude_persisted__"; function getPersistedState(): ChatClaudePersistedState { const g = globalThis as unknown as Record; let state = g[CHAT_CLAUDE_STATE_KEY]; if (!state) { state = { sessions: new Map(), effort: DEFAULT_EFFORT }; g[CHAT_CLAUDE_STATE_KEY] = state; } // Back-fill for any persisted state written by an older revision of // the extension (pre-/claude-effort) that didn't carry an effort field. if (!state.effort) state.effort = DEFAULT_EFFORT; return state; } export default function (pi: ExtensionAPI) { // ── Mode state ──────────────────────────────────────────────────────────── let chatMode: Model | null = null; // null ⇒ not in chat mode // model → resumable claude session id. Pulled from globalThis so the // mapping (and the current effort level) survive `/reload` (see // getPersistedState above). `persisted` is kept as a handle so // `/claude-effort` can mutate `persisted.effort` in place and have // the change picked up by subsequent runChatTurn calls. const persisted = getPersistedState(); const { sessions } = persisted; let isGenerating = false; let currentAbort: AbortController | null = null; // pi-ask bridge — opens a Unix socket + generates an --mcp-config so // Claude (running inside this chat) can ask the user questions through // pi's native ask UI. Bound to the chat-mode lifetime: started on // enterChatMode, closed on exitChatMode. let askBridge: AskBridge | null = null; // Live TUI reference captured from the mode-banner widget factory, used to // schedule re-renders while a Claude response is streaming into the // current chat-claude-session message. let tuiRef: { requestRender: () => void } | null = null; // The in-flight chat session's `details` object. Stored by reference so // mutations here are reflected in the CustomMessage already displayed // in pi's conversation. Null between chat-mode sessions. let currentDetails: ChatSessionDetails | null = null; // Keep a module-level set of the extension's custom-message types so the // `context` event handler can strip them out of pi's LLM context — chat // mode is between the user and Claude and has no business in pi's // prompt payload. const CHAT_CLAUDE_CUSTOM_TYPES = new Set(["chat-claude-session"]); // ── Render throttling ──────────────────────────────────────────────────── // Claude's `--include-partial-messages` fires an onUpdate for every token // delta (100+ Hz under a fast stream). Rendering per-token was the second // half of the progressive-lag problem — even with per-turn caching, the // TUI would be asked to diff+repaint dozens of times per second. // // scheduleStreamRender coalesces back-to-back requests into a trailing- // edge timer at ~30 Hz. The first update within a quiet window waits up // to 33 ms before rendering; any further updates in that window are // folded into the same render. flushStreamRender cancels the pending // timer and renders immediately — used on stream completion, abort, and // chat-mode teardown so the user sees the terminal frame right away. let streamRenderTimer: ReturnType | null = null; const STREAM_RENDER_INTERVAL_MS = 33; // ~30 Hz function scheduleStreamRender() { if (streamRenderTimer) return; streamRenderTimer = setTimeout(() => { streamRenderTimer = null; tuiRef?.requestRender(); }, STREAM_RENDER_INTERVAL_MS); } function flushStreamRender() { if (streamRenderTimer) { clearTimeout(streamRenderTimer); streamRenderTimer = null; } tuiRef?.requestRender(); } // ── Rendering helpers ──────────────────────────────────────────────────── // Mirrors pi's AssistantMessageComponent conventions (see // modes/interactive/components/assistant-message.js): Markdown at // paddingX=1, paddingY=0; thinking as italic `thinkingText`-coloured // markdown; tool blocks via the shared renderToolBlock (same one // ask-claude uses) so bash / read / edit / write all look identical to // pi's own tool executions. function renderTurnInto(container: Container, turn: ChatTurn, theme: any) { const md = getMarkdownTheme(); if (turn.role === "user") { container.addChild(new Text(orangeBold("▶ you"), 1, 0)); container.addChild(new Spacer(1)); container.addChild(new Markdown(turn.text.trim(), 1, 0, md)); return; } // Assistant turn header const icon = turn.cancelled ? orange("◇ ") : turn.error ? theme.fg("error", "✗ ") : turn.isResume ? orange("↩ ") : orange("◆ "); const header = icon + orangeBold(`Claude ${capitalize(turn.model)}`) + (turn.sessionId ? theme.fg("dim", ` session:${turn.sessionId.slice(0, 8)}`) : "") + (!turn.done ? theme.fg("warning", " ⏳") : ""); container.addChild(new Text(header, 1, 0)); container.addChild(new Spacer(1)); if (turn.cancelled) { container.addChild(new Text(orange("(Cancelled)"), 1, 0)); return; } if (turn.error) { container.addChild(new Text(theme.fg("error", `Error: ${turn.error}`), 1, 0)); return; } // Defensive dedup — see claude-stream.ts for the root-cause fix, but // keep a safety net here in case a future Claude CLI change re-orders // events differently. const rawBlocks = turn.blocks ?? []; const seenToolIds = new Set(); const blocks: StreamBlock[] = []; for (const b of rawBlocks) { if (b.type === "tool") { if (seenToolIds.has(b.id)) continue; seenToolIds.add(b.id); } blocks.push(b); } let addedAny = false; for (let i = 0; i < blocks.length; i++) { const block = blocks[i]; if (block.type === "thinking" && block.text.trim()) { if (addedAny) container.addChild(new Spacer(1)); container.addChild(new Markdown(block.text.trim(), 1, 0, md, { color: (t: string) => theme.fg("thinkingText", t), italic: true, })); addedAny = true; } else if (block.type === "tool") { if (addedAny) container.addChild(new Spacer(1)); container.addChild(renderToolBlockTruncated(block, theme)); addedAny = true; } else if (block.type === "text" && block.text.trim()) { if (addedAny) container.addChild(new Spacer(1)); container.addChild(new Markdown(block.text.trim(), 1, 0, md)); addedAny = true; } } if (turn.done) { const usage = formatUsage(turn as any); if (usage) { container.addChild(new Spacer(1)); container.addChild(new Text(theme.fg("dim", usage), 1, 0)); } } } // Render one turn in isolation and return its lines PRE-PADDED to // `innerWidth` visible columns. // // Pre-padding here means `visibleWidth()` (which calls `Intl.Segmenter` // — the measured hot spot: 85% of pi's CPU in a laggy session) runs // exactly ONCE per line per turn, not once per line per frame. For // completed turns these padded lines are cached and reused forever at // that (width, theme); for the streaming tail turn the work is bounded // to just the in-flight turn's lines. function renderTurnLines(turn: ChatTurn, theme: any, innerWidth: number): string[] { const c = new Container(); renderTurnInto(c, turn, theme); const rawLines = c.render(innerWidth); const padded: string[] = new Array(rawLines.length); for (let i = 0; i < rawLines.length; i++) { padded[i] = padToInnerWidth(rawLines[i], innerWidth); } return padded; } // Assemble the WHOLE session's inner lines with per-turn caching. // // Cache invariants: // • User turns are immutable → always cacheable. // • Assistant turns are mutated in-place by runClaude's onUpdate // callback while streaming, and only become stable after // `done: true` is set (see runChatTurn). So we only cache // assistants once they're done. // • Cache keys on (innerWidth, theme) — terminal resize or theme // switch invalidates all per-turn caches transparently by forcing // a rebuild on the next render. // // With this cache, a streaming frame only rebuilds the one in-flight // assistant turn (the tail); all prior turns are an O(1) line-copy. // That eliminates the O(turns × blocks) rebuild that previously ran // every time a partial Claude message arrived. // // Returned lines are PRE-PADDED to `innerWidth` visible columns — see // renderTurnLines/padToInnerWidth for why. The caller can hand them // straight to wrapInOrangeBorder without any further visibleWidth() // calls, which is critical: visibleWidth drives Intl.Segmenter, whose // 512-entry LRU thrashes when called per-line-per-frame on a long chat. function renderSessionLines(details: ChatSessionDetails, theme: any, innerWidth: number): string[] { // Streaming placeholder so the border grows immediately after the // user submits, even before any block has arrived from Claude. if (details.turns.length === 0) { const c = new Container(); c.addChild(new Text(orangeDim("(chat mode — waiting for first message)"), 0, 0)); const rawLines = c.render(innerWidth); const padded: string[] = new Array(rawLines.length); for (let i = 0; i < rawLines.length; i++) padded[i] = padToInnerWidth(rawLines[i], innerWidth); return padded; } const out: string[] = []; // The blank inter-turn spacer must ALSO be padded — otherwise // wrapInOrangeBorder emits "│ │" with a visibly short interior, // producing a ragged right edge on the orange border. const spacerLine = " ".repeat(innerWidth); for (let i = 0; i < details.turns.length; i++) { if (i > 0) out.push(spacerLine); const turn = details.turns[i]; const cacheable = turn.role === "user" || (turn.role === "assistant" && turn.done); if ( cacheable && turn.cachedLines && turn.cachedWidth === innerWidth && turn.cachedTheme === theme ) { for (const line of turn.cachedLines) out.push(line); } else { const lines = renderTurnLines(turn, theme, innerWidth); if (cacheable) { turn.cachedLines = lines; turn.cachedWidth = innerWidth; turn.cachedTheme = theme; } else { // Streaming turn — make sure we don't accidentally // carry stale cached output from a prior life. turn.cachedLines = undefined; turn.cachedWidth = undefined; turn.cachedTheme = undefined; } for (const line of lines) out.push(line); } } return out; } // Drop every turn's render cache — called from the message renderer's // `invalidate()` hook (triggered by pi when theme changes or when a // from-scratch re-render is needed). function invalidateSessionCache(details: ChatSessionDetails) { for (const turn of details.turns) { turn.cachedLines = undefined; turn.cachedWidth = undefined; turn.cachedTheme = undefined; } } // ── Mode banner + status ───────────────────────────────────────────────── function syncUI(ctx: any) { if (!ctx?.hasUI) return; if (!chatMode) { ctx.ui.setWidget("chat-claude", undefined); ctx.ui.setStatus("chat-claude", undefined); ctx.ui.setTitle("pi"); return; } const sessionId = sessions.get(chatMode); const short = sessionId ? sessionId.slice(0, 8) : "new"; const modelUp = capitalize(chatMode).toUpperCase(); ctx.ui.setWidget("chat-claude", (tui: any, theme: any) => { tuiRef = tui; // ← captured for live streaming re-renders return { invalidate: () => {}, render: () => { const rail = orange("▌ "); const title = orangeBold("◆ CLAUDE CHAT MODE"); const modelLabel = orangeBold(modelUp); const sessionTag = orangeDim("session:" + short); const effortTag = orangeDim("effort:" + persisted.effort); const running = isGenerating ? " " + orange("⏳ streaming…") : ""; const line1 = rail + title + " " + modelLabel + " " + sessionTag + " " + effortTag + running; const line2 = rail + theme.fg("dim", "Type to chat · /claude haiku|sonnet|opus · /claude-new · /claude-effort · /claude-end · /claude-abort"); return [line1, line2]; }, }; }, { placement: "aboveEditor" }); const busy = isGenerating ? " · streaming" : ""; ctx.ui.setStatus("chat-claude", orange(`◆ Claude ${capitalize(chatMode)} · ${short} · effort:${persisted.effort}${busy}`)); ctx.ui.setTitle(`pi · Claude ${capitalize(chatMode)} Chat`); } // ── ESC-to-abort editor ────────────────────────────────────────────────── // ESC (the "interrupt" action) is on the extension-runner's reserved list // (see node_modules/@mariozechner/pi-coding-agent/.../runner.js — any // registerShortcut("escape", …) is silently dropped), so a custom editor is // the sanctioned way to intercept it. We subclass pi's exported CustomEditor // and short-circuit ESC ONLY while a chat-claude response is streaming. // For every other case we defer to `super.handleInput`, which runs the // app-level keybindings — including pi's own onEscape handler, which // setCustomEditorComponent copies onto the custom editor at install time // (see interactive-mode.js setCustomEditorComponent, ~line 1258). class ChatEscEditor extends CustomEditor { handleInput(data: string): void { if (matchesKey(data, "escape") && isGenerating && currentAbort) { try { currentAbort.abort(); } catch { /* ok */ } // We may not have a direct ctx here, but the UI is live during // chat mode, so flush any pending throttled render and force // a frame now; the chat-claude-session renderer will show the // assistant turn as cancelled once runClaude's promise // rejects with AbortError. flushStreamRender(); return; } super.handleInput(data); } } // ── Mode transitions ───────────────────────────────────────────────────── function enterChatMode(model: Model, ctx: any, freshSession: boolean) { const wasActive = chatMode !== null; const modelChanged = chatMode !== model; if (freshSession) sessions.delete(model); // A new /claude invocation after an exit starts a fresh border box, so // drop any reference to the previous session's details. The existing // CustomMessage in the conversation keeps its own reference and stays // visible in the scrollback. if (!wasActive || modelChanged || freshSession) { currentDetails = null; } chatMode = model; // Stand up (or refresh) the pi-ask bridge so Claude can ask the user // questions through pi's native overlay. Re-create on every entry so // the socket+temp dir lifetime is bounded by the chat session. if (ctx?.hasUI) { askBridge?.close(); try { askBridge = startAskBridge({ ui: ctx.ui, onAsk: () => tuiRef?.requestRender(), }); } catch (err) { askBridge = null; ctx.ui.notify( `pi-ask bridge unavailable: ${err instanceof Error ? err.message : String(err)} — Claude won't be able to ask questions.`, "warning", ); } // Install the ESC-aborts-Claude custom editor. Idempotent: if chat // mode was already active (e.g. /claude haiku → /claude opus), setting // it again just re-wires the same class cleanly. ctx.ui.setEditorComponent((tui: TUI, theme: EditorTheme, keybindings: KeybindingsManager) => new ChatEscEditor(tui, theme, keybindings), ); } syncUI(ctx); if (ctx?.hasUI) { const sess = sessions.get(model); const kind = freshSession || !sess ? "new session" : `resume ${sess.slice(0, 8)}`; const verb = wasActive ? (modelChanged ? "Switched to" : "Re-entered") : "Entered chat mode:"; ctx.ui.notify(`${verb} Claude ${capitalize(model)} · ${kind}`, "info"); } } function exitChatMode(ctx: any) { if (currentAbort) try { currentAbort.abort(); } catch { /* ok */ } currentAbort = null; isGenerating = false; chatMode = null; // Cancel any pending throttled stream render so we don't leave a // dangling timer firing tuiRef.requestRender() after chat mode ends // (tuiRef itself lingers, so the render would be harmless but wasted). if (streamRenderTimer) { clearTimeout(streamRenderTimer); streamRenderTimer = null; } // Detach from current session details so the next entry starts a new // border. The message and its details stay in place in pi's scrollback. currentDetails = null; // Tear down the pi-ask bridge: close the socket and remove the temp // dir holding the socket + generated mcp.json. askBridge?.close(); askBridge = null; // Restore pi's default editor (undoes ChatEscEditor from enterChatMode). if (ctx?.hasUI) ctx.ui.setEditorComponent(undefined); syncUI(ctx); if (ctx?.hasUI) ctx.ui.notify("Exited chat mode — back to normal pi.", "info"); } // ── Session / turn management ──────────────────────────────────────────── function ensureSessionMessage(): ChatSessionDetails { if (currentDetails) return currentDetails; const details: ChatSessionDetails = { turns: [] }; currentDetails = details; pi.sendMessage( { customType: "chat-claude-session", // content is only used if we had no custom renderer; stays // hidden from pi's LLM via the context filter below. content: "", display: true, details, }, { triggerTurn: false }, ); return details; } async function runChatTurn(userText: string, ctx: any) { if (!chatMode) return; const model = chatMode; const details = ensureSessionMessage(); // Append user turn + placeholder assistant turn up front so the // border extends as soon as the user hits enter. details.turns.push({ role: "user", text: userText }); const existingSession = sessions.get(model); const assistantTurn: AssistantTurn = { role: "assistant", model, blocks: [], finalText: "", isResume: !!existingSession, done: false, }; details.turns.push(assistantTurn); tuiRef?.requestRender(); isGenerating = true; currentAbort = new AbortController(); syncUI(ctx); if (ctx?.hasUI) ctx.ui.setWorkingMessage(`Claude ${capitalize(model)} is thinking…`); try { const r = await runClaude(userText, { model, sessionId: existingSession, cwd: ctx.cwd, signal: currentAbort.signal, // Enable extended thinking — without --effort, `claude -p` // NEVER emits thinking_delta events regardless of the user's // interactive defaultThinkingLevel setting, and the italic // thinking-block rendering below sits idle. Default is "max" // and is configurable live via /claude-effort; the model // still decides on-demand whether it actually needs to think. effort: persisted.effort, // Route AskUserQuestion-style requests through pi's native // overlay via the pi-ask-mcp bridge. Disallowing the built-in // AskUserQuestion forces Claude to use mcp__pi__ask if it // wants to ask a structured question. mcpConfigPath: askBridge?.mcpConfigPath, disallowedTools: askBridge ? ["AskUserQuestion"] : undefined, onUpdate: (partial) => { assistantTurn.blocks = partial.blocks; assistantTurn.finalText = partial.finalText; // Throttle to ~30 Hz so a fast token stream doesn't cause // a render-per-token, which compounds with any other // extension's per-frame work (footer, widgets, etc.). scheduleStreamRender(); }, }); if (r.sessionId) sessions.set(model, r.sessionId); assistantTurn.blocks = r.blocks; assistantTurn.finalText = r.finalText; assistantTurn.sessionId = r.sessionId; assistantTurn.costUsd = r.costUsd; assistantTurn.inputTokens = r.inputTokens; assistantTurn.outputTokens = r.outputTokens; assistantTurn.cacheReadTokens = r.cacheReadTokens; assistantTurn.cacheWriteTokens = r.cacheWriteTokens; assistantTurn.done = true; } catch (err) { const aborted = currentAbort?.signal.aborted === true; assistantTurn.done = true; assistantTurn.cancelled = aborted; assistantTurn.error = aborted ? undefined : (err instanceof Error ? err.message : String(err)); } finally { isGenerating = false; currentAbort = null; if (ctx?.hasUI) ctx.ui.setWorkingMessage(undefined); syncUI(ctx); // Flush (not schedule): the stream just ended or was aborted — // we want the final frame on screen immediately, not 33 ms later. // Also cancels any in-flight throttled timer so it doesn't fire // a stale second render after the assistant turn is already // marked done and cached. flushStreamRender(); } } // ── Input interception ─────────────────────────────────────────────────── // Registered pi commands (/claude, /claude-end, etc.) dispatch BEFORE this // event fires, so they still work normally. Bash via `!` goes through // user_bash, not here. Every other text the user submits in chat mode is // routed straight to Claude. pi.on("input", async (event, ctx) => { if (!chatMode) return { action: "continue" } as const; if (event.source !== "interactive") return { action: "continue" } as const; const text = event.text ?? ""; if (!text.trim()) return { action: "continue" } as const; if (text.trimStart().startsWith("!")) return { action: "continue" } as const; if (isGenerating) { ctx.ui.notify( "Claude is still responding. Use /claude-abort to cancel, then try again.", "warning", ); return { action: "handled" } as const; } runChatTurn(text, ctx).catch((err) => { ctx.ui.notify( `Chat error: ${err instanceof Error ? err.message : String(err)}`, "error", ); }); return { action: "handled" } as const; }); // Keep chat-mode custom messages out of pi's LLM context — chat mode is // between the user and Claude, not part of pi's conversation. pi.on("context", (event) => { const filtered = event.messages.filter((m: any) => !(m.role === "custom" && CHAT_CLAUDE_CUSTOM_TYPES.has(m.customType)), ); return { messages: filtered }; }); // ── Session lifecycle ──────────────────────────────────────────────────── pi.on("session_start", (_event, ctx) => { syncUI(ctx); }); pi.on("session_shutdown", (_event, ctx) => { if (chatMode) exitChatMode(ctx); // Defensive: if exitChatMode was never reached (chatMode was already // null but a bridge somehow lingered), close it directly. if (askBridge) { askBridge.close(); askBridge = null; } // Defensive: same for the throttled render timer — exitChatMode // already clears it, but this keeps the Node process clean in the // case where chat mode was never entered but some hypothetical // future code path scheduled a render anyway. if (streamRenderTimer) { clearTimeout(streamRenderTimer); streamRenderTimer = null; } }); // ── Commands ───────────────────────────────────────────────────────────── const modelCompletions = (prefix: string) => MODELS.filter((m) => m.startsWith(prefix.toLowerCase())) .map((m) => ({ value: m, label: m })); pi.registerCommand("claude", { description: [ "Enter distinct Claude chat mode — typed input bypasses pi's LLM and goes to Claude.", " /claude — enter with last/default model (sonnet)", " /claude haiku|sonnet|opus — enter/switch model", ].join("\n"), getArgumentCompletions: modelCompletions, handler: async (args, ctx) => { const arg = (args ?? "").trim().toLowerCase(); const target: Model = (MODELS as readonly string[]).includes(arg) ? (arg as Model) : (chatMode ?? "sonnet"); enterChatMode(target, ctx, false); }, }); pi.registerCommand("claude-new", { description: "Enter chat mode with a fresh Claude session (discards any resumed session id). Example: /claude-new opus", getArgumentCompletions: modelCompletions, handler: async (args, ctx) => { const arg = (args ?? "").trim().toLowerCase(); const target: Model = (MODELS as readonly string[]).includes(arg) ? (arg as Model) : (chatMode ?? "sonnet"); enterChatMode(target, ctx, true); }, }); // /claude-effort — set the extended-thinking effort level for subsequent // chat turns. Without the flag `claude -p` emits no thinking_delta // events at all (the interactive `defaultThinkingLevel` setting is // ignored in -p mode); with it, the model decides on-demand whether // to actually think. Stored on the persisted state so the choice // survives `/reload`. // // /claude-effort — show current value // /claude-effort max — set to max (default) // /claude-effort off — disable (skip the --effort flag) const effortCompletions = (prefix: string) => EFFORTS.filter((e) => e.startsWith(prefix.toLowerCase())) .map((e) => ({ value: e, label: e })); pi.registerCommand("claude-effort", { description: [ "Set the extended-thinking effort level for Claude chat turns.", " /claude-effort — show current value", " /claude-effort off|low|medium|high|xhigh|max", "", "Note: without an effort setting, `claude -p` emits no thinking", "blocks at all — so lowering this trades thought visibility for speed.", ].join("\n"), getArgumentCompletions: effortCompletions, handler: async (args, ctx) => { const arg = (args ?? "").trim().toLowerCase(); if (!arg) { ctx.ui.notify( `Current Claude effort: ${persisted.effort}. Options: ${EFFORTS.join(", ")}.`, "info", ); return; } if (!(EFFORTS as readonly string[]).includes(arg)) { ctx.ui.notify( `Unknown effort "${arg}". Valid levels: ${EFFORTS.join(", ")}.`, "warning", ); return; } const prev = persisted.effort; persisted.effort = arg as Effort; syncUI(ctx); const note = arg === "off" ? "thinking disabled — Claude will no longer emit thinking blocks" : `thinking effort set to ${arg}`; ctx.ui.notify( `${note} (was ${prev}). Applies to the next chat turn.`, "info", ); }, }); pi.registerCommand("claude-end", { description: "Exit Claude chat mode and resume normal pi operation.", handler: async (_args, ctx) => { if (!chatMode) { ctx.ui.notify("Not in chat mode.", "info"); return; } exitChatMode(ctx); }, }); pi.registerCommand("claude-abort", { description: "Cancel the in-flight Claude response (no effect if nothing is generating).", handler: async (_args, ctx) => { if (!isGenerating || !currentAbort) { ctx.ui.notify("No active Claude response to cancel.", "info"); return; } try { currentAbort.abort(); } catch { /* ok */ } ctx.ui.notify("Aborting Claude response…", "info"); }, }); // /claude-resume — present a picker of past Claude sessions whose cwd matches // the current project directory, then resume the chosen one in chat mode. // // Caveat: this only sets the session id and starts a fresh orange border. // The historical transcript is NOT replayed inside pi (rendering it would // require a separate translation pass from JSONL → ChatTurn[]); however // `claude --resume ` keeps the FULL conversation context alive on the // Claude side, so subsequent prompts behave exactly like a continuation. pi.registerCommand("claude-resume", { description: "Pick a past Claude session for the current project directory and resume it in chat mode.", handler: async (_args, ctx) => { if (!ctx?.hasUI) { ctx?.ui?.notify?.("/claude-resume requires interactive mode.", "error"); return; } if (isGenerating) { ctx.ui.notify( "A Claude response is still streaming. Use /claude-abort first, then /claude-resume.", "warning", ); return; } const past = readPastSessions(ctx.cwd); if (past.length === 0) { ctx.ui.notify( `No past Claude sessions found for ${ctx.cwd}.`, "info", ); return; } // Cap the picker at the 25 most recent sessions to keep the // inline-note overlay tractable. Sessions are already sorted // newest-first by readPastSessions(). const MAX_OPTIONS = 25; const choices = past.slice(0, MAX_OPTIONS); // Label format (per user spec): // · · (session:) const PREVIEW_MAX = 60; const buildLabel = (s: PastSession) => { const preview = s.firstUserMessage ? truncate(s.firstUserMessage, PREVIEW_MAX) : "(no user message)"; return `${relativeTime(s.mtimeMs)} · ${preview} · (session:${s.sessionId.slice(0, 8)})`; }; // Disambiguate: in the very unlikely event two sessions produce // the same display label, append a counter so the post-pick lookup // can match exactly. const labels: string[] = []; const seen = new Map(); for (const s of choices) { const base = buildLabel(s); const n = seen.get(base) ?? 0; seen.set(base, n + 1); labels.push(n === 0 ? base : `${base} #${n + 1}`); } const sessionPick = await askSingleQuestionWithInlineNote(ctx.ui, { question: `Resume which past Claude session in ${ctx.cwd}?`, options: labels.map((label) => ({ label })), }); if (sessionPick.selectedOptions.length === 0) { ctx.ui.notify("Resume cancelled.", "info"); return; } const pickedLabel = sessionPick.selectedOptions[0]; const idx = labels.indexOf(pickedLabel); if (idx < 0) { ctx.ui.notify("Picked session not found (label mismatch).", "warning"); return; } const picked = choices[idx]; // Second picker: which model to display the resumed conversation // under in pi's UI. Note: claude CLI ignores --model when --resume // is set, so this is purely a UI/labelling choice. We mark the // session's original model with "(used by this session)" and set // it as the recommended default so most users can just hit Enter. const originalModel = picked.model; const modelLabels = MODELS.map((m) => originalModel === m ? `${m} (used by this session)` : m, ); const recommendedIdx = originalModel ? MODELS.indexOf(originalModel) : 1; // default sonnet const modelPick = await askSingleQuestionWithInlineNote(ctx.ui, { question: "Display this resumed session under which model in pi's UI?", options: modelLabels.map((label) => ({ label })), recommended: recommendedIdx, }); if (modelPick.selectedOptions.length === 0) { ctx.ui.notify("Resume cancelled.", "info"); return; } // Strip any "(used by this session)" suffix and parse the bare // model name (the first whitespace-separated token). const bare = modelPick.selectedOptions[0].split(/\s+/)[0].toLowerCase(); const targetModel: Model = (MODELS as readonly string[]).includes(bare) ? (bare as Model) : "sonnet"; // Wire up the session id BEFORE entering chat mode, so the next // turn the user sends triggers --resume . sessions.set(targetModel, picked.sessionId); enterChatMode(targetModel, ctx, false); // Replay the historical transcript inside the orange border so the // user can SEE the context they're resuming. ensureSessionMessage() // creates the (now-empty) session CustomMessage; we then push every // past turn into details.turns and ask for a re-render. const historical = loadSessionTurns(picked.sessionId, ctx.cwd, targetModel); const details = ensureSessionMessage(); details.turns.push(...historical); tuiRef?.requestRender(); const ago = relativeTime(picked.mtimeMs); const preview = picked.firstUserMessage ? `: "${truncate(picked.firstUserMessage, 50)}"` : ""; const histNote = historical.length > 0 ? ` (${historical.length} historical turn${historical.length === 1 ? "" : "s"} loaded)` : " (transcript empty or unreadable)"; ctx.ui.notify( `Resuming session ${picked.sessionId.slice(0, 8)} (${ago})${preview} as Claude ${capitalize(targetModel)}.${histNote}`, "info", ); }, }); // Note on ESC: pi's extension runner reserves the "interrupt" action, so // pi.registerShortcut("escape", …) is silently ignored. ESC-to-abort is // wired via the ChatEscEditor custom editor installed in enterChatMode. // ── Message renderer ───────────────────────────────────────────────────── // ONE custom message type holds the WHOLE chat-mode session. Returning a // live component (render reads `details.turns` on every frame) lets // streaming updates appear with a simple `tuiRef.requestRender()` — no // full rebuild of pi's chat container required. // // Performance: each frame now reuses cached per-turn line output for // completed turns (see renderSessionLines). Only the in-flight assistant // turn (if any) is rebuilt each frame, so long conversations stop driving // O(turns × blocks) allocation during Claude streaming. pi.registerMessageRenderer("chat-claude-session", (message, _opts, theme) => { const d = message.details as ChatSessionDetails | undefined; if (!d || !Array.isArray(d.turns)) return undefined; return { // pi calls invalidate() when theme changes or a from-scratch // re-render is needed — drop every turn's render cache so the // next render pass rebuilds against the new theme. invalidate: () => invalidateSessionCache(d), render: (width: number) => { if (width < 6) return renderSessionLines(d, theme, width); const innerWidth = width - 4; // 2 border chars + 2 padding chars // renderSessionLines returns lines already padded to // `innerWidth` visible columns, so wrapInOrangeBorder does // NO visibleWidth() call per frame — the previous hot path // (~85% CPU in Intl.Segmenter) is gone. const paddedInnerLines = renderSessionLines(d, theme, innerWidth); return wrapInOrangeBorder(paddedInnerLines, width); }, }; }); }