Files
dotfiles/pi/.pi/agent/extensions/chat-claude.ts
Jonas H c7edbb1be0 pi
2026-05-10 09:34:33 +02:00

1665 lines
69 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* chat-claude — Distinctive Claude chat MODE inside pi.
*
* When chat mode is active, typed user input is routed to a Claude model
* (haiku/sonnet/opus) via the `claude` CLI — NOT to pi's active LLM.
*
* Rendering goals (match pi's native chat UX):
* - Text appears as full markdown (no truncated previews, no dim grey).
* - Thinking blocks stream live as italic `thinkingText`-coloured markdown
* (the `claude` CLI is invoked with --include-partial-messages).
* - Tool calls use pi's normal tool-execution look (renderToolBlock).
*
* All turns of a single chat-mode session are rendered inside ONE continuous
* orange border: the top line sits above the first turn, the bottom line
* below the most recent turn, and the border extends live as new turns
* (user + assistant) arrive. A new border starts each time the user enters
* chat mode again via /claude / /claude-new.
*
* Commands:
* /claude [haiku|sonnet|opus] — enter chat mode / switch model
* /claude-new [haiku|sonnet|opus] — enter chat mode with a fresh Claude session
* /claude-resume — pick a past session for the current cwd and resume it
* /claude-end — exit chat mode
* /claude-abort — cancel an in-flight Claude response
*/
import { closeSync, openSync, readdirSync, readFileSync, readSync, statSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { copyToClipboard, CustomEditor, getMarkdownTheme } from "@mariozechner/pi-coding-agent";
import type { ExtensionAPI, KeybindingsManager } from "@mariozechner/pi-coding-agent";
import { Box, Container, matchesKey, Markdown, Spacer, Text, truncateToWidth, TUI, visibleWidth, type Component, type EditorTheme } from "@mariozechner/pi-tui";
import {
formatUsage,
renderToolBlock,
runClaude,
type StreamBlock,
} from "../shared/claude-stream.js";
import { startAskBridge, type AskBridge } from "../shared/pi-ask-bridge.js";
import { askSingleQuestionWithInlineNote } from "./pi-ask-tool/ask-inline-ui.js";
// ---------------------------------------------------------------------------
// Orange styling
// ---------------------------------------------------------------------------
const ORANGE = "\x1b[38;5;208m"; // pumpkin / tangerine
const ORANGE_DIM = "\x1b[38;5;94m";
const RESET = "\x1b[0m";
const BOLD = "\x1b[1m";
const orange = (s: string) => ORANGE + s + RESET;
const orangeBold = (s: string) => ORANGE + BOLD + s + RESET;
const orangeDim = (s: string) => ORANGE_DIM + s + RESET;
// ---------------------------------------------------------------------------
// Orange border wrapping helper — wraps an array of inner lines in a
// continuous orange box. Applied at the session level so the WHOLE chat
// conversation sits inside ONE box (top above first turn, bottom below
// most recent turn). Pure string→string — no component allocation per frame.
//
// IMPORTANT: `innerLines` must ALREADY be padded to `innerWidth` columns of
// visible width. We don't call visibleWidth() here because that function
// invokes Intl.Segmenter (expensive ICU BreakIterator on every miss) and
// this wrapper runs on every single line of the session on every frame.
// Profile data showed 85% of pi's idle CPU being burned in Segmenter via
// this function. Callers (renderSessionLines) pre-pad inner lines once
// per turn and cache them, so the cost amortises to O(streaming tail).
// ---------------------------------------------------------------------------
function wrapInOrangeBorder(paddedInnerLines: string[], width: number): string[] {
const v = orange("│");
const top = orange("╭" + "─".repeat(width - 2) + "╮");
const bottom = orange("╰" + "─".repeat(width - 2) + "╯");
const out: string[] = [top];
for (const line of paddedInnerLines) out.push(v + " " + line + " " + v);
out.push(bottom);
return out;
}
// Pad a single inner line to exactly `innerWidth` visible columns, OR
// truncate it if it's already over-wide. Uses visibleWidth() — pi-tui's
// grapheme-aware width function (which is what sits on top of the hot
// Intl.Segmenter path). Intended to be called ONCE per line at cache-build
// time, NOT per frame.
//
// Truncation is a defensive safety net: any component that emits a line
// wider than the width it was handed would otherwise crash pi's TUI (see
// tui.js doRender: "Rendered line N exceeds terminal width"). Without this,
// one stray over-wide line (e.g. a long source code line inside a Read
// tool result) takes down the entire session.
function padToInnerWidth(line: string, innerWidth: number): string {
const w = visibleWidth(line);
if (w > innerWidth) return truncateToWidth(line, innerWidth, "…", true);
const padRight = innerWidth - w;
return padRight > 0 ? line + " ".repeat(padRight) : line;
}
// ---------------------------------------------------------------------------
// Read-tool result truncation
//
// `Read` tool calls inside chat mode often dump entire files into the result
// banner — many hundreds of lines, which buries the surrounding conversation.
// We cap the rendered file content at MAX_READ_LINES and append a single
// centered notice line describing how many lines were hidden. This is a
// PRESENTATION-only truncation: `block.result.text` is left untouched, so
// resumed sessions / re-renders still see the full content.
//
// Centering needs render-time width, so we implement a tiny custom Component
// (TruncatedReadResult) and swap it into the Box body produced by the shared
// renderToolBlock helper. The same dim line-number formatting used by
// renderToolResultBox is preserved so the truncated view looks identical to
// the un-truncated one above the notice.
// ---------------------------------------------------------------------------
const MAX_READ_LINES = 40;
class TruncatedReadResult implements Component {
constructor(
private readonly numbered: { num: string; content: string }[],
private readonly maxNumLen: number,
private readonly dimFn: (s: string) => string,
private readonly noticeFn: (s: string) => string,
) {}
invalidate(): void { /* stateless */ }
render(width: number): string[] {
const total = this.numbered.length;
const visible = Math.min(MAX_READ_LINES, total);
const lines: string[] = [];
for (let i = 0; i < visible; i++) {
const l = this.numbered[i];
// Truncate to `width` so a single long source-code line (think
// minified JS or a long comment) can't blow past the TUI's width
// check and crash the whole session. `truncateToWidth` is
// ANSI-aware so the dim SGR sequences wrapping the line number
// survive the cut.
const raw = this.dimFn(l.num.padStart(this.maxNumLen)) + " " + l.content;
lines.push(truncateToWidth(raw, width, "…", false));
}
if (total > visible) {
const hidden = total - visible;
const notice = `${hidden} more line${hidden === 1 ? "" : "s"} hidden …`;
const visLen = visibleWidth(notice);
const left = Math.max(0, Math.floor((width - visLen) / 2));
lines.push(" ".repeat(left) + this.noticeFn(notice));
}
return lines;
}
}
// Wrap shared renderToolBlock: for `Read` tool blocks whose result exceeds
// MAX_READ_LINES, replace the Box body's child Text with our truncating
// component. All other tool kinds, error results, and short reads pass
// through unchanged.
function renderToolBlockTruncated(block: Extract<StreamBlock, { type: "tool" }>, theme: any): Container {
const c = renderToolBlock(block, theme);
if (block.name.toLowerCase() !== "read") return c;
if (!block.result || block.result.isError) return c;
const rawLines = block.result.text.split("\n").filter((l) => l.length > 0);
if (rawLines.length <= MAX_READ_LINES) return c;
const parsed = rawLines.map((l) => {
const tab = l.indexOf("\t");
return tab >= 0 ? { num: l.slice(0, tab), content: l.slice(tab + 1) } : { num: "", content: l };
});
const maxNumLen = parsed.reduce((m, l) => Math.max(m, l.num.length), 0);
// renderToolBlock's container is [headerText, bodyBox]. Bail safely if a
// future change to that helper alters the structure.
const body = c.children[1];
if (!(body instanceof Box)) return c;
body.clear();
body.addChild(new TruncatedReadResult(
parsed,
maxNumLen,
(s) => theme.fg("dim", s),
(s) => theme.fg("dim", s),
));
return c;
}
// ---------------------------------------------------------------------------
// Models / turn types
// ---------------------------------------------------------------------------
const MODELS = ["haiku", "sonnet", "opus"] as const;
type Model = (typeof MODELS)[number];
const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1);
// UI-facing model slot → actual `claude --model <id>` argument.
//
// `opus` is pinned to claude-opus-4-6 on purpose: Opus 4.7 (what the plain
// `opus` alias currently resolves to) returns thinking as an encrypted
// signature only — no `thinking_delta` events ever stream, so the italic
// thinking-block rendering stays blank the entire turn. 4.6 streams
// plaintext thinking normally, so pinning here restores the feature for
// the `opus` slot. Haiku/Sonnet use the plain alias (newest).
//
// We also pin haiku/sonnet to their CLI aliases for symmetry — if a
// future CLI alias bump lands on a model with the same redacted-thinking
// behaviour, we can downgrade the pin here without touching the rest of
// the extension.
const CLI_MODEL: Record<Model, string> = {
haiku: "haiku",
sonnet: "sonnet",
opus: "claude-opus-4-6",
};
// ---------------------------------------------------------------------------
// Past-session discovery (used by /claude-resume).
//
// Claude CLI persists every session's transcript at:
// ~/.claude/projects/<mangled-cwd>/<session-uuid>.jsonl
// where the mangling rule (verified empirically) is "replace every '/' and
// '.' with '-'". So /home/jonas/dotfiles/pi/.pi → -home-jonas-dotfiles-pi--pi
// (the leading '-' comes from the leading '/'; '.pi' contributes '--pi'
// because both '/' and '.' map to '-').
//
// We don't need to consult ~/.claude/sessions/ for this picker — that
// directory only contains metadata for currently-running Claude processes.
// The on-disk transcript at projects/<cwd>/<id>.jsonl is the source of
// truth for "past sessions in this directory".
// ---------------------------------------------------------------------------
function mangleCwd(cwd: string): string {
return cwd.replace(/[/.]/g, "-");
}
function relativeTime(ms: number): string {
const diff = Date.now() - ms;
if (diff < 0) return "in the future";
const sec = Math.floor(diff / 1000);
if (sec < 60) return `${sec}s ago`;
const min = Math.floor(sec / 60);
if (min < 60) return `${min}m ago`;
const hr = Math.floor(min / 60);
if (hr < 24) return `${hr}h ago`;
const day = Math.floor(hr / 24);
if (day < 30) return `${day}d ago`;
const mon = Math.floor(day / 30);
if (mon < 12) return `${mon}mo ago`;
return `${Math.floor(day / 365)}y ago`;
}
/** Map a raw Claude model identifier (e.g. "claude-haiku-4-5-20251001") to
* one of our canonical short names. Returns null if no match. */
function normalizeRawModel(raw: string): Model | null {
const lc = raw.toLowerCase();
if (lc.includes("haiku")) return "haiku";
if (lc.includes("sonnet")) return "sonnet";
if (lc.includes("opus")) return "opus";
return null;
}
interface PastSession {
sessionId: string;
mtimeMs: number;
firstUserMessage: string; // truncated/normalised, "" if not found
model: Model | null; // null ⇒ couldn't determine
rawModel: string; // raw string from JSONL ("" if not found)
}
/** Read the head of a file (avoids slurping multi-MB JSONL transcripts). */
function readFileHead(path: string, maxBytes: number): string {
const fd = openSync(path, "r");
try {
const buf = Buffer.alloc(maxBytes);
const n = readSync(fd, buf, 0, maxBytes, 0);
return buf.subarray(0, n).toString("utf8");
} finally {
closeSync(fd);
}
}
/** Pluck the first user message + first model id from a transcript head. */
function extractSessionMeta(head: string): { firstUserMessage: string; rawModel: string } {
let firstUserMessage = "";
let rawModel = "";
for (const line of head.split("\n")) {
if (firstUserMessage && rawModel) break;
if (!line.trim()) continue;
let ev: any;
try { ev = JSON.parse(line); } catch { continue; }
if (!firstUserMessage) {
// Two equivalent sources: a queue-operation enqueue carries the raw
// text the user typed; a `type: "user"` event carries it inside
// message.content (which is either a string or an array of blocks).
if (ev.type === "queue-operation" && ev.operation === "enqueue" && typeof ev.content === "string") {
firstUserMessage = ev.content;
} else if (ev.type === "user" && ev.message) {
const c = ev.message.content;
if (typeof c === "string") {
firstUserMessage = c;
} else if (Array.isArray(c)) {
firstUserMessage = c
.filter((b: any) => b?.type === "text" && typeof b.text === "string")
.map((b: any) => b.text as string)
.join(" ");
}
}
}
if (!rawModel && typeof ev?.message?.model === "string") {
rawModel = ev.message.model;
}
}
return {
firstUserMessage: firstUserMessage.replace(/\s+/g, " ").trim(),
rawModel,
};
}
function readPastSessions(cwd: string): PastSession[] {
const dir = join(homedir(), ".claude", "projects", mangleCwd(cwd));
let entries: string[];
try {
entries = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
} catch {
return [];
}
const out: PastSession[] = [];
for (const f of entries) {
const full = join(dir, f);
let st;
try { st = statSync(full); } catch { continue; }
// Read up to ~256 KB — enough to find the first user message and the
// first assistant turn (which carries the model id) in any reasonable
// transcript without paying for multi-MB reads.
let head: string;
try { head = readFileHead(full, 256 * 1024); } catch { continue; }
const { firstUserMessage, rawModel } = extractSessionMeta(head);
out.push({
sessionId: f.replace(/\.jsonl$/, ""),
mtimeMs: st.mtimeMs,
firstUserMessage,
model: rawModel ? normalizeRawModel(rawModel) : null,
rawModel,
});
}
out.sort((a, b) => b.mtimeMs - a.mtimeMs);
return out;
}
/** Truncate a string to `max` chars, appending "…" when cut. */
function truncate(s: string, max: number): string {
if (s.length <= max) return s;
return s.slice(0, Math.max(0, max - 1)).trimEnd() + "…";
}
// ---------------------------------------------------------------------------
// JSONL transcript → ChatTurn[]
//
// Given a sessionId and cwd, load the full transcript at
// ~/.claude/projects/<mangled-cwd>/<sessionId>.jsonl
// and convert it into the same UserTurn / AssistantTurn shape the live
// runChatTurn() path produces. This lets /claude-resume render the past
// context inside the orange border so the user can SEE what they're
// resuming, not just blindly continue an invisible thread.
//
// JSONL event reference (observed in 2.1.118 transcripts):
// {type:"user", message:{role:"user", content: <string>}} ← typed prompt
// {type:"user", message:{role:"user", content: [{type:"tool_result", …}, …]}} ← tool outputs
// {type:"assistant",message:{role:"assistant", content: [<one of: thinking|text|tool_use>], usage:{…}, model:"claude-sonnet-4-6"}}
// Each assistant content block is emitted as its OWN line, all sharing the
// same usage / model fields (one API call → many lines). We coalesce every
// run of consecutive assistant lines into a single AssistantTurn whose
// `blocks` array preserves the in-order list of thinking/text/tool blocks.
// Tool results that arrive in subsequent user-lines are attached back onto
// the matching tool block by tool_use_id.
//
// Lines we ignore: agent-setting, queue-operation, attachment, last-prompt,
// summary, and anything else without a recognisable role/content shape.
// Tokens/cost are intentionally NOT carried over — the JSONL repeats usage
// per content block so summing naively would over-count, and the user is
// here to see CONTENT, not a token panel for old turns.
// ---------------------------------------------------------------------------
function loadSessionTurns(sessionId: string, cwd: string, fallbackModel: Model): ChatTurn[] {
const path = join(homedir(), ".claude", "projects", mangleCwd(cwd), `${sessionId}.jsonl`);
let raw: string;
try { raw = readFileSync(path, "utf8"); } catch { return []; }
const turns: ChatTurn[] = [];
let current: AssistantTurn | null = null;
const flush = () => {
if (!current) return;
current.finalText = current.blocks
.filter((b) => b.type === "text")
.map((b: any) => b.text as string)
.join("");
turns.push(current);
current = null;
};
const ensureCurrent = (model: Model): AssistantTurn => {
if (current) return current;
current = {
role: "assistant",
model,
blocks: [],
finalText: "",
sessionId,
isResume: false,
done: true,
};
return current;
};
const tool_resultText = (content: any): { text: string; isError: boolean } => {
if (typeof content === "string") return { text: content, isError: false };
if (Array.isArray(content)) {
const text = content
.filter((b: any) => b?.type === "text" && typeof b.text === "string")
.map((b: any) => b.text as string)
.join("\n");
return { text, isError: false };
}
return { text: "", isError: false };
};
for (const line of raw.split("\n")) {
if (!line.trim()) continue;
let ev: any;
try { ev = JSON.parse(line); } catch { continue; }
if (ev.type === "user") {
const c = ev.message?.content;
if (typeof c === "string") {
// Typed user prompt — closes any in-flight assistant turn.
flush();
if (c.trim()) turns.push({ role: "user", text: c });
} else if (Array.isArray(c)) {
let sawToolResult = false;
for (const block of c) {
if (block?.type === "tool_result") {
sawToolResult = true;
const { text } = tool_resultText(block.content);
const isError = block.is_error === true;
// TS 5.x loses narrowing of the `let current` that is
// reassigned by the `flush` closure — even a `const cur
// = current` annotation doesn't survive the for-of
// header re-evaluation. A direct cast on the `.blocks`
// access is the minimal escape hatch confirmed to work
// in isolation tests with TS 5.9.
if (current !== null) {
const curBlocks = (current as AssistantTurn).blocks;
for (const tb of curBlocks) {
if (tb.type === "tool" && tb.id === block.tool_use_id) {
tb.result = { text, isError };
break;
}
}
}
} else if (block?.type === "text" && typeof block.text === "string") {
// Some clients send array-shaped user prompts.
if (!sawToolResult) {
flush();
if (block.text.trim()) turns.push({ role: "user", text: block.text });
}
}
}
}
} else if (ev.type === "assistant") {
const content = (ev.message?.content ?? []) as any[];
const rawModel = String(ev.message?.model ?? "");
const model = (rawModel ? normalizeRawModel(rawModel) : null) ?? fallbackModel;
const a = ensureCurrent(model);
// If the per-line model differs from what we opened the turn with,
// keep the first one — a single coalesced "turn" inherits the model
// of its first API call. (This is purely for the header label.)
for (const block of content) {
if (block?.type === "thinking" && typeof block.thinking === "string") {
if (block.thinking.trim()) a.blocks.push({ type: "thinking", text: block.thinking });
} else if (block?.type === "text" && typeof block.text === "string") {
if (block.text.trim()) a.blocks.push({ type: "text", text: block.text });
} else if (block?.type === "tool_use") {
a.blocks.push({
type: "tool",
id: String(block.id ?? ""),
name: String(block.name ?? ""),
inputJson: JSON.stringify(block.input ?? {}),
});
}
}
}
// All other event types (agent-setting, queue-operation, attachment,
// last-prompt, summary, …) are intentionally ignored.
}
flush();
return turns;
}
// Per-turn render cache: once a turn is "frozen" (user turns are always
// frozen; assistant turns after done=true), its rendered output at a given
// (innerWidth, theme) is invariant. Caching avoids O(turns) rebuild on every
// frame, which otherwise creates quadratic-ish lag during streaming because
// partial-message updates drive tens of renders per second.
interface TurnRenderCache {
cachedLines?: string[];
cachedWidth?: number;
cachedTheme?: unknown;
}
interface UserTurn extends TurnRenderCache {
role: "user";
text: string;
}
interface AssistantTurn extends TurnRenderCache {
role: "assistant";
model: Model;
blocks: StreamBlock[];
finalText: string;
sessionId?: string;
isResume: boolean;
done: boolean;
error?: string;
cancelled?: boolean;
costUsd?: number;
inputTokens?: number;
outputTokens?: number;
cacheReadTokens?: number;
cacheWriteTokens?: number;
}
type ChatTurn = UserTurn | AssistantTurn;
interface ChatSessionDetails {
turns: ChatTurn[];
}
// ---------------------------------------------------------------------------
// Todo extraction — scan the session for the most recent TodoWrite tool call
// and return its todos array. Rendered BETWEEN the orange-bordered
// conversation and the mode banner by the chat-claude widget so the
// current task list is always visible without scrolling through history.
//
// Only the latest TodoWrite wins (earlier ones are superseded); empty or
// malformed inputs are treated as "no todos" and suppress the section.
// ---------------------------------------------------------------------------
type TodoStatus = "completed" | "in_progress" | "pending";
interface Todo {
content: string;
status: TodoStatus;
activeForm: string;
}
function getLatestTodos(details: ChatSessionDetails | null): Todo[] | null {
if (!details) return null;
for (let i = details.turns.length - 1; i >= 0; i--) {
const turn = details.turns[i];
if (turn.role !== "assistant") continue;
for (let j = turn.blocks.length - 1; j >= 0; j--) {
const block = turn.blocks[j];
if (block.type !== "tool") continue;
if (block.name !== "TodoWrite") continue;
try {
const input = JSON.parse(block.inputJson);
if (Array.isArray(input?.todos) && input.todos.length > 0) {
return input.todos as Todo[];
}
// Hit the latest TodoWrite but it's empty/malformed — stop,
// don't fall through to an older one (the user cleared it).
return null;
} catch {
return null;
}
}
}
return null;
}
// Cap so a runaway todo list can't push the editor off-screen. In practice
// lists stay well under this; when they don't, we render the first N-1 items
// plus a "… X more" notice. Non-completed items are prioritised over
// completed ones in the visible slice, since the point of surfacing todos
// on-screen is to show what's left to do.
const MAX_TODO_LINES = 12;
function sliceTodosForDisplay(todos: Todo[]): { shown: Todo[]; hidden: number } {
if (todos.length <= MAX_TODO_LINES) return { shown: todos, hidden: 0 };
const budget = MAX_TODO_LINES - 1; // reserve one line for the "… more" notice
const nonCompleted = todos.filter((t) => t.status !== "completed");
const completed = todos.filter((t) => t.status === "completed");
const shown: Todo[] = [];
// Non-completed items come first so in-flight / pending work is always
// visible; any leftover budget is filled with completed items (for
// context) in original order.
for (const t of nonCompleted) {
if (shown.length >= budget) break;
shown.push(t);
}
for (const t of completed) {
if (shown.length >= budget) break;
shown.push(t);
}
return { shown, hidden: todos.length - shown.length };
}
// ---------------------------------------------------------------------------
// Code block extraction — raw fenced code from the session's text blocks.
//
// Used by the Ctrl+Shift+C shortcut to copy clean, unrendered code directly
// from the parsed JSON stream, avoiding the ANSI escape sequences, stray
// indentation, and line-continuation artefacts that terminal selection gives.
//
// Blocks are returned newest-first (last assistant turn first; within a turn,
// last code fence first) so the most recent snippet is always at index 0.
// ---------------------------------------------------------------------------
interface ExtractedCodeBlock {
lang: string; // language tag after the opening fence ("" when absent)
code: string; // raw content between the fences (no surrounding ```)
label: string; // compact one-line description for the picker UI
}
function extractCodeBlocksFromSession(details: ChatSessionDetails): ExtractedCodeBlock[] {
const out: ExtractedCodeBlock[] = [];
for (let ti = details.turns.length - 1; ti >= 0; ti--) {
const turn = details.turns[ti];
if (turn.role !== "assistant") continue;
const turnBlocks: ExtractedCodeBlock[] = [];
for (const block of turn.blocks) {
if (block.type !== "text") continue;
// Match fenced code: ```lang\n…content…``` (lang optional)
// \r? handles CRLF transcripts; [\s\S]*? is non-greedy so nested
// fences (rare but possible in prose) are handled correctly.
const fence = /```(\w*)\r?\n([\s\S]*?)```/g;
let m: RegExpExecArray | null;
while ((m = fence.exec(block.text)) !== null) {
const lang = m[1] ?? "";
const code = m[2] ?? "";
if (!code.trim()) continue; // skip empty fences
// Build a compact one-line label: [lang] first-non-blank-line
const firstLine = code.split("\n").find((l) => l.trim()) ?? "";
const preview = firstLine.length > 55
? firstLine.slice(0, 52).trimEnd() + "…"
: firstLine;
const langTag = lang ? `[${lang}] ` : "";
turnBlocks.push({ lang, code, label: `${langTag}${preview}` });
}
}
// Reverse within the turn so the last fence in that turn comes first.
for (let i = turnBlocks.length - 1; i >= 0; i--) out.push(turnBlocks[i]!);
}
return out;
}
// =============================================================================
// Extension entry point
// =============================================================================
// ── Reload-persistent state ─────────────────────────────────────────────────
// pi's `/reload` tears the extension down and re-invokes the default export,
// which resets every closure-local `let`/`const`. The Map of resumable Claude
// session ids (model → sessionId) is the one piece of state we want to
// survive that — otherwise /reload silently orphans the ongoing Claude
// threads, forcing the user to re-pick them via /claude-resume.
//
// Everything else (chatMode, currentDetails, askBridge, tuiRef, isGenerating)
// is intentionally NOT persisted: the bridge/TUI references are bound to the
// torn-down ctx and must be rebuilt on the next enterChatMode(), and any
// in-flight stream is already aborted when the old closure is discarded.
//
// We stash the Map on globalThis behind a namespaced key. globalThis survives
// module re-evaluation (only top-level lexical bindings are reset), and the
// guarded getter keeps initialization idempotent across repeated reloads.
// Valid extended-thinking effort levels accepted by `claude --effort`, plus
// our synthetic "off" sentinel which skips the flag entirely (falling back
// to the CLI's default of no thinking emission in -p mode).
const EFFORTS = ["off", "low", "medium", "high", "xhigh", "max"] as const;
type Effort = (typeof EFFORTS)[number];
const DEFAULT_EFFORT: Effort = "max";
interface ChatClaudePersistedState {
sessions: Map<Model, string>;
// Current extended-thinking effort level — persisted across `/reload`
// so the user's choice survives the extension teardown the same way
// resumable session ids do.
effort: Effort;
// Prompts typed in chat mode, oldest-first. Capped at MAX_PROMPT_HISTORY.
// Replayed into the editor on every ChatEscEditor creation so up-arrow
// history is available immediately in any new chat session.
promptHistory: string[];
}
const CHAT_CLAUDE_STATE_KEY = "__pi_chat_claude_persisted__";
// Maximum number of prompts to persist. The Editor caps its own in-memory
// list at 100; we persist more so the most recent 100 are always available
// even after many reloads without hitting the per-instance limit.
const MAX_PROMPT_HISTORY = 200;
function getPersistedState(): ChatClaudePersistedState {
const g = globalThis as unknown as Record<string, ChatClaudePersistedState>;
let state = g[CHAT_CLAUDE_STATE_KEY];
if (!state) {
state = { sessions: new Map<Model, string>(), effort: DEFAULT_EFFORT, promptHistory: [] };
g[CHAT_CLAUDE_STATE_KEY] = state;
}
// Back-fill for any persisted state written by an older revision of
// the extension (pre-/claude-effort) that didn't carry an effort field.
if (!state.effort) state.effort = DEFAULT_EFFORT;
// Back-fill for pre-promptHistory revisions.
if (!state.promptHistory) state.promptHistory = [];
return state;
}
export default function (pi: ExtensionAPI) {
// ── Mode state ────────────────────────────────────────────────────────────
let chatMode: Model | null = null; // null ⇒ not in chat mode
// model → resumable claude session id. Pulled from globalThis so the
// mapping (and the current effort level) survive `/reload` (see
// getPersistedState above). `persisted` is kept as a handle so
// `/claude-effort` can mutate `persisted.effort` in place and have
// the change picked up by subsequent runChatTurn calls.
const persisted = getPersistedState();
const { sessions } = persisted;
let isGenerating = false;
let currentAbort: AbortController | null = null;
// pi-ask bridge — opens a Unix socket + generates an --mcp-config so
// Claude (running inside this chat) can ask the user questions through
// pi's native ask UI. Bound to the chat-mode lifetime: started on
// enterChatMode, closed on exitChatMode.
let askBridge: AskBridge | null = null;
// Live TUI reference captured from the mode-banner widget factory, used to
// schedule re-renders while a Claude response is streaming into the
// current chat-claude-session message.
let tuiRef: { requestRender: () => void } | null = null;
// Reference to the active ChatEscEditor instance so we can call
// addToHistory() on it after each prompt submission, making the new entry
// immediately navigable with the up-arrow inside the same session.
let editorRef: ChatEscEditor | null = null;
// The in-flight chat session's `details` object. Stored by reference so
// mutations here are reflected in the CustomMessage already displayed
// in pi's conversation. Null between chat-mode sessions.
let currentDetails: ChatSessionDetails | null = null;
// Keep a module-level set of the extension's custom-message types so the
// `context` event handler can strip them out of pi's LLM context — chat
// mode is between the user and Claude and has no business in pi's
// prompt payload.
const CHAT_CLAUDE_CUSTOM_TYPES = new Set(["chat-claude-session"]);
// ── Render throttling ────────────────────────────────────────────────────
// Claude's `--include-partial-messages` fires an onUpdate for every token
// delta (100+ Hz under a fast stream). Rendering per-token was the second
// half of the progressive-lag problem — even with per-turn caching, the
// TUI would be asked to diff+repaint dozens of times per second.
//
// scheduleStreamRender coalesces back-to-back requests into a trailing-
// edge timer at ~30 Hz. The first update within a quiet window waits up
// to 33 ms before rendering; any further updates in that window are
// folded into the same render. flushStreamRender cancels the pending
// timer and renders immediately — used on stream completion, abort, and
// chat-mode teardown so the user sees the terminal frame right away.
let streamRenderTimer: ReturnType<typeof setTimeout> | null = null;
const STREAM_RENDER_INTERVAL_MS = 33; // ~30 Hz
function scheduleStreamRender() {
if (streamRenderTimer) return;
streamRenderTimer = setTimeout(() => {
streamRenderTimer = null;
tuiRef?.requestRender();
}, STREAM_RENDER_INTERVAL_MS);
}
function flushStreamRender() {
if (streamRenderTimer) {
clearTimeout(streamRenderTimer);
streamRenderTimer = null;
}
tuiRef?.requestRender();
}
// ── Rendering helpers ────────────────────────────────────────────────────
// Mirrors pi's AssistantMessageComponent conventions (see
// modes/interactive/components/assistant-message.js): Markdown at
// paddingX=1, paddingY=0; thinking as italic `thinkingText`-coloured
// markdown; tool blocks via the shared renderToolBlock (same one
// ask-claude uses) so bash / read / edit / write all look identical to
// pi's own tool executions.
function renderTurnInto(container: Container, turn: ChatTurn, theme: any) {
const md = getMarkdownTheme();
if (turn.role === "user") {
container.addChild(new Text(orangeBold(" you"), 1, 0));
container.addChild(new Spacer(1));
container.addChild(new Markdown(turn.text.trim(), 1, 0, md));
return;
}
// Assistant turn header
const icon =
turn.cancelled ? orange("◇ ")
: turn.error ? theme.fg("error", "✗ ")
: turn.isResume ? orange(" ")
: orange("◆ ");
const header =
icon + orangeBold(`Claude ${capitalize(turn.model)}`)
+ (turn.sessionId ? theme.fg("dim", ` session:${turn.sessionId.slice(0, 8)}`) : "")
+ (!turn.done ? theme.fg("warning", " ") : "");
container.addChild(new Text(header, 1, 0));
container.addChild(new Spacer(1));
// Defensive dedup — see claude-stream.ts for the root-cause fix, but
// keep a safety net here in case a future Claude CLI change re-orders
// events differently.
const rawBlocks = turn.blocks ?? [];
const seenToolIds = new Set<string>();
const blocks: StreamBlock[] = [];
for (const b of rawBlocks) {
if (b.type === "tool") {
if (seenToolIds.has(b.id)) continue;
seenToolIds.add(b.id);
}
blocks.push(b);
}
let addedAny = false;
for (let i = 0; i < blocks.length; i++) {
const block = blocks[i];
if (block.type === "thinking" && block.text.trim()) {
if (addedAny) container.addChild(new Spacer(1));
container.addChild(new Markdown(block.text.trim(), 1, 0, md, {
color: (t: string) => theme.fg("thinkingText", t),
italic: true,
}));
addedAny = true;
} else if (block.type === "tool") {
if (addedAny) container.addChild(new Spacer(1));
container.addChild(renderToolBlockTruncated(block, theme));
addedAny = true;
} else if (block.type === "text" && block.text.trim()) {
if (addedAny) container.addChild(new Spacer(1));
container.addChild(new Markdown(block.text.trim(), 1, 0, md));
addedAny = true;
}
}
// Render the terminal notice AFTER any partial blocks so streamed
// output accumulated before a timeout / abort / error is preserved
// and visible rather than being silently discarded.
if (turn.cancelled) {
if (addedAny) container.addChild(new Spacer(1));
container.addChild(new Text(orange("(Cancelled)"), 1, 0));
} else if (turn.error) {
if (addedAny) container.addChild(new Spacer(1));
container.addChild(new Text(theme.fg("error", `Error: ${turn.error}`), 1, 0));
} else if (turn.done) {
const usage = formatUsage(turn as any);
if (usage) {
container.addChild(new Spacer(1));
container.addChild(new Text(theme.fg("dim", usage), 1, 0));
}
}
}
// Render one turn in isolation and return its lines PRE-PADDED to
// `innerWidth` visible columns.
//
// Pre-padding here means `visibleWidth()` (which calls `Intl.Segmenter`
// — the measured hot spot: 85% of pi's CPU in a laggy session) runs
// exactly ONCE per line per turn, not once per line per frame. For
// completed turns these padded lines are cached and reused forever at
// that (width, theme); for the streaming tail turn the work is bounded
// to just the in-flight turn's lines.
function renderTurnLines(turn: ChatTurn, theme: any, innerWidth: number): string[] {
const c = new Container();
renderTurnInto(c, turn, theme);
const rawLines = c.render(innerWidth);
const padded: string[] = new Array(rawLines.length);
for (let i = 0; i < rawLines.length; i++) {
padded[i] = padToInnerWidth(rawLines[i], innerWidth);
}
return padded;
}
// Assemble the WHOLE session's inner lines with per-turn caching.
//
// Cache invariants:
// • User turns are immutable → always cacheable.
// • Assistant turns are mutated in-place by runClaude's onUpdate
// callback while streaming, and only become stable after
// `done: true` is set (see runChatTurn). So we only cache
// assistants once they're done.
// • Cache keys on (innerWidth, theme) — terminal resize or theme
// switch invalidates all per-turn caches transparently by forcing
// a rebuild on the next render.
//
// With this cache, a streaming frame only rebuilds the one in-flight
// assistant turn (the tail); all prior turns are an O(1) line-copy.
// That eliminates the O(turns × blocks) rebuild that previously ran
// every time a partial Claude message arrived.
//
// Returned lines are PRE-PADDED to `innerWidth` visible columns — see
// renderTurnLines/padToInnerWidth for why. The caller can hand them
// straight to wrapInOrangeBorder without any further visibleWidth()
// calls, which is critical: visibleWidth drives Intl.Segmenter, whose
// 512-entry LRU thrashes when called per-line-per-frame on a long chat.
function renderSessionLines(details: ChatSessionDetails, theme: any, innerWidth: number): string[] {
// Streaming placeholder so the border grows immediately after the
// user submits, even before any block has arrived from Claude.
if (details.turns.length === 0) {
const c = new Container();
c.addChild(new Text(orangeDim("(chat mode — waiting for first message)"), 0, 0));
const rawLines = c.render(innerWidth);
const padded: string[] = new Array(rawLines.length);
for (let i = 0; i < rawLines.length; i++) padded[i] = padToInnerWidth(rawLines[i], innerWidth);
return padded;
}
const out: string[] = [];
// The blank inter-turn spacer must ALSO be padded — otherwise
// wrapInOrangeBorder emits "│ │" with a visibly short interior,
// producing a ragged right edge on the orange border.
const spacerLine = " ".repeat(innerWidth);
for (let i = 0; i < details.turns.length; i++) {
if (i > 0) out.push(spacerLine);
const turn = details.turns[i];
const cacheable = turn.role === "user" || (turn.role === "assistant" && turn.done);
if (
cacheable
&& turn.cachedLines
&& turn.cachedWidth === innerWidth
&& turn.cachedTheme === theme
) {
for (const line of turn.cachedLines) out.push(line);
} else {
const lines = renderTurnLines(turn, theme, innerWidth);
if (cacheable) {
turn.cachedLines = lines;
turn.cachedWidth = innerWidth;
turn.cachedTheme = theme;
} else {
// Streaming turn — make sure we don't accidentally
// carry stale cached output from a prior life.
turn.cachedLines = undefined;
turn.cachedWidth = undefined;
turn.cachedTheme = undefined;
}
for (const line of lines) out.push(line);
}
}
return out;
}
// Drop every turn's render cache — called from the message renderer's
// `invalidate()` hook (triggered by pi when theme changes or when a
// from-scratch re-render is needed).
function invalidateSessionCache(details: ChatSessionDetails) {
for (const turn of details.turns) {
turn.cachedLines = undefined;
turn.cachedWidth = undefined;
turn.cachedTheme = undefined;
}
}
// ── Mode banner + status ─────────────────────────────────────────────────
function syncUI(ctx: any) {
if (!ctx?.hasUI) return;
if (!chatMode) {
ctx.ui.setWidget("chat-claude", undefined);
ctx.ui.setStatus("chat-claude", undefined);
ctx.ui.setTitle("pi");
return;
}
const sessionId = sessions.get(chatMode);
const short = sessionId ? sessionId.slice(0, 8) : "new";
const modelUp = capitalize(chatMode).toUpperCase();
ctx.ui.setWidget("chat-claude", (tui: any, theme: any) => {
tuiRef = tui; // ← captured for live streaming re-renders
return {
invalidate: () => {},
render: (width: number) => {
const rail = orange("▌ ");
const out: string[] = [];
// ── Todos (if any) ────────────────────────────────────
// Sourced from the most recent TodoWrite tool call in
// this chat session. Rendered BEFORE the mode banner so
// the layout reads, top→bottom:
// orange-bordered conversation
// ▌ ☒ completed todo
// ▌ ▸ current in-progress todo (activeForm)
// ▌ ☐ pending todo
// ▌ ◆ CLAUDE CHAT MODE …
// ▌ Type to chat · …
const todos = getLatestTodos(currentDetails);
if (todos && todos.length > 0) {
const { shown, hidden } = sliceTodosForDisplay(todos);
for (const todo of shown) {
let marker: string;
let text: string;
if (todo.status === "completed") {
marker = theme.fg("success", "☒");
text = theme.fg("dim", todo.content);
} else if (todo.status === "in_progress") {
marker = orangeBold("▸");
text = orangeBold(todo.activeForm || todo.content);
} else {
marker = orangeDim("☐");
text = todo.content;
}
out.push(truncateToWidth(rail + marker + " " + text, width, "…", false));
}
if (hidden > 0) {
const notice = `${hidden} more todo${hidden === 1 ? "" : "s"} hidden`;
out.push(truncateToWidth(rail + theme.fg("dim", notice), width, "…", false));
}
}
// ── Mode banner ──────────────────────────────────────
const title = orangeBold("◆ CLAUDE CHAT MODE");
const modelLabel = orangeBold(modelUp);
const sessionTag = orangeDim("session:" + short);
const effortTag = orangeDim("effort:" + persisted.effort);
const running = isGenerating ? " " + orange(" streaming…") : "";
const line1 = rail + title + " " + modelLabel + " " + sessionTag + " " + effortTag + running;
const line2 = rail + theme.fg("dim",
"Type to chat · /claude haiku|sonnet|opus · /claude-new · /claude-effort · /claude-end · /claude-abort");
out.push(line1, line2);
return out;
},
};
}, { placement: "aboveEditor" });
const busy = isGenerating ? " · streaming" : "";
ctx.ui.setStatus("chat-claude",
orange(`◆ Claude ${capitalize(chatMode)} · ${short} · effort:${persisted.effort}${busy}`));
ctx.ui.setTitle(`pi · Claude ${capitalize(chatMode)} Chat`);
}
// ── ESC-to-abort editor ──────────────────────────────────────────────────
// ESC (the "interrupt" action) is on the extension-runner's reserved list
// (see node_modules/@mariozechner/pi-coding-agent/.../runner.js — any
// registerShortcut("escape", …) is silently dropped), so a custom editor is
// the sanctioned way to intercept it. We subclass pi's exported CustomEditor
// and short-circuit ESC ONLY while a chat-claude response is streaming.
// For every other case we defer to `super.handleInput`, which runs the
// app-level keybindings — including pi's own onEscape handler, which
// setCustomEditorComponent copies onto the custom editor at install time
// (see interactive-mode.js setCustomEditorComponent, ~line 1258).
class ChatEscEditor extends CustomEditor {
constructor(tui: TUI, theme: EditorTheme, keybindings: KeybindingsManager) {
super(tui, theme, keybindings);
// Store a module-level reference so runChatTurn can feed the new
// prompt into the editor's history after each successful submission.
editorRef = this;
// Replay persisted history oldest-first: addToHistory() unshifts each
// entry, so the last call's text lands at index 0 (most recent) and
// up-arrow shows it first — exactly the expected shell-history UX.
// We cap the replay at 100 (the Editor's own internal limit) so the
// unshift loop doesn't silently discard entries mid-way.
const toReplay = persisted.promptHistory.slice(-100);
for (const text of toReplay) {
this.addToHistory(text);
}
}
handleInput(data: string): void {
if (matchesKey(data, "escape") && isGenerating && currentAbort) {
try { currentAbort.abort(); } catch { /* ok */ }
// We may not have a direct ctx here, but the UI is live during
// chat mode, so flush any pending throttled render and force
// a frame now; the chat-claude-session renderer will show the
// assistant turn as cancelled once runClaude's promise
// rejects with AbortError.
flushStreamRender();
return;
}
super.handleInput(data);
}
}
// ── Mode transitions ─────────────────────────────────────────────────────
function enterChatMode(model: Model, ctx: any, freshSession: boolean) {
const wasActive = chatMode !== null;
const modelChanged = chatMode !== model;
if (freshSession) sessions.delete(model);
// A new /claude invocation after an exit starts a fresh border box, so
// drop any reference to the previous session's details. The existing
// CustomMessage in the conversation keeps its own reference and stays
// visible in the scrollback.
if (!wasActive || modelChanged || freshSession) {
currentDetails = null;
}
chatMode = model;
// Stand up (or refresh) the pi-ask bridge so Claude can ask the user
// questions through pi's native overlay. Re-create on every entry so
// the socket+temp dir lifetime is bounded by the chat session.
if (ctx?.hasUI) {
askBridge?.close();
try {
askBridge = startAskBridge({
ui: ctx.ui,
onAsk: () => tuiRef?.requestRender(),
});
} catch (err) {
askBridge = null;
ctx.ui.notify(
`pi-ask bridge unavailable: ${err instanceof Error ? err.message : String(err)} — Claude won't be able to ask questions.`,
"warning",
);
}
// Install the ESC-aborts-Claude custom editor. Idempotent: if chat
// mode was already active (e.g. /claude haiku → /claude opus), setting
// it again just re-wires the same class cleanly.
ctx.ui.setEditorComponent((tui: TUI, theme: EditorTheme, keybindings: KeybindingsManager) =>
new ChatEscEditor(tui, theme, keybindings),
);
}
syncUI(ctx);
if (ctx?.hasUI) {
const sess = sessions.get(model);
const kind = freshSession || !sess ? "new session" : `resume ${sess.slice(0, 8)}`;
const verb = wasActive ? (modelChanged ? "Switched to" : "Re-entered") : "Entered chat mode:";
ctx.ui.notify(`${verb} Claude ${capitalize(model)} · ${kind}`, "info");
}
}
function exitChatMode(ctx: any) {
if (currentAbort) try { currentAbort.abort(); } catch { /* ok */ }
currentAbort = null;
isGenerating = false;
chatMode = null;
// Cancel any pending throttled stream render so we don't leave a
// dangling timer firing tuiRef.requestRender() after chat mode ends
// (tuiRef itself lingers, so the render would be harmless but wasted).
if (streamRenderTimer) {
clearTimeout(streamRenderTimer);
streamRenderTimer = null;
}
// Detach from current session details so the next entry starts a new
// border. The message and its details stay in place in pi's scrollback.
currentDetails = null;
// Tear down the pi-ask bridge: close the socket and remove the temp
// dir holding the socket + generated mcp.json.
askBridge?.close();
askBridge = null;
// Restore pi's default editor (undoes ChatEscEditor from enterChatMode).
if (ctx?.hasUI) ctx.ui.setEditorComponent(undefined);
editorRef = null;
syncUI(ctx);
if (ctx?.hasUI) ctx.ui.notify("Exited chat mode — back to normal pi.", "info");
}
// ── Session / turn management ────────────────────────────────────────────
function ensureSessionMessage(): ChatSessionDetails {
if (currentDetails) return currentDetails;
const details: ChatSessionDetails = { turns: [] };
currentDetails = details;
pi.sendMessage(
{
customType: "chat-claude-session",
// content is only used if we had no custom renderer; stays
// hidden from pi's LLM via the context filter below.
content: "",
display: true,
details,
},
{ triggerTurn: false },
);
return details;
}
async function runChatTurn(userText: string, ctx: any) {
if (!chatMode) return;
const model = chatMode;
const details = ensureSessionMessage();
// Persist the prompt so it survives /reload and is available in future
// chat sessions. We record it here — before the async Claude call —
// so cancellations and errors still land in history.
// Deduplicate: skip if identical to the most recent persisted entry.
const trimmedPrompt = userText.trim();
if (trimmedPrompt && persisted.promptHistory.at(-1) !== trimmedPrompt) {
persisted.promptHistory.push(trimmedPrompt);
if (persisted.promptHistory.length > MAX_PROMPT_HISTORY) {
persisted.promptHistory = persisted.promptHistory.slice(-MAX_PROMPT_HISTORY);
}
}
// Also push into the live editor so the entry is navigable immediately
// (without requiring a reload to replay from persisted state).
if (trimmedPrompt) editorRef?.addToHistory(trimmedPrompt);
// Append user turn + placeholder assistant turn up front so the
// border extends as soon as the user hits enter.
details.turns.push({ role: "user", text: userText });
const existingSession = sessions.get(model);
const assistantTurn: AssistantTurn = {
role: "assistant",
model,
blocks: [],
finalText: "",
isResume: !!existingSession,
done: false,
};
details.turns.push(assistantTurn);
tuiRef?.requestRender();
isGenerating = true;
currentAbort = new AbortController();
syncUI(ctx);
if (ctx?.hasUI) ctx.ui.setWorkingMessage(`Claude ${capitalize(model)} is thinking…`);
try {
const r = await runClaude(userText, {
// Resolve UI slot ("opus") → CLI model id ("claude-opus-4-6")
// so Opus streams plaintext thinking (4.7 redacts it).
model: CLI_MODEL[model],
sessionId: existingSession,
cwd: ctx.cwd,
signal: currentAbort.signal,
// Enable extended thinking — without --effort, `claude -p`
// NEVER emits thinking_delta events regardless of the user's
// interactive defaultThinkingLevel setting, and the italic
// thinking-block rendering below sits idle. Default is "max"
// and is configurable live via /claude-effort; the model
// still decides on-demand whether it actually needs to think.
effort: persisted.effort,
// Route AskUserQuestion-style requests through pi's native
// overlay via the pi-ask-mcp bridge. Disallowing the built-in
// AskUserQuestion forces Claude to use mcp__pi__ask if it
// wants to ask a structured question.
mcpConfigPath: askBridge?.mcpConfigPath,
disallowedTools: askBridge ? ["AskUserQuestion"] : undefined,
onUpdate: (partial) => {
assistantTurn.blocks = partial.blocks;
assistantTurn.finalText = partial.finalText;
// Throttle to ~30 Hz so a fast token stream doesn't cause
// a render-per-token, which compounds with any other
// extension's per-frame work (footer, widgets, etc.).
scheduleStreamRender();
},
});
if (r.sessionId) sessions.set(model, r.sessionId);
assistantTurn.blocks = r.blocks;
assistantTurn.finalText = r.finalText;
assistantTurn.sessionId = r.sessionId;
assistantTurn.costUsd = r.costUsd;
assistantTurn.inputTokens = r.inputTokens;
assistantTurn.outputTokens = r.outputTokens;
assistantTurn.cacheReadTokens = r.cacheReadTokens;
assistantTurn.cacheWriteTokens = r.cacheWriteTokens;
assistantTurn.done = true;
} catch (err) {
const aborted = currentAbort?.signal.aborted === true;
assistantTurn.done = true;
assistantTurn.cancelled = aborted;
assistantTurn.error = aborted ? undefined : (err instanceof Error ? err.message : String(err));
} finally {
isGenerating = false;
currentAbort = null;
if (ctx?.hasUI) ctx.ui.setWorkingMessage(undefined);
syncUI(ctx);
// Flush (not schedule): the stream just ended or was aborted —
// we want the final frame on screen immediately, not 33 ms later.
// Also cancels any in-flight throttled timer so it doesn't fire
// a stale second render after the assistant turn is already
// marked done and cached.
flushStreamRender();
}
}
// ── Input interception ───────────────────────────────────────────────────
// Registered pi commands (/claude, /claude-end, etc.) dispatch BEFORE this
// event fires, so they still work normally. Bash via `!` goes through
// user_bash, not here. Every other text the user submits in chat mode is
// routed straight to Claude.
pi.on("input", async (event, ctx) => {
if (!chatMode) return { action: "continue" } as const;
if (event.source !== "interactive") return { action: "continue" } as const;
const text = event.text ?? "";
if (!text.trim()) return { action: "continue" } as const;
if (text.trimStart().startsWith("!")) return { action: "continue" } as const;
if (isGenerating) {
ctx.ui.notify(
"Claude is still responding. Use /claude-abort to cancel, then try again.",
"warning",
);
return { action: "handled" } as const;
}
runChatTurn(text, ctx).catch((err) => {
ctx.ui.notify(
`Chat error: ${err instanceof Error ? err.message : String(err)}`,
"error",
);
});
return { action: "handled" } as const;
});
// Keep chat-mode custom messages out of pi's LLM context — chat mode is
// between the user and Claude, not part of pi's conversation.
pi.on("context", (event) => {
const filtered = event.messages.filter((m: any) =>
!(m.role === "custom" && CHAT_CLAUDE_CUSTOM_TYPES.has(m.customType)),
);
return { messages: filtered };
});
// ── Session lifecycle ────────────────────────────────────────────────────
pi.on("session_start", (_event, ctx) => { syncUI(ctx); });
pi.on("session_shutdown", (_event, ctx) => {
if (chatMode) exitChatMode(ctx);
// Defensive: if exitChatMode was never reached (chatMode was already
// null but a bridge somehow lingered), close it directly.
if (askBridge) { askBridge.close(); askBridge = null; }
// Defensive: same for the throttled render timer — exitChatMode
// already clears it, but this keeps the Node process clean in the
// case where chat mode was never entered but some hypothetical
// future code path scheduled a render anyway.
if (streamRenderTimer) {
clearTimeout(streamRenderTimer);
streamRenderTimer = null;
}
});
// ── Commands ─────────────────────────────────────────────────────────────
const modelCompletions = (prefix: string) =>
MODELS.filter((m) => m.startsWith(prefix.toLowerCase()))
.map((m) => ({ value: m, label: m }));
pi.registerCommand("claude", {
description: [
"Enter distinct Claude chat mode — typed input bypasses pi's LLM and goes to Claude.",
" /claude — enter with last/default model (sonnet)",
" /claude haiku|sonnet|opus — enter/switch model",
].join("\n"),
getArgumentCompletions: modelCompletions,
handler: async (args, ctx) => {
const arg = (args ?? "").trim().toLowerCase();
const target: Model = (MODELS as readonly string[]).includes(arg)
? (arg as Model)
: (chatMode ?? "sonnet");
enterChatMode(target, ctx, false);
},
});
pi.registerCommand("claude-new", {
description: "Enter chat mode with a fresh Claude session (discards any resumed session id). Example: /claude-new opus",
getArgumentCompletions: modelCompletions,
handler: async (args, ctx) => {
const arg = (args ?? "").trim().toLowerCase();
const target: Model = (MODELS as readonly string[]).includes(arg)
? (arg as Model)
: (chatMode ?? "sonnet");
enterChatMode(target, ctx, true);
},
});
// /claude-effort — set the extended-thinking effort level for subsequent
// chat turns. Without the flag `claude -p` emits no thinking_delta
// events at all (the interactive `defaultThinkingLevel` setting is
// ignored in -p mode); with it, the model decides on-demand whether
// to actually think. Stored on the persisted state so the choice
// survives `/reload`.
//
// /claude-effort — show current value
// /claude-effort max — set to max (default)
// /claude-effort off — disable (skip the --effort flag)
const effortCompletions = (prefix: string) =>
EFFORTS.filter((e) => e.startsWith(prefix.toLowerCase()))
.map((e) => ({ value: e, label: e }));
pi.registerCommand("claude-effort", {
description: [
"Set the extended-thinking effort level for Claude chat turns.",
" /claude-effort — show current value",
" /claude-effort off|low|medium|high|xhigh|max",
"",
"Note: without an effort setting, `claude -p` emits no thinking",
"blocks at all — so lowering this trades thought visibility for speed.",
].join("\n"),
getArgumentCompletions: effortCompletions,
handler: async (args, ctx) => {
const arg = (args ?? "").trim().toLowerCase();
if (!arg) {
ctx.ui.notify(
`Current Claude effort: ${persisted.effort}. Options: ${EFFORTS.join(", ")}.`,
"info",
);
return;
}
if (!(EFFORTS as readonly string[]).includes(arg)) {
ctx.ui.notify(
`Unknown effort "${arg}". Valid levels: ${EFFORTS.join(", ")}.`,
"warning",
);
return;
}
const prev = persisted.effort;
persisted.effort = arg as Effort;
syncUI(ctx);
const note = arg === "off"
? "thinking disabled — Claude will no longer emit thinking blocks"
: `thinking effort set to ${arg}`;
ctx.ui.notify(
`${note} (was ${prev}). Applies to the next chat turn.`,
"info",
);
},
});
pi.registerCommand("claude-end", {
description: "Exit Claude chat mode and resume normal pi operation.",
handler: async (_args, ctx) => {
if (!chatMode) { ctx.ui.notify("Not in chat mode.", "info"); return; }
exitChatMode(ctx);
},
});
pi.registerCommand("claude-abort", {
description: "Cancel the in-flight Claude response (no effect if nothing is generating).",
handler: async (_args, ctx) => {
if (!isGenerating || !currentAbort) {
ctx.ui.notify("No active Claude response to cancel.", "info");
return;
}
try { currentAbort.abort(); } catch { /* ok */ }
ctx.ui.notify("Aborting Claude response…", "info");
},
});
// /claude-resume — present a picker of past Claude sessions whose cwd matches
// the current project directory, then resume the chosen one in chat mode.
//
// Caveat: this only sets the session id and starts a fresh orange border.
// The historical transcript is NOT replayed inside pi (rendering it would
// require a separate translation pass from JSONL → ChatTurn[]); however
// `claude --resume <id>` keeps the FULL conversation context alive on the
// Claude side, so subsequent prompts behave exactly like a continuation.
pi.registerCommand("claude-resume", {
description: "Pick a past Claude session for the current project directory and resume it in chat mode.",
handler: async (_args, ctx) => {
if (!ctx?.hasUI) {
ctx?.ui?.notify?.("/claude-resume requires interactive mode.", "error");
return;
}
if (isGenerating) {
ctx.ui.notify(
"A Claude response is still streaming. Use /claude-abort first, then /claude-resume.",
"warning",
);
return;
}
const past = readPastSessions(ctx.cwd);
if (past.length === 0) {
ctx.ui.notify(
`No past Claude sessions found for ${ctx.cwd}.`,
"info",
);
return;
}
// Cap the picker at the 25 most recent sessions to keep the
// inline-note overlay tractable. Sessions are already sorted
// newest-first by readPastSessions().
const MAX_OPTIONS = 25;
const choices = past.slice(0, MAX_OPTIONS);
// Label format (per user spec):
// <relative time> · <first user message truncated> · (session:<short-id>)
const PREVIEW_MAX = 60;
const buildLabel = (s: PastSession) => {
const preview = s.firstUserMessage
? truncate(s.firstUserMessage, PREVIEW_MAX)
: "(no user message)";
return `${relativeTime(s.mtimeMs)} · ${preview} · (session:${s.sessionId.slice(0, 8)})`;
};
// Disambiguate: in the very unlikely event two sessions produce
// the same display label, append a counter so the post-pick lookup
// can match exactly.
const labels: string[] = [];
const seen = new Map<string, number>();
for (const s of choices) {
const base = buildLabel(s);
const n = seen.get(base) ?? 0;
seen.set(base, n + 1);
labels.push(n === 0 ? base : `${base} #${n + 1}`);
}
const sessionPick = await askSingleQuestionWithInlineNote(ctx.ui, {
question: `Resume which past Claude session in ${ctx.cwd}?`,
options: labels.map((label) => ({ label })),
});
if (sessionPick.selectedOptions.length === 0) {
ctx.ui.notify("Resume cancelled.", "info");
return;
}
const pickedLabel = sessionPick.selectedOptions[0];
const idx = labels.indexOf(pickedLabel);
if (idx < 0) {
ctx.ui.notify("Picked session not found (label mismatch).", "warning");
return;
}
const picked = choices[idx];
// Second picker: which model to display the resumed conversation
// under in pi's UI. Note: claude CLI ignores --model when --resume
// is set, so this is purely a UI/labelling choice. We mark the
// session's original model with "(used by this session)" and set
// it as the recommended default so most users can just hit Enter.
const originalModel = picked.model;
const modelLabels = MODELS.map((m) =>
originalModel === m ? `${m} (used by this session)` : m,
);
const recommendedIdx = originalModel ? MODELS.indexOf(originalModel) : 1; // default sonnet
const modelPick = await askSingleQuestionWithInlineNote(ctx.ui, {
question: "Display this resumed session under which model in pi's UI?",
options: modelLabels.map((label) => ({ label })),
recommended: recommendedIdx,
});
if (modelPick.selectedOptions.length === 0) {
ctx.ui.notify("Resume cancelled.", "info");
return;
}
// Strip any "(used by this session)" suffix and parse the bare
// model name (the first whitespace-separated token).
const bare = modelPick.selectedOptions[0].split(/\s+/)[0].toLowerCase();
const targetModel: Model = (MODELS as readonly string[]).includes(bare)
? (bare as Model)
: "sonnet";
// Wire up the session id BEFORE entering chat mode, so the next
// turn the user sends triggers --resume <id>.
sessions.set(targetModel, picked.sessionId);
enterChatMode(targetModel, ctx, false);
// Replay the historical transcript inside the orange border so the
// user can SEE the context they're resuming. ensureSessionMessage()
// creates the (now-empty) session CustomMessage; we then push every
// past turn into details.turns and ask for a re-render.
const historical = loadSessionTurns(picked.sessionId, ctx.cwd, targetModel);
const details = ensureSessionMessage();
details.turns.push(...historical);
tuiRef?.requestRender();
const ago = relativeTime(picked.mtimeMs);
const preview = picked.firstUserMessage
? `: "${truncate(picked.firstUserMessage, 50)}"`
: "";
const histNote = historical.length > 0
? ` (${historical.length} historical turn${historical.length === 1 ? "" : "s"} loaded)`
: " (transcript empty or unreadable)";
ctx.ui.notify(
`Resuming session ${picked.sessionId.slice(0, 8)} (${ago})${preview} as Claude ${capitalize(targetModel)}.${histNote}`,
"info",
);
},
});
// Note on ESC: pi's extension runner reserves the "interrupt" action, so
// pi.registerShortcut("escape", …) is silently ignored. ESC-to-abort is
// wired via the ChatEscEditor custom editor installed in enterChatMode.
// ── Raw code copy shortcut ───────────────────────────────────────────────
// Ctrl+Shift+C copies the raw, unrendered content of a fenced code block
// from the current chat-claude session by reading directly from the parsed
// JSON stream — bypassing ANSI sequences, stray indentation, and
// line-continuation garbage that normal terminal selection produces.
//
// 0 blocks found → notify; nothing copied
// 1 block found → copy immediately + notify
// N blocks found → inline picker (newest first) → copy selected + notify
//
// Note: most terminal emulators handle Ctrl+Shift+C at the VTE layer
// (before the app sees it) so this shortcut is only reachable when
// Kitty keyboard protocol is active and the terminal forwards the combo.
// It does NOT intercept the terminal's own clipboard mechanism when pi
// is not the foreground process receiving extended key events.
pi.registerShortcut("ctrl+shift+c", {
description: "Copy a raw fenced code block from the current Claude chat session (bypasses ANSI rendering).",
handler: async (ctx) => {
if (!currentDetails) {
ctx.ui.notify(
"No active chat-claude session — start one with /claude first.",
"info",
);
return;
}
const blocks = extractCodeBlocksFromSession(currentDetails);
if (blocks.length === 0) {
ctx.ui.notify(
"No fenced code blocks found in the current chat-claude session.",
"info",
);
return;
}
let chosen: ExtractedCodeBlock;
if (blocks.length === 1 || !ctx.hasUI) {
// Single block or no UI — copy the newest (index 0) directly.
chosen = blocks[0]!;
} else {
// Multiple blocks — present a picker, numbered for uniqueness.
// Number prefix guarantees distinct labels even when two blocks
// share the same first line.
const labels = blocks.map((b, i) => `${i + 1}. ${b.label}`);
const pick = await askSingleQuestionWithInlineNote(ctx.ui, {
question: `${blocks.length} code blocks in this session — pick one to copy:`,
options: labels.map((label) => ({ label })),
recommended: 0, // default: newest block
});
if (pick.selectedOptions.length === 0) return; // user cancelled
const idx = labels.indexOf(pick.selectedOptions[0] ?? "");
if (idx < 0) return;
chosen = blocks[idx]!;
}
copyToClipboard(chosen.code);
const lines = chosen.code.split("\n").length;
const langNote = chosen.lang ? ` (${chosen.lang})` : "";
ctx.ui.notify(
`Copied${langNote} · ${lines} line${lines === 1 ? "" : "s"}`,
"success",
);
},
});
// ── Message renderer ─────────────────────────────────────────────────────
// ONE custom message type holds the WHOLE chat-mode session. Returning a
// live component (render reads `details.turns` on every frame) lets
// streaming updates appear with a simple `tuiRef.requestRender()` — no
// full rebuild of pi's chat container required.
//
// Performance: each frame now reuses cached per-turn line output for
// completed turns (see renderSessionLines). Only the in-flight assistant
// turn (if any) is rebuilt each frame, so long conversations stop driving
// O(turns × blocks) allocation during Claude streaming.
pi.registerMessageRenderer("chat-claude-session", (message, _opts, theme) => {
const d = message.details as ChatSessionDetails | undefined;
if (!d || !Array.isArray(d.turns)) return undefined;
return {
// pi calls invalidate() when theme changes or a from-scratch
// re-render is needed — drop every turn's render cache so the
// next render pass rebuilds against the new theme.
invalidate: () => invalidateSessionCache(d),
render: (width: number) => {
if (width < 6) return renderSessionLines(d, theme, width);
const innerWidth = width - 4; // 2 border chars + 2 padding chars
// renderSessionLines returns lines already padded to
// `innerWidth` visible columns, so wrapInOrangeBorder does
// NO visibleWidth() call per frame — the previous hot path
// (~85% CPU in Intl.Segmenter) is gone.
const paddedInnerLines = renderSessionLines(d, theme, innerWidth);
return wrapInOrangeBorder(paddedInnerLines, width);
},
};
});
}