1372 lines
56 KiB
TypeScript
1372 lines
56 KiB
TypeScript
/**
|
||
* chat-claude — Distinctive Claude chat MODE inside pi.
|
||
*
|
||
* When chat mode is active, typed user input is routed to a Claude model
|
||
* (haiku/sonnet/opus) via the `claude` CLI — NOT to pi's active LLM.
|
||
*
|
||
* Rendering goals (match pi's native chat UX):
|
||
* - Text appears as full markdown (no truncated previews, no dim grey).
|
||
* - Thinking blocks stream live as italic `thinkingText`-coloured markdown
|
||
* (the `claude` CLI is invoked with --include-partial-messages).
|
||
* - Tool calls use pi's normal tool-execution look (renderToolBlock).
|
||
*
|
||
* All turns of a single chat-mode session are rendered inside ONE continuous
|
||
* orange border: the top line sits above the first turn, the bottom line
|
||
* below the most recent turn, and the border extends live as new turns
|
||
* (user + assistant) arrive. A new border starts each time the user enters
|
||
* chat mode again via /claude / /claude-new.
|
||
*
|
||
* Commands:
|
||
* /claude [haiku|sonnet|opus] — enter chat mode / switch model
|
||
* /claude-new [haiku|sonnet|opus] — enter chat mode with a fresh Claude session
|
||
* /claude-resume — pick a past session for the current cwd and resume it
|
||
* /claude-end — exit chat mode
|
||
* /claude-abort — cancel an in-flight Claude response
|
||
*/
|
||
|
||
import { closeSync, openSync, readdirSync, readFileSync, readSync, statSync } from "node:fs";
|
||
import { homedir } from "node:os";
|
||
import { join } from "node:path";
|
||
import { CustomEditor, getMarkdownTheme } from "@mariozechner/pi-coding-agent";
|
||
import type { ExtensionAPI, KeybindingsManager } from "@mariozechner/pi-coding-agent";
|
||
import { Box, Container, matchesKey, Markdown, Spacer, Text, truncateToWidth, TUI, visibleWidth, type Component, type EditorTheme } from "@mariozechner/pi-tui";
|
||
import {
|
||
formatUsage,
|
||
renderToolBlock,
|
||
runClaude,
|
||
type StreamBlock,
|
||
} from "../shared/claude-stream.js";
|
||
import { startAskBridge, type AskBridge } from "../shared/pi-ask-bridge.js";
|
||
import { askSingleQuestionWithInlineNote } from "./pi-ask-tool/ask-inline-ui.js";
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Orange styling
|
||
// ---------------------------------------------------------------------------
|
||
const ORANGE = "\x1b[38;5;208m"; // pumpkin / tangerine
|
||
const ORANGE_DIM = "\x1b[38;5;130m";
|
||
const RESET = "\x1b[0m";
|
||
const BOLD = "\x1b[1m";
|
||
const orange = (s: string) => ORANGE + s + RESET;
|
||
const orangeBold = (s: string) => ORANGE + BOLD + s + RESET;
|
||
const orangeDim = (s: string) => ORANGE_DIM + s + RESET;
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Orange border wrapping helper — wraps an array of inner lines in a
|
||
// continuous orange box. Applied at the session level so the WHOLE chat
|
||
// conversation sits inside ONE box (top above first turn, bottom below
|
||
// most recent turn). Pure string→string — no component allocation per frame.
|
||
//
|
||
// IMPORTANT: `innerLines` must ALREADY be padded to `innerWidth` columns of
|
||
// visible width. We don't call visibleWidth() here because that function
|
||
// invokes Intl.Segmenter (expensive ICU BreakIterator on every miss) and
|
||
// this wrapper runs on every single line of the session on every frame.
|
||
// Profile data showed 85% of pi's idle CPU being burned in Segmenter via
|
||
// this function. Callers (renderSessionLines) pre-pad inner lines once
|
||
// per turn and cache them, so the cost amortises to O(streaming tail).
|
||
// ---------------------------------------------------------------------------
|
||
function wrapInOrangeBorder(paddedInnerLines: string[], width: number): string[] {
|
||
const v = orange("│");
|
||
const top = orange("╭" + "─".repeat(width - 2) + "╮");
|
||
const bottom = orange("╰" + "─".repeat(width - 2) + "╯");
|
||
const out: string[] = [top];
|
||
for (const line of paddedInnerLines) out.push(v + " " + line + " " + v);
|
||
out.push(bottom);
|
||
return out;
|
||
}
|
||
|
||
// Pad a single inner line to exactly `innerWidth` visible columns, OR
|
||
// truncate it if it's already over-wide. Uses visibleWidth() — pi-tui's
|
||
// grapheme-aware width function (which is what sits on top of the hot
|
||
// Intl.Segmenter path). Intended to be called ONCE per line at cache-build
|
||
// time, NOT per frame.
|
||
//
|
||
// Truncation is a defensive safety net: any component that emits a line
|
||
// wider than the width it was handed would otherwise crash pi's TUI (see
|
||
// tui.js doRender: "Rendered line N exceeds terminal width"). Without this,
|
||
// one stray over-wide line (e.g. a long source code line inside a Read
|
||
// tool result) takes down the entire session.
|
||
function padToInnerWidth(line: string, innerWidth: number): string {
|
||
const w = visibleWidth(line);
|
||
if (w > innerWidth) return truncateToWidth(line, innerWidth, "…", true);
|
||
const padRight = innerWidth - w;
|
||
return padRight > 0 ? line + " ".repeat(padRight) : line;
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Read-tool result truncation
|
||
//
|
||
// `Read` tool calls inside chat mode often dump entire files into the result
|
||
// banner — many hundreds of lines, which buries the surrounding conversation.
|
||
// We cap the rendered file content at MAX_READ_LINES and append a single
|
||
// centered notice line describing how many lines were hidden. This is a
|
||
// PRESENTATION-only truncation: `block.result.text` is left untouched, so
|
||
// resumed sessions / re-renders still see the full content.
|
||
//
|
||
// Centering needs render-time width, so we implement a tiny custom Component
|
||
// (TruncatedReadResult) and swap it into the Box body produced by the shared
|
||
// renderToolBlock helper. The same dim line-number formatting used by
|
||
// renderToolResultBox is preserved so the truncated view looks identical to
|
||
// the un-truncated one above the notice.
|
||
// ---------------------------------------------------------------------------
|
||
const MAX_READ_LINES = 40;
|
||
|
||
class TruncatedReadResult implements Component {
|
||
constructor(
|
||
private readonly numbered: { num: string; content: string }[],
|
||
private readonly maxNumLen: number,
|
||
private readonly dimFn: (s: string) => string,
|
||
private readonly noticeFn: (s: string) => string,
|
||
) {}
|
||
|
||
invalidate(): void { /* stateless */ }
|
||
|
||
render(width: number): string[] {
|
||
const total = this.numbered.length;
|
||
const visible = Math.min(MAX_READ_LINES, total);
|
||
const lines: string[] = [];
|
||
for (let i = 0; i < visible; i++) {
|
||
const l = this.numbered[i];
|
||
// Truncate to `width` so a single long source-code line (think
|
||
// minified JS or a long comment) can't blow past the TUI's width
|
||
// check and crash the whole session. `truncateToWidth` is
|
||
// ANSI-aware so the dim SGR sequences wrapping the line number
|
||
// survive the cut.
|
||
const raw = this.dimFn(l.num.padStart(this.maxNumLen)) + " " + l.content;
|
||
lines.push(truncateToWidth(raw, width, "…", false));
|
||
}
|
||
if (total > visible) {
|
||
const hidden = total - visible;
|
||
const notice = `… ${hidden} more line${hidden === 1 ? "" : "s"} hidden …`;
|
||
const visLen = visibleWidth(notice);
|
||
const left = Math.max(0, Math.floor((width - visLen) / 2));
|
||
lines.push(" ".repeat(left) + this.noticeFn(notice));
|
||
}
|
||
return lines;
|
||
}
|
||
}
|
||
|
||
// Wrap shared renderToolBlock: for `Read` tool blocks whose result exceeds
|
||
// MAX_READ_LINES, replace the Box body's child Text with our truncating
|
||
// component. All other tool kinds, error results, and short reads pass
|
||
// through unchanged.
|
||
function renderToolBlockTruncated(block: Extract<StreamBlock, { type: "tool" }>, theme: any): Container {
|
||
const c = renderToolBlock(block, theme);
|
||
if (block.name.toLowerCase() !== "read") return c;
|
||
if (!block.result || block.result.isError) return c;
|
||
|
||
const rawLines = block.result.text.split("\n").filter((l) => l.length > 0);
|
||
if (rawLines.length <= MAX_READ_LINES) return c;
|
||
|
||
const parsed = rawLines.map((l) => {
|
||
const tab = l.indexOf("\t");
|
||
return tab >= 0 ? { num: l.slice(0, tab), content: l.slice(tab + 1) } : { num: "", content: l };
|
||
});
|
||
const maxNumLen = parsed.reduce((m, l) => Math.max(m, l.num.length), 0);
|
||
|
||
// renderToolBlock's container is [headerText, bodyBox]. Bail safely if a
|
||
// future change to that helper alters the structure.
|
||
const body = c.children[1];
|
||
if (!(body instanceof Box)) return c;
|
||
body.clear();
|
||
body.addChild(new TruncatedReadResult(
|
||
parsed,
|
||
maxNumLen,
|
||
(s) => theme.fg("dim", s),
|
||
(s) => theme.fg("dim", s),
|
||
));
|
||
return c;
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Models / turn types
|
||
// ---------------------------------------------------------------------------
|
||
const MODELS = ["haiku", "sonnet", "opus"] as const;
|
||
type Model = (typeof MODELS)[number];
|
||
const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1);
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Past-session discovery (used by /claude-resume).
|
||
//
|
||
// Claude CLI persists every session's transcript at:
|
||
// ~/.claude/projects/<mangled-cwd>/<session-uuid>.jsonl
|
||
// where the mangling rule (verified empirically) is "replace every '/' and
|
||
// '.' with '-'". So /home/jonas/dotfiles/pi/.pi → -home-jonas-dotfiles-pi--pi
|
||
// (the leading '-' comes from the leading '/'; '.pi' contributes '--pi'
|
||
// because both '/' and '.' map to '-').
|
||
//
|
||
// We don't need to consult ~/.claude/sessions/ for this picker — that
|
||
// directory only contains metadata for currently-running Claude processes.
|
||
// The on-disk transcript at projects/<cwd>/<id>.jsonl is the source of
|
||
// truth for "past sessions in this directory".
|
||
// ---------------------------------------------------------------------------
|
||
function mangleCwd(cwd: string): string {
|
||
return cwd.replace(/[/.]/g, "-");
|
||
}
|
||
|
||
function relativeTime(ms: number): string {
|
||
const diff = Date.now() - ms;
|
||
if (diff < 0) return "in the future";
|
||
const sec = Math.floor(diff / 1000);
|
||
if (sec < 60) return `${sec}s ago`;
|
||
const min = Math.floor(sec / 60);
|
||
if (min < 60) return `${min}m ago`;
|
||
const hr = Math.floor(min / 60);
|
||
if (hr < 24) return `${hr}h ago`;
|
||
const day = Math.floor(hr / 24);
|
||
if (day < 30) return `${day}d ago`;
|
||
const mon = Math.floor(day / 30);
|
||
if (mon < 12) return `${mon}mo ago`;
|
||
return `${Math.floor(day / 365)}y ago`;
|
||
}
|
||
|
||
/** Map a raw Claude model identifier (e.g. "claude-haiku-4-5-20251001") to
|
||
* one of our canonical short names. Returns null if no match. */
|
||
function normalizeRawModel(raw: string): Model | null {
|
||
const lc = raw.toLowerCase();
|
||
if (lc.includes("haiku")) return "haiku";
|
||
if (lc.includes("sonnet")) return "sonnet";
|
||
if (lc.includes("opus")) return "opus";
|
||
return null;
|
||
}
|
||
|
||
interface PastSession {
|
||
sessionId: string;
|
||
mtimeMs: number;
|
||
firstUserMessage: string; // truncated/normalised, "" if not found
|
||
model: Model | null; // null ⇒ couldn't determine
|
||
rawModel: string; // raw string from JSONL ("" if not found)
|
||
}
|
||
|
||
/** Read the head of a file (avoids slurping multi-MB JSONL transcripts). */
|
||
function readFileHead(path: string, maxBytes: number): string {
|
||
const fd = openSync(path, "r");
|
||
try {
|
||
const buf = Buffer.alloc(maxBytes);
|
||
const n = readSync(fd, buf, 0, maxBytes, 0);
|
||
return buf.subarray(0, n).toString("utf8");
|
||
} finally {
|
||
closeSync(fd);
|
||
}
|
||
}
|
||
|
||
/** Pluck the first user message + first model id from a transcript head. */
|
||
function extractSessionMeta(head: string): { firstUserMessage: string; rawModel: string } {
|
||
let firstUserMessage = "";
|
||
let rawModel = "";
|
||
|
||
for (const line of head.split("\n")) {
|
||
if (firstUserMessage && rawModel) break;
|
||
if (!line.trim()) continue;
|
||
let ev: any;
|
||
try { ev = JSON.parse(line); } catch { continue; }
|
||
|
||
if (!firstUserMessage) {
|
||
// Two equivalent sources: a queue-operation enqueue carries the raw
|
||
// text the user typed; a `type: "user"` event carries it inside
|
||
// message.content (which is either a string or an array of blocks).
|
||
if (ev.type === "queue-operation" && ev.operation === "enqueue" && typeof ev.content === "string") {
|
||
firstUserMessage = ev.content;
|
||
} else if (ev.type === "user" && ev.message) {
|
||
const c = ev.message.content;
|
||
if (typeof c === "string") {
|
||
firstUserMessage = c;
|
||
} else if (Array.isArray(c)) {
|
||
firstUserMessage = c
|
||
.filter((b: any) => b?.type === "text" && typeof b.text === "string")
|
||
.map((b: any) => b.text as string)
|
||
.join(" ");
|
||
}
|
||
}
|
||
}
|
||
|
||
if (!rawModel && typeof ev?.message?.model === "string") {
|
||
rawModel = ev.message.model;
|
||
}
|
||
}
|
||
|
||
return {
|
||
firstUserMessage: firstUserMessage.replace(/\s+/g, " ").trim(),
|
||
rawModel,
|
||
};
|
||
}
|
||
|
||
function readPastSessions(cwd: string): PastSession[] {
|
||
const dir = join(homedir(), ".claude", "projects", mangleCwd(cwd));
|
||
let entries: string[];
|
||
try {
|
||
entries = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
|
||
} catch {
|
||
return [];
|
||
}
|
||
|
||
const out: PastSession[] = [];
|
||
for (const f of entries) {
|
||
const full = join(dir, f);
|
||
let st;
|
||
try { st = statSync(full); } catch { continue; }
|
||
// Read up to ~256 KB — enough to find the first user message and the
|
||
// first assistant turn (which carries the model id) in any reasonable
|
||
// transcript without paying for multi-MB reads.
|
||
let head: string;
|
||
try { head = readFileHead(full, 256 * 1024); } catch { continue; }
|
||
const { firstUserMessage, rawModel } = extractSessionMeta(head);
|
||
out.push({
|
||
sessionId: f.replace(/\.jsonl$/, ""),
|
||
mtimeMs: st.mtimeMs,
|
||
firstUserMessage,
|
||
model: rawModel ? normalizeRawModel(rawModel) : null,
|
||
rawModel,
|
||
});
|
||
}
|
||
|
||
out.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
||
return out;
|
||
}
|
||
|
||
/** Truncate a string to `max` chars, appending "…" when cut. */
|
||
function truncate(s: string, max: number): string {
|
||
if (s.length <= max) return s;
|
||
return s.slice(0, Math.max(0, max - 1)).trimEnd() + "…";
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// JSONL transcript → ChatTurn[]
|
||
//
|
||
// Given a sessionId and cwd, load the full transcript at
|
||
// ~/.claude/projects/<mangled-cwd>/<sessionId>.jsonl
|
||
// and convert it into the same UserTurn / AssistantTurn shape the live
|
||
// runChatTurn() path produces. This lets /claude-resume render the past
|
||
// context inside the orange border so the user can SEE what they're
|
||
// resuming, not just blindly continue an invisible thread.
|
||
//
|
||
// JSONL event reference (observed in 2.1.118 transcripts):
|
||
// {type:"user", message:{role:"user", content: <string>}} ← typed prompt
|
||
// {type:"user", message:{role:"user", content: [{type:"tool_result", …}, …]}} ← tool outputs
|
||
// {type:"assistant",message:{role:"assistant", content: [<one of: thinking|text|tool_use>], usage:{…}, model:"claude-sonnet-4-6"}}
|
||
// Each assistant content block is emitted as its OWN line, all sharing the
|
||
// same usage / model fields (one API call → many lines). We coalesce every
|
||
// run of consecutive assistant lines into a single AssistantTurn whose
|
||
// `blocks` array preserves the in-order list of thinking/text/tool blocks.
|
||
// Tool results that arrive in subsequent user-lines are attached back onto
|
||
// the matching tool block by tool_use_id.
|
||
//
|
||
// Lines we ignore: agent-setting, queue-operation, attachment, last-prompt,
|
||
// summary, and anything else without a recognisable role/content shape.
|
||
// Tokens/cost are intentionally NOT carried over — the JSONL repeats usage
|
||
// per content block so summing naively would over-count, and the user is
|
||
// here to see CONTENT, not a token panel for old turns.
|
||
// ---------------------------------------------------------------------------
|
||
function loadSessionTurns(sessionId: string, cwd: string, fallbackModel: Model): ChatTurn[] {
|
||
const path = join(homedir(), ".claude", "projects", mangleCwd(cwd), `${sessionId}.jsonl`);
|
||
let raw: string;
|
||
try { raw = readFileSync(path, "utf8"); } catch { return []; }
|
||
|
||
const turns: ChatTurn[] = [];
|
||
let current: AssistantTurn | null = null;
|
||
|
||
const flush = () => {
|
||
if (!current) return;
|
||
current.finalText = current.blocks
|
||
.filter((b) => b.type === "text")
|
||
.map((b: any) => b.text as string)
|
||
.join("");
|
||
turns.push(current);
|
||
current = null;
|
||
};
|
||
|
||
const ensureCurrent = (model: Model): AssistantTurn => {
|
||
if (current) return current;
|
||
current = {
|
||
role: "assistant",
|
||
model,
|
||
blocks: [],
|
||
finalText: "",
|
||
sessionId,
|
||
isResume: false,
|
||
done: true,
|
||
};
|
||
return current;
|
||
};
|
||
|
||
const tool_resultText = (content: any): { text: string; isError: boolean } => {
|
||
if (typeof content === "string") return { text: content, isError: false };
|
||
if (Array.isArray(content)) {
|
||
const text = content
|
||
.filter((b: any) => b?.type === "text" && typeof b.text === "string")
|
||
.map((b: any) => b.text as string)
|
||
.join("\n");
|
||
return { text, isError: false };
|
||
}
|
||
return { text: "", isError: false };
|
||
};
|
||
|
||
for (const line of raw.split("\n")) {
|
||
if (!line.trim()) continue;
|
||
let ev: any;
|
||
try { ev = JSON.parse(line); } catch { continue; }
|
||
|
||
if (ev.type === "user") {
|
||
const c = ev.message?.content;
|
||
if (typeof c === "string") {
|
||
// Typed user prompt — closes any in-flight assistant turn.
|
||
flush();
|
||
if (c.trim()) turns.push({ role: "user", text: c });
|
||
} else if (Array.isArray(c)) {
|
||
let sawToolResult = false;
|
||
for (const block of c) {
|
||
if (block?.type === "tool_result") {
|
||
sawToolResult = true;
|
||
const { text } = tool_resultText(block.content);
|
||
const isError = block.is_error === true;
|
||
if (current) {
|
||
for (const tb of current.blocks) {
|
||
if (tb.type === "tool" && tb.id === block.tool_use_id) {
|
||
tb.result = { text, isError };
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
} else if (block?.type === "text" && typeof block.text === "string") {
|
||
// Some clients send array-shaped user prompts.
|
||
if (!sawToolResult) {
|
||
flush();
|
||
if (block.text.trim()) turns.push({ role: "user", text: block.text });
|
||
}
|
||
}
|
||
}
|
||
}
|
||
} else if (ev.type === "assistant") {
|
||
const content = (ev.message?.content ?? []) as any[];
|
||
const rawModel = String(ev.message?.model ?? "");
|
||
const model = (rawModel ? normalizeRawModel(rawModel) : null) ?? fallbackModel;
|
||
const a = ensureCurrent(model);
|
||
// If the per-line model differs from what we opened the turn with,
|
||
// keep the first one — a single coalesced "turn" inherits the model
|
||
// of its first API call. (This is purely for the header label.)
|
||
for (const block of content) {
|
||
if (block?.type === "thinking" && typeof block.thinking === "string") {
|
||
if (block.thinking.trim()) a.blocks.push({ type: "thinking", text: block.thinking });
|
||
} else if (block?.type === "text" && typeof block.text === "string") {
|
||
if (block.text.trim()) a.blocks.push({ type: "text", text: block.text });
|
||
} else if (block?.type === "tool_use") {
|
||
a.blocks.push({
|
||
type: "tool",
|
||
id: String(block.id ?? ""),
|
||
name: String(block.name ?? ""),
|
||
inputJson: JSON.stringify(block.input ?? {}),
|
||
});
|
||
}
|
||
}
|
||
}
|
||
// All other event types (agent-setting, queue-operation, attachment,
|
||
// last-prompt, summary, …) are intentionally ignored.
|
||
}
|
||
|
||
flush();
|
||
return turns;
|
||
}
|
||
|
||
// Per-turn render cache: once a turn is "frozen" (user turns are always
|
||
// frozen; assistant turns after done=true), its rendered output at a given
|
||
// (innerWidth, theme) is invariant. Caching avoids O(turns) rebuild on every
|
||
// frame, which otherwise creates quadratic-ish lag during streaming because
|
||
// partial-message updates drive tens of renders per second.
|
||
interface TurnRenderCache {
|
||
cachedLines?: string[];
|
||
cachedWidth?: number;
|
||
cachedTheme?: unknown;
|
||
}
|
||
|
||
interface UserTurn extends TurnRenderCache {
|
||
role: "user";
|
||
text: string;
|
||
}
|
||
interface AssistantTurn extends TurnRenderCache {
|
||
role: "assistant";
|
||
model: Model;
|
||
blocks: StreamBlock[];
|
||
finalText: string;
|
||
sessionId?: string;
|
||
isResume: boolean;
|
||
done: boolean;
|
||
error?: string;
|
||
cancelled?: boolean;
|
||
costUsd?: number;
|
||
inputTokens?: number;
|
||
outputTokens?: number;
|
||
cacheReadTokens?: number;
|
||
cacheWriteTokens?: number;
|
||
}
|
||
type ChatTurn = UserTurn | AssistantTurn;
|
||
|
||
interface ChatSessionDetails {
|
||
turns: ChatTurn[];
|
||
}
|
||
|
||
// =============================================================================
|
||
// Extension entry point
|
||
// =============================================================================
|
||
|
||
// ── Reload-persistent state ─────────────────────────────────────────────────
|
||
// pi's `/reload` tears the extension down and re-invokes the default export,
|
||
// which resets every closure-local `let`/`const`. The Map of resumable Claude
|
||
// session ids (model → sessionId) is the one piece of state we want to
|
||
// survive that — otherwise /reload silently orphans the ongoing Claude
|
||
// threads, forcing the user to re-pick them via /claude-resume.
|
||
//
|
||
// Everything else (chatMode, currentDetails, askBridge, tuiRef, isGenerating)
|
||
// is intentionally NOT persisted: the bridge/TUI references are bound to the
|
||
// torn-down ctx and must be rebuilt on the next enterChatMode(), and any
|
||
// in-flight stream is already aborted when the old closure is discarded.
|
||
//
|
||
// We stash the Map on globalThis behind a namespaced key. globalThis survives
|
||
// module re-evaluation (only top-level lexical bindings are reset), and the
|
||
// guarded getter keeps initialization idempotent across repeated reloads.
|
||
// Valid extended-thinking effort levels accepted by `claude --effort`, plus
|
||
// our synthetic "off" sentinel which skips the flag entirely (falling back
|
||
// to the CLI's default of no thinking emission in -p mode).
|
||
const EFFORTS = ["off", "low", "medium", "high", "xhigh", "max"] as const;
|
||
type Effort = (typeof EFFORTS)[number];
|
||
const DEFAULT_EFFORT: Effort = "max";
|
||
|
||
interface ChatClaudePersistedState {
|
||
sessions: Map<Model, string>;
|
||
// Current extended-thinking effort level — persisted across `/reload`
|
||
// so the user's choice survives the extension teardown the same way
|
||
// resumable session ids do.
|
||
effort: Effort;
|
||
}
|
||
const CHAT_CLAUDE_STATE_KEY = "__pi_chat_claude_persisted__";
|
||
function getPersistedState(): ChatClaudePersistedState {
|
||
const g = globalThis as unknown as Record<string, ChatClaudePersistedState>;
|
||
let state = g[CHAT_CLAUDE_STATE_KEY];
|
||
if (!state) {
|
||
state = { sessions: new Map<Model, string>(), effort: DEFAULT_EFFORT };
|
||
g[CHAT_CLAUDE_STATE_KEY] = state;
|
||
}
|
||
// Back-fill for any persisted state written by an older revision of
|
||
// the extension (pre-/claude-effort) that didn't carry an effort field.
|
||
if (!state.effort) state.effort = DEFAULT_EFFORT;
|
||
return state;
|
||
}
|
||
|
||
export default function (pi: ExtensionAPI) {
|
||
// ── Mode state ────────────────────────────────────────────────────────────
|
||
let chatMode: Model | null = null; // null ⇒ not in chat mode
|
||
// model → resumable claude session id. Pulled from globalThis so the
|
||
// mapping (and the current effort level) survive `/reload` (see
|
||
// getPersistedState above). `persisted` is kept as a handle so
|
||
// `/claude-effort` can mutate `persisted.effort` in place and have
|
||
// the change picked up by subsequent runChatTurn calls.
|
||
const persisted = getPersistedState();
|
||
const { sessions } = persisted;
|
||
let isGenerating = false;
|
||
let currentAbort: AbortController | null = null;
|
||
|
||
// pi-ask bridge — opens a Unix socket + generates an --mcp-config so
|
||
// Claude (running inside this chat) can ask the user questions through
|
||
// pi's native ask UI. Bound to the chat-mode lifetime: started on
|
||
// enterChatMode, closed on exitChatMode.
|
||
let askBridge: AskBridge | null = null;
|
||
|
||
// Live TUI reference captured from the mode-banner widget factory, used to
|
||
// schedule re-renders while a Claude response is streaming into the
|
||
// current chat-claude-session message.
|
||
let tuiRef: { requestRender: () => void } | null = null;
|
||
|
||
// The in-flight chat session's `details` object. Stored by reference so
|
||
// mutations here are reflected in the CustomMessage already displayed
|
||
// in pi's conversation. Null between chat-mode sessions.
|
||
let currentDetails: ChatSessionDetails | null = null;
|
||
|
||
// Keep a module-level set of the extension's custom-message types so the
|
||
// `context` event handler can strip them out of pi's LLM context — chat
|
||
// mode is between the user and Claude and has no business in pi's
|
||
// prompt payload.
|
||
const CHAT_CLAUDE_CUSTOM_TYPES = new Set(["chat-claude-session"]);
|
||
|
||
// ── Render throttling ────────────────────────────────────────────────────
|
||
// Claude's `--include-partial-messages` fires an onUpdate for every token
|
||
// delta (100+ Hz under a fast stream). Rendering per-token was the second
|
||
// half of the progressive-lag problem — even with per-turn caching, the
|
||
// TUI would be asked to diff+repaint dozens of times per second.
|
||
//
|
||
// scheduleStreamRender coalesces back-to-back requests into a trailing-
|
||
// edge timer at ~30 Hz. The first update within a quiet window waits up
|
||
// to 33 ms before rendering; any further updates in that window are
|
||
// folded into the same render. flushStreamRender cancels the pending
|
||
// timer and renders immediately — used on stream completion, abort, and
|
||
// chat-mode teardown so the user sees the terminal frame right away.
|
||
let streamRenderTimer: ReturnType<typeof setTimeout> | null = null;
|
||
const STREAM_RENDER_INTERVAL_MS = 33; // ~30 Hz
|
||
function scheduleStreamRender() {
|
||
if (streamRenderTimer) return;
|
||
streamRenderTimer = setTimeout(() => {
|
||
streamRenderTimer = null;
|
||
tuiRef?.requestRender();
|
||
}, STREAM_RENDER_INTERVAL_MS);
|
||
}
|
||
function flushStreamRender() {
|
||
if (streamRenderTimer) {
|
||
clearTimeout(streamRenderTimer);
|
||
streamRenderTimer = null;
|
||
}
|
||
tuiRef?.requestRender();
|
||
}
|
||
|
||
// ── Rendering helpers ────────────────────────────────────────────────────
|
||
// Mirrors pi's AssistantMessageComponent conventions (see
|
||
// modes/interactive/components/assistant-message.js): Markdown at
|
||
// paddingX=1, paddingY=0; thinking as italic `thinkingText`-coloured
|
||
// markdown; tool blocks via the shared renderToolBlock (same one
|
||
// ask-claude uses) so bash / read / edit / write all look identical to
|
||
// pi's own tool executions.
|
||
function renderTurnInto(container: Container, turn: ChatTurn, theme: any) {
|
||
const md = getMarkdownTheme();
|
||
|
||
if (turn.role === "user") {
|
||
container.addChild(new Text(orangeBold("▶ you"), 1, 0));
|
||
container.addChild(new Spacer(1));
|
||
container.addChild(new Markdown(turn.text.trim(), 1, 0, md));
|
||
return;
|
||
}
|
||
|
||
// Assistant turn header
|
||
const icon =
|
||
turn.cancelled ? orange("◇ ")
|
||
: turn.error ? theme.fg("error", "✗ ")
|
||
: turn.isResume ? orange("↩ ")
|
||
: orange("◆ ");
|
||
const header =
|
||
icon + orangeBold(`Claude ${capitalize(turn.model)}`)
|
||
+ (turn.sessionId ? theme.fg("dim", ` session:${turn.sessionId.slice(0, 8)}`) : "")
|
||
+ (!turn.done ? theme.fg("warning", " ⏳") : "");
|
||
container.addChild(new Text(header, 1, 0));
|
||
container.addChild(new Spacer(1));
|
||
|
||
if (turn.cancelled) {
|
||
container.addChild(new Text(orange("(Cancelled)"), 1, 0));
|
||
return;
|
||
}
|
||
if (turn.error) {
|
||
container.addChild(new Text(theme.fg("error", `Error: ${turn.error}`), 1, 0));
|
||
return;
|
||
}
|
||
|
||
// Defensive dedup — see claude-stream.ts for the root-cause fix, but
|
||
// keep a safety net here in case a future Claude CLI change re-orders
|
||
// events differently.
|
||
const rawBlocks = turn.blocks ?? [];
|
||
const seenToolIds = new Set<string>();
|
||
const blocks: StreamBlock[] = [];
|
||
for (const b of rawBlocks) {
|
||
if (b.type === "tool") {
|
||
if (seenToolIds.has(b.id)) continue;
|
||
seenToolIds.add(b.id);
|
||
}
|
||
blocks.push(b);
|
||
}
|
||
|
||
let addedAny = false;
|
||
for (let i = 0; i < blocks.length; i++) {
|
||
const block = blocks[i];
|
||
if (block.type === "thinking" && block.text.trim()) {
|
||
if (addedAny) container.addChild(new Spacer(1));
|
||
container.addChild(new Markdown(block.text.trim(), 1, 0, md, {
|
||
color: (t: string) => theme.fg("thinkingText", t),
|
||
italic: true,
|
||
}));
|
||
addedAny = true;
|
||
} else if (block.type === "tool") {
|
||
if (addedAny) container.addChild(new Spacer(1));
|
||
container.addChild(renderToolBlockTruncated(block, theme));
|
||
addedAny = true;
|
||
} else if (block.type === "text" && block.text.trim()) {
|
||
if (addedAny) container.addChild(new Spacer(1));
|
||
container.addChild(new Markdown(block.text.trim(), 1, 0, md));
|
||
addedAny = true;
|
||
}
|
||
}
|
||
|
||
if (turn.done) {
|
||
const usage = formatUsage(turn as any);
|
||
if (usage) {
|
||
container.addChild(new Spacer(1));
|
||
container.addChild(new Text(theme.fg("dim", usage), 1, 0));
|
||
}
|
||
}
|
||
}
|
||
|
||
// Render one turn in isolation and return its lines PRE-PADDED to
|
||
// `innerWidth` visible columns.
|
||
//
|
||
// Pre-padding here means `visibleWidth()` (which calls `Intl.Segmenter`
|
||
// — the measured hot spot: 85% of pi's CPU in a laggy session) runs
|
||
// exactly ONCE per line per turn, not once per line per frame. For
|
||
// completed turns these padded lines are cached and reused forever at
|
||
// that (width, theme); for the streaming tail turn the work is bounded
|
||
// to just the in-flight turn's lines.
|
||
function renderTurnLines(turn: ChatTurn, theme: any, innerWidth: number): string[] {
|
||
const c = new Container();
|
||
renderTurnInto(c, turn, theme);
|
||
const rawLines = c.render(innerWidth);
|
||
const padded: string[] = new Array(rawLines.length);
|
||
for (let i = 0; i < rawLines.length; i++) {
|
||
padded[i] = padToInnerWidth(rawLines[i], innerWidth);
|
||
}
|
||
return padded;
|
||
}
|
||
|
||
// Assemble the WHOLE session's inner lines with per-turn caching.
|
||
//
|
||
// Cache invariants:
|
||
// • User turns are immutable → always cacheable.
|
||
// • Assistant turns are mutated in-place by runClaude's onUpdate
|
||
// callback while streaming, and only become stable after
|
||
// `done: true` is set (see runChatTurn). So we only cache
|
||
// assistants once they're done.
|
||
// • Cache keys on (innerWidth, theme) — terminal resize or theme
|
||
// switch invalidates all per-turn caches transparently by forcing
|
||
// a rebuild on the next render.
|
||
//
|
||
// With this cache, a streaming frame only rebuilds the one in-flight
|
||
// assistant turn (the tail); all prior turns are an O(1) line-copy.
|
||
// That eliminates the O(turns × blocks) rebuild that previously ran
|
||
// every time a partial Claude message arrived.
|
||
//
|
||
// Returned lines are PRE-PADDED to `innerWidth` visible columns — see
|
||
// renderTurnLines/padToInnerWidth for why. The caller can hand them
|
||
// straight to wrapInOrangeBorder without any further visibleWidth()
|
||
// calls, which is critical: visibleWidth drives Intl.Segmenter, whose
|
||
// 512-entry LRU thrashes when called per-line-per-frame on a long chat.
|
||
function renderSessionLines(details: ChatSessionDetails, theme: any, innerWidth: number): string[] {
|
||
// Streaming placeholder so the border grows immediately after the
|
||
// user submits, even before any block has arrived from Claude.
|
||
if (details.turns.length === 0) {
|
||
const c = new Container();
|
||
c.addChild(new Text(orangeDim("(chat mode — waiting for first message)"), 0, 0));
|
||
const rawLines = c.render(innerWidth);
|
||
const padded: string[] = new Array(rawLines.length);
|
||
for (let i = 0; i < rawLines.length; i++) padded[i] = padToInnerWidth(rawLines[i], innerWidth);
|
||
return padded;
|
||
}
|
||
|
||
const out: string[] = [];
|
||
// The blank inter-turn spacer must ALSO be padded — otherwise
|
||
// wrapInOrangeBorder emits "│ │" with a visibly short interior,
|
||
// producing a ragged right edge on the orange border.
|
||
const spacerLine = " ".repeat(innerWidth);
|
||
for (let i = 0; i < details.turns.length; i++) {
|
||
if (i > 0) out.push(spacerLine);
|
||
const turn = details.turns[i];
|
||
const cacheable = turn.role === "user" || (turn.role === "assistant" && turn.done);
|
||
if (
|
||
cacheable
|
||
&& turn.cachedLines
|
||
&& turn.cachedWidth === innerWidth
|
||
&& turn.cachedTheme === theme
|
||
) {
|
||
for (const line of turn.cachedLines) out.push(line);
|
||
} else {
|
||
const lines = renderTurnLines(turn, theme, innerWidth);
|
||
if (cacheable) {
|
||
turn.cachedLines = lines;
|
||
turn.cachedWidth = innerWidth;
|
||
turn.cachedTheme = theme;
|
||
} else {
|
||
// Streaming turn — make sure we don't accidentally
|
||
// carry stale cached output from a prior life.
|
||
turn.cachedLines = undefined;
|
||
turn.cachedWidth = undefined;
|
||
turn.cachedTheme = undefined;
|
||
}
|
||
for (const line of lines) out.push(line);
|
||
}
|
||
}
|
||
return out;
|
||
}
|
||
|
||
// Drop every turn's render cache — called from the message renderer's
|
||
// `invalidate()` hook (triggered by pi when theme changes or when a
|
||
// from-scratch re-render is needed).
|
||
function invalidateSessionCache(details: ChatSessionDetails) {
|
||
for (const turn of details.turns) {
|
||
turn.cachedLines = undefined;
|
||
turn.cachedWidth = undefined;
|
||
turn.cachedTheme = undefined;
|
||
}
|
||
}
|
||
|
||
// ── Mode banner + status ─────────────────────────────────────────────────
|
||
function syncUI(ctx: any) {
|
||
if (!ctx?.hasUI) return;
|
||
|
||
if (!chatMode) {
|
||
ctx.ui.setWidget("chat-claude", undefined);
|
||
ctx.ui.setStatus("chat-claude", undefined);
|
||
ctx.ui.setTitle("pi");
|
||
return;
|
||
}
|
||
|
||
const sessionId = sessions.get(chatMode);
|
||
const short = sessionId ? sessionId.slice(0, 8) : "new";
|
||
const modelUp = capitalize(chatMode).toUpperCase();
|
||
|
||
ctx.ui.setWidget("chat-claude", (tui: any, theme: any) => {
|
||
tuiRef = tui; // ← captured for live streaming re-renders
|
||
return {
|
||
invalidate: () => {},
|
||
render: () => {
|
||
const rail = orange("▌ ");
|
||
const title = orangeBold("◆ CLAUDE CHAT MODE");
|
||
const modelLabel = orangeBold(modelUp);
|
||
const sessionTag = orangeDim("session:" + short);
|
||
const effortTag = orangeDim("effort:" + persisted.effort);
|
||
const running = isGenerating ? " " + orange("⏳ streaming…") : "";
|
||
const line1 = rail + title + " " + modelLabel + " " + sessionTag + " " + effortTag + running;
|
||
const line2 = rail + theme.fg("dim",
|
||
"Type to chat · /claude haiku|sonnet|opus · /claude-new · /claude-effort · /claude-end · /claude-abort");
|
||
return [line1, line2];
|
||
},
|
||
};
|
||
}, { placement: "aboveEditor" });
|
||
|
||
const busy = isGenerating ? " · streaming" : "";
|
||
ctx.ui.setStatus("chat-claude",
|
||
orange(`◆ Claude ${capitalize(chatMode)} · ${short} · effort:${persisted.effort}${busy}`));
|
||
ctx.ui.setTitle(`pi · Claude ${capitalize(chatMode)} Chat`);
|
||
}
|
||
|
||
// ── ESC-to-abort editor ──────────────────────────────────────────────────
|
||
// ESC (the "interrupt" action) is on the extension-runner's reserved list
|
||
// (see node_modules/@mariozechner/pi-coding-agent/.../runner.js — any
|
||
// registerShortcut("escape", …) is silently dropped), so a custom editor is
|
||
// the sanctioned way to intercept it. We subclass pi's exported CustomEditor
|
||
// and short-circuit ESC ONLY while a chat-claude response is streaming.
|
||
// For every other case we defer to `super.handleInput`, which runs the
|
||
// app-level keybindings — including pi's own onEscape handler, which
|
||
// setCustomEditorComponent copies onto the custom editor at install time
|
||
// (see interactive-mode.js setCustomEditorComponent, ~line 1258).
|
||
class ChatEscEditor extends CustomEditor {
|
||
handleInput(data: string): void {
|
||
if (matchesKey(data, "escape") && isGenerating && currentAbort) {
|
||
try { currentAbort.abort(); } catch { /* ok */ }
|
||
// We may not have a direct ctx here, but the UI is live during
|
||
// chat mode, so flush any pending throttled render and force
|
||
// a frame now; the chat-claude-session renderer will show the
|
||
// assistant turn as cancelled once runClaude's promise
|
||
// rejects with AbortError.
|
||
flushStreamRender();
|
||
return;
|
||
}
|
||
super.handleInput(data);
|
||
}
|
||
}
|
||
|
||
// ── Mode transitions ─────────────────────────────────────────────────────
|
||
function enterChatMode(model: Model, ctx: any, freshSession: boolean) {
|
||
const wasActive = chatMode !== null;
|
||
const modelChanged = chatMode !== model;
|
||
|
||
if (freshSession) sessions.delete(model);
|
||
// A new /claude invocation after an exit starts a fresh border box, so
|
||
// drop any reference to the previous session's details. The existing
|
||
// CustomMessage in the conversation keeps its own reference and stays
|
||
// visible in the scrollback.
|
||
if (!wasActive || modelChanged || freshSession) {
|
||
currentDetails = null;
|
||
}
|
||
|
||
chatMode = model;
|
||
|
||
// Stand up (or refresh) the pi-ask bridge so Claude can ask the user
|
||
// questions through pi's native overlay. Re-create on every entry so
|
||
// the socket+temp dir lifetime is bounded by the chat session.
|
||
if (ctx?.hasUI) {
|
||
askBridge?.close();
|
||
try {
|
||
askBridge = startAskBridge({
|
||
ui: ctx.ui,
|
||
onAsk: () => tuiRef?.requestRender(),
|
||
});
|
||
} catch (err) {
|
||
askBridge = null;
|
||
ctx.ui.notify(
|
||
`pi-ask bridge unavailable: ${err instanceof Error ? err.message : String(err)} — Claude won't be able to ask questions.`,
|
||
"warning",
|
||
);
|
||
}
|
||
|
||
// Install the ESC-aborts-Claude custom editor. Idempotent: if chat
|
||
// mode was already active (e.g. /claude haiku → /claude opus), setting
|
||
// it again just re-wires the same class cleanly.
|
||
ctx.ui.setEditorComponent((tui: TUI, theme: EditorTheme, keybindings: KeybindingsManager) =>
|
||
new ChatEscEditor(tui, theme, keybindings),
|
||
);
|
||
}
|
||
|
||
syncUI(ctx);
|
||
|
||
if (ctx?.hasUI) {
|
||
const sess = sessions.get(model);
|
||
const kind = freshSession || !sess ? "new session" : `resume ${sess.slice(0, 8)}`;
|
||
const verb = wasActive ? (modelChanged ? "Switched to" : "Re-entered") : "Entered chat mode:";
|
||
ctx.ui.notify(`${verb} Claude ${capitalize(model)} · ${kind}`, "info");
|
||
}
|
||
}
|
||
|
||
function exitChatMode(ctx: any) {
|
||
if (currentAbort) try { currentAbort.abort(); } catch { /* ok */ }
|
||
currentAbort = null;
|
||
isGenerating = false;
|
||
chatMode = null;
|
||
// Cancel any pending throttled stream render so we don't leave a
|
||
// dangling timer firing tuiRef.requestRender() after chat mode ends
|
||
// (tuiRef itself lingers, so the render would be harmless but wasted).
|
||
if (streamRenderTimer) {
|
||
clearTimeout(streamRenderTimer);
|
||
streamRenderTimer = null;
|
||
}
|
||
// Detach from current session details so the next entry starts a new
|
||
// border. The message and its details stay in place in pi's scrollback.
|
||
currentDetails = null;
|
||
// Tear down the pi-ask bridge: close the socket and remove the temp
|
||
// dir holding the socket + generated mcp.json.
|
||
askBridge?.close();
|
||
askBridge = null;
|
||
// Restore pi's default editor (undoes ChatEscEditor from enterChatMode).
|
||
if (ctx?.hasUI) ctx.ui.setEditorComponent(undefined);
|
||
syncUI(ctx);
|
||
if (ctx?.hasUI) ctx.ui.notify("Exited chat mode — back to normal pi.", "info");
|
||
}
|
||
|
||
// ── Session / turn management ────────────────────────────────────────────
|
||
function ensureSessionMessage(): ChatSessionDetails {
|
||
if (currentDetails) return currentDetails;
|
||
const details: ChatSessionDetails = { turns: [] };
|
||
currentDetails = details;
|
||
pi.sendMessage(
|
||
{
|
||
customType: "chat-claude-session",
|
||
// content is only used if we had no custom renderer; stays
|
||
// hidden from pi's LLM via the context filter below.
|
||
content: "",
|
||
display: true,
|
||
details,
|
||
},
|
||
{ triggerTurn: false },
|
||
);
|
||
return details;
|
||
}
|
||
|
||
async function runChatTurn(userText: string, ctx: any) {
|
||
if (!chatMode) return;
|
||
const model = chatMode;
|
||
const details = ensureSessionMessage();
|
||
|
||
// Append user turn + placeholder assistant turn up front so the
|
||
// border extends as soon as the user hits enter.
|
||
details.turns.push({ role: "user", text: userText });
|
||
const existingSession = sessions.get(model);
|
||
const assistantTurn: AssistantTurn = {
|
||
role: "assistant",
|
||
model,
|
||
blocks: [],
|
||
finalText: "",
|
||
isResume: !!existingSession,
|
||
done: false,
|
||
};
|
||
details.turns.push(assistantTurn);
|
||
tuiRef?.requestRender();
|
||
|
||
isGenerating = true;
|
||
currentAbort = new AbortController();
|
||
syncUI(ctx);
|
||
if (ctx?.hasUI) ctx.ui.setWorkingMessage(`Claude ${capitalize(model)} is thinking…`);
|
||
|
||
try {
|
||
const r = await runClaude(userText, {
|
||
model,
|
||
sessionId: existingSession,
|
||
cwd: ctx.cwd,
|
||
signal: currentAbort.signal,
|
||
// Enable extended thinking — without --effort, `claude -p`
|
||
// NEVER emits thinking_delta events regardless of the user's
|
||
// interactive defaultThinkingLevel setting, and the italic
|
||
// thinking-block rendering below sits idle. Default is "max"
|
||
// and is configurable live via /claude-effort; the model
|
||
// still decides on-demand whether it actually needs to think.
|
||
effort: persisted.effort,
|
||
// Route AskUserQuestion-style requests through pi's native
|
||
// overlay via the pi-ask-mcp bridge. Disallowing the built-in
|
||
// AskUserQuestion forces Claude to use mcp__pi__ask if it
|
||
// wants to ask a structured question.
|
||
mcpConfigPath: askBridge?.mcpConfigPath,
|
||
disallowedTools: askBridge ? ["AskUserQuestion"] : undefined,
|
||
onUpdate: (partial) => {
|
||
assistantTurn.blocks = partial.blocks;
|
||
assistantTurn.finalText = partial.finalText;
|
||
// Throttle to ~30 Hz so a fast token stream doesn't cause
|
||
// a render-per-token, which compounds with any other
|
||
// extension's per-frame work (footer, widgets, etc.).
|
||
scheduleStreamRender();
|
||
},
|
||
});
|
||
|
||
if (r.sessionId) sessions.set(model, r.sessionId);
|
||
assistantTurn.blocks = r.blocks;
|
||
assistantTurn.finalText = r.finalText;
|
||
assistantTurn.sessionId = r.sessionId;
|
||
assistantTurn.costUsd = r.costUsd;
|
||
assistantTurn.inputTokens = r.inputTokens;
|
||
assistantTurn.outputTokens = r.outputTokens;
|
||
assistantTurn.cacheReadTokens = r.cacheReadTokens;
|
||
assistantTurn.cacheWriteTokens = r.cacheWriteTokens;
|
||
assistantTurn.done = true;
|
||
} catch (err) {
|
||
const aborted = currentAbort?.signal.aborted === true;
|
||
assistantTurn.done = true;
|
||
assistantTurn.cancelled = aborted;
|
||
assistantTurn.error = aborted ? undefined : (err instanceof Error ? err.message : String(err));
|
||
} finally {
|
||
isGenerating = false;
|
||
currentAbort = null;
|
||
if (ctx?.hasUI) ctx.ui.setWorkingMessage(undefined);
|
||
syncUI(ctx);
|
||
// Flush (not schedule): the stream just ended or was aborted —
|
||
// we want the final frame on screen immediately, not 33 ms later.
|
||
// Also cancels any in-flight throttled timer so it doesn't fire
|
||
// a stale second render after the assistant turn is already
|
||
// marked done and cached.
|
||
flushStreamRender();
|
||
}
|
||
}
|
||
|
||
// ── Input interception ───────────────────────────────────────────────────
|
||
// Registered pi commands (/claude, /claude-end, etc.) dispatch BEFORE this
|
||
// event fires, so they still work normally. Bash via `!` goes through
|
||
// user_bash, not here. Every other text the user submits in chat mode is
|
||
// routed straight to Claude.
|
||
pi.on("input", async (event, ctx) => {
|
||
if (!chatMode) return { action: "continue" } as const;
|
||
if (event.source !== "interactive") return { action: "continue" } as const;
|
||
const text = event.text ?? "";
|
||
if (!text.trim()) return { action: "continue" } as const;
|
||
if (text.trimStart().startsWith("!")) return { action: "continue" } as const;
|
||
|
||
if (isGenerating) {
|
||
ctx.ui.notify(
|
||
"Claude is still responding. Use /claude-abort to cancel, then try again.",
|
||
"warning",
|
||
);
|
||
return { action: "handled" } as const;
|
||
}
|
||
|
||
runChatTurn(text, ctx).catch((err) => {
|
||
ctx.ui.notify(
|
||
`Chat error: ${err instanceof Error ? err.message : String(err)}`,
|
||
"error",
|
||
);
|
||
});
|
||
return { action: "handled" } as const;
|
||
});
|
||
|
||
// Keep chat-mode custom messages out of pi's LLM context — chat mode is
|
||
// between the user and Claude, not part of pi's conversation.
|
||
pi.on("context", (event) => {
|
||
const filtered = event.messages.filter((m: any) =>
|
||
!(m.role === "custom" && CHAT_CLAUDE_CUSTOM_TYPES.has(m.customType)),
|
||
);
|
||
return { messages: filtered };
|
||
});
|
||
|
||
// ── Session lifecycle ────────────────────────────────────────────────────
|
||
pi.on("session_start", (_event, ctx) => { syncUI(ctx); });
|
||
pi.on("session_shutdown", (_event, ctx) => {
|
||
if (chatMode) exitChatMode(ctx);
|
||
// Defensive: if exitChatMode was never reached (chatMode was already
|
||
// null but a bridge somehow lingered), close it directly.
|
||
if (askBridge) { askBridge.close(); askBridge = null; }
|
||
// Defensive: same for the throttled render timer — exitChatMode
|
||
// already clears it, but this keeps the Node process clean in the
|
||
// case where chat mode was never entered but some hypothetical
|
||
// future code path scheduled a render anyway.
|
||
if (streamRenderTimer) {
|
||
clearTimeout(streamRenderTimer);
|
||
streamRenderTimer = null;
|
||
}
|
||
});
|
||
|
||
// ── Commands ─────────────────────────────────────────────────────────────
|
||
const modelCompletions = (prefix: string) =>
|
||
MODELS.filter((m) => m.startsWith(prefix.toLowerCase()))
|
||
.map((m) => ({ value: m, label: m }));
|
||
|
||
pi.registerCommand("claude", {
|
||
description: [
|
||
"Enter distinct Claude chat mode — typed input bypasses pi's LLM and goes to Claude.",
|
||
" /claude — enter with last/default model (sonnet)",
|
||
" /claude haiku|sonnet|opus — enter/switch model",
|
||
].join("\n"),
|
||
getArgumentCompletions: modelCompletions,
|
||
handler: async (args, ctx) => {
|
||
const arg = (args ?? "").trim().toLowerCase();
|
||
const target: Model = (MODELS as readonly string[]).includes(arg)
|
||
? (arg as Model)
|
||
: (chatMode ?? "sonnet");
|
||
enterChatMode(target, ctx, false);
|
||
},
|
||
});
|
||
|
||
pi.registerCommand("claude-new", {
|
||
description: "Enter chat mode with a fresh Claude session (discards any resumed session id). Example: /claude-new opus",
|
||
getArgumentCompletions: modelCompletions,
|
||
handler: async (args, ctx) => {
|
||
const arg = (args ?? "").trim().toLowerCase();
|
||
const target: Model = (MODELS as readonly string[]).includes(arg)
|
||
? (arg as Model)
|
||
: (chatMode ?? "sonnet");
|
||
enterChatMode(target, ctx, true);
|
||
},
|
||
});
|
||
|
||
// /claude-effort — set the extended-thinking effort level for subsequent
|
||
// chat turns. Without the flag `claude -p` emits no thinking_delta
|
||
// events at all (the interactive `defaultThinkingLevel` setting is
|
||
// ignored in -p mode); with it, the model decides on-demand whether
|
||
// to actually think. Stored on the persisted state so the choice
|
||
// survives `/reload`.
|
||
//
|
||
// /claude-effort — show current value
|
||
// /claude-effort max — set to max (default)
|
||
// /claude-effort off — disable (skip the --effort flag)
|
||
const effortCompletions = (prefix: string) =>
|
||
EFFORTS.filter((e) => e.startsWith(prefix.toLowerCase()))
|
||
.map((e) => ({ value: e, label: e }));
|
||
|
||
pi.registerCommand("claude-effort", {
|
||
description: [
|
||
"Set the extended-thinking effort level for Claude chat turns.",
|
||
" /claude-effort — show current value",
|
||
" /claude-effort off|low|medium|high|xhigh|max",
|
||
"",
|
||
"Note: without an effort setting, `claude -p` emits no thinking",
|
||
"blocks at all — so lowering this trades thought visibility for speed.",
|
||
].join("\n"),
|
||
getArgumentCompletions: effortCompletions,
|
||
handler: async (args, ctx) => {
|
||
const arg = (args ?? "").trim().toLowerCase();
|
||
if (!arg) {
|
||
ctx.ui.notify(
|
||
`Current Claude effort: ${persisted.effort}. Options: ${EFFORTS.join(", ")}.`,
|
||
"info",
|
||
);
|
||
return;
|
||
}
|
||
if (!(EFFORTS as readonly string[]).includes(arg)) {
|
||
ctx.ui.notify(
|
||
`Unknown effort "${arg}". Valid levels: ${EFFORTS.join(", ")}.`,
|
||
"warning",
|
||
);
|
||
return;
|
||
}
|
||
const prev = persisted.effort;
|
||
persisted.effort = arg as Effort;
|
||
syncUI(ctx);
|
||
const note = arg === "off"
|
||
? "thinking disabled — Claude will no longer emit thinking blocks"
|
||
: `thinking effort set to ${arg}`;
|
||
ctx.ui.notify(
|
||
`${note} (was ${prev}). Applies to the next chat turn.`,
|
||
"info",
|
||
);
|
||
},
|
||
});
|
||
|
||
pi.registerCommand("claude-end", {
|
||
description: "Exit Claude chat mode and resume normal pi operation.",
|
||
handler: async (_args, ctx) => {
|
||
if (!chatMode) { ctx.ui.notify("Not in chat mode.", "info"); return; }
|
||
exitChatMode(ctx);
|
||
},
|
||
});
|
||
|
||
pi.registerCommand("claude-abort", {
|
||
description: "Cancel the in-flight Claude response (no effect if nothing is generating).",
|
||
handler: async (_args, ctx) => {
|
||
if (!isGenerating || !currentAbort) {
|
||
ctx.ui.notify("No active Claude response to cancel.", "info");
|
||
return;
|
||
}
|
||
try { currentAbort.abort(); } catch { /* ok */ }
|
||
ctx.ui.notify("Aborting Claude response…", "info");
|
||
},
|
||
});
|
||
|
||
// /claude-resume — present a picker of past Claude sessions whose cwd matches
|
||
// the current project directory, then resume the chosen one in chat mode.
|
||
//
|
||
// Caveat: this only sets the session id and starts a fresh orange border.
|
||
// The historical transcript is NOT replayed inside pi (rendering it would
|
||
// require a separate translation pass from JSONL → ChatTurn[]); however
|
||
// `claude --resume <id>` keeps the FULL conversation context alive on the
|
||
// Claude side, so subsequent prompts behave exactly like a continuation.
|
||
pi.registerCommand("claude-resume", {
|
||
description: "Pick a past Claude session for the current project directory and resume it in chat mode.",
|
||
handler: async (_args, ctx) => {
|
||
if (!ctx?.hasUI) {
|
||
ctx?.ui?.notify?.("/claude-resume requires interactive mode.", "error");
|
||
return;
|
||
}
|
||
if (isGenerating) {
|
||
ctx.ui.notify(
|
||
"A Claude response is still streaming. Use /claude-abort first, then /claude-resume.",
|
||
"warning",
|
||
);
|
||
return;
|
||
}
|
||
|
||
const past = readPastSessions(ctx.cwd);
|
||
if (past.length === 0) {
|
||
ctx.ui.notify(
|
||
`No past Claude sessions found for ${ctx.cwd}.`,
|
||
"info",
|
||
);
|
||
return;
|
||
}
|
||
|
||
// Cap the picker at the 25 most recent sessions to keep the
|
||
// inline-note overlay tractable. Sessions are already sorted
|
||
// newest-first by readPastSessions().
|
||
const MAX_OPTIONS = 25;
|
||
const choices = past.slice(0, MAX_OPTIONS);
|
||
|
||
// Label format (per user spec):
|
||
// <relative time> · <first user message truncated> · (session:<short-id>)
|
||
const PREVIEW_MAX = 60;
|
||
const buildLabel = (s: PastSession) => {
|
||
const preview = s.firstUserMessage
|
||
? truncate(s.firstUserMessage, PREVIEW_MAX)
|
||
: "(no user message)";
|
||
return `${relativeTime(s.mtimeMs)} · ${preview} · (session:${s.sessionId.slice(0, 8)})`;
|
||
};
|
||
|
||
// Disambiguate: in the very unlikely event two sessions produce
|
||
// the same display label, append a counter so the post-pick lookup
|
||
// can match exactly.
|
||
const labels: string[] = [];
|
||
const seen = new Map<string, number>();
|
||
for (const s of choices) {
|
||
const base = buildLabel(s);
|
||
const n = seen.get(base) ?? 0;
|
||
seen.set(base, n + 1);
|
||
labels.push(n === 0 ? base : `${base} #${n + 1}`);
|
||
}
|
||
|
||
const sessionPick = await askSingleQuestionWithInlineNote(ctx.ui, {
|
||
question: `Resume which past Claude session in ${ctx.cwd}?`,
|
||
options: labels.map((label) => ({ label })),
|
||
});
|
||
if (sessionPick.selectedOptions.length === 0) {
|
||
ctx.ui.notify("Resume cancelled.", "info");
|
||
return;
|
||
}
|
||
const pickedLabel = sessionPick.selectedOptions[0];
|
||
const idx = labels.indexOf(pickedLabel);
|
||
if (idx < 0) {
|
||
ctx.ui.notify("Picked session not found (label mismatch).", "warning");
|
||
return;
|
||
}
|
||
const picked = choices[idx];
|
||
|
||
// Second picker: which model to display the resumed conversation
|
||
// under in pi's UI. Note: claude CLI ignores --model when --resume
|
||
// is set, so this is purely a UI/labelling choice. We mark the
|
||
// session's original model with "(used by this session)" and set
|
||
// it as the recommended default so most users can just hit Enter.
|
||
const originalModel = picked.model;
|
||
const modelLabels = MODELS.map((m) =>
|
||
originalModel === m ? `${m} (used by this session)` : m,
|
||
);
|
||
const recommendedIdx = originalModel ? MODELS.indexOf(originalModel) : 1; // default sonnet
|
||
|
||
const modelPick = await askSingleQuestionWithInlineNote(ctx.ui, {
|
||
question: "Display this resumed session under which model in pi's UI?",
|
||
options: modelLabels.map((label) => ({ label })),
|
||
recommended: recommendedIdx,
|
||
});
|
||
if (modelPick.selectedOptions.length === 0) {
|
||
ctx.ui.notify("Resume cancelled.", "info");
|
||
return;
|
||
}
|
||
// Strip any "(used by this session)" suffix and parse the bare
|
||
// model name (the first whitespace-separated token).
|
||
const bare = modelPick.selectedOptions[0].split(/\s+/)[0].toLowerCase();
|
||
const targetModel: Model = (MODELS as readonly string[]).includes(bare)
|
||
? (bare as Model)
|
||
: "sonnet";
|
||
|
||
// Wire up the session id BEFORE entering chat mode, so the next
|
||
// turn the user sends triggers --resume <id>.
|
||
sessions.set(targetModel, picked.sessionId);
|
||
enterChatMode(targetModel, ctx, false);
|
||
|
||
// Replay the historical transcript inside the orange border so the
|
||
// user can SEE the context they're resuming. ensureSessionMessage()
|
||
// creates the (now-empty) session CustomMessage; we then push every
|
||
// past turn into details.turns and ask for a re-render.
|
||
const historical = loadSessionTurns(picked.sessionId, ctx.cwd, targetModel);
|
||
const details = ensureSessionMessage();
|
||
details.turns.push(...historical);
|
||
tuiRef?.requestRender();
|
||
|
||
const ago = relativeTime(picked.mtimeMs);
|
||
const preview = picked.firstUserMessage
|
||
? `: "${truncate(picked.firstUserMessage, 50)}"`
|
||
: "";
|
||
const histNote = historical.length > 0
|
||
? ` (${historical.length} historical turn${historical.length === 1 ? "" : "s"} loaded)`
|
||
: " (transcript empty or unreadable)";
|
||
ctx.ui.notify(
|
||
`Resuming session ${picked.sessionId.slice(0, 8)} (${ago})${preview} as Claude ${capitalize(targetModel)}.${histNote}`,
|
||
"info",
|
||
);
|
||
},
|
||
});
|
||
// Note on ESC: pi's extension runner reserves the "interrupt" action, so
|
||
// pi.registerShortcut("escape", …) is silently ignored. ESC-to-abort is
|
||
// wired via the ChatEscEditor custom editor installed in enterChatMode.
|
||
|
||
// ── Message renderer ─────────────────────────────────────────────────────
|
||
// ONE custom message type holds the WHOLE chat-mode session. Returning a
|
||
// live component (render reads `details.turns` on every frame) lets
|
||
// streaming updates appear with a simple `tuiRef.requestRender()` — no
|
||
// full rebuild of pi's chat container required.
|
||
//
|
||
// Performance: each frame now reuses cached per-turn line output for
|
||
// completed turns (see renderSessionLines). Only the in-flight assistant
|
||
// turn (if any) is rebuilt each frame, so long conversations stop driving
|
||
// O(turns × blocks) allocation during Claude streaming.
|
||
pi.registerMessageRenderer("chat-claude-session", (message, _opts, theme) => {
|
||
const d = message.details as ChatSessionDetails | undefined;
|
||
if (!d || !Array.isArray(d.turns)) return undefined;
|
||
|
||
return {
|
||
// pi calls invalidate() when theme changes or a from-scratch
|
||
// re-render is needed — drop every turn's render cache so the
|
||
// next render pass rebuilds against the new theme.
|
||
invalidate: () => invalidateSessionCache(d),
|
||
render: (width: number) => {
|
||
if (width < 6) return renderSessionLines(d, theme, width);
|
||
const innerWidth = width - 4; // 2 border chars + 2 padding chars
|
||
// renderSessionLines returns lines already padded to
|
||
// `innerWidth` visible columns, so wrapInOrangeBorder does
|
||
// NO visibleWidth() call per frame — the previous hot path
|
||
// (~85% CPU in Intl.Segmenter) is gone.
|
||
const paddedInnerLines = renderSessionLines(d, theme, innerWidth);
|
||
return wrapInOrangeBorder(paddedInnerLines, width);
|
||
},
|
||
};
|
||
});
|
||
}
|