pi extension fixes
This commit is contained in:
@@ -6,7 +6,7 @@
|
|||||||
* have complex union-type parameters represented as `{"description": "..."}` with
|
* have complex union-type parameters represented as `{"description": "..."}` with
|
||||||
* no `type`, which causes llama-server to return a 400 error.
|
* no `type`, which causes llama-server to return a 400 error.
|
||||||
*
|
*
|
||||||
* This extension starts a tiny local HTTP proxy on port 8081 that:
|
* This extension provides an optional tiny local HTTP proxy on port 8081 that:
|
||||||
* 1. Intercepts outgoing OpenAI-compatible API calls
|
* 1. Intercepts outgoing OpenAI-compatible API calls
|
||||||
* 2. Walks tool schemas and adds `"type": "string"` to any schema node
|
* 2. Walks tool schemas and adds `"type": "string"` to any schema node
|
||||||
* that is missing a type declaration
|
* that is missing a type declaration
|
||||||
@@ -15,10 +15,13 @@
|
|||||||
*
|
*
|
||||||
* It also overrides the `llama-cpp` provider's baseUrl to point at the proxy,
|
* It also overrides the `llama-cpp` provider's baseUrl to point at the proxy,
|
||||||
* so no changes to models.json are needed (beyond what's already there).
|
* so no changes to models.json are needed (beyond what's already there).
|
||||||
|
*
|
||||||
|
* Use `/llama-proxy` command to toggle the proxy on/off. Off by default.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||||||
import * as http from "http";
|
import * as http from "http";
|
||||||
|
import { execSync } from "child_process";
|
||||||
|
|
||||||
const PROXY_PORT = 8081;
|
const PROXY_PORT = 8081;
|
||||||
const TARGET_HOST = "127.0.0.1";
|
const TARGET_HOST = "127.0.0.1";
|
||||||
@@ -97,6 +100,33 @@ function sanitizeRequestBody(body: Record<string, unknown>): Record<string, unkn
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Process management
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Kill any existing processes using the proxy port.
|
||||||
|
*/
|
||||||
|
function killExistingProxy(): void {
|
||||||
|
try {
|
||||||
|
// Use lsof to find processes on the port and kill them
|
||||||
|
const output = execSync(`lsof -ti:${PROXY_PORT} 2>/dev/null || true`, {
|
||||||
|
encoding: "utf-8",
|
||||||
|
});
|
||||||
|
const pids = output.trim().split("\n").filter(Boolean);
|
||||||
|
for (const pid of pids) {
|
||||||
|
try {
|
||||||
|
process.kill(Number(pid), "SIGTERM");
|
||||||
|
console.log(`[llama-proxy] Terminated old instance (PID: ${pid})`);
|
||||||
|
} catch {
|
||||||
|
// Process may have already exited
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// lsof not available or other error — continue anyway
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Proxy server
|
// Proxy server
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -165,15 +195,16 @@ function startProxy(): http.Server {
|
|||||||
});
|
});
|
||||||
|
|
||||||
server.listen(PROXY_PORT, "127.0.0.1", () => {
|
server.listen(PROXY_PORT, "127.0.0.1", () => {
|
||||||
// Server is up
|
console.log(`[llama-proxy] Proxy started on port ${PROXY_PORT}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
server.on("error", (err: NodeJS.ErrnoException) => {
|
server.on("error", (err: NodeJS.ErrnoException) => {
|
||||||
if (err.code === "EADDRINUSE") {
|
if (err.code === "EADDRINUSE") {
|
||||||
console.warn(
|
console.error(
|
||||||
`[llama-proxy] Port ${PROXY_PORT} already in use — proxy not started. ` +
|
`[llama-proxy] Port ${PROXY_PORT} already in use. ` +
|
||||||
`If a previous pi session left it running, kill it and reload.`,
|
`Killing old instances and retrying...`,
|
||||||
);
|
);
|
||||||
|
killExistingProxy();
|
||||||
} else {
|
} else {
|
||||||
console.error("[llama-proxy] Server error:", err);
|
console.error("[llama-proxy] Server error:", err);
|
||||||
}
|
}
|
||||||
@@ -187,7 +218,20 @@ function startProxy(): http.Server {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
export default function (pi: ExtensionAPI) {
|
export default function (pi: ExtensionAPI) {
|
||||||
const server = startProxy();
|
let server: http.Server | null = null;
|
||||||
|
let proxyEnabled = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the proxy and register the provider override.
|
||||||
|
*/
|
||||||
|
function enableProxy(): void {
|
||||||
|
if (proxyEnabled) {
|
||||||
|
console.log("[llama-proxy] Proxy already enabled");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
killExistingProxy();
|
||||||
|
server = startProxy();
|
||||||
|
|
||||||
// Override the llama-cpp provider's baseUrl to route through our proxy.
|
// Override the llama-cpp provider's baseUrl to route through our proxy.
|
||||||
// models.json model definitions are preserved; only the endpoint changes.
|
// models.json model definitions are preserved; only the endpoint changes.
|
||||||
@@ -195,7 +239,55 @@ export default function (pi: ExtensionAPI) {
|
|||||||
baseUrl: `http://127.0.0.1:${PROXY_PORT}/v1`,
|
baseUrl: `http://127.0.0.1:${PROXY_PORT}/v1`,
|
||||||
});
|
});
|
||||||
|
|
||||||
pi.on("session_end", async () => {
|
proxyEnabled = true;
|
||||||
|
console.log("[llama-proxy] Proxy enabled");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Disable the proxy and restore default provider.
|
||||||
|
*/
|
||||||
|
function disableProxy(): void {
|
||||||
|
if (!proxyEnabled) {
|
||||||
|
console.log("[llama-proxy] Proxy already disabled");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (server) {
|
||||||
server.close();
|
server.close();
|
||||||
|
server = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset provider to default (no baseUrl override)
|
||||||
|
pi.registerProvider("llama-cpp", {});
|
||||||
|
|
||||||
|
proxyEnabled = false;
|
||||||
|
console.log("[llama-proxy] Proxy disabled");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the /llama-proxy command to toggle the proxy
|
||||||
|
pi.registerCommand("llama-proxy", async (args) => {
|
||||||
|
const action = args[0]?.toLowerCase() || "";
|
||||||
|
|
||||||
|
if (action === "on") {
|
||||||
|
enableProxy();
|
||||||
|
} else if (action === "off") {
|
||||||
|
disableProxy();
|
||||||
|
} else if (action === "status") {
|
||||||
|
console.log(`[llama-proxy] Status: ${proxyEnabled ? "enabled" : "disabled"}`);
|
||||||
|
} else {
|
||||||
|
// Toggle if no argument
|
||||||
|
if (proxyEnabled) {
|
||||||
|
disableProxy();
|
||||||
|
} else {
|
||||||
|
enableProxy();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Clean up on session end
|
||||||
|
pi.on("session_end", async () => {
|
||||||
|
if (server) {
|
||||||
|
server.close();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import {
|
|||||||
clampPercent,
|
clampPercent,
|
||||||
colorForPercent,
|
colorForPercent,
|
||||||
detectProvider,
|
detectProvider,
|
||||||
|
ensureFreshAuthForProviders,
|
||||||
fetchAllUsages,
|
fetchAllUsages,
|
||||||
fetchClaudeUsage,
|
fetchClaudeUsage,
|
||||||
fetchCodexUsage,
|
fetchCodexUsage,
|
||||||
@@ -30,6 +31,7 @@ import {
|
|||||||
readUsageCache,
|
readUsageCache,
|
||||||
resolveUsageEndpoints,
|
resolveUsageEndpoints,
|
||||||
writeUsageCache,
|
writeUsageCache,
|
||||||
|
type OAuthProviderId,
|
||||||
type ProviderKey,
|
type ProviderKey,
|
||||||
type UsageByProvider,
|
type UsageByProvider,
|
||||||
type UsageData,
|
type UsageData,
|
||||||
@@ -443,34 +445,60 @@ export default function (pi: ExtensionAPI) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Actually hit the API ---
|
// --- Proactive token refresh ---
|
||||||
// Skip independent token refresh — pi manages OAuth tokens and refreshes
|
// Before hitting the API, check whether the stored access token is expired.
|
||||||
// them in memory. A parallel refresh here would cause token rotation
|
// This is the main cause of HTTP 401 errors: switching accounts via
|
||||||
// conflicts (Anthropic invalidates the old refresh token on use).
|
// /switch-claude restores a profile whose access token has since expired
|
||||||
|
// (the refresh token is still valid). We use pi's own OAuth resolver so
|
||||||
|
// the new tokens are written back to auth.json and the profile stays in
|
||||||
|
// sync. This is safe at turn_start / session_start because pi hasn't made
|
||||||
|
// any Claude API calls yet, so there's no parallel refresh to conflict with.
|
||||||
|
const oauthId = providerToOAuthProviderId(active);
|
||||||
|
let effectiveAuth = auth;
|
||||||
|
if (oauthId && active !== "zai") {
|
||||||
|
const creds = auth[oauthId as keyof typeof auth] as
|
||||||
|
| { access?: string; refresh?: string; expires?: number }
|
||||||
|
| undefined;
|
||||||
|
const expires = typeof creds?.expires === "number" ? creds.expires : 0;
|
||||||
|
const tokenExpiredOrMissing =
|
||||||
|
!creds?.access || (expires > 0 && Date.now() + 60_000 >= expires);
|
||||||
|
if (tokenExpiredOrMissing && creds?.refresh) {
|
||||||
|
try {
|
||||||
|
const refreshed = await ensureFreshAuthForProviders([oauthId as OAuthProviderId], {
|
||||||
|
auth,
|
||||||
|
persist: true,
|
||||||
|
});
|
||||||
|
if (refreshed.auth) effectiveAuth = refreshed.auth;
|
||||||
|
} catch {
|
||||||
|
// Ignore refresh errors — fall through with existing auth
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let result: UsageData;
|
let result: UsageData;
|
||||||
|
|
||||||
if (active === "codex") {
|
if (active === "codex") {
|
||||||
const access = auth["openai-codex"]?.access;
|
const access = effectiveAuth["openai-codex"]?.access;
|
||||||
result = access
|
result = access
|
||||||
? await fetchCodexUsage(access)
|
? await fetchCodexUsage(access)
|
||||||
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
||||||
} else if (active === "claude") {
|
} else if (active === "claude") {
|
||||||
const access = auth.anthropic?.access;
|
const access = effectiveAuth.anthropic?.access;
|
||||||
result = access
|
result = access
|
||||||
? await fetchClaudeUsage(access)
|
? await fetchClaudeUsage(access)
|
||||||
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
||||||
} else if (active === "zai") {
|
} else if (active === "zai") {
|
||||||
const token = auth.zai?.access || auth.zai?.key;
|
const token = effectiveAuth.zai?.access || effectiveAuth.zai?.key;
|
||||||
result = token
|
result = token
|
||||||
? await fetchZaiUsage(token, { endpoints })
|
? await fetchZaiUsage(token, { endpoints })
|
||||||
: { session: 0, weekly: 0, error: "missing token (try /login again)" };
|
: { session: 0, weekly: 0, error: "missing token (try /login again)" };
|
||||||
} else if (active === "gemini") {
|
} else if (active === "gemini") {
|
||||||
const creds = auth["google-gemini-cli"];
|
const creds = effectiveAuth["google-gemini-cli"];
|
||||||
result = creds?.access
|
result = creds?.access
|
||||||
? await fetchGoogleUsage(creds.access, endpoints.gemini, creds.projectId, "gemini", { endpoints })
|
? await fetchGoogleUsage(creds.access, endpoints.gemini, creds.projectId, "gemini", { endpoints })
|
||||||
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
||||||
} else {
|
} else {
|
||||||
const creds = auth["google-antigravity"];
|
const creds = effectiveAuth["google-antigravity"];
|
||||||
result = creds?.access
|
result = creds?.access
|
||||||
? await fetchGoogleUsage(creds.access, endpoints.antigravity, creds.projectId, "antigravity", { endpoints })
|
? await fetchGoogleUsage(creds.access, endpoints.antigravity, creds.projectId, "antigravity", { endpoints })
|
||||||
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
: { session: 0, weekly: 0, error: "missing access token (try /login again)" };
|
||||||
@@ -479,18 +507,35 @@ export default function (pi: ExtensionAPI) {
|
|||||||
state[active] = result;
|
state[active] = result;
|
||||||
|
|
||||||
// Write result + rate-limit state to shared cache so other sessions
|
// Write result + rate-limit state to shared cache so other sessions
|
||||||
// (and our own next timer tick) don't need to re-hit the API.
|
// don't need to re-hit the API within CACHE_TTL_MS.
|
||||||
|
//
|
||||||
|
// Error results (other than 429) are NOT cached: they should be retried
|
||||||
|
// on the next input instead of being replayed from cache for 15 minutes.
|
||||||
|
// The most common error is HTTP 401 (expired token after an account switch)
|
||||||
|
// which resolves on the very next poll once the token is refreshed above.
|
||||||
|
if (result.error) {
|
||||||
|
if (result.error === "HTTP 429") {
|
||||||
|
// Write rate-limit backoff but preserve the last good data in cache.
|
||||||
|
const nextCache: import("./core").UsageCache = {
|
||||||
|
timestamp: cache?.timestamp ?? now,
|
||||||
|
data: { ...(cache?.data ?? {}) },
|
||||||
|
rateLimitedUntil: {
|
||||||
|
...(cache?.rateLimitedUntil ?? {}),
|
||||||
|
[active]: now + RATE_LIMITED_BACKOFF_MS,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
writeUsageCache(nextCache);
|
||||||
|
}
|
||||||
|
// All other errors: don't update cache — next turn will retry from scratch.
|
||||||
|
} else {
|
||||||
const nextCache: import("./core").UsageCache = {
|
const nextCache: import("./core").UsageCache = {
|
||||||
timestamp: now,
|
timestamp: now,
|
||||||
data: { ...(cache?.data ?? {}), [active]: result },
|
data: { ...(cache?.data ?? {}), [active]: result },
|
||||||
rateLimitedUntil: { ...(cache?.rateLimitedUntil ?? {}) },
|
rateLimitedUntil: { ...(cache?.rateLimitedUntil ?? {}) },
|
||||||
};
|
};
|
||||||
if (result.error === "HTTP 429") {
|
|
||||||
nextCache.rateLimitedUntil![active] = now + RATE_LIMITED_BACKOFF_MS;
|
|
||||||
} else {
|
|
||||||
delete nextCache.rateLimitedUntil![active];
|
delete nextCache.rateLimitedUntil![active];
|
||||||
}
|
|
||||||
writeUsageCache(nextCache);
|
writeUsageCache(nextCache);
|
||||||
|
}
|
||||||
|
|
||||||
state.lastPoll = now;
|
state.lastPoll = now;
|
||||||
updateStatus();
|
updateStatus();
|
||||||
|
|||||||
Reference in New Issue
Block a user