From bea8fa19b396ccc9c6acfabeb0b5156c8a9e0fe8 Mon Sep 17 00:00:00 2001
From: Jonas H <haugesen@lightbrick.com>
Date: Mon, 9 Mar 2026 11:53:09 +0100
Subject: [PATCH] pi extension fixes

---
 pi/.pi/agent/extensions/llama-schema-proxy.ts | 114 ++++++++++++++++--
 pi/.pi/agent/extensions/usage-bars/index.ts   |  81 ++++++++++---
 2 files changed, 166 insertions(+), 29 deletions(-)

diff --git a/pi/.pi/agent/extensions/llama-schema-proxy.ts b/pi/.pi/agent/extensions/llama-schema-proxy.ts
index 78abaa5..b80111e 100644
--- a/pi/.pi/agent/extensions/llama-schema-proxy.ts
+++ b/pi/.pi/agent/extensions/llama-schema-proxy.ts
@@ -6,7 +6,7 @@
  * have complex union-type parameters represented as `{"description": "..."}` with
  * no `type`, which causes llama-server to return a 400 error.
  *
- * This extension starts a tiny local HTTP proxy on port 8081 that:
+ * This extension provides an optional tiny local HTTP proxy on port 8081 that:
  *   1. Intercepts outgoing OpenAI-compatible API calls
  *   2. Walks tool schemas and adds `"type": "string"` to any schema node
  *      that is missing a type declaration
@@ -15,10 +15,13 @@
  *
  * It also overrides the `llama-cpp` provider's baseUrl to point at the proxy,
  * so no changes to models.json are needed (beyond what's already there).
+ *
+ * Use `/llama-proxy` command to toggle the proxy on/off. Off by default.
  */
 
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
 import * as http from "http";
+import { execSync } from "child_process";
 
 const PROXY_PORT = 8081;
 const TARGET_HOST = "127.0.0.1";
@@ -97,6 +100,33 @@ function sanitizeRequestBody(body: Record<string, unknown>): Record<string, unkn
 	};
 }
 
+// ---------------------------------------------------------------------------
+// Process management
+// ---------------------------------------------------------------------------
+
+/**
+ * Kill any existing processes using the proxy port.
+ */
+function killExistingProxy(): void {
+	try {
+		// Use lsof to find processes on the port and kill them
+		const output = execSync(`lsof -ti:${PROXY_PORT} 2>/dev/null || true`, {
+			encoding: "utf-8",
+		});
+		const pids = output.trim().split("\n").filter(Boolean);
+		for (const pid of pids) {
+			try {
+				process.kill(Number(pid), "SIGTERM");
+				console.log(`[llama-proxy] Terminated old instance (PID: ${pid})`);
+			} catch {
+				// Process may have already exited
+			}
+		}
+	} catch {
+		// lsof not available or other error — continue anyway
+	}
+}
+
 // ---------------------------------------------------------------------------
 // Proxy server
 // ---------------------------------------------------------------------------
@@ -165,15 +195,16 @@ function startProxy(): http.Server {
 	});
 
 	server.listen(PROXY_PORT, "127.0.0.1", () => {
-		// Server is up
+		console.log(`[llama-proxy] Proxy started on port ${PROXY_PORT}`);
 	});
 
 	server.on("error", (err: NodeJS.ErrnoException) => {
 		if (err.code === "EADDRINUSE") {
-			console.warn(
-				`[llama-proxy] Port ${PROXY_PORT} already in use — proxy not started. ` +
-					`If a previous pi session left it running, kill it and reload.`,
+			console.error(
+				`[llama-proxy] Port ${PROXY_PORT} already in use. ` +
+					`Killing old instances and retrying...`,
 			);
+			killExistingProxy();
 		} else {
 			console.error("[llama-proxy] Server error:", err);
 		}
@@ -187,15 +218,76 @@ function startProxy(): http.Server {
 // ---------------------------------------------------------------------------
 
 export default function (pi: ExtensionAPI) {
-	const server = startProxy();
+	let server: http.Server | null = null;
+	let proxyEnabled = false;
 
-	// Override the llama-cpp provider's baseUrl to route through our proxy.
-	// models.json model definitions are preserved; only the endpoint changes.
-	pi.registerProvider("llama-cpp", {
-		baseUrl: `http://127.0.0.1:${PROXY_PORT}/v1`,
+	/**
+	 * Start the proxy and register the provider override.
+	 */
+	function enableProxy(): void {
+		if (proxyEnabled) {
+			console.log("[llama-proxy] Proxy already enabled");
+			return;
+		}
+
+		killExistingProxy();
+		server = startProxy();
+
+		// Override the llama-cpp provider's baseUrl to route through our proxy.
+		// models.json model definitions are preserved; only the endpoint changes.
+		pi.registerProvider("llama-cpp", {
+			baseUrl: `http://127.0.0.1:${PROXY_PORT}/v1`,
+		});
+
+		proxyEnabled = true;
+		console.log("[llama-proxy] Proxy enabled");
+	}
+
+	/**
+	 * Disable the proxy and restore default provider.
+	 */
+	function disableProxy(): void {
+		if (!proxyEnabled) {
+			console.log("[llama-proxy] Proxy already disabled");
+			return;
+		}
+
+		if (server) {
+			server.close();
+			server = null;
+		}
+
+		// Reset provider to default (no baseUrl override)
+		pi.registerProvider("llama-cpp", {});
+
+		proxyEnabled = false;
+		console.log("[llama-proxy] Proxy disabled");
+	}
+
+	// Register the /llama-proxy command to toggle the proxy
+	pi.registerCommand("llama-proxy", async (args) => {
+		const action = args[0]?.toLowerCase() || "";
+
+		if (action === "on") {
+			enableProxy();
+		} else if (action === "off") {
+			disableProxy();
+		} else if (action === "status") {
+			console.log(`[llama-proxy] Status: ${proxyEnabled ? "enabled" : "disabled"}`);
+		} else {
+			// Toggle if no argument
+			if (proxyEnabled) {
+				disableProxy();
+			} else {
+				enableProxy();
+			}
+		}
 	});
 
+	// Clean up on session end
 	pi.on("session_end", async () => {
-		server.close();
+		if (server) {
+			server.close();
+		}
 	});
 }
diff --git a/pi/.pi/agent/extensions/usage-bars/index.ts b/pi/.pi/agent/extensions/usage-bars/index.ts
index 9347819..4f9acb1 100644
--- a/pi/.pi/agent/extensions/usage-bars/index.ts
+++ b/pi/.pi/agent/extensions/usage-bars/index.ts
@@ -20,6 +20,7 @@ import {
   clampPercent,
   colorForPercent,
   detectProvider,
+  ensureFreshAuthForProviders,
   fetchAllUsages,
   fetchClaudeUsage,
   fetchCodexUsage,
@@ -30,6 +31,7 @@ import {
   readUsageCache,
   resolveUsageEndpoints,
   writeUsageCache,
+  type OAuthProviderId,
   type ProviderKey,
   type UsageByProvider,
   type UsageData,
@@ -443,34 +445,60 @@ export default function (pi: ExtensionAPI) {
       return;
     }
 
-    // --- Actually hit the API ---
-    // Skip independent token refresh — pi manages OAuth tokens and refreshes
-    // them in memory. A parallel refresh here would cause token rotation
-    // conflicts (Anthropic invalidates the old refresh token on use).
+    // --- Proactive token refresh ---
+    // Before hitting the API, check whether the stored access token is expired.
+    // This is the main cause of HTTP 401 errors: switching accounts via
+    // /switch-claude restores a profile whose access token has since expired
+    // (the refresh token is still valid). We use pi's own OAuth resolver so
+    // the new tokens are written back to auth.json and the profile stays in
+    // sync. This is safe at turn_start / session_start because pi hasn't made
+    // any Claude API calls yet, so there's no parallel refresh to conflict with.
+    const oauthId = providerToOAuthProviderId(active);
+    let effectiveAuth = auth;
+    if (oauthId && active !== "zai") {
+      const creds = auth[oauthId as keyof typeof auth] as
+        | { access?: string; refresh?: string; expires?: number }
+        | undefined;
+      const expires = typeof creds?.expires === "number" ? creds.expires : 0;
+      const tokenExpiredOrMissing =
+        !creds?.access || (expires > 0 && Date.now() + 60_000 >= expires);
+      if (tokenExpiredOrMissing && creds?.refresh) {
+        try {
+          const refreshed = await ensureFreshAuthForProviders([oauthId as OAuthProviderId], {
+            auth,
+            persist: true,
+          });
+          if (refreshed.auth) effectiveAuth = refreshed.auth;
+        } catch {
+          // Ignore refresh errors — fall through with existing auth
+        }
+      }
+    }
+
     let result: UsageData;
 
     if (active === "codex") {
-      const access = auth["openai-codex"]?.access;
+      const access = effectiveAuth["openai-codex"]?.access;
       result = access
         ? await fetchCodexUsage(access)
         : { session: 0, weekly: 0, error: "missing access token (try /login again)" };
     } else if (active === "claude") {
-      const access = auth.anthropic?.access;
+      const access = effectiveAuth.anthropic?.access;
       result = access
         ? await fetchClaudeUsage(access)
         : { session: 0, weekly: 0, error: "missing access token (try /login again)" };
     } else if (active === "zai") {
-      const token = auth.zai?.access || auth.zai?.key;
+      const token = effectiveAuth.zai?.access || effectiveAuth.zai?.key;
       result = token
         ? await fetchZaiUsage(token, { endpoints })
         : { session: 0, weekly: 0, error: "missing token (try /login again)" };
     } else if (active === "gemini") {
-      const creds = auth["google-gemini-cli"];
+      const creds = effectiveAuth["google-gemini-cli"];
       result = creds?.access
         ? await fetchGoogleUsage(creds.access, endpoints.gemini, creds.projectId, "gemini", { endpoints })
         : { session: 0, weekly: 0, error: "missing access token (try /login again)" };
     } else {
-      const creds = auth["google-antigravity"];
+      const creds = effectiveAuth["google-antigravity"];
       result = creds?.access
         ? await fetchGoogleUsage(creds.access, endpoints.antigravity, creds.projectId, "antigravity", { endpoints })
         : { session: 0, weekly: 0, error: "missing access token (try /login again)" };
@@ -479,18 +507,35 @@ export default function (pi: ExtensionAPI) {
     state[active] = result;
 
     // Write result + rate-limit state to shared cache so other sessions
-    // (and our own next timer tick) don't need to re-hit the API.
-    const nextCache: import("./core").UsageCache = {
-      timestamp: now,
-      data: { ...(cache?.data ?? {}), [active]: result },
-      rateLimitedUntil: { ...(cache?.rateLimitedUntil ?? {}) },
-    };
-    if (result.error === "HTTP 429") {
-      nextCache.rateLimitedUntil![active] = now + RATE_LIMITED_BACKOFF_MS;
+    // don't need to re-hit the API within CACHE_TTL_MS.
+    //
+    // Error results (other than 429) are NOT cached: they should be retried
+    // on the next input instead of being replayed from cache for 15 minutes.
+    // The most common error is HTTP 401 (expired token after an account switch)
+    // which resolves on the very next poll once the token is refreshed above.
+    if (result.error) {
+      if (result.error === "HTTP 429") {
+        // Write rate-limit backoff but preserve the last good data in cache.
+        const nextCache: import("./core").UsageCache = {
+          timestamp: cache?.timestamp ?? now,
+          data: { ...(cache?.data ?? {}) },
+          rateLimitedUntil: {
+            ...(cache?.rateLimitedUntil ?? {}),
+            [active]: now + RATE_LIMITED_BACKOFF_MS,
+          },
+        };
+        writeUsageCache(nextCache);
+      }
+      // All other errors: don't update cache — next turn will retry from scratch.
     } else {
+      const nextCache: import("./core").UsageCache = {
+        timestamp: now,
+        data: { ...(cache?.data ?? {}), [active]: result },
+        rateLimitedUntil: { ...(cache?.rateLimitedUntil ?? {}) },
+      };
       delete nextCache.rateLimitedUntil![active];
+      writeUsageCache(nextCache);
     }
-    writeUsageCache(nextCache);
 
     state.lastPoll = now;
     updateStatus();