tcdent · tcdent · Apr 1, 2026
diff --git a/config.example.toml b/config.example.toml
@@ -5,6 +5,25 @@
 # Working directory (default: current directory)
 # working_dir = "/path/to/project"
 
+# Cache keep-alive interval in seconds (default: 0 = disabled).
+# Sends a minimal API request to keep Anthropic's prompt cache warm during
+# idle periods, preventing expensive cache re-creation when you return to
+# a session after being away.
+#
+# How it works:
+#   - Anthropic caches your conversation prefix (system prompt + message history)
+#   - Default cache TTL is 5 minutes; unused caches expire and must be re-written
+#   - Keep-alive sends a 1-token request using your existing prefix to refresh the TTL
+#   - Cache reads cost 10% of normal input pricing (e.g., $0.50/MTok vs $5 for Opus)
+#   - Without keep-alive, returning after 5+ min idle costs full cache write (1.25x input)
+#
+# Recommended values:
+#   240  = ping every 4 min (safe margin for 5-min default TTL)
+#   3300 = ping every 55 min (if using 1-hour extended TTL)
+#   0    = disabled (default)
+#
+# cache_keepalive_secs = 240
+
 [agents.foreground]
 # Model to use (default: claude-opus-4-6)
 model = "claude-opus-4-6"

diff --git a/src/app.rs b/src/app.rs
@@ -361,6 +361,12 @@ impl App {
         self.chat.render(&mut self.terminal);
         self.draw();
 
+        // Cache keep-alive timer: check every 30s if a keep-alive ping is needed.
+        // The actual interval is configured via `cache_keepalive_secs` in config;
+        // this tick rate just controls how often we check.
+        let mut keepalive_tick = tokio::time::interval(Duration::from_secs(30));
+        keepalive_tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+
         // CANCEL-SAFETY: When one branch of tokio::select! completes, all other
         // futures are dropped (not paused). Any async fn polled here must store
         // its state on `self`, not in local variables, so it can resume correctly
@@ -398,6 +404,29 @@ impl App {
                         self.draw();
                     }
                 }
+                // Cache keep-alive: send minimal ping to refresh Anthropic prompt cache TTL
+                _ = keepalive_tick.tick() => {
+                    if let Some(agent_mutex) = self.agents.primary() {
+                        let mut agent = agent_mutex.lock().await;
+                        if agent.needs_cache_keepalive() {
+                            match agent.send_cache_keepalive().await {
+                                Ok(usage) => {
+                                    if usage.cache_read_tokens > 0 {
+                                        tracing::info!(
+                                            "Cache keep-alive refreshed {} cached tokens",
+                                            usage.cache_read_tokens
+                                        );
+                                    } else {
+                                        tracing::debug!("Cache keep-alive sent (no cached tokens in response)");
+                                    }
+                                }
+                                Err(e) => {
+                                    tracing::debug!("Cache keep-alive failed: {}", e);
+                                }
+                            }
+                        }
+                    }
+                }
             }
 
             if self.should_quit {

diff --git a/src/config.rs b/src/config.rs
@@ -50,6 +50,7 @@ pub const CORRECTIONS_FILENAME: &str = "corrections.json";
 ///     max_retries: 5,
 ///     compaction_thinking_budget: 8_000,
 ///     fast_mode: false,
+///     cache_keepalive_secs: 0,
 /// };
 /// ```
 #[derive(Debug, Clone)]
@@ -62,6 +63,10 @@ pub struct AgentRuntimeConfig {
     /// Enable fast mode (research preview) for lower-latency responses.
     /// Only effective with opus-4-6 models.
     pub fast_mode: bool,
+    /// Interval in seconds for cache keep-alive pings. 0 = disabled.
+    /// Sends a minimal API request to refresh Anthropic's prompt cache TTL
+    /// before it expires, avoiding expensive cache re-creation on idle sessions.
+    pub cache_keepalive_secs: u64,
 }
 
 impl Default for AgentRuntimeConfig {
@@ -73,6 +78,7 @@ impl Default for AgentRuntimeConfig {
             max_retries: 5,
             compaction_thinking_budget: 8_000,
             fast_mode: false,
+            cache_keepalive_secs: 0,
         }
     }
 }
@@ -92,6 +98,7 @@ impl AgentRuntimeConfig {
             max_retries: config.general.max_retries,
             compaction_thinking_budget: config.general.compaction_thinking_budget,
             fast_mode: config.agents.foreground.fast_mode,
+            cache_keepalive_secs: config.general.cache_keepalive_secs,
         }
     }
 
@@ -104,6 +111,7 @@ impl AgentRuntimeConfig {
             max_retries: config.general.max_retries,
             compaction_thinking_budget: config.general.compaction_thinking_budget,
             fast_mode: config.agents.background.fast_mode,
+            cache_keepalive_secs: config.general.cache_keepalive_secs,
         }
     }
 }
@@ -225,6 +233,10 @@ pub struct GeneralConfig {
     pub compaction_threshold: u32,
     /// Thinking budget for compaction requests (default: 8,000)
     pub compaction_thinking_budget: u32,
+    /// Interval in seconds for cache keep-alive pings. 0 = disabled (default).
+    /// Sends a minimal API request to keep Anthropic's prompt cache warm during
+    /// idle periods. Recommended: 240 (4 min) for 5-min TTL, 3300 (55 min) for 1-hour TTL.
+    pub cache_keepalive_secs: u64,
 }
 
 #[cfg(feature = "cli")]
@@ -235,6 +247,7 @@ impl Default for GeneralConfig {
             max_retries: 5,
             compaction_threshold: 192_000,
             compaction_thinking_budget: 8_000,
+            cache_keepalive_secs: 0,
         }
     }
 }

diff --git a/src/llm/agent.rs b/src/llm/agent.rs
@@ -200,6 +200,9 @@ pub struct Agent {
     /// Retry attempt counter, persists across calls to exec_chat_with_retry.
     /// Reset on successful request or new user message.
     retry_attempt: u32,
+
+    /// Timestamp of the last successful API request (for cache keep-alive scheduling).
+    last_request_time: Option<Instant>,
 }
 
 impl Agent {
@@ -233,6 +236,7 @@ impl Agent {
 
             fast_mode_cooldown_until: None,
             retry_attempt: 0,
+            last_request_time: None,
         }
     }
 
@@ -270,6 +274,7 @@ impl Agent {
 
             fast_mode_cooldown_until: None,
             retry_attempt: 0,
+            last_request_time: None,
         }
     }
 
@@ -400,6 +405,101 @@ impl Agent {
         self.active_stream = None;
     }
 
+    /// Returns true if a cache keep-alive ping is needed.
+    ///
+    /// Checks whether: keep-alive is enabled, the agent is idle (no active stream),
+    /// and enough time has elapsed since the last API request.
+    pub fn needs_cache_keepalive(&self) -> bool {
+        let interval = self.config.cache_keepalive_secs;
+        if interval == 0 {
+            return false;
+        }
+        // Don't send keep-alive if the agent is actively streaming
+        if self.state.is_some() {
+            return false;
+        }
+        // Need at least one prior request to have something cached
+        let Some(last) = self.last_request_time else {
+            return false;
+        };
+        // Don't keep-alive on OpenRouter (cache is Anthropic-specific)
+        if is_openrouter_model(&self.config.model) {
+            return false;
+        }
+        last.elapsed() >= Duration::from_secs(interval)
+    }
+
+    /// Send a minimal API request to keep the Anthropic prompt cache warm.
+    ///
+    /// Uses the existing message history as the cache prefix, appends a
+    /// disposable keep-alive message, sends with max_tokens=1 and no tools,
+    /// then discards the response. The keep-alive message is NOT added to
+    /// the agent's conversation history.
+    pub async fn send_cache_keepalive(&mut self) -> Result<Usage> {
+        info!("Sending cache keep-alive ping");
+
+        // Build messages: clone current history + cache_control on last real message
+        let mut messages = self.messages.clone();
+        if let Some(last_msg) = messages.last_mut() {
+            last_msg.options = Some(CacheControl::Ephemeral.into());
+        }
+        // Append a disposable keep-alive user message (not stored in self.messages)
+        messages.push(ChatMessage::user("[cache-keepalive] Respond with: ack"));
+
+        let request = ChatRequest::new(messages);
+
+        // Build minimal headers (same auth as normal requests)
+        let headers = if let Some(ref oauth) = self.oauth {
+            Headers::from([
+                (
+                    "authorization".to_string(),
+                    format!("Bearer {}", oauth.access_token),
+                ),
+                ("anthropic-beta".to_string(), ANTHROPIC_BETA_HEADER.to_string()),
+                ("user-agent".to_string(), ANTHROPIC_USER_AGENT.to_string()),
+            ])
+        } else {
+            Headers::from([("anthropic-beta".to_string(), "interleaved-thinking-2025-05-14".to_string())])
+        };
+
+        // Minimal options: 1 output token, no tools, no thinking
+        let chat_options = ChatOptions::default()
+            .with_max_tokens(1)
+            .with_capture_usage(true)
+            .with_extra_headers(headers);
+
+        match self
+            .client
+            .exec_chat_stream(&self.config.model, request, Some(&chat_options))
+            .await
+        {
+            Ok(resp) => {
+                // Drain the stream to complete the request
+                let mut stream = Box::pin(resp.stream);
+                let mut turn_usage = Usage::default();
+                while let Some(event) = stream.next().await {
+                    if let Ok(ChatStreamEvent::End(end)) = event {
+                        if let Some(ref genai_usage) = end.captured_usage {
+                            turn_usage = Self::extract_turn_usage(genai_usage);
+                        }
+                    }
+                }
+                self.last_request_time = Some(Instant::now());
+                info!(
+                    "Cache keep-alive complete: {} cached tokens refreshed",
+                    turn_usage.cache_read_tokens
+                );
+                Ok(turn_usage)
+            }
+            Err(e) => {
+                warn!("Cache keep-alive failed: {:#}", e);
+                // Still update timestamp to avoid hammering on repeated failures
+                self.last_request_time = Some(Instant::now());
+                Err(anyhow::anyhow!("Cache keep-alive failed: {:#}", e))
+            }
+        }
+    }
+
     /// Refresh OAuth token if expired. Returns true if refresh was needed and succeeded.
     #[allow(dead_code)]
     pub async fn refresh_oauth_if_needed(&mut self) -> Result<bool> {
@@ -626,6 +726,7 @@ impl Agent {
             Ok(resp) => {
                 info!("Chat request successful");
                 self.retry_attempt = 0;
+                self.last_request_time = Some(Instant::now());
                 Ok(resp)
             },
             Err(e) => {