From 4324a4e776bdfe1c9e05d64eb9fec2f958e22a86 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Tue, 16 Jun 2026 14:32:06 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20fix:=20Address=20Codex?= =?UTF-8?q?=20round=203=20=E2=80=94=20approval=20expiry=20lifecycle=20comp?= =?UTF-8?q?leteness?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three P2 findings on 780833d908, all valid: - H1 status consistency: /chat/status now treats a non-expired requires_action job as active (matching /chat/active), so a client refreshing while an approval is pending resumes/subscribes instead of treating the run as finished and stranding it. - H2 active expiry: cleanup now finalizes past-expiry requires_action jobs (→ aborted) in both stores instead of only filtering them from the active list — an expired prompt no longer lingers resident until key TTL. Redis routes through transitionStatus (terminal content cleanup); in-memory marks terminal + reclaims. - H3 resumed liveness: in-memory stale-running check uses max(lastActivity, lastActiveAt, createdAt), so a just-resumed job isn't reaped on a stale per-chunk lastActivity entry before the next chunk. Test added: in-memory cleanup finalizes + reclaims a past-expiry approval. tsc + lint clean; policy + type-contract specs pass. --- api/server/routes/agents/index.js | 9 ++++++- .../stream/__tests__/pendingAction.spec.ts | 21 +++++++++++++++ .../implementations/InMemoryJobStore.ts | 26 ++++++++++++++++--- .../stream/implementations/RedisJobStore.ts | 17 ++++++++++++ 4 files changed, 68 insertions(+), 5 deletions(-) diff --git a/api/server/routes/agents/index.js b/api/server/routes/agents/index.js index 5684fc9622..7cc11c15bf 100644 --- a/api/server/routes/agents/index.js +++ b/api/server/routes/agents/index.js @@ -202,7 +202,14 @@ router.get('/chat/status/:conversationId', async (req, res) => { // Get resume state which contains aggregatedContent // Avoid calling both getStreamInfo and getResumeState (both fetch content) const resumeState = await GenerationJobManager.getResumeState(conversationId); - const isActive = job.status === 'running'; + // A job paused for human review is still active (consistent with /chat/active), + // so the client resumes/subscribes rather than treating it as finished — unless + // its prompt has already expired (cleanup/expiry will finalize it). + const pendingExpired = + job.metadata.pendingAction?.expiresAt != null && + job.metadata.pendingAction.expiresAt <= Date.now(); + const isActive = + job.status === 'running' || (job.status === 'requires_action' && !pendingExpired); res.json({ active: isActive, diff --git a/packages/api/src/stream/__tests__/pendingAction.spec.ts b/packages/api/src/stream/__tests__/pendingAction.spec.ts index bc764ea95e..de4136a513 100644 --- a/packages/api/src/stream/__tests__/pendingAction.spec.ts +++ b/packages/api/src/stream/__tests__/pendingAction.spec.ts @@ -228,3 +228,24 @@ describe('ApprovalLifecycle via GenerationJobManager.approvals (in-memory)', () }); }); }); + +describe('InMemoryJobStore — approval expiry cleanup', () => { + test('cleanup() finalizes and reclaims a past-expiry pending-approval job', async () => { + const store = new InMemoryJobStore({ ttlAfterComplete: 0 }); + await store.createJob('s1', 'u1'); + + const action = buildPendingAction( + buildToolApprovalPayload([{ name: 'shell', arguments: {}, tool_call_id: 'c1' }]), + { streamId: 's1', ttlMs: -1000 }, + ); + await store.transitionStatus('s1', { + from: 'running', + to: 'requires_action', + patch: { pendingAction: action, pendingActionId: action.actionId }, + }); + + // A past-expiry approval must be finalized + reclaimed, not left resident. + await store.cleanup(); + expect(await store.getJob('s1')).toBeNull(); + }); +}); diff --git a/packages/api/src/stream/implementations/InMemoryJobStore.ts b/packages/api/src/stream/implementations/InMemoryJobStore.ts index 3da1a41fc7..8c52ee829f 100644 --- a/packages/api/src/stream/implementations/InMemoryJobStore.ts +++ b/packages/api/src/stream/implementations/InMemoryJobStore.ts @@ -207,14 +207,32 @@ export class InMemoryJobStore implements IJobStore { if (this.ttlAfterComplete === 0 || now - job.completedAt > this.ttlAfterComplete) { toDelete.push(streamId); } + } else if (job.status === 'requires_action' && isPendingActionExpired(job)) { + // Past-due approval: finalize it (aborted) so it stops occupying the + // user slot and its content state is reclaimed, mirroring + // ApprovalLifecycle.expire(). Skipping it (active-list filter) alone + // would leave the job resident indefinitely. + job.status = 'aborted'; + job.completedAt = now; + job.error = 'Approval expired before a decision was made'; + delete job.pendingAction; + delete job.pendingActionId; + if (this.ttlAfterComplete === 0) { + toDelete.push(streamId); + } } else if (this.staleJobTimeout > 0 && job.status === 'running') { // Failsafe: reap jobs stuck in "running" with no generation activity for // longer than the stale timeout. These are crashed/hung generations that // never reached a terminal state; without this they accumulate their - // content state in memory until the process OOMs. Reaping keys off last - // activity (not creation time) so a long but live stream is never reaped, - // mirroring RedisJobStore refreshing the running TTL on each chunk. - const lastActive = this.lastActivity.get(streamId) ?? job.lastActiveAt ?? job.createdAt; + // content state in memory until the process OOMs. Reaping keys off the + // most recent liveness signal (not creation time) so a long but live + // stream is never reaped, and a just-resumed approval (fresh + // `lastActiveAt`) wins over a stale per-chunk `lastActivity` entry. + const lastActive = Math.max( + this.lastActivity.get(streamId) ?? 0, + job.lastActiveAt ?? 0, + job.createdAt, + ); if (now - lastActive > this.staleJobTimeout) { toDelete.push(streamId); staleRunning++; diff --git a/packages/api/src/stream/implementations/RedisJobStore.ts b/packages/api/src/stream/implementations/RedisJobStore.ts index acc5b27100..d96b835a9e 100644 --- a/packages/api/src/stream/implementations/RedisJobStore.ts +++ b/packages/api/src/stream/implementations/RedisJobStore.ts @@ -675,6 +675,23 @@ export class RedisJobStore implements IJobStore { return 1; } + // Past-due approval: finalize it (aborted) so it stops occupying the + // slot and its stream contents are reclaimed, mirroring + // ApprovalLifecycle.expire(). transitionStatus runs the terminal + // content cleanup (sets, chunks, run-steps, userJobs, completed TTL). + if (isPendingActionExpired(job)) { + await this.transitionStatus(streamId, { + from: 'requires_action', + to: 'aborted', + clear: ['pendingAction', 'pendingActionId'], + patch: { + error: 'Approval expired before a decision was made', + completedAt: Date.now(), + }, + }); + return 1; + } + return 0; }), );