mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-06-28 10:21:39 +00:00
🛡️ fix: Address Codex round 3 — approval expiry lifecycle completeness
Three P2 findings on 780833d908, all valid:
- H1 status consistency: /chat/status now treats a non-expired
requires_action job as active (matching /chat/active), so a client
refreshing while an approval is pending resumes/subscribes instead of
treating the run as finished and stranding it.
- H2 active expiry: cleanup now finalizes past-expiry requires_action jobs
(→ aborted) in both stores instead of only filtering them from the active
list — an expired prompt no longer lingers resident until key TTL. Redis
routes through transitionStatus (terminal content cleanup); in-memory marks
terminal + reclaims.
- H3 resumed liveness: in-memory stale-running check uses
max(lastActivity, lastActiveAt, createdAt), so a just-resumed job isn't
reaped on a stale per-chunk lastActivity entry before the next chunk.
Test added: in-memory cleanup finalizes + reclaims a past-expiry approval.
tsc + lint clean; policy + type-contract specs pass.
This commit is contained in:
parent
780833d908
commit
4324a4e776
4 changed files with 68 additions and 5 deletions
|
|
@ -202,7 +202,14 @@ router.get('/chat/status/:conversationId', async (req, res) => {
|
|||
// Get resume state which contains aggregatedContent
|
||||
// Avoid calling both getStreamInfo and getResumeState (both fetch content)
|
||||
const resumeState = await GenerationJobManager.getResumeState(conversationId);
|
||||
const isActive = job.status === 'running';
|
||||
// A job paused for human review is still active (consistent with /chat/active),
|
||||
// so the client resumes/subscribes rather than treating it as finished — unless
|
||||
// its prompt has already expired (cleanup/expiry will finalize it).
|
||||
const pendingExpired =
|
||||
job.metadata.pendingAction?.expiresAt != null &&
|
||||
job.metadata.pendingAction.expiresAt <= Date.now();
|
||||
const isActive =
|
||||
job.status === 'running' || (job.status === 'requires_action' && !pendingExpired);
|
||||
|
||||
res.json({
|
||||
active: isActive,
|
||||
|
|
|
|||
|
|
@ -228,3 +228,24 @@ describe('ApprovalLifecycle via GenerationJobManager.approvals (in-memory)', ()
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('InMemoryJobStore — approval expiry cleanup', () => {
|
||||
test('cleanup() finalizes and reclaims a past-expiry pending-approval job', async () => {
|
||||
const store = new InMemoryJobStore({ ttlAfterComplete: 0 });
|
||||
await store.createJob('s1', 'u1');
|
||||
|
||||
const action = buildPendingAction(
|
||||
buildToolApprovalPayload([{ name: 'shell', arguments: {}, tool_call_id: 'c1' }]),
|
||||
{ streamId: 's1', ttlMs: -1000 },
|
||||
);
|
||||
await store.transitionStatus('s1', {
|
||||
from: 'running',
|
||||
to: 'requires_action',
|
||||
patch: { pendingAction: action, pendingActionId: action.actionId },
|
||||
});
|
||||
|
||||
// A past-expiry approval must be finalized + reclaimed, not left resident.
|
||||
await store.cleanup();
|
||||
expect(await store.getJob('s1')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -207,14 +207,32 @@ export class InMemoryJobStore implements IJobStore {
|
|||
if (this.ttlAfterComplete === 0 || now - job.completedAt > this.ttlAfterComplete) {
|
||||
toDelete.push(streamId);
|
||||
}
|
||||
} else if (job.status === 'requires_action' && isPendingActionExpired(job)) {
|
||||
// Past-due approval: finalize it (aborted) so it stops occupying the
|
||||
// user slot and its content state is reclaimed, mirroring
|
||||
// ApprovalLifecycle.expire(). Skipping it (active-list filter) alone
|
||||
// would leave the job resident indefinitely.
|
||||
job.status = 'aborted';
|
||||
job.completedAt = now;
|
||||
job.error = 'Approval expired before a decision was made';
|
||||
delete job.pendingAction;
|
||||
delete job.pendingActionId;
|
||||
if (this.ttlAfterComplete === 0) {
|
||||
toDelete.push(streamId);
|
||||
}
|
||||
} else if (this.staleJobTimeout > 0 && job.status === 'running') {
|
||||
// Failsafe: reap jobs stuck in "running" with no generation activity for
|
||||
// longer than the stale timeout. These are crashed/hung generations that
|
||||
// never reached a terminal state; without this they accumulate their
|
||||
// content state in memory until the process OOMs. Reaping keys off last
|
||||
// activity (not creation time) so a long but live stream is never reaped,
|
||||
// mirroring RedisJobStore refreshing the running TTL on each chunk.
|
||||
const lastActive = this.lastActivity.get(streamId) ?? job.lastActiveAt ?? job.createdAt;
|
||||
// content state in memory until the process OOMs. Reaping keys off the
|
||||
// most recent liveness signal (not creation time) so a long but live
|
||||
// stream is never reaped, and a just-resumed approval (fresh
|
||||
// `lastActiveAt`) wins over a stale per-chunk `lastActivity` entry.
|
||||
const lastActive = Math.max(
|
||||
this.lastActivity.get(streamId) ?? 0,
|
||||
job.lastActiveAt ?? 0,
|
||||
job.createdAt,
|
||||
);
|
||||
if (now - lastActive > this.staleJobTimeout) {
|
||||
toDelete.push(streamId);
|
||||
staleRunning++;
|
||||
|
|
|
|||
|
|
@ -675,6 +675,23 @@ export class RedisJobStore implements IJobStore {
|
|||
return 1;
|
||||
}
|
||||
|
||||
// Past-due approval: finalize it (aborted) so it stops occupying the
|
||||
// slot and its stream contents are reclaimed, mirroring
|
||||
// ApprovalLifecycle.expire(). transitionStatus runs the terminal
|
||||
// content cleanup (sets, chunks, run-steps, userJobs, completed TTL).
|
||||
if (isPendingActionExpired(job)) {
|
||||
await this.transitionStatus(streamId, {
|
||||
from: 'requires_action',
|
||||
to: 'aborted',
|
||||
clear: ['pendingAction', 'pendingActionId'],
|
||||
patch: {
|
||||
error: 'Approval expired before a decision was made',
|
||||
completedAt: Date.now(),
|
||||
},
|
||||
});
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}),
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue