Fix performance issues with SharedTokenManager causing 20-minute delays (#586)

- Optimize lock acquisition strategy with exponential backoff
- Reduce excessive I/O operations by increasing cache check interval
- Add timeout monitoring for token refresh operations
- Add timeout wrappers for file operations to prevent hanging
- Fixes issue #585 where users experienced extreme performance issues with Qwen Code
This commit is contained in:
DS-Controller2
2025-09-12 03:35:22 -05:00
committed by GitHub
parent adabd96a42
commit 67e2e270bd

View File

@@ -26,17 +26,20 @@ const QWEN_LOCK_FILENAME = 'oauth_creds.lock';
// Token and Cache Configuration // Token and Cache Configuration
const TOKEN_REFRESH_BUFFER_MS = 30 * 1000; // 30 seconds const TOKEN_REFRESH_BUFFER_MS = 30 * 1000; // 30 seconds
const LOCK_TIMEOUT_MS = 10000; // 10 seconds lock timeout const LOCK_TIMEOUT_MS = 10000; // 10 seconds lock timeout
const CACHE_CHECK_INTERVAL_MS = 1000; // 1 second cache check interval const CACHE_CHECK_INTERVAL_MS = 5000; // 5 seconds cache check interval (increased from 1 second)
// Lock acquisition configuration (can be overridden for testing) // Lock acquisition configuration (can be overridden for testing)
interface LockConfig { interface LockConfig {
maxAttempts: number; maxAttempts: number;
attemptInterval: number; attemptInterval: number;
// Add exponential backoff parameters
maxInterval: number;
} }
const DEFAULT_LOCK_CONFIG: LockConfig = { const DEFAULT_LOCK_CONFIG: LockConfig = {
maxAttempts: 50, maxAttempts: 20, // Reduced from 50 to prevent excessive waiting
attemptInterval: 200, attemptInterval: 100, // Reduced from 200ms to check more frequently
maxInterval: 2000, // Maximum interval for exponential backoff
}; };
/** /**
@@ -300,7 +303,25 @@ export class SharedTokenManager {
try { try {
const filePath = this.getCredentialFilePath(); const filePath = this.getCredentialFilePath();
const stats = await fs.stat(filePath); // Add timeout to file stat operation
const withTimeout = async <T>(
promise: Promise<T>,
timeoutMs: number,
): Promise<T> =>
Promise.race([
promise,
new Promise<never>((_, reject) =>
setTimeout(
() =>
reject(
new Error(`File operation timed out after ${timeoutMs}ms`),
),
timeoutMs,
),
),
]);
const stats = await withTimeout(fs.stat(filePath), 3000);
const fileModTime = stats.mtimeMs; const fileModTime = stats.mtimeMs;
// Reload credentials if file has been modified since last cache // Reload credentials if file has been modified since last cache
@@ -423,6 +444,7 @@ export class SharedTokenManager {
qwenClient: IQwenOAuth2Client, qwenClient: IQwenOAuth2Client,
forceRefresh = false, forceRefresh = false,
): Promise<QwenCredentials> { ): Promise<QwenCredentials> {
const startTime = Date.now();
const lockPath = this.getLockFilePath(); const lockPath = this.getLockFilePath();
try { try {
@@ -439,6 +461,15 @@ export class SharedTokenManager {
// Acquire distributed file lock // Acquire distributed file lock
await this.acquireLock(lockPath); await this.acquireLock(lockPath);
// Check if the operation is taking too long
const lockAcquisitionTime = Date.now() - startTime;
if (lockAcquisitionTime > 5000) {
// 5 seconds warning threshold
console.warn(
`Token refresh lock acquisition took ${lockAcquisitionTime}ms`,
);
}
// Double-check if another process already refreshed the token (unless force refresh is requested) // Double-check if another process already refreshed the token (unless force refresh is requested)
// Skip the time-based throttling since we're already in a locked refresh operation // Skip the time-based throttling since we're already in a locked refresh operation
await this.forceFileCheck(qwenClient); await this.forceFileCheck(qwenClient);
@@ -456,6 +487,13 @@ export class SharedTokenManager {
// Perform the actual token refresh // Perform the actual token refresh
const response = await qwenClient.refreshAccessToken(); const response = await qwenClient.refreshAccessToken();
// Check if the token refresh is taking too long
const totalOperationTime = Date.now() - startTime;
if (totalOperationTime > 10000) {
// 10 seconds warning threshold
console.warn(`Token refresh operation took ${totalOperationTime}ms`);
}
if (!response || isErrorResponse(response)) { if (!response || isErrorResponse(response)) {
const errorData = response as ErrorData; const errorData = response as ErrorData;
throw new TokenManagerError( throw new TokenManagerError(
@@ -551,9 +589,27 @@ export class SharedTokenManager {
const dirPath = path.dirname(filePath); const dirPath = path.dirname(filePath);
const tempPath = `${filePath}.tmp.${randomUUID()}`; const tempPath = `${filePath}.tmp.${randomUUID()}`;
// Add timeout wrapper for file operations
const withTimeout = async <T>(
promise: Promise<T>,
timeoutMs: number,
): Promise<T> =>
Promise.race([
promise,
new Promise<never>((_, reject) =>
setTimeout(
() => reject(new Error(`Operation timed out after ${timeoutMs}ms`)),
timeoutMs,
),
),
]);
// Create directory with restricted permissions // Create directory with restricted permissions
try { try {
await fs.mkdir(dirPath, { recursive: true, mode: 0o700 }); await withTimeout(
fs.mkdir(dirPath, { recursive: true, mode: 0o700 }),
5000,
);
} catch (error) { } catch (error) {
throw new TokenManagerError( throw new TokenManagerError(
TokenError.FILE_ACCESS_ERROR, TokenError.FILE_ACCESS_ERROR,
@@ -566,18 +622,21 @@ export class SharedTokenManager {
try { try {
// Write to temporary file first with restricted permissions // Write to temporary file first with restricted permissions
await fs.writeFile(tempPath, credString, { mode: 0o600 }); await withTimeout(
fs.writeFile(tempPath, credString, { mode: 0o600 }),
5000,
);
// Atomic move to final location // Atomic move to final location
await fs.rename(tempPath, filePath); await withTimeout(fs.rename(tempPath, filePath), 5000);
// Update cached file modification time atomically after successful write // Update cached file modification time atomically after successful write
const stats = await fs.stat(filePath); const stats = await withTimeout(fs.stat(filePath), 5000);
this.memoryCache.fileModTime = stats.mtimeMs; this.memoryCache.fileModTime = stats.mtimeMs;
} catch (error) { } catch (error) {
// Clean up temp file if it exists // Clean up temp file if it exists
try { try {
await fs.unlink(tempPath); await withTimeout(fs.unlink(tempPath), 1000);
} catch (_cleanupError) { } catch (_cleanupError) {
// Ignore cleanup errors - temp file might not exist // Ignore cleanup errors - temp file might not exist
} }
@@ -628,9 +687,11 @@ export class SharedTokenManager {
* @throws TokenManagerError if lock cannot be acquired within timeout period * @throws TokenManagerError if lock cannot be acquired within timeout period
*/ */
private async acquireLock(lockPath: string): Promise<void> { private async acquireLock(lockPath: string): Promise<void> {
const { maxAttempts, attemptInterval } = this.lockConfig; const { maxAttempts, attemptInterval, maxInterval } = this.lockConfig;
const lockId = randomUUID(); // Use random UUID instead of PID for security const lockId = randomUUID(); // Use random UUID instead of PID for security
let currentInterval = attemptInterval;
for (let attempt = 0; attempt < maxAttempts; attempt++) { for (let attempt = 0; attempt < maxAttempts; attempt++) {
try { try {
// Attempt to create lock file atomically (exclusive mode) // Attempt to create lock file atomically (exclusive mode)
@@ -671,8 +732,10 @@ export class SharedTokenManager {
); );
} }
// Wait before retrying // Wait before retrying with exponential backoff
await new Promise((resolve) => setTimeout(resolve, attemptInterval)); await new Promise((resolve) => setTimeout(resolve, currentInterval));
// Increase interval for next attempt (exponential backoff), but cap at maxInterval
currentInterval = Math.min(currentInterval * 1.5, maxInterval);
} else { } else {
throw new TokenManagerError( throw new TokenManagerError(
TokenError.FILE_ACCESS_ERROR, TokenError.FILE_ACCESS_ERROR,