mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
chore(integration-tests): refactor to typescript (#5645)
This commit is contained in:
642
integration-tests/test-helper.ts
Normal file
642
integration-tests/test-helper.ts
Normal file
@@ -0,0 +1,642 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { execSync, spawn } from 'child_process';
|
||||
import { parse } from 'shell-quote';
|
||||
import { mkdirSync, writeFileSync, readFileSync } from 'fs';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { env } from 'process';
|
||||
import { fileExists } from '../scripts/telemetry_utils.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
function sanitizeTestName(name: string) {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]/g, '-')
|
||||
.replace(/-+/g, '-');
|
||||
}
|
||||
|
||||
// Helper to create detailed error messages
|
||||
export function createToolCallErrorMessage(
|
||||
expectedTools: string | string[],
|
||||
foundTools: string[],
|
||||
result: string,
|
||||
) {
|
||||
const expectedStr = Array.isArray(expectedTools)
|
||||
? expectedTools.join(' or ')
|
||||
: expectedTools;
|
||||
return (
|
||||
`Expected to find ${expectedStr} tool call(s). ` +
|
||||
`Found: ${foundTools.length > 0 ? foundTools.join(', ') : 'none'}. ` +
|
||||
`Output preview: ${result ? result.substring(0, 200) + '...' : 'no output'}`
|
||||
);
|
||||
}
|
||||
|
||||
// Helper to print debug information when tests fail
|
||||
export function printDebugInfo(
|
||||
rig: TestRig,
|
||||
result: string,
|
||||
context: Record<string, unknown> = {},
|
||||
) {
|
||||
console.error('Test failed - Debug info:');
|
||||
console.error('Result length:', result.length);
|
||||
console.error('Result (first 500 chars):', result.substring(0, 500));
|
||||
console.error(
|
||||
'Result (last 500 chars):',
|
||||
result.substring(result.length - 500),
|
||||
);
|
||||
|
||||
// Print any additional context provided
|
||||
Object.entries(context).forEach(([key, value]) => {
|
||||
console.error(`${key}:`, value);
|
||||
});
|
||||
|
||||
// Check what tools were actually called
|
||||
const allTools = rig.readToolLogs();
|
||||
console.error(
|
||||
'All tool calls found:',
|
||||
allTools.map((t) => t.toolRequest.name),
|
||||
);
|
||||
|
||||
return allTools;
|
||||
}
|
||||
|
||||
// Helper to validate model output and warn about unexpected content
|
||||
export function validateModelOutput(
|
||||
result: string,
|
||||
expectedContent: string | (string | RegExp)[] | null = null,
|
||||
testName = '',
|
||||
) {
|
||||
// First, check if there's any output at all (this should fail the test if missing)
|
||||
if (!result || result.trim().length === 0) {
|
||||
throw new Error('Expected LLM to return some output');
|
||||
}
|
||||
|
||||
// If expectedContent is provided, check for it and warn if missing
|
||||
if (expectedContent) {
|
||||
const contents = Array.isArray(expectedContent)
|
||||
? expectedContent
|
||||
: [expectedContent];
|
||||
const missingContent = contents.filter((content) => {
|
||||
if (typeof content === 'string') {
|
||||
return !result.toLowerCase().includes(content.toLowerCase());
|
||||
} else if (content instanceof RegExp) {
|
||||
return !content.test(result);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (missingContent.length > 0) {
|
||||
console.warn(
|
||||
`Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`,
|
||||
'This is not ideal but not a test failure.',
|
||||
);
|
||||
console.warn(
|
||||
'The tool was called successfully, which is the main requirement.',
|
||||
);
|
||||
return false;
|
||||
} else if (process.env.VERBOSE === 'true') {
|
||||
console.log(`${testName}: Model output validated successfully.`);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
export class TestRig {
|
||||
bundlePath: string;
|
||||
testDir: string | null;
|
||||
testName?: string;
|
||||
_lastRunStdout?: string;
|
||||
|
||||
constructor() {
|
||||
this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
|
||||
this.testDir = null;
|
||||
}
|
||||
|
||||
// Get timeout based on environment
|
||||
getDefaultTimeout() {
|
||||
if (env.CI) return 60000; // 1 minute in CI
|
||||
if (env.GEMINI_SANDBOX) return 30000; // 30s in containers
|
||||
return 15000; // 15s locally
|
||||
}
|
||||
|
||||
setup(
|
||||
testName: string,
|
||||
options: { settings?: Record<string, unknown> } = {},
|
||||
) {
|
||||
this.testName = testName;
|
||||
const sanitizedName = sanitizeTestName(testName);
|
||||
this.testDir = join(env.INTEGRATION_TEST_FILE_DIR!, sanitizedName);
|
||||
mkdirSync(this.testDir, { recursive: true });
|
||||
|
||||
// Create a settings file to point the CLI to the local collector
|
||||
const geminiDir = join(this.testDir, '.gemini');
|
||||
mkdirSync(geminiDir, { recursive: true });
|
||||
// In sandbox mode, use an absolute path for telemetry inside the container
|
||||
// The container mounts the test directory at the same path as the host
|
||||
const telemetryPath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir, 'telemetry.log') // Absolute path in test directory
|
||||
: env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox
|
||||
|
||||
const settings = {
|
||||
telemetry: {
|
||||
enabled: true,
|
||||
target: 'local',
|
||||
otlpEndpoint: '',
|
||||
outfile: telemetryPath,
|
||||
},
|
||||
sandbox: env.GEMINI_SANDBOX !== 'false' ? env.GEMINI_SANDBOX : false,
|
||||
...options.settings, // Allow tests to override/add settings
|
||||
};
|
||||
writeFileSync(
|
||||
join(geminiDir, 'settings.json'),
|
||||
JSON.stringify(settings, null, 2),
|
||||
);
|
||||
}
|
||||
|
||||
createFile(fileName: string, content: string) {
|
||||
const filePath = join(this.testDir!, fileName);
|
||||
writeFileSync(filePath, content);
|
||||
return filePath;
|
||||
}
|
||||
|
||||
mkdir(dir: string) {
|
||||
mkdirSync(join(this.testDir!, dir), { recursive: true });
|
||||
}
|
||||
|
||||
sync() {
|
||||
// ensure file system is done before spawning
|
||||
execSync('sync', { cwd: this.testDir! });
|
||||
}
|
||||
|
||||
run(
|
||||
promptOrOptions: string | { prompt?: string; stdin?: string },
|
||||
...args: string[]
|
||||
): Promise<string> {
|
||||
let command = `node ${this.bundlePath} --yolo`;
|
||||
const execOptions: {
|
||||
cwd: string;
|
||||
encoding: 'utf-8';
|
||||
input?: string;
|
||||
} = {
|
||||
cwd: this.testDir!,
|
||||
encoding: 'utf-8',
|
||||
};
|
||||
|
||||
if (typeof promptOrOptions === 'string') {
|
||||
command += ` --prompt ${JSON.stringify(promptOrOptions)}`;
|
||||
} else if (
|
||||
typeof promptOrOptions === 'object' &&
|
||||
promptOrOptions !== null
|
||||
) {
|
||||
if (promptOrOptions.prompt) {
|
||||
command += ` --prompt ${JSON.stringify(promptOrOptions.prompt)}`;
|
||||
}
|
||||
if (promptOrOptions.stdin) {
|
||||
execOptions.input = promptOrOptions.stdin;
|
||||
}
|
||||
}
|
||||
|
||||
command += ` ${args.join(' ')}`;
|
||||
|
||||
const commandArgs = parse(command);
|
||||
const node = commandArgs.shift() as string;
|
||||
|
||||
const child = spawn(node, commandArgs as string[], {
|
||||
cwd: this.testDir!,
|
||||
stdio: 'pipe',
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
// Handle stdin if provided
|
||||
if (execOptions.input) {
|
||||
child.stdin!.write(execOptions.input);
|
||||
child.stdin!.end();
|
||||
}
|
||||
|
||||
child.stdout!.on('data', (data: Buffer) => {
|
||||
stdout += data;
|
||||
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
|
||||
process.stdout.write(data);
|
||||
}
|
||||
});
|
||||
|
||||
child.stderr!.on('data', (data: Buffer) => {
|
||||
stderr += data;
|
||||
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
|
||||
process.stderr.write(data);
|
||||
}
|
||||
});
|
||||
|
||||
const promise = new Promise<string>((resolve, reject) => {
|
||||
child.on('close', (code: number) => {
|
||||
if (code === 0) {
|
||||
// Store the raw stdout for Podman telemetry parsing
|
||||
this._lastRunStdout = stdout;
|
||||
|
||||
// Filter out telemetry output when running with Podman
|
||||
// Podman seems to output telemetry to stdout even when writing to file
|
||||
let result = stdout;
|
||||
if (env.GEMINI_SANDBOX === 'podman') {
|
||||
// Remove telemetry JSON objects from output
|
||||
// They are multi-line JSON objects that start with { and contain telemetry fields
|
||||
const lines = result.split('\n');
|
||||
const filteredLines = [];
|
||||
let inTelemetryObject = false;
|
||||
let braceDepth = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
if (!inTelemetryObject && line.trim() === '{') {
|
||||
// Check if this might be start of telemetry object
|
||||
inTelemetryObject = true;
|
||||
braceDepth = 1;
|
||||
} else if (inTelemetryObject) {
|
||||
// Count braces to track nesting
|
||||
for (const char of line) {
|
||||
if (char === '{') braceDepth++;
|
||||
else if (char === '}') braceDepth--;
|
||||
}
|
||||
|
||||
// Check if we've closed all braces
|
||||
if (braceDepth === 0) {
|
||||
inTelemetryObject = false;
|
||||
// Skip this line (the closing brace)
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// Not in telemetry object, keep the line
|
||||
filteredLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
result = filteredLines.join('\n');
|
||||
}
|
||||
// If we have stderr output, include that also
|
||||
if (stderr) {
|
||||
result += `\n\nStdErr:\n${stderr}`;
|
||||
}
|
||||
|
||||
resolve(result);
|
||||
} else {
|
||||
reject(new Error(`Process exited with code ${code}:\n${stderr}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return promise;
|
||||
}
|
||||
|
||||
readFile(fileName: string) {
|
||||
const content = readFileSync(join(this.testDir!, fileName), 'utf-8');
|
||||
if (env.KEEP_OUTPUT === 'true' || env.VERBOSE === 'true') {
|
||||
const testId = `${env.TEST_FILE_NAME!.replace(
|
||||
'.test.js',
|
||||
'',
|
||||
)}:${this.testName!.replace(/ /g, '-')}`;
|
||||
console.log(`--- FILE: ${testId}/${fileName} ---`);
|
||||
console.log(content);
|
||||
console.log(`--- END FILE: ${testId}/${fileName} ---`);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
async cleanup() {
|
||||
// Clean up test directory
|
||||
if (this.testDir && !env.KEEP_OUTPUT) {
|
||||
try {
|
||||
execSync(`rm -rf ${this.testDir}`);
|
||||
} catch (error) {
|
||||
// Ignore cleanup errors
|
||||
if (env.VERBOSE === 'true') {
|
||||
console.warn('Cleanup warning:', (error as Error).message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async waitForTelemetryReady() {
|
||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
||||
const logFilePath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir!, 'telemetry.log')
|
||||
: env.TELEMETRY_LOG_FILE;
|
||||
|
||||
if (!logFilePath) return;
|
||||
|
||||
// Wait for telemetry file to exist and have content
|
||||
await this.poll(
|
||||
() => {
|
||||
if (!fileExists(logFilePath)) return false;
|
||||
try {
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
// Check if file has meaningful content (at least one complete JSON object)
|
||||
return content.includes('"event.name"');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
2000, // 2 seconds max - reduced since telemetry should flush on exit now
|
||||
100, // check every 100ms
|
||||
);
|
||||
}
|
||||
|
||||
async waitForToolCall(toolName: string, timeout?: number) {
|
||||
// Use environment-specific timeout
|
||||
if (!timeout) {
|
||||
timeout = this.getDefaultTimeout();
|
||||
}
|
||||
|
||||
// Wait for telemetry to be ready before polling for tool calls
|
||||
await this.waitForTelemetryReady();
|
||||
|
||||
return this.poll(
|
||||
() => {
|
||||
const toolLogs = this.readToolLogs();
|
||||
return toolLogs.some((log) => log.toolRequest.name === toolName);
|
||||
},
|
||||
timeout,
|
||||
100,
|
||||
);
|
||||
}
|
||||
|
||||
async waitForAnyToolCall(toolNames: string[], timeout?: number) {
|
||||
// Use environment-specific timeout
|
||||
if (!timeout) {
|
||||
timeout = this.getDefaultTimeout();
|
||||
}
|
||||
|
||||
// Wait for telemetry to be ready before polling for tool calls
|
||||
await this.waitForTelemetryReady();
|
||||
|
||||
return this.poll(
|
||||
() => {
|
||||
const toolLogs = this.readToolLogs();
|
||||
return toolNames.some((name) =>
|
||||
toolLogs.some((log) => log.toolRequest.name === name),
|
||||
);
|
||||
},
|
||||
timeout,
|
||||
100,
|
||||
);
|
||||
}
|
||||
|
||||
async poll(
|
||||
predicate: () => boolean,
|
||||
timeout: number,
|
||||
interval: number,
|
||||
): Promise<boolean> {
|
||||
const startTime = Date.now();
|
||||
let attempts = 0;
|
||||
while (Date.now() - startTime < timeout) {
|
||||
attempts++;
|
||||
const result = predicate();
|
||||
if (env.VERBOSE === 'true' && attempts % 5 === 0) {
|
||||
console.log(
|
||||
`Poll attempt ${attempts}: ${result ? 'success' : 'waiting...'}`,
|
||||
);
|
||||
}
|
||||
if (result) {
|
||||
return true;
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, interval));
|
||||
}
|
||||
if (env.VERBOSE === 'true') {
|
||||
console.log(`Poll timed out after ${attempts} attempts`);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
_parseToolLogsFromStdout(stdout: string) {
|
||||
const logs: {
|
||||
timestamp: number;
|
||||
toolRequest: {
|
||||
name: string;
|
||||
args: string;
|
||||
success: boolean;
|
||||
duration_ms: number;
|
||||
};
|
||||
}[] = [];
|
||||
|
||||
// The console output from Podman is JavaScript object notation, not JSON
|
||||
// Look for tool call events in the output
|
||||
// Updated regex to handle tool names with hyphens and underscores
|
||||
const toolCallPattern =
|
||||
/body:\s*'Tool call:\s*([\w-]+)\..*?Success:\s*(\w+)\..*?Duration:\s*(\d+)ms\.'/g;
|
||||
const matches = [...stdout.matchAll(toolCallPattern)];
|
||||
|
||||
for (const match of matches) {
|
||||
const toolName = match[1];
|
||||
const success = match[2] === 'true';
|
||||
const duration = parseInt(match[3], 10);
|
||||
|
||||
// Try to find function_args nearby
|
||||
const matchIndex = match.index || 0;
|
||||
const contextStart = Math.max(0, matchIndex - 500);
|
||||
const contextEnd = Math.min(stdout.length, matchIndex + 500);
|
||||
const context = stdout.substring(contextStart, contextEnd);
|
||||
|
||||
// Look for function_args in the context
|
||||
let args = '{}';
|
||||
const argsMatch = context.match(/function_args:\s*'([^']+)'/);
|
||||
if (argsMatch) {
|
||||
args = argsMatch[1];
|
||||
}
|
||||
|
||||
// Also try to find function_name to double-check
|
||||
// Updated regex to handle tool names with hyphens and underscores
|
||||
const nameMatch = context.match(/function_name:\s*'([\w-]+)'/);
|
||||
const actualToolName = nameMatch ? nameMatch[1] : toolName;
|
||||
|
||||
logs.push({
|
||||
timestamp: Date.now(),
|
||||
toolRequest: {
|
||||
name: actualToolName,
|
||||
args: args,
|
||||
success: success,
|
||||
duration_ms: duration,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// If no matches found with the simple pattern, try the JSON parsing approach
|
||||
// in case the format changes
|
||||
if (logs.length === 0) {
|
||||
const lines = stdout.split('\n');
|
||||
let currentObject = '';
|
||||
let inObject = false;
|
||||
let braceDepth = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
if (!inObject && line.trim() === '{') {
|
||||
inObject = true;
|
||||
braceDepth = 1;
|
||||
currentObject = line + '\n';
|
||||
} else if (inObject) {
|
||||
currentObject += line + '\n';
|
||||
|
||||
// Count braces
|
||||
for (const char of line) {
|
||||
if (char === '{') braceDepth++;
|
||||
else if (char === '}') braceDepth--;
|
||||
}
|
||||
|
||||
// If we've closed all braces, try to parse the object
|
||||
if (braceDepth === 0) {
|
||||
inObject = false;
|
||||
try {
|
||||
const obj = JSON.parse(currentObject);
|
||||
|
||||
// Check for tool call in different formats
|
||||
if (
|
||||
obj.body &&
|
||||
obj.body.includes('Tool call:') &&
|
||||
obj.attributes
|
||||
) {
|
||||
const bodyMatch = obj.body.match(/Tool call: (\w+)\./);
|
||||
if (bodyMatch) {
|
||||
logs.push({
|
||||
timestamp: obj.timestamp || Date.now(),
|
||||
toolRequest: {
|
||||
name: bodyMatch[1],
|
||||
args: obj.attributes.function_args || '{}',
|
||||
success: obj.attributes.success !== false,
|
||||
duration_ms: obj.attributes.duration_ms || 0,
|
||||
},
|
||||
});
|
||||
}
|
||||
} else if (
|
||||
obj.attributes &&
|
||||
obj.attributes['event.name'] === 'gemini_cli.tool_call'
|
||||
) {
|
||||
logs.push({
|
||||
timestamp: obj.attributes['event.timestamp'],
|
||||
toolRequest: {
|
||||
name: obj.attributes.function_name,
|
||||
args: obj.attributes.function_args,
|
||||
success: obj.attributes.success,
|
||||
duration_ms: obj.attributes.duration_ms,
|
||||
},
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// Not valid JSON
|
||||
}
|
||||
currentObject = '';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return logs;
|
||||
}
|
||||
|
||||
readToolLogs() {
|
||||
// For Podman, first check if telemetry file exists and has content
|
||||
// If not, fall back to parsing from stdout
|
||||
if (env.GEMINI_SANDBOX === 'podman') {
|
||||
// Try reading from file first
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (fileExists(logFilePath)) {
|
||||
try {
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
if (content && content.includes('"event.name"')) {
|
||||
// File has content, use normal file parsing
|
||||
// Continue to the normal file parsing logic below
|
||||
} else if (this._lastRunStdout) {
|
||||
// File exists but is empty or doesn't have events, parse from stdout
|
||||
return this._parseToolLogsFromStdout(this._lastRunStdout);
|
||||
}
|
||||
} catch {
|
||||
// Error reading file, fall back to stdout
|
||||
if (this._lastRunStdout) {
|
||||
return this._parseToolLogsFromStdout(this._lastRunStdout);
|
||||
}
|
||||
}
|
||||
} else if (this._lastRunStdout) {
|
||||
// No file exists, parse from stdout
|
||||
return this._parseToolLogsFromStdout(this._lastRunStdout);
|
||||
}
|
||||
}
|
||||
|
||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
||||
const logFilePath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir!, 'telemetry.log')
|
||||
: env.TELEMETRY_LOG_FILE;
|
||||
|
||||
if (!logFilePath) {
|
||||
console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Check if file exists, if not return empty array (file might not be created yet)
|
||||
if (!fileExists(logFilePath)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
|
||||
// Split the content into individual JSON objects
|
||||
// They are separated by "}\n{"
|
||||
const jsonObjects = content
|
||||
.split(/}\s*\n\s*{/)
|
||||
.map((obj, index, array) => {
|
||||
// Add back the braces we removed during split
|
||||
if (index > 0) obj = '{' + obj;
|
||||
if (index < array.length - 1) obj = obj + '}';
|
||||
return obj.trim();
|
||||
})
|
||||
.filter((obj) => obj);
|
||||
|
||||
const logs: {
|
||||
toolRequest: {
|
||||
name: string;
|
||||
args: string;
|
||||
success: boolean;
|
||||
duration_ms: number;
|
||||
};
|
||||
}[] = [];
|
||||
|
||||
for (const jsonStr of jsonObjects) {
|
||||
try {
|
||||
const logData = JSON.parse(jsonStr);
|
||||
// Look for tool call logs
|
||||
if (
|
||||
logData.attributes &&
|
||||
logData.attributes['event.name'] === 'gemini_cli.tool_call'
|
||||
) {
|
||||
const toolName = logData.attributes.function_name;
|
||||
logs.push({
|
||||
toolRequest: {
|
||||
name: toolName,
|
||||
args: logData.attributes.function_args,
|
||||
success: logData.attributes.success,
|
||||
duration_ms: logData.attributes.duration_ms,
|
||||
},
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip objects that aren't valid JSON
|
||||
if (env.VERBOSE === 'true') {
|
||||
console.error(
|
||||
'Failed to parse telemetry object:',
|
||||
(e as Error).message,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return logs;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user