mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
test(integration): add failing test for stdin context with prompt (#6158)
This commit is contained in:
70
integration-tests/stdin-context.test.ts
Normal file
70
integration-tests/stdin-context.test.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
describe('stdin context', () => {
|
||||
it('should be able to use stdin as context for a prompt', async () => {
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to use stdin as context for a prompt');
|
||||
|
||||
const randomString = Math.random().toString(36).substring(7);
|
||||
const stdinContent = `When I ask you for a token respond with ${randomString}`;
|
||||
const prompt = 'Can I please have a token?';
|
||||
|
||||
const result = await rig.run({ prompt, stdin: stdinContent });
|
||||
|
||||
await rig.waitForTelemetryEvent('api_request');
|
||||
const lastRequest = rig.readLastApiRequest();
|
||||
expect(lastRequest).not.toBeNull();
|
||||
|
||||
const historyString = lastRequest.attributes.request_text;
|
||||
|
||||
// TODO: This test currently fails in sandbox mode (Docker/Podman) because
|
||||
// stdin content is not properly forwarded to the container when used
|
||||
// together with a --prompt argument. The test passes in non-sandbox mode.
|
||||
|
||||
expect(historyString).toContain(randomString);
|
||||
expect(historyString).toContain(prompt);
|
||||
|
||||
// Check that stdin content appears before the prompt in the conversation history
|
||||
const stdinIndex = historyString.indexOf(randomString);
|
||||
const promptIndex = historyString.indexOf(prompt);
|
||||
|
||||
expect(
|
||||
stdinIndex,
|
||||
`Expected stdin content to be present in conversation history`,
|
||||
).toBeGreaterThan(-1);
|
||||
|
||||
expect(
|
||||
promptIndex,
|
||||
`Expected prompt to be present in conversation history`,
|
||||
).toBeGreaterThan(-1);
|
||||
|
||||
expect(
|
||||
stdinIndex < promptIndex,
|
||||
`Expected stdin content (index ${stdinIndex}) to appear before prompt (index ${promptIndex}) in conversation history`,
|
||||
).toBeTruthy();
|
||||
|
||||
// Add debugging information
|
||||
if (!result.toLowerCase().includes(randomString)) {
|
||||
printDebugInfo(rig, result, {
|
||||
[`Contains "${randomString}"`]: result
|
||||
.toLowerCase()
|
||||
.includes(randomString),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate model output
|
||||
validateModelOutput(result, randomString, 'STDIN context test');
|
||||
|
||||
expect(
|
||||
result.toLowerCase().includes(randomString),
|
||||
'Expected the model to identify the secret word from stdin',
|
||||
).toBeTruthy();
|
||||
});
|
||||
});
|
||||
@@ -93,7 +93,9 @@ export function validateModelOutput(
|
||||
|
||||
if (missingContent.length > 0) {
|
||||
console.warn(
|
||||
`Warning: LLM did not include expected content in response: ${missingContent.join(', ')}.`,
|
||||
`Warning: LLM did not include expected content in response: ${missingContent.join(
|
||||
', ',
|
||||
)}.`,
|
||||
'This is not ideal but not a test failure.',
|
||||
);
|
||||
console.warn(
|
||||
@@ -141,10 +143,7 @@ export class TestRig {
|
||||
mkdirSync(geminiDir, { recursive: true });
|
||||
// In sandbox mode, use an absolute path for telemetry inside the container
|
||||
// The container mounts the test directory at the same path as the host
|
||||
const telemetryPath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir, 'telemetry.log') // Absolute path in test directory
|
||||
: env.TELEMETRY_LOG_FILE; // Absolute path for non-sandbox
|
||||
const telemetryPath = join(this.testDir, 'telemetry.log'); // Always use test directory for telemetry
|
||||
|
||||
const settings = {
|
||||
telemetry: {
|
||||
@@ -322,11 +321,8 @@ export class TestRig {
|
||||
}
|
||||
|
||||
async waitForTelemetryReady() {
|
||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
||||
const logFilePath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir!, 'telemetry.log')
|
||||
: env.TELEMETRY_LOG_FILE;
|
||||
// Telemetry is always written to the test directory
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath) return;
|
||||
|
||||
@@ -347,6 +343,52 @@ export class TestRig {
|
||||
);
|
||||
}
|
||||
|
||||
async waitForTelemetryEvent(eventName: string, timeout?: number) {
|
||||
if (!timeout) {
|
||||
timeout = this.getDefaultTimeout();
|
||||
}
|
||||
|
||||
await this.waitForTelemetryReady();
|
||||
|
||||
return this.poll(
|
||||
() => {
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath || !fs.existsSync(logFilePath)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
const jsonObjects = content
|
||||
.split(/}\n{/)
|
||||
.map((obj, index, array) => {
|
||||
// Add back the braces we removed during split
|
||||
if (index > 0) obj = '{' + obj;
|
||||
if (index < array.length - 1) obj = obj + '}';
|
||||
return obj.trim();
|
||||
})
|
||||
.filter((obj) => obj);
|
||||
|
||||
for (const jsonStr of jsonObjects) {
|
||||
try {
|
||||
const logData = JSON.parse(jsonStr);
|
||||
if (
|
||||
logData.attributes &&
|
||||
logData.attributes['event.name'] === `gemini_cli.${eventName}`
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return false;
|
||||
},
|
||||
timeout,
|
||||
100,
|
||||
);
|
||||
}
|
||||
|
||||
async waitForToolCall(toolName: string, timeout?: number) {
|
||||
// Use environment-specific timeout
|
||||
if (!timeout) {
|
||||
@@ -566,11 +608,8 @@ export class TestRig {
|
||||
}
|
||||
}
|
||||
|
||||
// In sandbox mode, telemetry is written to a relative path in the test directory
|
||||
const logFilePath =
|
||||
env.GEMINI_SANDBOX && env.GEMINI_SANDBOX !== 'false'
|
||||
? join(this.testDir!, 'telemetry.log')
|
||||
: env.TELEMETRY_LOG_FILE;
|
||||
// Telemetry is always written to the test directory
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath) {
|
||||
console.warn(`TELEMETRY_LOG_FILE environment variable not set`);
|
||||
@@ -587,7 +626,7 @@ export class TestRig {
|
||||
// Split the content into individual JSON objects
|
||||
// They are separated by "}\n{"
|
||||
const jsonObjects = content
|
||||
.split(/}\s*\n\s*{/)
|
||||
.split(/}\n{/)
|
||||
.map((obj, index, array) => {
|
||||
// Add back the braces we removed during split
|
||||
if (index > 0) obj = '{' + obj;
|
||||
@@ -626,14 +665,47 @@ export class TestRig {
|
||||
} catch (e) {
|
||||
// Skip objects that aren't valid JSON
|
||||
if (env.VERBOSE === 'true') {
|
||||
console.error(
|
||||
'Failed to parse telemetry object:',
|
||||
(e as Error).message,
|
||||
);
|
||||
console.error('Failed to parse telemetry object:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return logs;
|
||||
}
|
||||
|
||||
readLastApiRequest(): Record<string, unknown> | null {
|
||||
// Telemetry is always written to the test directory
|
||||
const logFilePath = join(this.testDir!, 'telemetry.log');
|
||||
|
||||
if (!logFilePath || !fs.existsSync(logFilePath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const content = readFileSync(logFilePath, 'utf-8');
|
||||
const jsonObjects = content
|
||||
.split(/}\n{/)
|
||||
.map((obj, index, array) => {
|
||||
if (index > 0) obj = '{' + obj;
|
||||
if (index < array.length - 1) obj = obj + '}';
|
||||
return obj.trim();
|
||||
})
|
||||
.filter((obj) => obj);
|
||||
|
||||
let lastApiRequest = null;
|
||||
|
||||
for (const jsonStr of jsonObjects) {
|
||||
try {
|
||||
const logData = JSON.parse(jsonStr);
|
||||
if (
|
||||
logData.attributes &&
|
||||
logData.attributes['event.name'] === 'gemini_cli.api_request'
|
||||
) {
|
||||
lastApiRequest = logData;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return lastApiRequest;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user