diff --git a/integration-tests/todo_write.test.ts b/integration-tests/todo_write.test.ts index 5de52811..5c63e3c4 100644 --- a/integration-tests/todo_write.test.ts +++ b/integration-tests/todo_write.test.ts @@ -12,13 +12,12 @@ describe('todo_write', () => { const rig = new TestRig(); await rig.setup('should be able to create and manage a todo list'); - const prompt = `I want to implement a new feature to track user preferences. Here are the tasks: -1. Create a user preferences model -2. Add API endpoints for preferences -3. Implement frontend components -4. Write tests for the new functionality + const prompt = `Please create a todo list with these three simple tasks: +1. Buy milk +2. Walk the dog +3. Read a book -Please create a todo list for these tasks.`; +Use the todo_write tool to create this list.`; const result = await rig.run(prompt); @@ -50,83 +49,21 @@ Please create a todo list for these tasks.`; expect(todoArgs.todos).toBeDefined(); expect(Array.isArray(todoArgs.todos)).toBe(true); - expect(todoArgs.todos.length).toBeGreaterThan(0); + expect(todoArgs.todos.length).toBeGreaterThanOrEqual(3); // Check that all todos have the correct structure for (const todo of todoArgs.todos) { expect(todo.id).toBeDefined(); expect(todo.content).toBeDefined(); - expect(['pending', 'in_progress', 'completed']).toContain(todo.status); + expect(['pending', 'in_progress', 'completed', 'cancelled']).toContain( + todo.status, + ); } // Log success info if verbose if (process.env['VERBOSE'] === 'true') { console.log('Todo list created successfully'); - } - }); - - it('should be able to update todo status', async () => { - const rig = new TestRig(); - await rig.setup('should be able to update todo status'); - - // First create a todo list - const initialPrompt = `Create a todo list with these tasks: -1. Set up project structure -2. Implement authentication -3. Add database migrations`; - - await rig.run(initialPrompt); - await rig.waitForToolCall('todo_write'); - - // Now update the todo list by marking one as in progress - const updatePrompt = `I've started working on implementing authentication. Please update the todo list to reflect that.`; - - const result = await rig.run(updatePrompt); - - const foundToolCall = await rig.waitForToolCall('todo_write'); - - // Add debugging information - if (!foundToolCall) { - printDebugInfo(rig, result); - } - - expect( - foundToolCall, - 'Expected to find a todo_write tool call', - ).toBeTruthy(); - - // Validate model output - will throw if no output - validateModelOutput(result, null, 'Todo update test'); - - // Check that the tool was called with updated parameters - const toolLogs = rig.readToolLogs(); - const todoWriteCalls = toolLogs.filter( - (t) => t.toolRequest.name === 'todo_write', - ); - - expect(todoWriteCalls.length).toBeGreaterThan(0); - - // Parse the arguments to verify the update - const todoArgs = JSON.parse( - todoWriteCalls[todoWriteCalls.length - 1].toolRequest.args, - ); - - expect(todoArgs.todos).toBeDefined(); - expect(Array.isArray(todoArgs.todos)).toBe(true); - // The model might create a new list with just the task it's working on - // or it might update the existing list. Let's check that we have at least one todo - expect(todoArgs.todos.length).toBeGreaterThanOrEqual(1); - - // Check that all todos have the correct structure - for (const todo of todoArgs.todos) { - expect(todo.id).toBeDefined(); - expect(todo.content).toBeDefined(); - expect(['pending', 'in_progress', 'completed']).toContain(todo.status); - } - - // Log success info if verbose - if (process.env['VERBOSE'] === 'true') { - console.log('Todo list updated successfully'); + console.log(`Created ${todoArgs.todos.length} todos`); } }); });