perf(core): implement parallel file processing for 74% performance improvement (#4763)

Co-authored-by: Jacob Richman <jacob314@gmail.com> Co-authored-by: Sandy Tao <sandytao520@icloud.com>
2025-12-19 09:33:53 +00:00 · 2025-08-06 07:47:18 +09:00
parent c402784d97
commit aebe3ace3c
2 changed files with 267 additions and 53 deletions
--- a/packages/core/src/tools/read-many-files.test.ts
+++ b/packages/core/src/tools/read-many-files.test.ts
@@ -477,4 +477,139 @@ describe('ReadManyFilesTool', () => {
      fs.rmSync(tempDir2, { recursive: true, force: true });
    });
  });
+
+  describe('Batch Processing', () => {
+    const createMultipleFiles = (count: number, contentPrefix = 'Content') => {
+      const files: string[] = [];
+      for (let i = 0; i < count; i++) {
+        const fileName = `file${i}.txt`;
+        createFile(fileName, `${contentPrefix} ${i}`);
+        files.push(fileName);
+      }
+      return files;
+    };
+
+    const createFile = (filePath: string, content = '') => {
+      const fullPath = path.join(tempRootDir, filePath);
+      fs.mkdirSync(path.dirname(fullPath), { recursive: true });
+      fs.writeFileSync(fullPath, content);
+    };
+
+    it('should process files in parallel for performance', async () => {
+      // Mock detectFileType to add artificial delay to simulate I/O
+      const detectFileTypeSpy = vi.spyOn(
+        await import('../utils/fileUtils.js'),
+        'detectFileType',
+      );
+
+      // Create files
+      const fileCount = 4;
+      const files = createMultipleFiles(fileCount, 'Batch test');
+
+      // Mock with 100ms delay per file to simulate I/O operations
+      detectFileTypeSpy.mockImplementation(async (_filePath: string) => {
+        await new Promise((resolve) => setTimeout(resolve, 100));
+        return 'text';
+      });
+
+      const startTime = Date.now();
+      const params = { paths: files };
+      const result = await tool.execute(params, new AbortController().signal);
+      const endTime = Date.now();
+
+      const processingTime = endTime - startTime;
+
+      console.log(
+        `Processing time: ${processingTime}ms for ${fileCount} files`,
+      );
+
+      // Verify parallel processing performance improvement
+      // Parallel processing should complete in ~100ms (single file time)
+      // Sequential would take ~400ms (4 files × 100ms each)
+      expect(processingTime).toBeLessThan(200); // Should PASS with parallel implementation
+
+      // Verify all files were processed
+      const content = result.llmContent as string[];
+      expect(content).toHaveLength(fileCount);
+
+      // Cleanup mock
+      detectFileTypeSpy.mockRestore();
+    });
+
+    it('should handle batch processing errors gracefully', async () => {
+      // Create mix of valid and problematic files
+      createFile('valid1.txt', 'Valid content 1');
+      createFile('valid2.txt', 'Valid content 2');
+      createFile('valid3.txt', 'Valid content 3');
+
+      const params = {
+        paths: [
+          'valid1.txt',
+          'valid2.txt',
+          'nonexistent-file.txt', // This will fail
+          'valid3.txt',
+        ],
+      };
+
+      const result = await tool.execute(params, new AbortController().signal);
+      const content = result.llmContent as string[];
+
+      // Should successfully process valid files despite one failure
+      expect(content.length).toBeGreaterThanOrEqual(3);
+      expect(result.returnDisplay).toContain('Successfully read');
+
+      // Verify valid files were processed
+      const expectedPath1 = path.join(tempRootDir, 'valid1.txt');
+      const expectedPath3 = path.join(tempRootDir, 'valid3.txt');
+      expect(content.some((c) => c.includes(expectedPath1))).toBe(true);
+      expect(content.some((c) => c.includes(expectedPath3))).toBe(true);
+    });
+
+    it('should execute file operations concurrently', async () => {
+      // Track execution order to verify concurrency
+      const executionOrder: string[] = [];
+      const detectFileTypeSpy = vi.spyOn(
+        await import('../utils/fileUtils.js'),
+        'detectFileType',
+      );
+
+      const files = ['file1.txt', 'file2.txt', 'file3.txt'];
+      files.forEach((file) => createFile(file, 'test content'));
+
+      // Mock to track concurrent vs sequential execution
+      detectFileTypeSpy.mockImplementation(async (filePath: string) => {
+        const fileName = filePath.split('/').pop() || '';
+        executionOrder.push(`start:${fileName}`);
+
+        // Add delay to make timing differences visible
+        await new Promise((resolve) => setTimeout(resolve, 50));
+
+        executionOrder.push(`end:${fileName}`);
+        return 'text';
+      });
+
+      await tool.execute({ paths: files }, new AbortController().signal);
+
+      console.log('Execution order:', executionOrder);
+
+      // Verify concurrent execution pattern
+      // In parallel execution: all "start:" events should come before all "end:" events
+      // In sequential execution: "start:file1", "end:file1", "start:file2", "end:file2", etc.
+
+      const startEvents = executionOrder.filter((e) =>
+        e.startsWith('start:'),
+      ).length;
+      const firstEndIndex = executionOrder.findIndex((e) =>
+        e.startsWith('end:'),
+      );
+      const startsBeforeFirstEnd = executionOrder
+        .slice(0, firstEndIndex)
+        .filter((e) => e.startsWith('start:')).length;
+
+      // For parallel processing, ALL start events should happen before the first end event
+      expect(startsBeforeFirstEnd).toBe(startEvents); // Should PASS with parallel implementation
+
+      detectFileTypeSpy.mockRestore();
+    });
+  });
 });