mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-22 01:37:50 +00:00
Refactor read-file and support images. (#480)
This commit is contained in:
433
packages/server/src/utils/fileUtils.test.ts
Normal file
433
packages/server/src/utils/fileUtils.test.ts
Normal file
@@ -0,0 +1,433 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import {
|
||||
describe,
|
||||
it,
|
||||
expect,
|
||||
vi,
|
||||
beforeEach,
|
||||
afterEach,
|
||||
type Mock,
|
||||
} from 'vitest';
|
||||
|
||||
import * as actualNodeFs from 'node:fs'; // For setup/teardown
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import os from 'node:os';
|
||||
import mime from 'mime-types';
|
||||
|
||||
import {
|
||||
isWithinRoot,
|
||||
isBinaryFile,
|
||||
detectFileType,
|
||||
processSingleFileContent,
|
||||
} from './fileUtils.js';
|
||||
|
||||
vi.mock('mime-types', () => ({
|
||||
default: { lookup: vi.fn() },
|
||||
lookup: vi.fn(),
|
||||
}));
|
||||
|
||||
const mockMimeLookup = mime.lookup as Mock;
|
||||
|
||||
describe('fileUtils', () => {
|
||||
let tempRootDir: string;
|
||||
const originalProcessCwd = process.cwd;
|
||||
|
||||
let testTextFilePath: string;
|
||||
let testImageFilePath: string;
|
||||
let testPdfFilePath: string;
|
||||
let testBinaryFilePath: string;
|
||||
let nonExistentFilePath: string;
|
||||
let directoryPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks(); // Reset all mocks, including mime.lookup
|
||||
|
||||
tempRootDir = actualNodeFs.mkdtempSync(
|
||||
path.join(os.tmpdir(), 'fileUtils-test-'),
|
||||
);
|
||||
process.cwd = vi.fn(() => tempRootDir); // Mock cwd if necessary for relative path logic within tests
|
||||
|
||||
testTextFilePath = path.join(tempRootDir, 'test.txt');
|
||||
testImageFilePath = path.join(tempRootDir, 'image.png');
|
||||
testPdfFilePath = path.join(tempRootDir, 'document.pdf');
|
||||
testBinaryFilePath = path.join(tempRootDir, 'app.exe');
|
||||
nonExistentFilePath = path.join(tempRootDir, 'notfound.txt');
|
||||
directoryPath = path.join(tempRootDir, 'subdir');
|
||||
|
||||
actualNodeFs.mkdirSync(directoryPath, { recursive: true }); // Ensure subdir exists
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (actualNodeFs.existsSync(tempRootDir)) {
|
||||
actualNodeFs.rmSync(tempRootDir, { recursive: true, force: true });
|
||||
}
|
||||
process.cwd = originalProcessCwd;
|
||||
vi.restoreAllMocks(); // Restore any spies
|
||||
});
|
||||
|
||||
describe('isWithinRoot', () => {
|
||||
const root = path.resolve('/project/root');
|
||||
|
||||
it('should return true for paths directly within the root', () => {
|
||||
expect(isWithinRoot(path.join(root, 'file.txt'), root)).toBe(true);
|
||||
expect(isWithinRoot(path.join(root, 'subdir', 'file.txt'), root)).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return true for the root path itself', () => {
|
||||
expect(isWithinRoot(root, root)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false for paths outside the root', () => {
|
||||
expect(
|
||||
isWithinRoot(path.resolve('/project/other', 'file.txt'), root),
|
||||
).toBe(false);
|
||||
expect(isWithinRoot(path.resolve('/unrelated', 'file.txt'), root)).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return false for paths that only partially match the root prefix', () => {
|
||||
expect(
|
||||
isWithinRoot(
|
||||
path.resolve('/project/root-but-actually-different'),
|
||||
root,
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle paths with trailing slashes correctly', () => {
|
||||
expect(isWithinRoot(path.join(root, 'file.txt') + path.sep, root)).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isWithinRoot(root + path.sep, root)).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle different path separators (POSIX vs Windows)', () => {
|
||||
const posixRoot = '/project/root';
|
||||
const posixPathInside = '/project/root/file.txt';
|
||||
const posixPathOutside = '/project/other/file.txt';
|
||||
expect(isWithinRoot(posixPathInside, posixRoot)).toBe(true);
|
||||
expect(isWithinRoot(posixPathOutside, posixRoot)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for a root path that is a sub-path of the path to check', () => {
|
||||
const pathToCheck = path.resolve('/project/root/sub');
|
||||
const rootSub = path.resolve('/project/root');
|
||||
expect(isWithinRoot(pathToCheck, rootSub)).toBe(true);
|
||||
|
||||
const pathToCheckSuper = path.resolve('/project/root');
|
||||
const rootSuper = path.resolve('/project/root/sub');
|
||||
expect(isWithinRoot(pathToCheckSuper, rootSuper)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('isBinaryFile', () => {
|
||||
let filePathForBinaryTest: string;
|
||||
|
||||
beforeEach(() => {
|
||||
filePathForBinaryTest = path.join(tempRootDir, 'binaryCheck.tmp');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
|
||||
actualNodeFs.unlinkSync(filePathForBinaryTest);
|
||||
}
|
||||
});
|
||||
|
||||
it('should return false for an empty file', () => {
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, '');
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for a typical text file', () => {
|
||||
actualNodeFs.writeFileSync(
|
||||
filePathForBinaryTest,
|
||||
'Hello, world!\nThis is a test file with normal text content.',
|
||||
);
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true for a file with many null bytes', () => {
|
||||
const binaryContent = Buffer.from([
|
||||
0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
]); // "He\0llo\0\0\0\0\0"
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return true for a file with high percentage of non-printable ASCII', () => {
|
||||
const binaryContent = Buffer.from([
|
||||
0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06,
|
||||
]); // AB\x01\x02\x03\x04\x05CD\x06
|
||||
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false if file access fails (e.g., ENOENT)', () => {
|
||||
// Ensure the file does not exist
|
||||
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
|
||||
actualNodeFs.unlinkSync(filePathForBinaryTest);
|
||||
}
|
||||
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectFileType', () => {
|
||||
let filePathForDetectTest: string;
|
||||
|
||||
beforeEach(() => {
|
||||
filePathForDetectTest = path.join(tempRootDir, 'detectType.tmp');
|
||||
// Default: create as a text file for isBinaryFile fallback
|
||||
actualNodeFs.writeFileSync(filePathForDetectTest, 'Plain text content');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (actualNodeFs.existsSync(filePathForDetectTest)) {
|
||||
actualNodeFs.unlinkSync(filePathForDetectTest);
|
||||
}
|
||||
vi.restoreAllMocks(); // Restore spies on actualNodeFs
|
||||
});
|
||||
|
||||
it('should detect image type by extension (png)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('image/png');
|
||||
expect(detectFileType('file.png')).toBe('image');
|
||||
});
|
||||
|
||||
it('should detect image type by extension (jpeg)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('image/jpeg');
|
||||
expect(detectFileType('file.jpg')).toBe('image');
|
||||
});
|
||||
|
||||
it('should detect pdf type by extension', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('application/pdf');
|
||||
expect(detectFileType('file.pdf')).toBe('pdf');
|
||||
});
|
||||
|
||||
it('should detect known binary extensions as binary (e.g. .zip)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('application/zip');
|
||||
expect(detectFileType('archive.zip')).toBe('binary');
|
||||
});
|
||||
it('should detect known binary extensions as binary (e.g. .exe)', () => {
|
||||
mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe
|
||||
expect(detectFileType('app.exe')).toBe('binary');
|
||||
});
|
||||
|
||||
it('should use isBinaryFile for unknown extensions and detect as binary', () => {
|
||||
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
||||
// Create a file that isBinaryFile will identify as binary
|
||||
const binaryContent = Buffer.from([
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
]);
|
||||
actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent);
|
||||
expect(detectFileType(filePathForDetectTest)).toBe('binary');
|
||||
});
|
||||
|
||||
it('should default to text if mime type is unknown and content is not binary', () => {
|
||||
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
|
||||
// filePathForDetectTest is already a text file by default from beforeEach
|
||||
expect(detectFileType(filePathForDetectTest)).toBe('text');
|
||||
});
|
||||
});
|
||||
|
||||
describe('processSingleFileContent', () => {
|
||||
beforeEach(() => {
|
||||
// Ensure files exist for statSync checks before readFile might be mocked
|
||||
if (actualNodeFs.existsSync(testTextFilePath))
|
||||
actualNodeFs.unlinkSync(testTextFilePath);
|
||||
if (actualNodeFs.existsSync(testImageFilePath))
|
||||
actualNodeFs.unlinkSync(testImageFilePath);
|
||||
if (actualNodeFs.existsSync(testPdfFilePath))
|
||||
actualNodeFs.unlinkSync(testPdfFilePath);
|
||||
if (actualNodeFs.existsSync(testBinaryFilePath))
|
||||
actualNodeFs.unlinkSync(testBinaryFilePath);
|
||||
});
|
||||
|
||||
it('should read a text file successfully', async () => {
|
||||
const content = 'Line 1\\nLine 2\\nLine 3';
|
||||
actualNodeFs.writeFileSync(testTextFilePath, content);
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.llmContent).toBe(content);
|
||||
expect(result.returnDisplay).toContain('Read text file: test.txt');
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle file not found', async () => {
|
||||
const result = await processSingleFileContent(
|
||||
nonExistentFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.error).toContain('File not found');
|
||||
expect(result.returnDisplay).toContain('File not found');
|
||||
});
|
||||
|
||||
it('should handle read errors for text files', async () => {
|
||||
actualNodeFs.writeFileSync(testTextFilePath, 'content'); // File must exist for initial statSync
|
||||
const readError = new Error('Simulated read error');
|
||||
vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError);
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.error).toContain('Simulated read error');
|
||||
expect(result.returnDisplay).toContain('Simulated read error');
|
||||
});
|
||||
|
||||
it('should handle read errors for image/pdf files', async () => {
|
||||
actualNodeFs.writeFileSync(testImageFilePath, 'content'); // File must exist
|
||||
mockMimeLookup.mockReturnValue('image/png');
|
||||
const readError = new Error('Simulated image read error');
|
||||
vi.spyOn(fsPromises, 'readFile').mockRejectedValueOnce(readError);
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testImageFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.error).toContain('Simulated image read error');
|
||||
expect(result.returnDisplay).toContain('Simulated image read error');
|
||||
});
|
||||
|
||||
it('should process an image file', async () => {
|
||||
const fakePngData = Buffer.from('fake png data');
|
||||
actualNodeFs.writeFileSync(testImageFilePath, fakePngData);
|
||||
mockMimeLookup.mockReturnValue('image/png');
|
||||
const result = await processSingleFileContent(
|
||||
testImageFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
).toBeDefined();
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
|
||||
.mimeType,
|
||||
).toBe('image/png');
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
|
||||
).toBe(fakePngData.toString('base64'));
|
||||
expect(result.returnDisplay).toContain('Read image file: image.png');
|
||||
});
|
||||
|
||||
it('should process a PDF file', async () => {
|
||||
const fakePdfData = Buffer.from('fake pdf data');
|
||||
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
|
||||
mockMimeLookup.mockReturnValue('application/pdf');
|
||||
const result = await processSingleFileContent(
|
||||
testPdfFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(
|
||||
(result.llmContent as { inlineData: unknown }).inlineData,
|
||||
).toBeDefined();
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { mimeType: string } }).inlineData
|
||||
.mimeType,
|
||||
).toBe('application/pdf');
|
||||
expect(
|
||||
(result.llmContent as { inlineData: { data: string } }).inlineData.data,
|
||||
).toBe(fakePdfData.toString('base64'));
|
||||
expect(result.returnDisplay).toContain('Read pdf file: document.pdf');
|
||||
});
|
||||
|
||||
it('should skip binary files', async () => {
|
||||
actualNodeFs.writeFileSync(
|
||||
testBinaryFilePath,
|
||||
Buffer.from([0x00, 0x01, 0x02]),
|
||||
);
|
||||
mockMimeLookup.mockReturnValueOnce('application/octet-stream');
|
||||
// isBinaryFile will operate on the real file.
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testBinaryFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
expect(result.llmContent).toContain(
|
||||
'Cannot display content of binary file',
|
||||
);
|
||||
expect(result.returnDisplay).toContain('Skipped binary file: app.exe');
|
||||
});
|
||||
|
||||
it('should handle path being a directory', async () => {
|
||||
const result = await processSingleFileContent(directoryPath, tempRootDir);
|
||||
expect(result.error).toContain('Path is a directory');
|
||||
expect(result.returnDisplay).toContain('Path is a directory');
|
||||
});
|
||||
|
||||
it('should paginate text files correctly (offset and limit)', async () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
5,
|
||||
5,
|
||||
); // Read lines 6-10
|
||||
const expectedContent = lines.slice(5, 10).join('\n');
|
||||
|
||||
expect(result.llmContent).toContain(expectedContent);
|
||||
expect(result.llmContent).toContain(
|
||||
'[File content truncated: showing lines 6-10 of 20 total lines. Use offset/limit parameters to view more.]',
|
||||
);
|
||||
expect(result.returnDisplay).toContain(
|
||||
'Read text file: test.txt (truncated)',
|
||||
);
|
||||
expect(result.isTruncated).toBe(true);
|
||||
expect(result.originalLineCount).toBe(20);
|
||||
expect(result.linesShown).toEqual([6, 10]);
|
||||
});
|
||||
|
||||
it('should handle limit exceeding file length', async () => {
|
||||
const lines = ['Line 1', 'Line 2'];
|
||||
actualNodeFs.writeFileSync(testTextFilePath, lines.join('\n'));
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
0,
|
||||
10,
|
||||
);
|
||||
const expectedContent = lines.join('\n');
|
||||
|
||||
expect(result.llmContent).toBe(expectedContent);
|
||||
expect(result.returnDisplay).toContain('Read text file: test.txt');
|
||||
expect(result.isTruncated).toBe(false);
|
||||
expect(result.originalLineCount).toBe(2);
|
||||
expect(result.linesShown).toEqual([1, 2]);
|
||||
});
|
||||
|
||||
it('should truncate long lines in text files', async () => {
|
||||
const longLine = 'a'.repeat(2500);
|
||||
actualNodeFs.writeFileSync(
|
||||
testTextFilePath,
|
||||
`Short line\n${longLine}\nAnother short line`,
|
||||
);
|
||||
|
||||
const result = await processSingleFileContent(
|
||||
testTextFilePath,
|
||||
tempRootDir,
|
||||
);
|
||||
|
||||
expect(result.llmContent).toContain('Short line');
|
||||
expect(result.llmContent).toContain(
|
||||
longLine.substring(0, 2000) + '... [truncated]',
|
||||
);
|
||||
expect(result.llmContent).toContain('Another short line');
|
||||
expect(result.llmContent).toContain(
|
||||
'[File content partially truncated: some lines exceeded maximum length of 2000 characters.]',
|
||||
);
|
||||
expect(result.isTruncated).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
280
packages/server/src/utils/fileUtils.ts
Normal file
280
packages/server/src/utils/fileUtils.ts
Normal file
@@ -0,0 +1,280 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { PartUnion } from '@google/genai';
|
||||
import mime from 'mime-types';
|
||||
|
||||
// Constants for text file processing
|
||||
const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
|
||||
const MAX_LINE_LENGTH_TEXT_FILE = 2000;
|
||||
|
||||
// Default values for encoding and separator format
|
||||
export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
|
||||
|
||||
/**
|
||||
* Checks if a path is within a given root directory.
|
||||
* @param pathToCheck The absolute path to check.
|
||||
* @param rootDirectory The absolute root directory.
|
||||
* @returns True if the path is within the root directory, false otherwise.
|
||||
*/
|
||||
export function isWithinRoot(
|
||||
pathToCheck: string,
|
||||
rootDirectory: string,
|
||||
): boolean {
|
||||
const normalizedPathToCheck = path.normalize(pathToCheck);
|
||||
const normalizedRootDirectory = path.normalize(rootDirectory);
|
||||
|
||||
// Ensure the rootDirectory path ends with a separator for correct startsWith comparison,
|
||||
// unless it's the root path itself (e.g., '/' or 'C:\').
|
||||
const rootWithSeparator =
|
||||
normalizedRootDirectory === path.sep ||
|
||||
normalizedRootDirectory.endsWith(path.sep)
|
||||
? normalizedRootDirectory
|
||||
: normalizedRootDirectory + path.sep;
|
||||
|
||||
return (
|
||||
normalizedPathToCheck === normalizedRootDirectory ||
|
||||
normalizedPathToCheck.startsWith(rootWithSeparator)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a file is likely binary based on content sampling.
|
||||
* @param filePath Path to the file.
|
||||
* @returns True if the file appears to be binary.
|
||||
*/
|
||||
export function isBinaryFile(filePath: string): boolean {
|
||||
try {
|
||||
const fd = fs.openSync(filePath, 'r');
|
||||
// Read up to 4KB or file size, whichever is smaller
|
||||
const fileSize = fs.fstatSync(fd).size;
|
||||
if (fileSize === 0) {
|
||||
// Empty file is not considered binary for content checking
|
||||
fs.closeSync(fd);
|
||||
return false;
|
||||
}
|
||||
const bufferSize = Math.min(4096, fileSize);
|
||||
const buffer = Buffer.alloc(bufferSize);
|
||||
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
|
||||
fs.closeSync(fd);
|
||||
|
||||
if (bytesRead === 0) return false;
|
||||
|
||||
let nonPrintableCount = 0;
|
||||
for (let i = 0; i < bytesRead; i++) {
|
||||
if (buffer[i] === 0) return true; // Null byte is a strong indicator
|
||||
if (buffer[i] < 9 || (buffer[i] > 13 && buffer[i] < 32)) {
|
||||
nonPrintableCount++;
|
||||
}
|
||||
}
|
||||
// If >30% non-printable characters, consider it binary
|
||||
return nonPrintableCount / bytesRead > 0.3;
|
||||
} catch {
|
||||
// If any error occurs (e.g. file not found, permissions),
|
||||
// treat as not binary here; let higher-level functions handle existence/access errors.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the type of file based on extension and content.
|
||||
* @param filePath Path to the file.
|
||||
* @returns 'text', 'image', 'pdf', or 'binary'.
|
||||
*/
|
||||
export function detectFileType(
|
||||
filePath: string,
|
||||
): 'text' | 'image' | 'pdf' | 'binary' {
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
const lookedUpMimeType = mime.lookup(filePath); // Returns false if not found, or the mime type string
|
||||
|
||||
if (lookedUpMimeType && lookedUpMimeType.startsWith('image/')) {
|
||||
return 'image';
|
||||
}
|
||||
if (lookedUpMimeType && lookedUpMimeType === 'application/pdf') {
|
||||
return 'pdf';
|
||||
}
|
||||
|
||||
// Stricter binary check for common non-text extensions before content check
|
||||
// These are often not well-covered by mime-types or might be misidentified.
|
||||
if (
|
||||
[
|
||||
'.zip',
|
||||
'.tar',
|
||||
'.gz',
|
||||
'.exe',
|
||||
'.dll',
|
||||
'.so',
|
||||
'.class',
|
||||
'.jar',
|
||||
'.war',
|
||||
'.7z',
|
||||
'.doc',
|
||||
'.docx',
|
||||
'.xls',
|
||||
'.xlsx',
|
||||
'.ppt',
|
||||
'.pptx',
|
||||
'.odt',
|
||||
'.ods',
|
||||
'.odp',
|
||||
'.bin',
|
||||
'.dat',
|
||||
'.obj',
|
||||
'.o',
|
||||
'.a',
|
||||
'.lib',
|
||||
'.wasm',
|
||||
'.pyc',
|
||||
'.pyo',
|
||||
].includes(ext)
|
||||
) {
|
||||
return 'binary';
|
||||
}
|
||||
|
||||
// Fallback to content-based check if mime type wasn't conclusive for image/pdf
|
||||
// and it's not a known binary extension.
|
||||
if (isBinaryFile(filePath)) {
|
||||
return 'binary';
|
||||
}
|
||||
|
||||
return 'text';
|
||||
}
|
||||
|
||||
export interface ProcessedFileReadResult {
|
||||
llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
|
||||
returnDisplay: string;
|
||||
error?: string; // Optional error message for the LLM if file processing failed
|
||||
isTruncated?: boolean; // For text files, indicates if content was truncated
|
||||
originalLineCount?: number; // For text files
|
||||
linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and processes a single file, handling text, images, and PDFs.
|
||||
* @param filePath Absolute path to the file.
|
||||
* @param rootDirectory Absolute path to the project root for relative path display.
|
||||
* @param offset Optional offset for text files (0-based line number).
|
||||
* @param limit Optional limit for text files (number of lines to read).
|
||||
* @returns ProcessedFileReadResult object.
|
||||
*/
|
||||
export async function processSingleFileContent(
|
||||
filePath: string,
|
||||
rootDirectory: string,
|
||||
offset?: number,
|
||||
limit?: number,
|
||||
): Promise<ProcessedFileReadResult> {
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
// Sync check is acceptable before async read
|
||||
return {
|
||||
llmContent: '',
|
||||
returnDisplay: 'File not found.',
|
||||
error: `File not found: ${filePath}`,
|
||||
};
|
||||
}
|
||||
const stats = fs.statSync(filePath); // Sync check
|
||||
if (stats.isDirectory()) {
|
||||
return {
|
||||
llmContent: '',
|
||||
returnDisplay: 'Path is a directory.',
|
||||
error: `Path is a directory, not a file: ${filePath}`,
|
||||
};
|
||||
}
|
||||
|
||||
const fileType = detectFileType(filePath);
|
||||
const relativePathForDisplay = path
|
||||
.relative(rootDirectory, filePath)
|
||||
.replace(/\\/g, '/');
|
||||
|
||||
switch (fileType) {
|
||||
case 'binary': {
|
||||
return {
|
||||
llmContent: `Cannot display content of binary file: ${relativePathForDisplay}`,
|
||||
returnDisplay: `Skipped binary file: ${relativePathForDisplay}`,
|
||||
};
|
||||
}
|
||||
case 'text': {
|
||||
const content = await fs.promises.readFile(filePath, 'utf8');
|
||||
const lines = content.split('\n');
|
||||
const originalLineCount = lines.length;
|
||||
|
||||
const startLine = offset || 0;
|
||||
const effectiveLimit =
|
||||
limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
|
||||
// Ensure endLine does not exceed originalLineCount
|
||||
const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
|
||||
// Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
|
||||
const actualStartLine = Math.min(startLine, originalLineCount);
|
||||
const selectedLines = lines.slice(actualStartLine, endLine);
|
||||
|
||||
let linesWereTruncatedInLength = false;
|
||||
const formattedLines = selectedLines.map((line) => {
|
||||
if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
|
||||
linesWereTruncatedInLength = true;
|
||||
return (
|
||||
line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
|
||||
);
|
||||
}
|
||||
return line;
|
||||
});
|
||||
|
||||
const contentRangeTruncated = endLine < originalLineCount;
|
||||
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
|
||||
|
||||
let llmTextContent = '';
|
||||
if (contentRangeTruncated) {
|
||||
llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`;
|
||||
} else if (linesWereTruncatedInLength) {
|
||||
llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`;
|
||||
}
|
||||
llmTextContent += formattedLines.join('\n');
|
||||
|
||||
return {
|
||||
llmContent: llmTextContent,
|
||||
returnDisplay: `Read text file: ${relativePathForDisplay}${isTruncated ? ' (truncated)' : ''}`,
|
||||
isTruncated,
|
||||
originalLineCount,
|
||||
linesShown: [actualStartLine + 1, endLine],
|
||||
};
|
||||
}
|
||||
case 'image':
|
||||
case 'pdf': {
|
||||
const contentBuffer = await fs.promises.readFile(filePath);
|
||||
const base64Data = contentBuffer.toString('base64');
|
||||
return {
|
||||
llmContent: {
|
||||
inlineData: {
|
||||
data: base64Data,
|
||||
mimeType: mime.lookup(filePath) || 'application/octet-stream',
|
||||
},
|
||||
},
|
||||
returnDisplay: `Read ${fileType} file: ${relativePathForDisplay}`,
|
||||
};
|
||||
}
|
||||
default: {
|
||||
// Should not happen with current detectFileType logic
|
||||
const exhaustiveCheck: never = fileType;
|
||||
return {
|
||||
llmContent: `Unhandled file type: ${exhaustiveCheck}`,
|
||||
returnDisplay: `Skipped unhandled file type: ${relativePathForDisplay}`,
|
||||
error: `Unhandled file type for ${filePath}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
const displayPath = path
|
||||
.relative(rootDirectory, filePath)
|
||||
.replace(/\\/g, '/');
|
||||
return {
|
||||
llmContent: `Error reading file ${displayPath}: ${errorMessage}`,
|
||||
returnDisplay: `Error reading file ${displayPath}: ${errorMessage}`,
|
||||
error: `Error reading file ${filePath}: ${errorMessage}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user