mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
Vision model support for Qwen-OAuth (#525)
* refactor: openaiContentGenerator * refactor: optimize stream handling * refactor: re-organize refactored files * fix: unit test cases * feat: `/model` command for switching to vision model * fix: lint error * feat: add image tokenizer to fit vlm context window * fix: lint and type errors * feat: add `visionModelPreview` to control default visibility of vision models * fix: remove deprecated files * fix: align supported image formats with bailian doc
This commit is contained in:
157
packages/core/src/utils/request-tokenizer/imageTokenizer.test.ts
Normal file
157
packages/core/src/utils/request-tokenizer/imageTokenizer.test.ts
Normal file
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { ImageTokenizer } from './imageTokenizer.js';
|
||||
|
||||
describe('ImageTokenizer', () => {
|
||||
const tokenizer = new ImageTokenizer();
|
||||
|
||||
describe('token calculation', () => {
|
||||
it('should calculate tokens based on image dimensions with reference logic', () => {
|
||||
const metadata = {
|
||||
width: 28,
|
||||
height: 28,
|
||||
mimeType: 'image/png',
|
||||
dataSize: 1000,
|
||||
};
|
||||
|
||||
const tokens = tokenizer.calculateTokens(metadata);
|
||||
|
||||
// 28x28 = 784 pixels = 1 image token + 2 special tokens = 3 total
|
||||
// But minimum scaling may apply for small images
|
||||
expect(tokens).toBeGreaterThanOrEqual(6); // Minimum after scaling + special tokens
|
||||
});
|
||||
|
||||
it('should calculate tokens for larger images', () => {
|
||||
const metadata = {
|
||||
width: 512,
|
||||
height: 512,
|
||||
mimeType: 'image/png',
|
||||
dataSize: 10000,
|
||||
};
|
||||
|
||||
const tokens = tokenizer.calculateTokens(metadata);
|
||||
|
||||
// 512x512 with reference logic: rounded dimensions + scaling + special tokens
|
||||
expect(tokens).toBeGreaterThan(300);
|
||||
expect(tokens).toBeLessThan(400); // Should be reasonable for 512x512
|
||||
});
|
||||
|
||||
it('should enforce minimum tokens per image with scaling', () => {
|
||||
const metadata = {
|
||||
width: 1,
|
||||
height: 1,
|
||||
mimeType: 'image/png',
|
||||
dataSize: 100,
|
||||
};
|
||||
|
||||
const tokens = tokenizer.calculateTokens(metadata);
|
||||
|
||||
// Tiny images get scaled up to minimum pixels + special tokens
|
||||
expect(tokens).toBeGreaterThanOrEqual(6); // 4 image tokens + 2 special tokens
|
||||
});
|
||||
|
||||
it('should handle very large images with scaling', () => {
|
||||
const metadata = {
|
||||
width: 8192,
|
||||
height: 8192,
|
||||
mimeType: 'image/png',
|
||||
dataSize: 100000,
|
||||
};
|
||||
|
||||
const tokens = tokenizer.calculateTokens(metadata);
|
||||
|
||||
// Very large images should be scaled down to max limit + special tokens
|
||||
expect(tokens).toBeLessThanOrEqual(16386); // 16384 max + 2 special tokens
|
||||
expect(tokens).toBeGreaterThan(16000); // Should be close to the limit
|
||||
});
|
||||
});
|
||||
|
||||
describe('PNG dimension extraction', () => {
|
||||
it('should extract dimensions from valid PNG', async () => {
|
||||
// 1x1 PNG image in base64
|
||||
const pngBase64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
|
||||
const metadata = await tokenizer.extractImageMetadata(
|
||||
pngBase64,
|
||||
'image/png',
|
||||
);
|
||||
|
||||
expect(metadata.width).toBe(1);
|
||||
expect(metadata.height).toBe(1);
|
||||
expect(metadata.mimeType).toBe('image/png');
|
||||
});
|
||||
|
||||
it('should handle invalid PNG gracefully', async () => {
|
||||
const invalidBase64 = 'invalid-png-data';
|
||||
|
||||
const metadata = await tokenizer.extractImageMetadata(
|
||||
invalidBase64,
|
||||
'image/png',
|
||||
);
|
||||
|
||||
// Should return default dimensions
|
||||
expect(metadata.width).toBe(512);
|
||||
expect(metadata.height).toBe(512);
|
||||
expect(metadata.mimeType).toBe('image/png');
|
||||
});
|
||||
});
|
||||
|
||||
describe('batch processing', () => {
|
||||
it('should process multiple images serially', async () => {
|
||||
const pngBase64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
|
||||
const images = [
|
||||
{ data: pngBase64, mimeType: 'image/png' },
|
||||
{ data: pngBase64, mimeType: 'image/png' },
|
||||
{ data: pngBase64, mimeType: 'image/png' },
|
||||
];
|
||||
|
||||
const tokens = await tokenizer.calculateTokensBatch(images);
|
||||
|
||||
expect(tokens).toHaveLength(3);
|
||||
expect(tokens.every((t) => t >= 4)).toBe(true); // All should have at least 4 tokens
|
||||
});
|
||||
|
||||
it('should handle mixed valid and invalid images', async () => {
|
||||
const validPng =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
const invalidPng = 'invalid-data';
|
||||
|
||||
const images = [
|
||||
{ data: validPng, mimeType: 'image/png' },
|
||||
{ data: invalidPng, mimeType: 'image/png' },
|
||||
];
|
||||
|
||||
const tokens = await tokenizer.calculateTokensBatch(images);
|
||||
|
||||
expect(tokens).toHaveLength(2);
|
||||
expect(tokens.every((t) => t >= 4)).toBe(true); // All should have at least minimum tokens
|
||||
});
|
||||
});
|
||||
|
||||
describe('different image formats', () => {
|
||||
it('should handle different MIME types', async () => {
|
||||
const pngBase64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
|
||||
const formats = ['image/png', 'image/jpeg', 'image/webp', 'image/gif'];
|
||||
|
||||
for (const mimeType of formats) {
|
||||
const metadata = await tokenizer.extractImageMetadata(
|
||||
pngBase64,
|
||||
mimeType,
|
||||
);
|
||||
expect(metadata.mimeType).toBe(mimeType);
|
||||
expect(metadata.width).toBeGreaterThan(0);
|
||||
expect(metadata.height).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
505
packages/core/src/utils/request-tokenizer/imageTokenizer.ts
Normal file
505
packages/core/src/utils/request-tokenizer/imageTokenizer.ts
Normal file
@@ -0,0 +1,505 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { ImageMetadata } from './types.js';
|
||||
import { isSupportedImageMimeType } from './supportedImageFormats.js';
|
||||
|
||||
/**
|
||||
* Image tokenizer for calculating image tokens based on dimensions
|
||||
*
|
||||
* Key rules:
|
||||
* - 28x28 pixels = 1 token
|
||||
* - Minimum: 4 tokens per image
|
||||
* - Maximum: 16384 tokens per image
|
||||
* - Additional: 2 special tokens (vision_bos + vision_eos)
|
||||
* - Supports: PNG, JPEG, WebP, GIF, BMP, TIFF, HEIC formats
|
||||
*/
|
||||
export class ImageTokenizer {
|
||||
/** 28x28 pixels = 1 token */
|
||||
private static readonly PIXELS_PER_TOKEN = 28 * 28;
|
||||
|
||||
/** Minimum tokens per image */
|
||||
private static readonly MIN_TOKENS_PER_IMAGE = 4;
|
||||
|
||||
/** Maximum tokens per image */
|
||||
private static readonly MAX_TOKENS_PER_IMAGE = 16384;
|
||||
|
||||
/** Special tokens for vision markers */
|
||||
private static readonly VISION_SPECIAL_TOKENS = 2;
|
||||
|
||||
/**
|
||||
* Extract image metadata from base64 data
|
||||
*
|
||||
* @param base64Data Base64-encoded image data (with or without data URL prefix)
|
||||
* @param mimeType MIME type of the image
|
||||
* @returns Promise resolving to ImageMetadata with dimensions and format info
|
||||
*/
|
||||
async extractImageMetadata(
|
||||
base64Data: string,
|
||||
mimeType: string,
|
||||
): Promise<ImageMetadata> {
|
||||
try {
|
||||
// Check if the MIME type is supported
|
||||
if (!isSupportedImageMimeType(mimeType)) {
|
||||
console.warn(`Unsupported image format: ${mimeType}`);
|
||||
// Return default metadata for unsupported formats
|
||||
return {
|
||||
width: 512,
|
||||
height: 512,
|
||||
mimeType,
|
||||
dataSize: Math.floor(base64Data.length * 0.75),
|
||||
};
|
||||
}
|
||||
|
||||
const cleanBase64 = base64Data.replace(/^data:[^;]+;base64,/, '');
|
||||
const buffer = Buffer.from(cleanBase64, 'base64');
|
||||
const dimensions = await this.extractDimensions(buffer, mimeType);
|
||||
|
||||
return {
|
||||
width: dimensions.width,
|
||||
height: dimensions.height,
|
||||
mimeType,
|
||||
dataSize: buffer.length,
|
||||
};
|
||||
} catch (error) {
|
||||
console.warn('Failed to extract image metadata:', error);
|
||||
// Return default metadata for fallback
|
||||
return {
|
||||
width: 512,
|
||||
height: 512,
|
||||
mimeType,
|
||||
dataSize: Math.floor(base64Data.length * 0.75),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract image dimensions from buffer based on format
|
||||
*
|
||||
* @param buffer Binary image data buffer
|
||||
* @param mimeType MIME type to determine parsing strategy
|
||||
* @returns Promise resolving to width and height dimensions
|
||||
*/
|
||||
private async extractDimensions(
|
||||
buffer: Buffer,
|
||||
mimeType: string,
|
||||
): Promise<{ width: number; height: number }> {
|
||||
if (mimeType.includes('png')) {
|
||||
return this.extractPngDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('jpeg') || mimeType.includes('jpg')) {
|
||||
return this.extractJpegDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('webp')) {
|
||||
return this.extractWebpDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('gif')) {
|
||||
return this.extractGifDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('bmp')) {
|
||||
return this.extractBmpDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('tiff')) {
|
||||
return this.extractTiffDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('heic')) {
|
||||
return this.extractHeicDimensions(buffer);
|
||||
}
|
||||
|
||||
return { width: 512, height: 512 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract PNG dimensions from IHDR chunk
|
||||
* PNG signature: 89 50 4E 47 0D 0A 1A 0A
|
||||
* Width/height at bytes 16-19 and 20-23 (big-endian)
|
||||
*/
|
||||
private extractPngDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 24) {
|
||||
throw new Error('Invalid PNG: buffer too short');
|
||||
}
|
||||
|
||||
// Verify PNG signature
|
||||
const signature = buffer.subarray(0, 8);
|
||||
const expectedSignature = Buffer.from([
|
||||
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
|
||||
]);
|
||||
if (!signature.equals(expectedSignature)) {
|
||||
throw new Error('Invalid PNG signature');
|
||||
}
|
||||
|
||||
const width = buffer.readUInt32BE(16);
|
||||
const height = buffer.readUInt32BE(20);
|
||||
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract JPEG dimensions from SOF (Start of Frame) markers
|
||||
* JPEG starts with FF D8, SOF markers: 0xC0-0xC3, 0xC5-0xC7, 0xC9-0xCB, 0xCD-0xCF
|
||||
* Dimensions at offset +5 (height) and +7 (width) from SOF marker
|
||||
*/
|
||||
private extractJpegDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 4 || buffer[0] !== 0xff || buffer[1] !== 0xd8) {
|
||||
throw new Error('Invalid JPEG signature');
|
||||
}
|
||||
|
||||
let offset = 2;
|
||||
|
||||
while (offset < buffer.length - 8) {
|
||||
if (buffer[offset] !== 0xff) {
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const marker = buffer[offset + 1];
|
||||
|
||||
// SOF markers
|
||||
if (
|
||||
(marker >= 0xc0 && marker <= 0xc3) ||
|
||||
(marker >= 0xc5 && marker <= 0xc7) ||
|
||||
(marker >= 0xc9 && marker <= 0xcb) ||
|
||||
(marker >= 0xcd && marker <= 0xcf)
|
||||
) {
|
||||
const height = buffer.readUInt16BE(offset + 5);
|
||||
const width = buffer.readUInt16BE(offset + 7);
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
const segmentLength = buffer.readUInt16BE(offset + 2);
|
||||
offset += 2 + segmentLength;
|
||||
}
|
||||
|
||||
throw new Error('Could not find JPEG dimensions');
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract WebP dimensions from RIFF container
|
||||
* Supports VP8, VP8L, and VP8X formats
|
||||
*/
|
||||
private extractWebpDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 30) {
|
||||
throw new Error('Invalid WebP: too short');
|
||||
}
|
||||
|
||||
const riffSignature = buffer.subarray(0, 4).toString('ascii');
|
||||
const webpSignature = buffer.subarray(8, 12).toString('ascii');
|
||||
|
||||
if (riffSignature !== 'RIFF' || webpSignature !== 'WEBP') {
|
||||
throw new Error('Invalid WebP signature');
|
||||
}
|
||||
|
||||
const format = buffer.subarray(12, 16).toString('ascii');
|
||||
|
||||
if (format === 'VP8 ') {
|
||||
const width = buffer.readUInt16LE(26) & 0x3fff;
|
||||
const height = buffer.readUInt16LE(28) & 0x3fff;
|
||||
return { width, height };
|
||||
} else if (format === 'VP8L') {
|
||||
const bits = buffer.readUInt32LE(21);
|
||||
const width = (bits & 0x3fff) + 1;
|
||||
const height = ((bits >> 14) & 0x3fff) + 1;
|
||||
return { width, height };
|
||||
} else if (format === 'VP8X') {
|
||||
const width = (buffer.readUInt32LE(24) & 0xffffff) + 1;
|
||||
const height = (buffer.readUInt32LE(26) & 0xffffff) + 1;
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
throw new Error('Unsupported WebP format');
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract GIF dimensions from header
|
||||
* Supports GIF87a and GIF89a formats
|
||||
*/
|
||||
private extractGifDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 10) {
|
||||
throw new Error('Invalid GIF: too short');
|
||||
}
|
||||
|
||||
const signature = buffer.subarray(0, 6).toString('ascii');
|
||||
if (signature !== 'GIF87a' && signature !== 'GIF89a') {
|
||||
throw new Error('Invalid GIF signature');
|
||||
}
|
||||
|
||||
const width = buffer.readUInt16LE(6);
|
||||
const height = buffer.readUInt16LE(8);
|
||||
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for an image based on its metadata
|
||||
*
|
||||
* @param metadata Image metadata containing width, height, and format info
|
||||
* @returns Total token count including base image tokens and special tokens
|
||||
*/
|
||||
calculateTokens(metadata: ImageMetadata): number {
|
||||
return this.calculateTokensWithScaling(metadata.width, metadata.height);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens with scaling logic
|
||||
*
|
||||
* Steps:
|
||||
* 1. Normalize to 28-pixel multiples
|
||||
* 2. Scale large images down, small images up
|
||||
* 3. Calculate tokens: pixels / 784 + 2 special tokens
|
||||
*
|
||||
* @param width Original image width in pixels
|
||||
* @param height Original image height in pixels
|
||||
* @returns Total token count for the image
|
||||
*/
|
||||
private calculateTokensWithScaling(width: number, height: number): number {
|
||||
// Normalize to 28-pixel multiples
|
||||
let hBar = Math.round(height / 28) * 28;
|
||||
let wBar = Math.round(width / 28) * 28;
|
||||
|
||||
// Define pixel boundaries
|
||||
const minPixels =
|
||||
ImageTokenizer.MIN_TOKENS_PER_IMAGE * ImageTokenizer.PIXELS_PER_TOKEN;
|
||||
const maxPixels =
|
||||
ImageTokenizer.MAX_TOKENS_PER_IMAGE * ImageTokenizer.PIXELS_PER_TOKEN;
|
||||
|
||||
// Apply scaling
|
||||
if (hBar * wBar > maxPixels) {
|
||||
// Scale down large images
|
||||
const beta = Math.sqrt((height * width) / maxPixels);
|
||||
hBar = Math.floor(height / beta / 28) * 28;
|
||||
wBar = Math.floor(width / beta / 28) * 28;
|
||||
} else if (hBar * wBar < minPixels) {
|
||||
// Scale up small images
|
||||
const beta = Math.sqrt(minPixels / (height * width));
|
||||
hBar = Math.ceil((height * beta) / 28) * 28;
|
||||
wBar = Math.ceil((width * beta) / 28) * 28;
|
||||
}
|
||||
|
||||
// Calculate tokens
|
||||
const imageTokens = Math.floor(
|
||||
(hBar * wBar) / ImageTokenizer.PIXELS_PER_TOKEN,
|
||||
);
|
||||
|
||||
return imageTokens + ImageTokenizer.VISION_SPECIAL_TOKENS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for multiple images serially
|
||||
*
|
||||
* @param base64DataArray Array of image data with MIME type information
|
||||
* @returns Promise resolving to array of token counts in same order as input
|
||||
*/
|
||||
async calculateTokensBatch(
|
||||
base64DataArray: Array<{ data: string; mimeType: string }>,
|
||||
): Promise<number[]> {
|
||||
const results: number[] = [];
|
||||
|
||||
for (const { data, mimeType } of base64DataArray) {
|
||||
try {
|
||||
const metadata = await this.extractImageMetadata(data, mimeType);
|
||||
results.push(this.calculateTokens(metadata));
|
||||
} catch (error) {
|
||||
console.warn('Error calculating tokens for image:', error);
|
||||
// Return minimum tokens as fallback
|
||||
results.push(
|
||||
ImageTokenizer.MIN_TOKENS_PER_IMAGE +
|
||||
ImageTokenizer.VISION_SPECIAL_TOKENS,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract BMP dimensions from header
|
||||
* BMP signature: 42 4D (BM)
|
||||
* Width/height at bytes 18-21 and 22-25 (little-endian)
|
||||
*/
|
||||
private extractBmpDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 26) {
|
||||
throw new Error('Invalid BMP: buffer too short');
|
||||
}
|
||||
|
||||
// Verify BMP signature
|
||||
if (buffer[0] !== 0x42 || buffer[1] !== 0x4d) {
|
||||
throw new Error('Invalid BMP signature');
|
||||
}
|
||||
|
||||
const width = buffer.readUInt32LE(18);
|
||||
const height = buffer.readUInt32LE(22);
|
||||
|
||||
return { width, height: Math.abs(height) }; // Height can be negative for top-down BMPs
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract TIFF dimensions from IFD (Image File Directory)
|
||||
* TIFF can be little-endian (II) or big-endian (MM)
|
||||
* Width/height are stored in IFD entries with tags 0x0100 and 0x0101
|
||||
*/
|
||||
private extractTiffDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 8) {
|
||||
throw new Error('Invalid TIFF: buffer too short');
|
||||
}
|
||||
|
||||
// Check byte order
|
||||
const byteOrder = buffer.subarray(0, 2).toString('ascii');
|
||||
const isLittleEndian = byteOrder === 'II';
|
||||
const isBigEndian = byteOrder === 'MM';
|
||||
|
||||
if (!isLittleEndian && !isBigEndian) {
|
||||
throw new Error('Invalid TIFF byte order');
|
||||
}
|
||||
|
||||
// Read magic number (should be 42)
|
||||
const magic = isLittleEndian
|
||||
? buffer.readUInt16LE(2)
|
||||
: buffer.readUInt16BE(2);
|
||||
if (magic !== 42) {
|
||||
throw new Error('Invalid TIFF magic number');
|
||||
}
|
||||
|
||||
// Read IFD offset
|
||||
const ifdOffset = isLittleEndian
|
||||
? buffer.readUInt32LE(4)
|
||||
: buffer.readUInt32BE(4);
|
||||
|
||||
if (ifdOffset >= buffer.length) {
|
||||
throw new Error('Invalid TIFF IFD offset');
|
||||
}
|
||||
|
||||
// Read number of directory entries
|
||||
const numEntries = isLittleEndian
|
||||
? buffer.readUInt16LE(ifdOffset)
|
||||
: buffer.readUInt16BE(ifdOffset);
|
||||
|
||||
let width = 0;
|
||||
let height = 0;
|
||||
|
||||
// Parse IFD entries
|
||||
for (let i = 0; i < numEntries; i++) {
|
||||
const entryOffset = ifdOffset + 2 + i * 12;
|
||||
|
||||
if (entryOffset + 12 > buffer.length) break;
|
||||
|
||||
const tag = isLittleEndian
|
||||
? buffer.readUInt16LE(entryOffset)
|
||||
: buffer.readUInt16BE(entryOffset);
|
||||
|
||||
const type = isLittleEndian
|
||||
? buffer.readUInt16LE(entryOffset + 2)
|
||||
: buffer.readUInt16BE(entryOffset + 2);
|
||||
|
||||
const value = isLittleEndian
|
||||
? buffer.readUInt32LE(entryOffset + 8)
|
||||
: buffer.readUInt32BE(entryOffset + 8);
|
||||
|
||||
if (tag === 0x0100) {
|
||||
// ImageWidth
|
||||
width = type === 3 ? value : value; // SHORT or LONG
|
||||
} else if (tag === 0x0101) {
|
||||
// ImageLength (height)
|
||||
height = type === 3 ? value : value; // SHORT or LONG
|
||||
}
|
||||
|
||||
if (width > 0 && height > 0) break;
|
||||
}
|
||||
|
||||
if (width === 0 || height === 0) {
|
||||
throw new Error('Could not find TIFF dimensions');
|
||||
}
|
||||
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract HEIC dimensions from meta box
|
||||
* HEIC is based on ISO Base Media File Format
|
||||
* This is a simplified implementation that looks for 'ispe' (Image Spatial Extents) box
|
||||
*/
|
||||
private extractHeicDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 12) {
|
||||
throw new Error('Invalid HEIC: buffer too short');
|
||||
}
|
||||
|
||||
// Check for ftyp box with HEIC brand
|
||||
const ftypBox = buffer.subarray(4, 8).toString('ascii');
|
||||
if (ftypBox !== 'ftyp') {
|
||||
throw new Error('Invalid HEIC: missing ftyp box');
|
||||
}
|
||||
|
||||
const brand = buffer.subarray(8, 12).toString('ascii');
|
||||
if (!['heic', 'heix', 'hevc', 'hevx'].includes(brand)) {
|
||||
throw new Error('Invalid HEIC brand');
|
||||
}
|
||||
|
||||
// Look for meta box and then ispe box
|
||||
let offset = 0;
|
||||
while (offset < buffer.length - 8) {
|
||||
const boxSize = buffer.readUInt32BE(offset);
|
||||
const boxType = buffer.subarray(offset + 4, offset + 8).toString('ascii');
|
||||
|
||||
if (boxType === 'meta') {
|
||||
// Look for ispe box inside meta box
|
||||
const metaOffset = offset + 8;
|
||||
let innerOffset = metaOffset + 4; // Skip version and flags
|
||||
|
||||
while (innerOffset < offset + boxSize - 8) {
|
||||
const innerBoxSize = buffer.readUInt32BE(innerOffset);
|
||||
const innerBoxType = buffer
|
||||
.subarray(innerOffset + 4, innerOffset + 8)
|
||||
.toString('ascii');
|
||||
|
||||
if (innerBoxType === 'ispe') {
|
||||
// Found Image Spatial Extents box
|
||||
if (innerOffset + 20 <= buffer.length) {
|
||||
const width = buffer.readUInt32BE(innerOffset + 12);
|
||||
const height = buffer.readUInt32BE(innerOffset + 16);
|
||||
return { width, height };
|
||||
}
|
||||
}
|
||||
|
||||
if (innerBoxSize === 0) break;
|
||||
innerOffset += innerBoxSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (boxSize === 0) break;
|
||||
offset += boxSize;
|
||||
}
|
||||
|
||||
// Fallback: return default dimensions if we can't parse the structure
|
||||
console.warn('Could not extract HEIC dimensions, using default');
|
||||
return { width: 512, height: 512 };
|
||||
}
|
||||
}
|
||||
40
packages/core/src/utils/request-tokenizer/index.ts
Normal file
40
packages/core/src/utils/request-tokenizer/index.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
export { DefaultRequestTokenizer } from './requestTokenizer.js';
|
||||
import { DefaultRequestTokenizer } from './requestTokenizer.js';
|
||||
export { TextTokenizer } from './textTokenizer.js';
|
||||
export { ImageTokenizer } from './imageTokenizer.js';
|
||||
|
||||
export type {
|
||||
RequestTokenizer,
|
||||
TokenizerConfig,
|
||||
TokenCalculationResult,
|
||||
ImageMetadata,
|
||||
} from './types.js';
|
||||
|
||||
// Singleton instance for convenient usage
|
||||
let defaultTokenizer: DefaultRequestTokenizer | null = null;
|
||||
|
||||
/**
|
||||
* Get the default request tokenizer instance
|
||||
*/
|
||||
export function getDefaultTokenizer(): DefaultRequestTokenizer {
|
||||
if (!defaultTokenizer) {
|
||||
defaultTokenizer = new DefaultRequestTokenizer();
|
||||
}
|
||||
return defaultTokenizer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose of the default tokenizer instance
|
||||
*/
|
||||
export async function disposeDefaultTokenizer(): Promise<void> {
|
||||
if (defaultTokenizer) {
|
||||
await defaultTokenizer.dispose();
|
||||
defaultTokenizer = null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,293 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { DefaultRequestTokenizer } from './requestTokenizer.js';
|
||||
import type { CountTokensParameters } from '@google/genai';
|
||||
|
||||
describe('DefaultRequestTokenizer', () => {
|
||||
let tokenizer: DefaultRequestTokenizer;
|
||||
|
||||
beforeEach(() => {
|
||||
tokenizer = new DefaultRequestTokenizer();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await tokenizer.dispose();
|
||||
});
|
||||
|
||||
describe('text token calculation', () => {
|
||||
it('should calculate tokens for simple text content', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [{ text: 'Hello, world!' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(0);
|
||||
expect(result.breakdown.textTokens).toBeGreaterThan(0);
|
||||
expect(result.breakdown.imageTokens).toBe(0);
|
||||
expect(result.processingTime).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should handle multiple text parts', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{ text: 'First part' },
|
||||
{ text: 'Second part' },
|
||||
{ text: 'Third part' },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(0);
|
||||
expect(result.breakdown.textTokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should handle string content', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: ['Simple string content'],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(0);
|
||||
expect(result.breakdown.textTokens).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('image token calculation', () => {
|
||||
it('should calculate tokens for image content', async () => {
|
||||
// Create a simple 1x1 PNG image in base64
|
||||
const pngBase64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/png',
|
||||
data: pngBase64,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThanOrEqual(4); // Minimum 4 tokens per image
|
||||
expect(result.breakdown.imageTokens).toBeGreaterThanOrEqual(4);
|
||||
expect(result.breakdown.textTokens).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle multiple images', async () => {
|
||||
const pngBase64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/png',
|
||||
data: pngBase64,
|
||||
},
|
||||
},
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/png',
|
||||
data: pngBase64,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThanOrEqual(8); // At least 4 tokens per image
|
||||
expect(result.breakdown.imageTokens).toBeGreaterThanOrEqual(8);
|
||||
});
|
||||
});
|
||||
|
||||
describe('mixed content', () => {
|
||||
it('should handle text and image content together', async () => {
|
||||
const pngBase64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{ text: 'Here is an image:' },
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/png',
|
||||
data: pngBase64,
|
||||
},
|
||||
},
|
||||
{ text: 'What do you see?' },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(4);
|
||||
expect(result.breakdown.textTokens).toBeGreaterThan(0);
|
||||
expect(result.breakdown.imageTokens).toBeGreaterThanOrEqual(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('function content', () => {
|
||||
it('should handle function calls', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
functionCall: {
|
||||
name: 'test_function',
|
||||
args: { param1: 'value1', param2: 42 },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(0);
|
||||
expect(result.breakdown.otherTokens).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('empty content', () => {
|
||||
it('should handle empty request', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBe(0);
|
||||
expect(result.breakdown.textTokens).toBe(0);
|
||||
expect(result.breakdown.imageTokens).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle undefined contents', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('configuration', () => {
|
||||
it('should use custom text encoding', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [{ text: 'Test text for encoding' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request, {
|
||||
textEncoding: 'cl100k_base',
|
||||
});
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should process multiple images serially', async () => {
|
||||
const pngBase64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';
|
||||
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: Array(10).fill({
|
||||
inlineData: {
|
||||
mimeType: 'image/png',
|
||||
data: pngBase64,
|
||||
},
|
||||
}),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
expect(result.totalTokens).toBeGreaterThanOrEqual(60); // At least 6 tokens per image * 10 images
|
||||
});
|
||||
});
|
||||
|
||||
describe('error handling', () => {
|
||||
it('should handle malformed image data gracefully', async () => {
|
||||
const request: CountTokensParameters = {
|
||||
model: 'test-model',
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/png',
|
||||
data: 'invalid-base64-data',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await tokenizer.calculateTokens(request);
|
||||
|
||||
// Should still return some tokens (fallback to minimum)
|
||||
expect(result.totalTokens).toBeGreaterThanOrEqual(4);
|
||||
});
|
||||
});
|
||||
});
|
||||
341
packages/core/src/utils/request-tokenizer/requestTokenizer.ts
Normal file
341
packages/core/src/utils/request-tokenizer/requestTokenizer.ts
Normal file
@@ -0,0 +1,341 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type {
|
||||
CountTokensParameters,
|
||||
Content,
|
||||
Part,
|
||||
PartUnion,
|
||||
} from '@google/genai';
|
||||
import type {
|
||||
RequestTokenizer,
|
||||
TokenizerConfig,
|
||||
TokenCalculationResult,
|
||||
} from './types.js';
|
||||
import { TextTokenizer } from './textTokenizer.js';
|
||||
import { ImageTokenizer } from './imageTokenizer.js';
|
||||
|
||||
/**
|
||||
* Simple request tokenizer that handles text and image content serially
|
||||
*/
|
||||
export class DefaultRequestTokenizer implements RequestTokenizer {
|
||||
private textTokenizer: TextTokenizer;
|
||||
private imageTokenizer: ImageTokenizer;
|
||||
|
||||
constructor() {
|
||||
this.textTokenizer = new TextTokenizer();
|
||||
this.imageTokenizer = new ImageTokenizer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for a request using serial processing
|
||||
*/
|
||||
async calculateTokens(
|
||||
request: CountTokensParameters,
|
||||
config: TokenizerConfig = {},
|
||||
): Promise<TokenCalculationResult> {
|
||||
const startTime = performance.now();
|
||||
|
||||
// Apply configuration
|
||||
if (config.textEncoding) {
|
||||
this.textTokenizer = new TextTokenizer(config.textEncoding);
|
||||
}
|
||||
|
||||
try {
|
||||
// Process request content and group by type
|
||||
const { textContents, imageContents, audioContents, otherContents } =
|
||||
this.processAndGroupContents(request);
|
||||
|
||||
if (
|
||||
textContents.length === 0 &&
|
||||
imageContents.length === 0 &&
|
||||
audioContents.length === 0 &&
|
||||
otherContents.length === 0
|
||||
) {
|
||||
return {
|
||||
totalTokens: 0,
|
||||
breakdown: {
|
||||
textTokens: 0,
|
||||
imageTokens: 0,
|
||||
audioTokens: 0,
|
||||
otherTokens: 0,
|
||||
},
|
||||
processingTime: performance.now() - startTime,
|
||||
};
|
||||
}
|
||||
|
||||
// Calculate tokens for each content type serially
|
||||
const textTokens = await this.calculateTextTokens(textContents);
|
||||
const imageTokens = await this.calculateImageTokens(imageContents);
|
||||
const audioTokens = await this.calculateAudioTokens(audioContents);
|
||||
const otherTokens = await this.calculateOtherTokens(otherContents);
|
||||
|
||||
const totalTokens = textTokens + imageTokens + audioTokens + otherTokens;
|
||||
const processingTime = performance.now() - startTime;
|
||||
|
||||
return {
|
||||
totalTokens,
|
||||
breakdown: {
|
||||
textTokens,
|
||||
imageTokens,
|
||||
audioTokens,
|
||||
otherTokens,
|
||||
},
|
||||
processingTime,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error calculating tokens:', error);
|
||||
|
||||
// Fallback calculation
|
||||
const fallbackTokens = this.calculateFallbackTokens(request);
|
||||
|
||||
return {
|
||||
totalTokens: fallbackTokens,
|
||||
breakdown: {
|
||||
textTokens: fallbackTokens,
|
||||
imageTokens: 0,
|
||||
audioTokens: 0,
|
||||
otherTokens: 0,
|
||||
},
|
||||
processingTime: performance.now() - startTime,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for text contents
|
||||
*/
|
||||
private async calculateTextTokens(textContents: string[]): Promise<number> {
|
||||
if (textContents.length === 0) return 0;
|
||||
|
||||
try {
|
||||
const tokenCounts =
|
||||
await this.textTokenizer.calculateTokensBatch(textContents);
|
||||
return tokenCounts.reduce((sum, count) => sum + count, 0);
|
||||
} catch (error) {
|
||||
console.warn('Error calculating text tokens:', error);
|
||||
// Fallback: character-based estimation
|
||||
const totalChars = textContents.join('').length;
|
||||
return Math.ceil(totalChars / 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for image contents using serial processing
|
||||
*/
|
||||
private async calculateImageTokens(
|
||||
imageContents: Array<{ data: string; mimeType: string }>,
|
||||
): Promise<number> {
|
||||
if (imageContents.length === 0) return 0;
|
||||
|
||||
try {
|
||||
const tokenCounts =
|
||||
await this.imageTokenizer.calculateTokensBatch(imageContents);
|
||||
return tokenCounts.reduce((sum, count) => sum + count, 0);
|
||||
} catch (error) {
|
||||
console.warn('Error calculating image tokens:', error);
|
||||
// Fallback: minimum tokens per image
|
||||
return imageContents.length * 6; // 4 image tokens + 2 special tokens as minimum
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for audio contents
|
||||
* TODO: Implement proper audio token calculation
|
||||
*/
|
||||
private async calculateAudioTokens(
|
||||
audioContents: Array<{ data: string; mimeType: string }>,
|
||||
): Promise<number> {
|
||||
if (audioContents.length === 0) return 0;
|
||||
|
||||
// Placeholder implementation - audio token calculation would depend on
|
||||
// the specific model's audio processing capabilities
|
||||
// For now, estimate based on data size
|
||||
let totalTokens = 0;
|
||||
|
||||
for (const audioContent of audioContents) {
|
||||
try {
|
||||
const dataSize = Math.floor(audioContent.data.length * 0.75); // Approximate binary size
|
||||
// Rough estimate: 1 token per 100 bytes of audio data
|
||||
totalTokens += Math.max(Math.ceil(dataSize / 100), 10); // Minimum 10 tokens per audio
|
||||
} catch (error) {
|
||||
console.warn('Error calculating audio tokens:', error);
|
||||
totalTokens += 10; // Fallback minimum
|
||||
}
|
||||
}
|
||||
|
||||
return totalTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for other content types (functions, files, etc.)
|
||||
*/
|
||||
private async calculateOtherTokens(otherContents: string[]): Promise<number> {
|
||||
if (otherContents.length === 0) return 0;
|
||||
|
||||
try {
|
||||
// Treat other content as text for token calculation
|
||||
const tokenCounts =
|
||||
await this.textTokenizer.calculateTokensBatch(otherContents);
|
||||
return tokenCounts.reduce((sum, count) => sum + count, 0);
|
||||
} catch (error) {
|
||||
console.warn('Error calculating other content tokens:', error);
|
||||
// Fallback: character-based estimation
|
||||
const totalChars = otherContents.join('').length;
|
||||
return Math.ceil(totalChars / 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback token calculation using simple string serialization
|
||||
*/
|
||||
private calculateFallbackTokens(request: CountTokensParameters): number {
|
||||
try {
|
||||
const content = JSON.stringify(request.contents);
|
||||
return Math.ceil(content.length / 4); // Rough estimate: 1 token ≈ 4 characters
|
||||
} catch (error) {
|
||||
console.warn('Error in fallback token calculation:', error);
|
||||
return 100; // Conservative fallback
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process request contents and group by type
|
||||
*/
|
||||
private processAndGroupContents(request: CountTokensParameters): {
|
||||
textContents: string[];
|
||||
imageContents: Array<{ data: string; mimeType: string }>;
|
||||
audioContents: Array<{ data: string; mimeType: string }>;
|
||||
otherContents: string[];
|
||||
} {
|
||||
const textContents: string[] = [];
|
||||
const imageContents: Array<{ data: string; mimeType: string }> = [];
|
||||
const audioContents: Array<{ data: string; mimeType: string }> = [];
|
||||
const otherContents: string[] = [];
|
||||
|
||||
if (!request.contents) {
|
||||
return { textContents, imageContents, audioContents, otherContents };
|
||||
}
|
||||
|
||||
const contents = Array.isArray(request.contents)
|
||||
? request.contents
|
||||
: [request.contents];
|
||||
|
||||
for (const content of contents) {
|
||||
this.processContent(
|
||||
content,
|
||||
textContents,
|
||||
imageContents,
|
||||
audioContents,
|
||||
otherContents,
|
||||
);
|
||||
}
|
||||
|
||||
return { textContents, imageContents, audioContents, otherContents };
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single content item and add to appropriate arrays
|
||||
*/
|
||||
private processContent(
|
||||
content: Content | string | PartUnion,
|
||||
textContents: string[],
|
||||
imageContents: Array<{ data: string; mimeType: string }>,
|
||||
audioContents: Array<{ data: string; mimeType: string }>,
|
||||
otherContents: string[],
|
||||
): void {
|
||||
if (typeof content === 'string') {
|
||||
if (content.trim()) {
|
||||
textContents.push(content);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if ('parts' in content && content.parts) {
|
||||
for (const part of content.parts) {
|
||||
this.processPart(
|
||||
part,
|
||||
textContents,
|
||||
imageContents,
|
||||
audioContents,
|
||||
otherContents,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single part and add to appropriate arrays
|
||||
*/
|
||||
private processPart(
|
||||
part: Part | string,
|
||||
textContents: string[],
|
||||
imageContents: Array<{ data: string; mimeType: string }>,
|
||||
audioContents: Array<{ data: string; mimeType: string }>,
|
||||
otherContents: string[],
|
||||
): void {
|
||||
if (typeof part === 'string') {
|
||||
if (part.trim()) {
|
||||
textContents.push(part);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if ('text' in part && part.text) {
|
||||
textContents.push(part.text);
|
||||
return;
|
||||
}
|
||||
|
||||
if ('inlineData' in part && part.inlineData) {
|
||||
const { data, mimeType } = part.inlineData;
|
||||
if (mimeType && mimeType.startsWith('image/')) {
|
||||
imageContents.push({ data: data || '', mimeType });
|
||||
return;
|
||||
}
|
||||
if (mimeType && mimeType.startsWith('audio/')) {
|
||||
audioContents.push({ data: data || '', mimeType });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if ('fileData' in part && part.fileData) {
|
||||
otherContents.push(JSON.stringify(part.fileData));
|
||||
return;
|
||||
}
|
||||
|
||||
if ('functionCall' in part && part.functionCall) {
|
||||
otherContents.push(JSON.stringify(part.functionCall));
|
||||
return;
|
||||
}
|
||||
|
||||
if ('functionResponse' in part && part.functionResponse) {
|
||||
otherContents.push(JSON.stringify(part.functionResponse));
|
||||
return;
|
||||
}
|
||||
|
||||
// Unknown part type - try to serialize
|
||||
try {
|
||||
const serialized = JSON.stringify(part);
|
||||
if (serialized && serialized !== '{}') {
|
||||
otherContents.push(serialized);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to serialize unknown part type:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose of resources
|
||||
*/
|
||||
async dispose(): Promise<void> {
|
||||
try {
|
||||
// Dispose of tokenizers
|
||||
this.textTokenizer.dispose();
|
||||
} catch (error) {
|
||||
console.warn('Error disposing request tokenizer:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Supported image MIME types for vision models
|
||||
* These formats are supported by the vision model and can be processed by the image tokenizer
|
||||
*/
|
||||
export const SUPPORTED_IMAGE_MIME_TYPES = [
|
||||
'image/bmp',
|
||||
'image/jpeg',
|
||||
'image/jpg', // Alternative MIME type for JPEG
|
||||
'image/png',
|
||||
'image/tiff',
|
||||
'image/webp',
|
||||
'image/heic',
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Type for supported image MIME types
|
||||
*/
|
||||
export type SupportedImageMimeType =
|
||||
(typeof SUPPORTED_IMAGE_MIME_TYPES)[number];
|
||||
|
||||
/**
|
||||
* Check if a MIME type is supported for vision processing
|
||||
* @param mimeType The MIME type to check
|
||||
* @returns True if the MIME type is supported
|
||||
*/
|
||||
export function isSupportedImageMimeType(
|
||||
mimeType: string,
|
||||
): mimeType is SupportedImageMimeType {
|
||||
return SUPPORTED_IMAGE_MIME_TYPES.includes(
|
||||
mimeType as SupportedImageMimeType,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a human-readable list of supported image formats
|
||||
* @returns Comma-separated string of supported formats
|
||||
*/
|
||||
export function getSupportedImageFormatsString(): string {
|
||||
return SUPPORTED_IMAGE_MIME_TYPES.map((type) =>
|
||||
type.replace('image/', '').toUpperCase(),
|
||||
).join(', ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get warning message for unsupported image formats
|
||||
* @returns Warning message string
|
||||
*/
|
||||
export function getUnsupportedImageFormatWarning(): string {
|
||||
return `Only the following image formats are supported: ${getSupportedImageFormatsString()}. Other formats may not work as expected.`;
|
||||
}
|
||||
347
packages/core/src/utils/request-tokenizer/textTokenizer.test.ts
Normal file
347
packages/core/src/utils/request-tokenizer/textTokenizer.test.ts
Normal file
@@ -0,0 +1,347 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { TextTokenizer } from './textTokenizer.js';
|
||||
|
||||
// Mock tiktoken at the top level with hoisted functions
|
||||
const mockEncode = vi.hoisted(() => vi.fn());
|
||||
const mockFree = vi.hoisted(() => vi.fn());
|
||||
const mockGetEncoding = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock('tiktoken', () => ({
|
||||
get_encoding: mockGetEncoding,
|
||||
}));
|
||||
|
||||
describe('TextTokenizer', () => {
|
||||
let tokenizer: TextTokenizer;
|
||||
let consoleWarnSpy: ReturnType<typeof vi.spyOn>;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
|
||||
// Default mock implementation
|
||||
mockGetEncoding.mockReturnValue({
|
||||
encode: mockEncode,
|
||||
free: mockFree,
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
tokenizer?.dispose();
|
||||
});
|
||||
|
||||
describe('constructor', () => {
|
||||
it('should create tokenizer with default encoding', () => {
|
||||
tokenizer = new TextTokenizer();
|
||||
expect(tokenizer).toBeInstanceOf(TextTokenizer);
|
||||
});
|
||||
|
||||
it('should create tokenizer with custom encoding', () => {
|
||||
tokenizer = new TextTokenizer('gpt2');
|
||||
expect(tokenizer).toBeInstanceOf(TextTokenizer);
|
||||
});
|
||||
});
|
||||
|
||||
describe('calculateTokens', () => {
|
||||
beforeEach(() => {
|
||||
tokenizer = new TextTokenizer();
|
||||
});
|
||||
|
||||
it('should return 0 for empty text', async () => {
|
||||
const result = await tokenizer.calculateTokens('');
|
||||
expect(result).toBe(0);
|
||||
});
|
||||
|
||||
it('should return 0 for null/undefined text', async () => {
|
||||
const result1 = await tokenizer.calculateTokens(
|
||||
null as unknown as string,
|
||||
);
|
||||
const result2 = await tokenizer.calculateTokens(
|
||||
undefined as unknown as string,
|
||||
);
|
||||
expect(result1).toBe(0);
|
||||
expect(result2).toBe(0);
|
||||
});
|
||||
|
||||
it('should calculate tokens using tiktoken when available', async () => {
|
||||
const testText = 'Hello, world!';
|
||||
const mockTokens = [1, 2, 3, 4, 5]; // 5 tokens
|
||||
mockEncode.mockReturnValue(mockTokens);
|
||||
|
||||
const result = await tokenizer.calculateTokens(testText);
|
||||
|
||||
expect(mockGetEncoding).toHaveBeenCalledWith('cl100k_base');
|
||||
expect(mockEncode).toHaveBeenCalledWith(testText);
|
||||
expect(result).toBe(5);
|
||||
});
|
||||
|
||||
it('should use fallback calculation when tiktoken fails to load', async () => {
|
||||
mockGetEncoding.mockImplementation(() => {
|
||||
throw new Error('Failed to load tiktoken');
|
||||
});
|
||||
|
||||
const testText = 'Hello, world!'; // 13 characters
|
||||
const result = await tokenizer.calculateTokens(testText);
|
||||
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
'Failed to load tiktoken with encoding cl100k_base:',
|
||||
expect.any(Error),
|
||||
);
|
||||
// Fallback: Math.ceil(13 / 4) = 4
|
||||
expect(result).toBe(4);
|
||||
});
|
||||
|
||||
it('should use fallback calculation when encoding fails', async () => {
|
||||
mockEncode.mockImplementation(() => {
|
||||
throw new Error('Encoding failed');
|
||||
});
|
||||
|
||||
const testText = 'Hello, world!'; // 13 characters
|
||||
const result = await tokenizer.calculateTokens(testText);
|
||||
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
'Error encoding text with tiktoken:',
|
||||
expect.any(Error),
|
||||
);
|
||||
// Fallback: Math.ceil(13 / 4) = 4
|
||||
expect(result).toBe(4);
|
||||
});
|
||||
|
||||
it('should handle very long text', async () => {
|
||||
const longText = 'a'.repeat(10000);
|
||||
const mockTokens = new Array(2500); // 2500 tokens
|
||||
mockEncode.mockReturnValue(mockTokens);
|
||||
|
||||
const result = await tokenizer.calculateTokens(longText);
|
||||
|
||||
expect(result).toBe(2500);
|
||||
});
|
||||
|
||||
it('should handle unicode characters', async () => {
|
||||
const unicodeText = '你好世界 🌍';
|
||||
const mockTokens = [1, 2, 3, 4, 5, 6];
|
||||
mockEncode.mockReturnValue(mockTokens);
|
||||
|
||||
const result = await tokenizer.calculateTokens(unicodeText);
|
||||
|
||||
expect(result).toBe(6);
|
||||
});
|
||||
|
||||
it('should use custom encoding when specified', async () => {
|
||||
tokenizer = new TextTokenizer('gpt2');
|
||||
const testText = 'Hello, world!';
|
||||
const mockTokens = [1, 2, 3];
|
||||
mockEncode.mockReturnValue(mockTokens);
|
||||
|
||||
const result = await tokenizer.calculateTokens(testText);
|
||||
|
||||
expect(mockGetEncoding).toHaveBeenCalledWith('gpt2');
|
||||
expect(result).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe('calculateTokensBatch', () => {
|
||||
beforeEach(() => {
|
||||
tokenizer = new TextTokenizer();
|
||||
});
|
||||
|
||||
it('should process multiple texts and return token counts', async () => {
|
||||
const texts = ['Hello', 'world', 'test'];
|
||||
mockEncode
|
||||
.mockReturnValueOnce([1, 2]) // 2 tokens for 'Hello'
|
||||
.mockReturnValueOnce([3, 4, 5]) // 3 tokens for 'world'
|
||||
.mockReturnValueOnce([6]); // 1 token for 'test'
|
||||
|
||||
const result = await tokenizer.calculateTokensBatch(texts);
|
||||
|
||||
expect(result).toEqual([2, 3, 1]);
|
||||
expect(mockEncode).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('should handle empty array', async () => {
|
||||
const result = await tokenizer.calculateTokensBatch([]);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle array with empty strings', async () => {
|
||||
const texts = ['', 'hello', ''];
|
||||
mockEncode.mockReturnValue([1, 2, 3]); // Only called for 'hello'
|
||||
|
||||
const result = await tokenizer.calculateTokensBatch(texts);
|
||||
|
||||
expect(result).toEqual([0, 3, 0]);
|
||||
expect(mockEncode).toHaveBeenCalledTimes(1);
|
||||
expect(mockEncode).toHaveBeenCalledWith('hello');
|
||||
});
|
||||
|
||||
it('should use fallback calculation when tiktoken fails to load', async () => {
|
||||
mockGetEncoding.mockImplementation(() => {
|
||||
throw new Error('Failed to load tiktoken');
|
||||
});
|
||||
|
||||
const texts = ['Hello', 'world']; // 5 and 5 characters
|
||||
const result = await tokenizer.calculateTokensBatch(texts);
|
||||
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
'Failed to load tiktoken with encoding cl100k_base:',
|
||||
expect.any(Error),
|
||||
);
|
||||
// Fallback: Math.ceil(5/4) = 2 for both
|
||||
expect(result).toEqual([2, 2]);
|
||||
});
|
||||
|
||||
it('should use fallback calculation when encoding fails during batch processing', async () => {
|
||||
mockEncode.mockImplementation(() => {
|
||||
throw new Error('Encoding failed');
|
||||
});
|
||||
|
||||
const texts = ['Hello', 'world']; // 5 and 5 characters
|
||||
const result = await tokenizer.calculateTokensBatch(texts);
|
||||
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
'Error encoding texts with tiktoken:',
|
||||
expect.any(Error),
|
||||
);
|
||||
// Fallback: Math.ceil(5/4) = 2 for both
|
||||
expect(result).toEqual([2, 2]);
|
||||
});
|
||||
|
||||
it('should handle null and undefined values in batch', async () => {
|
||||
const texts = [null, 'hello', undefined, 'world'] as unknown as string[];
|
||||
mockEncode
|
||||
.mockReturnValueOnce([1, 2, 3]) // 3 tokens for 'hello'
|
||||
.mockReturnValueOnce([4, 5]); // 2 tokens for 'world'
|
||||
|
||||
const result = await tokenizer.calculateTokensBatch(texts);
|
||||
|
||||
expect(result).toEqual([0, 3, 0, 2]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('dispose', () => {
|
||||
beforeEach(() => {
|
||||
tokenizer = new TextTokenizer();
|
||||
});
|
||||
|
||||
it('should free tiktoken encoding when disposing', async () => {
|
||||
// Initialize the encoding by calling calculateTokens
|
||||
await tokenizer.calculateTokens('test');
|
||||
|
||||
tokenizer.dispose();
|
||||
|
||||
expect(mockFree).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle disposal when encoding is not initialized', () => {
|
||||
expect(() => tokenizer.dispose()).not.toThrow();
|
||||
expect(mockFree).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle disposal when encoding is null', async () => {
|
||||
// Force encoding to be null by making tiktoken fail
|
||||
mockGetEncoding.mockImplementation(() => {
|
||||
throw new Error('Failed to load');
|
||||
});
|
||||
|
||||
await tokenizer.calculateTokens('test');
|
||||
|
||||
expect(() => tokenizer.dispose()).not.toThrow();
|
||||
expect(mockFree).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle errors during disposal gracefully', async () => {
|
||||
await tokenizer.calculateTokens('test');
|
||||
|
||||
mockFree.mockImplementation(() => {
|
||||
throw new Error('Free failed');
|
||||
});
|
||||
|
||||
tokenizer.dispose();
|
||||
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
'Error freeing tiktoken encoding:',
|
||||
expect.any(Error),
|
||||
);
|
||||
});
|
||||
|
||||
it('should allow multiple calls to dispose', async () => {
|
||||
await tokenizer.calculateTokens('test');
|
||||
|
||||
tokenizer.dispose();
|
||||
tokenizer.dispose(); // Second call should not throw
|
||||
|
||||
expect(mockFree).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('lazy initialization', () => {
|
||||
beforeEach(() => {
|
||||
tokenizer = new TextTokenizer();
|
||||
});
|
||||
|
||||
it('should not initialize tiktoken until first use', () => {
|
||||
expect(mockGetEncoding).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should initialize tiktoken on first calculateTokens call', async () => {
|
||||
await tokenizer.calculateTokens('test');
|
||||
expect(mockGetEncoding).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should not reinitialize tiktoken on subsequent calls', async () => {
|
||||
await tokenizer.calculateTokens('test1');
|
||||
await tokenizer.calculateTokens('test2');
|
||||
|
||||
expect(mockGetEncoding).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should initialize tiktoken on first calculateTokensBatch call', async () => {
|
||||
await tokenizer.calculateTokensBatch(['test']);
|
||||
expect(mockGetEncoding).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('edge cases', () => {
|
||||
beforeEach(() => {
|
||||
tokenizer = new TextTokenizer();
|
||||
});
|
||||
|
||||
it('should handle very short text', async () => {
|
||||
const result = await tokenizer.calculateTokens('a');
|
||||
|
||||
if (mockGetEncoding.mock.calls.length > 0) {
|
||||
// If tiktoken was called, use its result
|
||||
expect(mockEncode).toHaveBeenCalledWith('a');
|
||||
} else {
|
||||
// If tiktoken failed, should use fallback: Math.ceil(1/4) = 1
|
||||
expect(result).toBe(1);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle text with only whitespace', async () => {
|
||||
const whitespaceText = ' \n\t ';
|
||||
const mockTokens = [1];
|
||||
mockEncode.mockReturnValue(mockTokens);
|
||||
|
||||
const result = await tokenizer.calculateTokens(whitespaceText);
|
||||
|
||||
expect(result).toBe(1);
|
||||
});
|
||||
|
||||
it('should handle special characters and symbols', async () => {
|
||||
const specialText = '!@#$%^&*()_+-=[]{}|;:,.<>?';
|
||||
const mockTokens = new Array(10);
|
||||
mockEncode.mockReturnValue(mockTokens);
|
||||
|
||||
const result = await tokenizer.calculateTokens(specialText);
|
||||
|
||||
expect(result).toBe(10);
|
||||
});
|
||||
});
|
||||
});
|
||||
97
packages/core/src/utils/request-tokenizer/textTokenizer.ts
Normal file
97
packages/core/src/utils/request-tokenizer/textTokenizer.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { TiktokenEncoding, Tiktoken } from 'tiktoken';
|
||||
import { get_encoding } from 'tiktoken';
|
||||
|
||||
/**
|
||||
* Text tokenizer for calculating text tokens using tiktoken
|
||||
*/
|
||||
export class TextTokenizer {
|
||||
private encoding: Tiktoken | null = null;
|
||||
private encodingName: string;
|
||||
|
||||
constructor(encodingName: string = 'cl100k_base') {
|
||||
this.encodingName = encodingName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the tokenizer (lazy loading)
|
||||
*/
|
||||
private async ensureEncoding(): Promise<void> {
|
||||
if (this.encoding) return;
|
||||
|
||||
try {
|
||||
// Use type assertion since we know the encoding name is valid
|
||||
this.encoding = get_encoding(this.encodingName as TiktokenEncoding);
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Failed to load tiktoken with encoding ${this.encodingName}:`,
|
||||
error,
|
||||
);
|
||||
this.encoding = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for text content
|
||||
*/
|
||||
async calculateTokens(text: string): Promise<number> {
|
||||
if (!text) return 0;
|
||||
|
||||
await this.ensureEncoding();
|
||||
|
||||
if (this.encoding) {
|
||||
try {
|
||||
return this.encoding.encode(text).length;
|
||||
} catch (error) {
|
||||
console.warn('Error encoding text with tiktoken:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: rough approximation using character count
|
||||
// This is a conservative estimate: 1 token ≈ 4 characters for most languages
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate tokens for multiple text strings in parallel
|
||||
*/
|
||||
async calculateTokensBatch(texts: string[]): Promise<number[]> {
|
||||
await this.ensureEncoding();
|
||||
|
||||
if (this.encoding) {
|
||||
try {
|
||||
return texts.map((text) => {
|
||||
if (!text) return 0;
|
||||
// this.encoding may be null, add a null check to satisfy lint
|
||||
return this.encoding ? this.encoding.encode(text).length : 0;
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn('Error encoding texts with tiktoken:', error);
|
||||
// In case of error, return fallback estimation for all texts
|
||||
return texts.map((text) => Math.ceil((text || '').length / 4));
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for batch processing
|
||||
return texts.map((text) => Math.ceil((text || '').length / 4));
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose of resources
|
||||
*/
|
||||
dispose(): void {
|
||||
if (this.encoding) {
|
||||
try {
|
||||
this.encoding.free();
|
||||
} catch (error) {
|
||||
console.warn('Error freeing tiktoken encoding:', error);
|
||||
}
|
||||
this.encoding = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
64
packages/core/src/utils/request-tokenizer/types.ts
Normal file
64
packages/core/src/utils/request-tokenizer/types.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { CountTokensParameters } from '@google/genai';
|
||||
|
||||
/**
|
||||
* Token calculation result for different content types
|
||||
*/
|
||||
export interface TokenCalculationResult {
|
||||
/** Total tokens calculated */
|
||||
totalTokens: number;
|
||||
/** Breakdown by content type */
|
||||
breakdown: {
|
||||
textTokens: number;
|
||||
imageTokens: number;
|
||||
audioTokens: number;
|
||||
otherTokens: number;
|
||||
};
|
||||
/** Processing time in milliseconds */
|
||||
processingTime: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for token calculation
|
||||
*/
|
||||
export interface TokenizerConfig {
|
||||
/** Custom text tokenizer encoding (defaults to cl100k_base) */
|
||||
textEncoding?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Image metadata extracted from base64 data
|
||||
*/
|
||||
export interface ImageMetadata {
|
||||
/** Image width in pixels */
|
||||
width: number;
|
||||
/** Image height in pixels */
|
||||
height: number;
|
||||
/** MIME type of the image */
|
||||
mimeType: string;
|
||||
/** Size of the base64 data in bytes */
|
||||
dataSize: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request tokenizer interface
|
||||
*/
|
||||
export interface RequestTokenizer {
|
||||
/**
|
||||
* Calculate tokens for a request
|
||||
*/
|
||||
calculateTokens(
|
||||
request: CountTokensParameters,
|
||||
config?: TokenizerConfig,
|
||||
): Promise<TokenCalculationResult>;
|
||||
|
||||
/**
|
||||
* Dispose of resources (worker threads, etc.)
|
||||
*/
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
Reference in New Issue
Block a user