From bf206237fa757f93d4d70842f606c79f0a63ca1d Mon Sep 17 00:00:00 2001 From: "mingholy.lmh" Date: Thu, 18 Sep 2025 11:12:51 +0800 Subject: [PATCH] fix: align supported image formats with bailian doc --- .../cli/src/ui/hooks/useVisionAutoSwitch.ts | 73 +++++++ packages/core/index.ts | 1 + .../utils/request-tokenizer/imageTokenizer.ts | 198 +++++++++++++++++- .../supportedImageFormats.ts | 56 +++++ 4 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 packages/core/src/utils/request-tokenizer/supportedImageFormats.ts diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts index 7b839c08..d4b9629c 100644 --- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts +++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts @@ -14,6 +14,10 @@ import { } from '../models/availableModels.js'; import { MessageType } from '../types.js'; import type { UseHistoryManagerReturn } from './useHistoryManager.js'; +import { + isSupportedImageMimeType, + getUnsupportedImageFormatWarning, +} from '@qwen-code/qwen-code-core'; /** * Checks if a PartListUnion contains image parts @@ -56,6 +60,60 @@ function isImagePart(part: Part): boolean { return false; } +/** + * Checks if image parts have supported formats and returns unsupported ones + */ +function checkImageFormatsSupport(parts: PartListUnion): { + hasImages: boolean; + hasUnsupportedFormats: boolean; + unsupportedMimeTypes: string[]; +} { + const unsupportedMimeTypes: string[] = []; + let hasImages = false; + + if (typeof parts === 'string') { + return { + hasImages: false, + hasUnsupportedFormats: false, + unsupportedMimeTypes: [], + }; + } + + const partsArray = Array.isArray(parts) ? parts : [parts]; + + for (const part of partsArray) { + if (typeof part === 'string') continue; + + let mimeType: string | undefined; + + // Check inlineData + if ( + 'inlineData' in part && + part.inlineData?.mimeType?.startsWith('image/') + ) { + hasImages = true; + mimeType = part.inlineData.mimeType; + } + + // Check fileData + if ('fileData' in part && part.fileData?.mimeType?.startsWith('image/')) { + hasImages = true; + mimeType = part.fileData.mimeType; + } + + // Check if the mime type is supported + if (mimeType && !isSupportedImageMimeType(mimeType)) { + unsupportedMimeTypes.push(mimeType); + } + } + + return { + hasImages, + hasUnsupportedFormats: unsupportedMimeTypes.length > 0, + unsupportedMimeTypes, + }; +} + /** * Determines if we should offer vision switch for the given parts, auth type, and current model */ @@ -167,6 +225,21 @@ export function useVisionAutoSwitch( return { shouldProceed: true }; } + // Check image format support first + const formatCheck = checkImageFormatsSupport(query); + + // If there are unsupported image formats, show warning + if (formatCheck.hasUnsupportedFormats) { + addItem( + { + type: MessageType.INFO, + text: getUnsupportedImageFormatWarning(), + }, + userMessageTimestamp, + ); + // Continue processing but with warning shown + } + // Check if vision switch is needed if ( !shouldOfferVisionSwitch( diff --git a/packages/core/index.ts b/packages/core/index.ts index 447560d4..3cc271d0 100644 --- a/packages/core/index.ts +++ b/packages/core/index.ts @@ -19,3 +19,4 @@ export { } from './src/telemetry/types.js'; export { makeFakeConfig } from './src/test-utils/config.js'; export * from './src/utils/pathReader.js'; +export * from './src/utils/request-tokenizer/supportedImageFormats.js'; diff --git a/packages/core/src/utils/request-tokenizer/imageTokenizer.ts b/packages/core/src/utils/request-tokenizer/imageTokenizer.ts index 5fbeb128..b55c6b9e 100644 --- a/packages/core/src/utils/request-tokenizer/imageTokenizer.ts +++ b/packages/core/src/utils/request-tokenizer/imageTokenizer.ts @@ -5,6 +5,7 @@ */ import type { ImageMetadata } from './types.js'; +import { isSupportedImageMimeType } from './supportedImageFormats.js'; /** * Image tokenizer for calculating image tokens based on dimensions @@ -14,7 +15,7 @@ import type { ImageMetadata } from './types.js'; * - Minimum: 4 tokens per image * - Maximum: 16384 tokens per image * - Additional: 2 special tokens (vision_bos + vision_eos) - * - Supports: PNG, JPEG, WebP, GIF formats + * - Supports: PNG, JPEG, WebP, GIF, BMP, TIFF, HEIC formats */ export class ImageTokenizer { /** 28x28 pixels = 1 token */ @@ -41,6 +42,18 @@ export class ImageTokenizer { mimeType: string, ): Promise { try { + // Check if the MIME type is supported + if (!isSupportedImageMimeType(mimeType)) { + console.warn(`Unsupported image format: ${mimeType}`); + // Return default metadata for unsupported formats + return { + width: 512, + height: 512, + mimeType, + dataSize: Math.floor(base64Data.length * 0.75), + }; + } + const cleanBase64 = base64Data.replace(/^data:[^;]+;base64,/, ''); const buffer = Buffer.from(cleanBase64, 'base64'); const dimensions = await this.extractDimensions(buffer, mimeType); @@ -90,6 +103,18 @@ export class ImageTokenizer { return this.extractGifDimensions(buffer); } + if (mimeType.includes('bmp')) { + return this.extractBmpDimensions(buffer); + } + + if (mimeType.includes('tiff')) { + return this.extractTiffDimensions(buffer); + } + + if (mimeType.includes('heic')) { + return this.extractHeicDimensions(buffer); + } + return { width: 512, height: 512 }; } @@ -306,4 +331,175 @@ export class ImageTokenizer { return results; } + + /** + * Extract BMP dimensions from header + * BMP signature: 42 4D (BM) + * Width/height at bytes 18-21 and 22-25 (little-endian) + */ + private extractBmpDimensions(buffer: Buffer): { + width: number; + height: number; + } { + if (buffer.length < 26) { + throw new Error('Invalid BMP: buffer too short'); + } + + // Verify BMP signature + if (buffer[0] !== 0x42 || buffer[1] !== 0x4d) { + throw new Error('Invalid BMP signature'); + } + + const width = buffer.readUInt32LE(18); + const height = buffer.readUInt32LE(22); + + return { width, height: Math.abs(height) }; // Height can be negative for top-down BMPs + } + + /** + * Extract TIFF dimensions from IFD (Image File Directory) + * TIFF can be little-endian (II) or big-endian (MM) + * Width/height are stored in IFD entries with tags 0x0100 and 0x0101 + */ + private extractTiffDimensions(buffer: Buffer): { + width: number; + height: number; + } { + if (buffer.length < 8) { + throw new Error('Invalid TIFF: buffer too short'); + } + + // Check byte order + const byteOrder = buffer.subarray(0, 2).toString('ascii'); + const isLittleEndian = byteOrder === 'II'; + const isBigEndian = byteOrder === 'MM'; + + if (!isLittleEndian && !isBigEndian) { + throw new Error('Invalid TIFF byte order'); + } + + // Read magic number (should be 42) + const magic = isLittleEndian + ? buffer.readUInt16LE(2) + : buffer.readUInt16BE(2); + if (magic !== 42) { + throw new Error('Invalid TIFF magic number'); + } + + // Read IFD offset + const ifdOffset = isLittleEndian + ? buffer.readUInt32LE(4) + : buffer.readUInt32BE(4); + + if (ifdOffset >= buffer.length) { + throw new Error('Invalid TIFF IFD offset'); + } + + // Read number of directory entries + const numEntries = isLittleEndian + ? buffer.readUInt16LE(ifdOffset) + : buffer.readUInt16BE(ifdOffset); + + let width = 0; + let height = 0; + + // Parse IFD entries + for (let i = 0; i < numEntries; i++) { + const entryOffset = ifdOffset + 2 + i * 12; + + if (entryOffset + 12 > buffer.length) break; + + const tag = isLittleEndian + ? buffer.readUInt16LE(entryOffset) + : buffer.readUInt16BE(entryOffset); + + const type = isLittleEndian + ? buffer.readUInt16LE(entryOffset + 2) + : buffer.readUInt16BE(entryOffset + 2); + + const value = isLittleEndian + ? buffer.readUInt32LE(entryOffset + 8) + : buffer.readUInt32BE(entryOffset + 8); + + if (tag === 0x0100) { + // ImageWidth + width = type === 3 ? value : value; // SHORT or LONG + } else if (tag === 0x0101) { + // ImageLength (height) + height = type === 3 ? value : value; // SHORT or LONG + } + + if (width > 0 && height > 0) break; + } + + if (width === 0 || height === 0) { + throw new Error('Could not find TIFF dimensions'); + } + + return { width, height }; + } + + /** + * Extract HEIC dimensions from meta box + * HEIC is based on ISO Base Media File Format + * This is a simplified implementation that looks for 'ispe' (Image Spatial Extents) box + */ + private extractHeicDimensions(buffer: Buffer): { + width: number; + height: number; + } { + if (buffer.length < 12) { + throw new Error('Invalid HEIC: buffer too short'); + } + + // Check for ftyp box with HEIC brand + const ftypBox = buffer.subarray(4, 8).toString('ascii'); + if (ftypBox !== 'ftyp') { + throw new Error('Invalid HEIC: missing ftyp box'); + } + + const brand = buffer.subarray(8, 12).toString('ascii'); + if (!['heic', 'heix', 'hevc', 'hevx'].includes(brand)) { + throw new Error('Invalid HEIC brand'); + } + + // Look for meta box and then ispe box + let offset = 0; + while (offset < buffer.length - 8) { + const boxSize = buffer.readUInt32BE(offset); + const boxType = buffer.subarray(offset + 4, offset + 8).toString('ascii'); + + if (boxType === 'meta') { + // Look for ispe box inside meta box + const metaOffset = offset + 8; + let innerOffset = metaOffset + 4; // Skip version and flags + + while (innerOffset < offset + boxSize - 8) { + const innerBoxSize = buffer.readUInt32BE(innerOffset); + const innerBoxType = buffer + .subarray(innerOffset + 4, innerOffset + 8) + .toString('ascii'); + + if (innerBoxType === 'ispe') { + // Found Image Spatial Extents box + if (innerOffset + 20 <= buffer.length) { + const width = buffer.readUInt32BE(innerOffset + 12); + const height = buffer.readUInt32BE(innerOffset + 16); + return { width, height }; + } + } + + if (innerBoxSize === 0) break; + innerOffset += innerBoxSize; + } + } + + if (boxSize === 0) break; + offset += boxSize; + } + + // Fallback: return default dimensions if we can't parse the structure + console.warn('Could not extract HEIC dimensions, using default'); + return { width: 512, height: 512 }; + } } diff --git a/packages/core/src/utils/request-tokenizer/supportedImageFormats.ts b/packages/core/src/utils/request-tokenizer/supportedImageFormats.ts new file mode 100644 index 00000000..fce679d7 --- /dev/null +++ b/packages/core/src/utils/request-tokenizer/supportedImageFormats.ts @@ -0,0 +1,56 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Supported image MIME types for vision models + * These formats are supported by the vision model and can be processed by the image tokenizer + */ +export const SUPPORTED_IMAGE_MIME_TYPES = [ + 'image/bmp', + 'image/jpeg', + 'image/jpg', // Alternative MIME type for JPEG + 'image/png', + 'image/tiff', + 'image/webp', + 'image/heic', +] as const; + +/** + * Type for supported image MIME types + */ +export type SupportedImageMimeType = + (typeof SUPPORTED_IMAGE_MIME_TYPES)[number]; + +/** + * Check if a MIME type is supported for vision processing + * @param mimeType The MIME type to check + * @returns True if the MIME type is supported + */ +export function isSupportedImageMimeType( + mimeType: string, +): mimeType is SupportedImageMimeType { + return SUPPORTED_IMAGE_MIME_TYPES.includes( + mimeType as SupportedImageMimeType, + ); +} + +/** + * Get a human-readable list of supported image formats + * @returns Comma-separated string of supported formats + */ +export function getSupportedImageFormatsString(): string { + return SUPPORTED_IMAGE_MIME_TYPES.map((type) => + type.replace('image/', '').toUpperCase(), + ).join(', '); +} + +/** + * Get warning message for unsupported image formats + * @returns Warning message string + */ +export function getUnsupportedImageFormatWarning(): string { + return `Only the following image formats are supported: ${getSupportedImageFormatsString()}. Other formats may not work as expected.`; +}