mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
fix: align supported image formats with bailian doc
This commit is contained in:
@@ -14,6 +14,10 @@ import {
|
||||
} from '../models/availableModels.js';
|
||||
import { MessageType } from '../types.js';
|
||||
import type { UseHistoryManagerReturn } from './useHistoryManager.js';
|
||||
import {
|
||||
isSupportedImageMimeType,
|
||||
getUnsupportedImageFormatWarning,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
|
||||
/**
|
||||
* Checks if a PartListUnion contains image parts
|
||||
@@ -56,6 +60,60 @@ function isImagePart(part: Part): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if image parts have supported formats and returns unsupported ones
|
||||
*/
|
||||
function checkImageFormatsSupport(parts: PartListUnion): {
|
||||
hasImages: boolean;
|
||||
hasUnsupportedFormats: boolean;
|
||||
unsupportedMimeTypes: string[];
|
||||
} {
|
||||
const unsupportedMimeTypes: string[] = [];
|
||||
let hasImages = false;
|
||||
|
||||
if (typeof parts === 'string') {
|
||||
return {
|
||||
hasImages: false,
|
||||
hasUnsupportedFormats: false,
|
||||
unsupportedMimeTypes: [],
|
||||
};
|
||||
}
|
||||
|
||||
const partsArray = Array.isArray(parts) ? parts : [parts];
|
||||
|
||||
for (const part of partsArray) {
|
||||
if (typeof part === 'string') continue;
|
||||
|
||||
let mimeType: string | undefined;
|
||||
|
||||
// Check inlineData
|
||||
if (
|
||||
'inlineData' in part &&
|
||||
part.inlineData?.mimeType?.startsWith('image/')
|
||||
) {
|
||||
hasImages = true;
|
||||
mimeType = part.inlineData.mimeType;
|
||||
}
|
||||
|
||||
// Check fileData
|
||||
if ('fileData' in part && part.fileData?.mimeType?.startsWith('image/')) {
|
||||
hasImages = true;
|
||||
mimeType = part.fileData.mimeType;
|
||||
}
|
||||
|
||||
// Check if the mime type is supported
|
||||
if (mimeType && !isSupportedImageMimeType(mimeType)) {
|
||||
unsupportedMimeTypes.push(mimeType);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
hasImages,
|
||||
hasUnsupportedFormats: unsupportedMimeTypes.length > 0,
|
||||
unsupportedMimeTypes,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if we should offer vision switch for the given parts, auth type, and current model
|
||||
*/
|
||||
@@ -167,6 +225,21 @@ export function useVisionAutoSwitch(
|
||||
return { shouldProceed: true };
|
||||
}
|
||||
|
||||
// Check image format support first
|
||||
const formatCheck = checkImageFormatsSupport(query);
|
||||
|
||||
// If there are unsupported image formats, show warning
|
||||
if (formatCheck.hasUnsupportedFormats) {
|
||||
addItem(
|
||||
{
|
||||
type: MessageType.INFO,
|
||||
text: getUnsupportedImageFormatWarning(),
|
||||
},
|
||||
userMessageTimestamp,
|
||||
);
|
||||
// Continue processing but with warning shown
|
||||
}
|
||||
|
||||
// Check if vision switch is needed
|
||||
if (
|
||||
!shouldOfferVisionSwitch(
|
||||
|
||||
@@ -19,3 +19,4 @@ export {
|
||||
} from './src/telemetry/types.js';
|
||||
export { makeFakeConfig } from './src/test-utils/config.js';
|
||||
export * from './src/utils/pathReader.js';
|
||||
export * from './src/utils/request-tokenizer/supportedImageFormats.js';
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
import type { ImageMetadata } from './types.js';
|
||||
import { isSupportedImageMimeType } from './supportedImageFormats.js';
|
||||
|
||||
/**
|
||||
* Image tokenizer for calculating image tokens based on dimensions
|
||||
@@ -14,7 +15,7 @@ import type { ImageMetadata } from './types.js';
|
||||
* - Minimum: 4 tokens per image
|
||||
* - Maximum: 16384 tokens per image
|
||||
* - Additional: 2 special tokens (vision_bos + vision_eos)
|
||||
* - Supports: PNG, JPEG, WebP, GIF formats
|
||||
* - Supports: PNG, JPEG, WebP, GIF, BMP, TIFF, HEIC formats
|
||||
*/
|
||||
export class ImageTokenizer {
|
||||
/** 28x28 pixels = 1 token */
|
||||
@@ -41,6 +42,18 @@ export class ImageTokenizer {
|
||||
mimeType: string,
|
||||
): Promise<ImageMetadata> {
|
||||
try {
|
||||
// Check if the MIME type is supported
|
||||
if (!isSupportedImageMimeType(mimeType)) {
|
||||
console.warn(`Unsupported image format: ${mimeType}`);
|
||||
// Return default metadata for unsupported formats
|
||||
return {
|
||||
width: 512,
|
||||
height: 512,
|
||||
mimeType,
|
||||
dataSize: Math.floor(base64Data.length * 0.75),
|
||||
};
|
||||
}
|
||||
|
||||
const cleanBase64 = base64Data.replace(/^data:[^;]+;base64,/, '');
|
||||
const buffer = Buffer.from(cleanBase64, 'base64');
|
||||
const dimensions = await this.extractDimensions(buffer, mimeType);
|
||||
@@ -90,6 +103,18 @@ export class ImageTokenizer {
|
||||
return this.extractGifDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('bmp')) {
|
||||
return this.extractBmpDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('tiff')) {
|
||||
return this.extractTiffDimensions(buffer);
|
||||
}
|
||||
|
||||
if (mimeType.includes('heic')) {
|
||||
return this.extractHeicDimensions(buffer);
|
||||
}
|
||||
|
||||
return { width: 512, height: 512 };
|
||||
}
|
||||
|
||||
@@ -306,4 +331,175 @@ export class ImageTokenizer {
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract BMP dimensions from header
|
||||
* BMP signature: 42 4D (BM)
|
||||
* Width/height at bytes 18-21 and 22-25 (little-endian)
|
||||
*/
|
||||
private extractBmpDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 26) {
|
||||
throw new Error('Invalid BMP: buffer too short');
|
||||
}
|
||||
|
||||
// Verify BMP signature
|
||||
if (buffer[0] !== 0x42 || buffer[1] !== 0x4d) {
|
||||
throw new Error('Invalid BMP signature');
|
||||
}
|
||||
|
||||
const width = buffer.readUInt32LE(18);
|
||||
const height = buffer.readUInt32LE(22);
|
||||
|
||||
return { width, height: Math.abs(height) }; // Height can be negative for top-down BMPs
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract TIFF dimensions from IFD (Image File Directory)
|
||||
* TIFF can be little-endian (II) or big-endian (MM)
|
||||
* Width/height are stored in IFD entries with tags 0x0100 and 0x0101
|
||||
*/
|
||||
private extractTiffDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 8) {
|
||||
throw new Error('Invalid TIFF: buffer too short');
|
||||
}
|
||||
|
||||
// Check byte order
|
||||
const byteOrder = buffer.subarray(0, 2).toString('ascii');
|
||||
const isLittleEndian = byteOrder === 'II';
|
||||
const isBigEndian = byteOrder === 'MM';
|
||||
|
||||
if (!isLittleEndian && !isBigEndian) {
|
||||
throw new Error('Invalid TIFF byte order');
|
||||
}
|
||||
|
||||
// Read magic number (should be 42)
|
||||
const magic = isLittleEndian
|
||||
? buffer.readUInt16LE(2)
|
||||
: buffer.readUInt16BE(2);
|
||||
if (magic !== 42) {
|
||||
throw new Error('Invalid TIFF magic number');
|
||||
}
|
||||
|
||||
// Read IFD offset
|
||||
const ifdOffset = isLittleEndian
|
||||
? buffer.readUInt32LE(4)
|
||||
: buffer.readUInt32BE(4);
|
||||
|
||||
if (ifdOffset >= buffer.length) {
|
||||
throw new Error('Invalid TIFF IFD offset');
|
||||
}
|
||||
|
||||
// Read number of directory entries
|
||||
const numEntries = isLittleEndian
|
||||
? buffer.readUInt16LE(ifdOffset)
|
||||
: buffer.readUInt16BE(ifdOffset);
|
||||
|
||||
let width = 0;
|
||||
let height = 0;
|
||||
|
||||
// Parse IFD entries
|
||||
for (let i = 0; i < numEntries; i++) {
|
||||
const entryOffset = ifdOffset + 2 + i * 12;
|
||||
|
||||
if (entryOffset + 12 > buffer.length) break;
|
||||
|
||||
const tag = isLittleEndian
|
||||
? buffer.readUInt16LE(entryOffset)
|
||||
: buffer.readUInt16BE(entryOffset);
|
||||
|
||||
const type = isLittleEndian
|
||||
? buffer.readUInt16LE(entryOffset + 2)
|
||||
: buffer.readUInt16BE(entryOffset + 2);
|
||||
|
||||
const value = isLittleEndian
|
||||
? buffer.readUInt32LE(entryOffset + 8)
|
||||
: buffer.readUInt32BE(entryOffset + 8);
|
||||
|
||||
if (tag === 0x0100) {
|
||||
// ImageWidth
|
||||
width = type === 3 ? value : value; // SHORT or LONG
|
||||
} else if (tag === 0x0101) {
|
||||
// ImageLength (height)
|
||||
height = type === 3 ? value : value; // SHORT or LONG
|
||||
}
|
||||
|
||||
if (width > 0 && height > 0) break;
|
||||
}
|
||||
|
||||
if (width === 0 || height === 0) {
|
||||
throw new Error('Could not find TIFF dimensions');
|
||||
}
|
||||
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract HEIC dimensions from meta box
|
||||
* HEIC is based on ISO Base Media File Format
|
||||
* This is a simplified implementation that looks for 'ispe' (Image Spatial Extents) box
|
||||
*/
|
||||
private extractHeicDimensions(buffer: Buffer): {
|
||||
width: number;
|
||||
height: number;
|
||||
} {
|
||||
if (buffer.length < 12) {
|
||||
throw new Error('Invalid HEIC: buffer too short');
|
||||
}
|
||||
|
||||
// Check for ftyp box with HEIC brand
|
||||
const ftypBox = buffer.subarray(4, 8).toString('ascii');
|
||||
if (ftypBox !== 'ftyp') {
|
||||
throw new Error('Invalid HEIC: missing ftyp box');
|
||||
}
|
||||
|
||||
const brand = buffer.subarray(8, 12).toString('ascii');
|
||||
if (!['heic', 'heix', 'hevc', 'hevx'].includes(brand)) {
|
||||
throw new Error('Invalid HEIC brand');
|
||||
}
|
||||
|
||||
// Look for meta box and then ispe box
|
||||
let offset = 0;
|
||||
while (offset < buffer.length - 8) {
|
||||
const boxSize = buffer.readUInt32BE(offset);
|
||||
const boxType = buffer.subarray(offset + 4, offset + 8).toString('ascii');
|
||||
|
||||
if (boxType === 'meta') {
|
||||
// Look for ispe box inside meta box
|
||||
const metaOffset = offset + 8;
|
||||
let innerOffset = metaOffset + 4; // Skip version and flags
|
||||
|
||||
while (innerOffset < offset + boxSize - 8) {
|
||||
const innerBoxSize = buffer.readUInt32BE(innerOffset);
|
||||
const innerBoxType = buffer
|
||||
.subarray(innerOffset + 4, innerOffset + 8)
|
||||
.toString('ascii');
|
||||
|
||||
if (innerBoxType === 'ispe') {
|
||||
// Found Image Spatial Extents box
|
||||
if (innerOffset + 20 <= buffer.length) {
|
||||
const width = buffer.readUInt32BE(innerOffset + 12);
|
||||
const height = buffer.readUInt32BE(innerOffset + 16);
|
||||
return { width, height };
|
||||
}
|
||||
}
|
||||
|
||||
if (innerBoxSize === 0) break;
|
||||
innerOffset += innerBoxSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (boxSize === 0) break;
|
||||
offset += boxSize;
|
||||
}
|
||||
|
||||
// Fallback: return default dimensions if we can't parse the structure
|
||||
console.warn('Could not extract HEIC dimensions, using default');
|
||||
return { width: 512, height: 512 };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Supported image MIME types for vision models
|
||||
* These formats are supported by the vision model and can be processed by the image tokenizer
|
||||
*/
|
||||
export const SUPPORTED_IMAGE_MIME_TYPES = [
|
||||
'image/bmp',
|
||||
'image/jpeg',
|
||||
'image/jpg', // Alternative MIME type for JPEG
|
||||
'image/png',
|
||||
'image/tiff',
|
||||
'image/webp',
|
||||
'image/heic',
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Type for supported image MIME types
|
||||
*/
|
||||
export type SupportedImageMimeType =
|
||||
(typeof SUPPORTED_IMAGE_MIME_TYPES)[number];
|
||||
|
||||
/**
|
||||
* Check if a MIME type is supported for vision processing
|
||||
* @param mimeType The MIME type to check
|
||||
* @returns True if the MIME type is supported
|
||||
*/
|
||||
export function isSupportedImageMimeType(
|
||||
mimeType: string,
|
||||
): mimeType is SupportedImageMimeType {
|
||||
return SUPPORTED_IMAGE_MIME_TYPES.includes(
|
||||
mimeType as SupportedImageMimeType,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a human-readable list of supported image formats
|
||||
* @returns Comma-separated string of supported formats
|
||||
*/
|
||||
export function getSupportedImageFormatsString(): string {
|
||||
return SUPPORTED_IMAGE_MIME_TYPES.map((type) =>
|
||||
type.replace('image/', '').toUpperCase(),
|
||||
).join(', ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get warning message for unsupported image formats
|
||||
* @returns Warning message string
|
||||
*/
|
||||
export function getUnsupportedImageFormatWarning(): string {
|
||||
return `Only the following image formats are supported: ${getSupportedImageFormatsString()}. Other formats may not work as expected.`;
|
||||
}
|
||||
Reference in New Issue
Block a user