import { HttpError } from "../utils/httpError.js"; import { imageMimeType } from "../utils/mediaTypes.js"; import { isDataUrl, isLikelyBase64, parseDataUrl, stripDataUrl } from "../utils/dataUrl.js"; const SUPPORTED_AUDIO_FORMATS = new Set(["mp3", "wav", "m4a"]); function isHttpUrl(value) { return typeof value === "string" && (value.startsWith("http://") || value.startsWith("https://")); } function normalizeImageSource(source, mimeType) { if (typeof source !== "string" || source.length === 0) { throw new HttpError(400, "Image input must include a non-empty URL or base64 payload."); } if (source.startsWith("http://") || source.startsWith("https://") || isDataUrl(source)) { return source; } if (isLikelyBase64(source)) { return `data:${mimeType ?? imageMimeType("png")};base64,${source}`; } throw new HttpError(400, "Image input must be an http(s) URL, data URL, or raw base64 string."); } function normalizeImagePart(part) { const image = typeof part.image_url === "string" ? { url: part.image_url } : part.image_url; const source = image?.url ?? part.input_image?.url ?? part.input_image?.data; const mimeType = part.mime_type ?? part.input_image?.mime_type; return { type: "image_url", image_url: { ...image, url: normalizeImageSource(source, mimeType) } }; } function normalizeAudioBase64(audio) { const format = audio.format?.toLowerCase(); if (!SUPPORTED_AUDIO_FORMATS.has(format)) { throw new HttpError(400, "Audio input format must be mp3, wav, or m4a."); } if (typeof audio.data !== "string" || audio.data.length === 0) { throw new HttpError(400, "Audio input must include base64 data."); } const parsed = parseDataUrl(audio.data); return { data: stripDataUrl(audio.data), format: inferAudioFormat(parsed?.mimeType, format) }; } function inferAudioFormat(mimeType, fallbackFormat) { const normalizedMimeType = String(mimeType || "").split(";")[0].trim().toLowerCase(); if (normalizedMimeType === "audio/wav" || normalizedMimeType === "audio/x-wav") { return "wav"; } if (normalizedMimeType === "audio/mp4" || normalizedMimeType === "audio/x-m4a") { return "m4a"; } if (normalizedMimeType === "audio/mpeg" || normalizedMimeType === "audio/mp3") { return "mp3"; } return fallbackFormat; } export function createRequestNormalizationService({ audioConversionService }) { return { async normalize(body) { if (!body || !Array.isArray(body.messages)) { throw new HttpError(400, "Request body must include a messages array."); } const normalized = structuredClone(body); const proxyOptions = normalized.proxy ?? {}; delete normalized.proxy; for (const message of normalized.messages) { if (!Array.isArray(message.content)) { continue; } const nextParts = []; for (const part of message.content) { if (part.type === "image_url" || part.type === "input_image") { nextParts.push(normalizeImagePart(part)); continue; } if (part.type === "input_audio") { const audio = part.input_audio ?? {}; const audioUrl = audio.url || (isHttpUrl(audio.data) ? audio.data : null); if (audioUrl) { const converted = await audioConversionService.downloadAndConvertToMp3Base64(audioUrl); nextParts.push({ type: "input_audio", input_audio: converted }); continue; } nextParts.push({ type: "input_audio", input_audio: await audioConversionService.normalizeBase64Audio(normalizeAudioBase64(audio)) }); continue; } nextParts.push(part); } message.content = nextParts; } return { normalizedBody: normalized, responseContext: { audioFormat: normalized.audio?.format ?? "mp3", exposeMediaUrls: proxyOptions.expose_media_urls !== false } }; } }; }