oapix / src /services /requestNormalizationService.js
woiceatus's picture
fix audio convert
8b5482f
import { HttpError } from "../utils/httpError.js";
import { imageMimeType } from "../utils/mediaTypes.js";
import { isDataUrl, isLikelyBase64, parseDataUrl, stripDataUrl } from "../utils/dataUrl.js";
const SUPPORTED_AUDIO_FORMATS = new Set(["mp3", "wav", "m4a"]);
function isHttpUrl(value) {
return typeof value === "string" && (value.startsWith("http://") || value.startsWith("https://"));
}
function normalizeImageSource(source, mimeType) {
if (typeof source !== "string" || source.length === 0) {
throw new HttpError(400, "Image input must include a non-empty URL or base64 payload.");
}
if (source.startsWith("http://") || source.startsWith("https://") || isDataUrl(source)) {
return source;
}
if (isLikelyBase64(source)) {
return `data:${mimeType ?? imageMimeType("png")};base64,${source}`;
}
throw new HttpError(400, "Image input must be an http(s) URL, data URL, or raw base64 string.");
}
function normalizeImagePart(part) {
const image = typeof part.image_url === "string" ? { url: part.image_url } : part.image_url;
const source = image?.url ?? part.input_image?.url ?? part.input_image?.data;
const mimeType = part.mime_type ?? part.input_image?.mime_type;
return {
type: "image_url",
image_url: {
...image,
url: normalizeImageSource(source, mimeType)
}
};
}
function normalizeAudioBase64(audio) {
const format = audio.format?.toLowerCase();
if (!SUPPORTED_AUDIO_FORMATS.has(format)) {
throw new HttpError(400, "Audio input format must be mp3, wav, or m4a.");
}
if (typeof audio.data !== "string" || audio.data.length === 0) {
throw new HttpError(400, "Audio input must include base64 data.");
}
const parsed = parseDataUrl(audio.data);
return {
data: stripDataUrl(audio.data),
format: inferAudioFormat(parsed?.mimeType, format)
};
}
function inferAudioFormat(mimeType, fallbackFormat) {
const normalizedMimeType = String(mimeType || "").split(";")[0].trim().toLowerCase();
if (normalizedMimeType === "audio/wav" || normalizedMimeType === "audio/x-wav") {
return "wav";
}
if (normalizedMimeType === "audio/mp4" || normalizedMimeType === "audio/x-m4a") {
return "m4a";
}
if (normalizedMimeType === "audio/mpeg" || normalizedMimeType === "audio/mp3") {
return "mp3";
}
return fallbackFormat;
}
export function createRequestNormalizationService({ audioConversionService }) {
return {
async normalize(body) {
if (!body || !Array.isArray(body.messages)) {
throw new HttpError(400, "Request body must include a messages array.");
}
const normalized = structuredClone(body);
const proxyOptions = normalized.proxy ?? {};
delete normalized.proxy;
for (const message of normalized.messages) {
if (!Array.isArray(message.content)) {
continue;
}
const nextParts = [];
for (const part of message.content) {
if (part.type === "image_url" || part.type === "input_image") {
nextParts.push(normalizeImagePart(part));
continue;
}
if (part.type === "input_audio") {
const audio = part.input_audio ?? {};
const audioUrl = audio.url || (isHttpUrl(audio.data) ? audio.data : null);
if (audioUrl) {
const converted = await audioConversionService.downloadAndConvertToMp3Base64(audioUrl);
nextParts.push({
type: "input_audio",
input_audio: converted
});
continue;
}
nextParts.push({
type: "input_audio",
input_audio: await audioConversionService.normalizeBase64Audio(normalizeAudioBase64(audio))
});
continue;
}
nextParts.push(part);
}
message.content = nextParts;
}
return {
normalizedBody: normalized,
responseContext: {
audioFormat: normalized.audio?.format ?? "mp3",
exposeMediaUrls: proxyOptions.expose_media_urls !== false
}
};
}
};
}