File size: 4,125 Bytes
e43a4a9
 
 
 
8b5482f
 
 
 
 
e43a4a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b5482f
e43a4a9
 
 
 
 
 
 
 
 
8b5482f
e43a4a9
 
 
8b5482f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e43a4a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b5482f
e43a4a9
8b5482f
 
e43a4a9
 
 
 
 
 
 
 
 
740e55f
e43a4a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import { HttpError } from "../utils/httpError.js";
import { imageMimeType } from "../utils/mediaTypes.js";
import { isDataUrl, isLikelyBase64, parseDataUrl, stripDataUrl } from "../utils/dataUrl.js";

const SUPPORTED_AUDIO_FORMATS = new Set(["mp3", "wav", "m4a"]);

function isHttpUrl(value) {
  return typeof value === "string" && (value.startsWith("http://") || value.startsWith("https://"));
}

function normalizeImageSource(source, mimeType) {
  if (typeof source !== "string" || source.length === 0) {
    throw new HttpError(400, "Image input must include a non-empty URL or base64 payload.");
  }

  if (source.startsWith("http://") || source.startsWith("https://") || isDataUrl(source)) {
    return source;
  }

  if (isLikelyBase64(source)) {
    return `data:${mimeType ?? imageMimeType("png")};base64,${source}`;
  }

  throw new HttpError(400, "Image input must be an http(s) URL, data URL, or raw base64 string.");
}

function normalizeImagePart(part) {
  const image = typeof part.image_url === "string" ? { url: part.image_url } : part.image_url;
  const source = image?.url ?? part.input_image?.url ?? part.input_image?.data;
  const mimeType = part.mime_type ?? part.input_image?.mime_type;

  return {
    type: "image_url",
    image_url: {
      ...image,
      url: normalizeImageSource(source, mimeType)
    }
  };
}

function normalizeAudioBase64(audio) {
  const format = audio.format?.toLowerCase();
  if (!SUPPORTED_AUDIO_FORMATS.has(format)) {
    throw new HttpError(400, "Audio input format must be mp3, wav, or m4a.");
  }

  if (typeof audio.data !== "string" || audio.data.length === 0) {
    throw new HttpError(400, "Audio input must include base64 data.");
  }

  const parsed = parseDataUrl(audio.data);
  return {
    data: stripDataUrl(audio.data),
    format: inferAudioFormat(parsed?.mimeType, format)
  };
}

function inferAudioFormat(mimeType, fallbackFormat) {
  const normalizedMimeType = String(mimeType || "").split(";")[0].trim().toLowerCase();

  if (normalizedMimeType === "audio/wav" || normalizedMimeType === "audio/x-wav") {
    return "wav";
  }

  if (normalizedMimeType === "audio/mp4" || normalizedMimeType === "audio/x-m4a") {
    return "m4a";
  }

  if (normalizedMimeType === "audio/mpeg" || normalizedMimeType === "audio/mp3") {
    return "mp3";
  }

  return fallbackFormat;
}

export function createRequestNormalizationService({ audioConversionService }) {
  return {
    async normalize(body) {
      if (!body || !Array.isArray(body.messages)) {
        throw new HttpError(400, "Request body must include a messages array.");
      }

      const normalized = structuredClone(body);
      const proxyOptions = normalized.proxy ?? {};
      delete normalized.proxy;

      for (const message of normalized.messages) {
        if (!Array.isArray(message.content)) {
          continue;
        }

        const nextParts = [];
        for (const part of message.content) {
          if (part.type === "image_url" || part.type === "input_image") {
            nextParts.push(normalizeImagePart(part));
            continue;
          }

          if (part.type === "input_audio") {
            const audio = part.input_audio ?? {};
            const audioUrl = audio.url || (isHttpUrl(audio.data) ? audio.data : null);

            if (audioUrl) {
              const converted = await audioConversionService.downloadAndConvertToMp3Base64(audioUrl);
              nextParts.push({
                type: "input_audio",
                input_audio: converted
              });
              continue;
            }

            nextParts.push({
              type: "input_audio",
              input_audio: await audioConversionService.normalizeBase64Audio(normalizeAudioBase64(audio))
            });
            continue;
          }

          nextParts.push(part);
        }

        message.content = nextParts;
      }

      return {
        normalizedBody: normalized,
        responseContext: {
          audioFormat: normalized.audio?.format ?? "mp3",
          exposeMediaUrls: proxyOptions.expose_media_urls !== false
        }
      };
    }
  };
}