Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Oct 6, 2025

Commit

53299b7

verified ·

1 Parent(s): 963cb7e

Update proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +160 -194

proxy_handler.py CHANGED Viewed

@@ -1,8 +1,9 @@
 """
-Proxy handler for Z.AI API requests
 """
-import json, logging, re, time, uuid, base64, datetime, hashlib, hmac, urllib.parse
 from typing import AsyncGenerator, Dict, Any, Tuple, List
 import httpx
 from fastapi import HTTPException
 from fastapi.responses import StreamingResponse
@@ -21,133 +22,86 @@ class ProxyHandler:
             limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
             http2=True,
         )
     async def aclose(self):
         if not self.client.is_closed:
             await self.client.aclose()
     def _parse_jwt_token(self, token: str) -> Dict[str, str]:
-        """A simple, dependency-free JWT parser to get the user_id."""
         try:
             parts = token.split('.')
-            if len(parts) != 3:
-                return {"user_id": ""}
             payload_b64 = parts[1]
-            # Add padding if necessary for base64 decoding
-            payload_b64 += '=' * (-len(payload_b64) % 4)
-            payload_bytes = base64.urlsafe_b64decode(payload_b64)
-            payload = json.loads(payload_bytes)
-            user_id = payload.get("sub", "")
-            return {"user_id": user_id}
         except Exception:
-            logger.warning("Failed to parse JWT token, continuing without user_id.", exc_info=False)
             return {"user_id": ""}
-    def _construct_payload(self, token: str, user_id: str, chat_id: str, request_id: str) -> Tuple[str, str]:
-        """Constructs the sorted payload string (vl) and URL parameters for signature."""
-        timestamp_ms = str(int(time.time() * 1000))
-        # Hardcoding is fine for these fingerprinting values, mimicking the JS logic
-        data = {
-            'timestamp': timestamp_ms,
-            'requestId': request_id,
-            'user_id': user_id,
-            'token': token,
-            'user_agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
-            'current_url': f'https://chat.z.ai/c/{chat_id}',
-            'pathname': f'/c/{chat_id}',
-            'timezone': 'Asia/Shanghai',  # Hardcoded for simplicity
-            'timezone_offset': '-480',  # Hardcoded for simplicity (UTC+8)
-            'local_time': datetime.datetime.now().isoformat(),
-            'utc_time': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
-            'version': '0.0.1', 'platform': 'web', 'language': 'zh-CN', 'languages': 'zh-CN,en',
-            'cookie_enabled': 'true', 'screen_width': '2560', 'screen_height': '1440',
-            'screen_resolution': '2560x1440', 'viewport_height': '1328', 'viewport_width': '1342',
-            'viewport_size': '1342x1328', 'color_depth': '24', 'pixel_ratio': '2',
-            'search': '', 'hash': '', 'host': 'chat.z.ai', 'hostname': 'chat.z.ai',
-            'protocol': 'https:', 'referrer': '', 'title': 'Chat with Z.ai - Free AI Chatbot powered by GLM-4.5',
-            'is_mobile': 'false', 'is_touch': 'false', 'max_touch_points': '0',
-            'browser_name': 'Chrome', 'os_name': 'Mac OS'
-        }
-        # Sort keys and create the required string formats
-        sorted_items = sorted(data.items())
-        sorted_payload_str = ','.join([f"{k},{v}" for k, v in sorted_items])
-        url_params_str = urllib.parse.urlencode(dict(sorted_items))
-        return sorted_payload_str, url_params_str
-    def _generate_signature(self, vl: str, mt: str, token: str) -> Dict[str, Any]:
-        """Generates the signature based on the provided JS logic."""
-        primary_secret = "junjie"
-        timestamp_ms = int(time.time() * 1000)
-        # Use 1-minute buckets as in the JS code (60 seconds * 1000 ms)
-        minute_bucket = timestamp_ms // 60000
-        # Level 1 HMAC to derive key
-        level1_data = f"{token}|{minute_bucket}"
-        mac1 = hmac.new(primary_secret.encode('utf-8'), level1_data.encode('utf-8'), hashlib.sha256)
-        derived_key_hex = mac1.hexdigest()
-        # Level 2 HMAC for the final signature
-        level2_data = f"{vl}|{mt}|{timestamp_ms}"
-        mac2 = hmac.new(derived_key_hex.encode('utf-8'), level2_data.encode('utf-8'), hashlib.sha256)
-        signature = mac2.hexdigest()
-        return {"signature": signature, "timestamp": timestamp_ms}
-    def _clean_thinking_content(self, text: str) -> str:
-        """
-        Aggressively cleans raw thinking content strings based on observed patterns
-        from the Z.AI API.
-        """
-        if not text:
-            return ""
-        cleaned_text = text
-        # 1. Remove specific unwanted blocks like tool calls and summaries.
-        cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
-        cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', cleaned_text, flags=re.DOTALL)
-        # 2. **FIX**: Remove tag-like metadata containing `duration` attribute.
-        # This handles the reported issue: `true" duration="0" ... >`
-        cleaned_text = re.sub(r'<[^>]*duration="[^"]*"[^>]*>', '', cleaned_text)
-        # 3. Remove specific structural tags, but keep the content between them.
-        cleaned_text = cleaned_text.replace("</thinking>", "")
-        cleaned_text = cleaned_text.replace("<Full>", "")
-        cleaned_text = cleaned_text.replace("</Full>", "")
-        # This regex handles <details>, <details open>, and </details>
         cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
-        # 4. Handle markdown blockquotes, preserving multi-level ones.
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
-        # 5. Remove other known text artifacts.
         cleaned_text = cleaned_text.replace("Thinking…", "")
-        # 6. Final strip to clean up residual whitespace.
         return cleaned_text.strip()
     def _clean_answer_content(self, text: str) -> str:
-        """
-        Cleans unwanted tags from answer content.
-        Does NOT strip whitespace to preserve markdown in streams.
-        """
-        if not text:
-            return ""
-        # Remove tool call blocks
-        cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
-        # Remove any residual details/summary blocks that might leak into the answer
-        cleaned_text = re.sub(r'<details[^>]*>.*?</details>', '', cleaned_text, flags=re.DOTALL)
-        cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
         return cleaned_text
     def _serialize_msgs(self, msgs) -> list:
-        """Converts message objects to a list of dictionaries."""
         out = []
         for m in msgs:
             if hasattr(m, "dict"): out.append(m.dict())
             elif hasattr(m, "model_dump"): out.append(m.model_dump())
             elif isinstance(m, dict): out.append(m)
@@ -155,40 +109,86 @@ class ProxyHandler:
         return out
     async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str, str]:
-        """Prepares the request body, headers, URL, and cookie for the upstream API."""
         ck = await cookie_manager.get_next_cookie()
         if not ck: raise HTTPException(503, "No available cookies")
-        # 1. Extract necessary info for signature
         chat_id = str(uuid.uuid4())
         request_id = str(uuid.uuid4())
-        user_id = self._parse_jwt_token(ck).get("user_id", "")
-        last_message = req.messages[-1] if req.messages else None
-        # 'mt' is the content of the last message
-        mt = last_message.content if last_message and isinstance(last_message.content, str) else ""
-        # 2. Generate signature components
-        # 'vl' is the sorted payload string
-        vl, url_params = self._construct_payload(ck, user_id, chat_id, request_id)
-        sig_data = self._generate_signature(vl, mt, ck)
-        signature = sig_data["signature"]
-        timestamp = sig_data["timestamp"]
-        # 3. Construct the final dynamic URL
-        final_url = f"{settings.UPSTREAM_URL}?{url_params}&signature_timestamp={timestamp}"
-        # 4. Prepare body and headers
-        model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
-        body = { "stream": True, "model": model, "messages": self._serialize_msgs(req.messages), "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": chat_id, "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,}, "id": request_id, "mcp_servers": ["deep-web-search"], "model_item": {"id": model, "name": "GLM-4.6", "owned_by": "openai"}, "params": {}, "tool_servers": [], "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},}
-        headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="141", "Google Chrome";v="141"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.79", "X-Signature": signature, "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
-        return body, headers, final_url, ck
     async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
         ck = None
         try:
-            body, headers, url, ck = await self._prep_upstream(req)
             comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
             think_open = False
             yielded_think_buffer = ""
@@ -201,19 +201,15 @@ class ProxyHandler:
                     if not think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = True
                     cleaned_full_text = self._clean_thinking_content(text)
-                    delta_to_send = cleaned_full_text[len(yielded_think_buffer):] if cleaned_full_text.startswith(yielded_think_buffer) else cleaned_full_text
                     if delta_to_send:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': delta_to_send}, 'finish_reason': None}]})}\n\n"
                     yielded_think_buffer = cleaned_full_text
                 elif content_type == "answer":
                     if think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = False
                     cleaned_text = self._clean_answer_content(text)
                     if cleaned_text:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
@@ -222,6 +218,7 @@ class ProxyHandler:
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); err_body = await resp.aread()
                     err_msg = f"Error: {resp.status_code} - {err_body.decode(errors='ignore')}"
                     err = {"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model, "choices": [{"index": 0, "delta": {"content": err_msg}, "finish_reason": "stop"}],}
                     yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"; return
                 await cookie_manager.mark_cookie_success(ck)
@@ -230,8 +227,8 @@ class ProxyHandler:
                     for line in raw.strip().split('\n'):
                         line = line.strip()
                         if not line.startswith('data: '): continue
                         payload_str = line[6:]
                         if payload_str == '[DONE]':
                             if think_open:
                                 yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
@@ -240,23 +237,21 @@ class ProxyHandler:
                             return
                         try:
                             dat = json.loads(payload_str).get("data", {})
-                        except (json.JSONDecodeError, AttributeError):
-                            continue
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
                         if not content_chunk:
-                            continue
                         if phase == "thinking":
-                            if dat.get("edit_content") is not None:
-                                current_raw_thinking = content_chunk
-                            else:
-                                current_raw_thinking += content_chunk
                             async for item in yield_delta("thinking", current_raw_thinking):
                                 yield item
                         elif phase == "answer":
                             content_to_process = content_chunk
                             if is_first_answer_chunk:
@@ -264,7 +259,6 @@ class ProxyHandler:
                                     parts = content_to_process.split('</details>', 1)
                                     content_to_process = parts[1] if len(parts) > 1 else ""
                                 is_first_answer_chunk = False
                             if content_to_process:
                                 async for item in yield_delta("answer", content_to_process):
                                     yield item
@@ -272,70 +266,42 @@ class ProxyHandler:
             logger.exception("Stream error"); raise
     async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
         ck = None
         try:
-            body, headers, url, ck = await self._prep_upstream(req)
-            last_thinking_content = ""
-            raw_answer_parts = []
-            async with self.client.stream("POST", url, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
                     raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
                 await cookie_manager.mark_cookie_success(ck)
-                current_raw_thinking = ""
-                is_first_answer_chunk = True
-                async for raw in resp.aiter_text():
-                    for line in raw.strip().split('\n'):
-                        line = line.strip()
-                        if not line.startswith('data: '): continue
-                        payload_str = line[6:]
-                        if payload_str == '[DONE]': break
-                        try:
-                            dat = json.loads(payload_str).get("data", {})
-                        except (json.JSONDecodeError, AttributeError): continue
-                        phase = dat.get("phase")
-                        content_chunk = dat.get("delta_content") or dat.get("edit_content")
-                        if not content_chunk:
-                            continue
-                        if phase == "thinking":
-                            if dat.get("edit_content") is not None:
-                                current_raw_thinking = content_chunk
-                            else:
-                                current_raw_thinking += content_chunk
-                            last_thinking_content = current_raw_thinking
-                        elif phase == "answer":
-                            content_to_process = content_chunk
-                            if is_first_answer_chunk:
-                                if '</details>' in content_to_process:
-                                    parts = content_to_process.split('</details>', 1)
-                                    content_to_process = parts[1] if len(parts) > 1 else ""
-                                is_first_answer_chunk = False
-                            if content_to_process:
-                                raw_answer_parts.append(content_to_process)
-                    else:
-                        continue
-                    break
-            full_answer = ''.join(raw_answer_parts)
-            cleaned_ans_text = self._clean_answer_content(full_answer).strip()
-            final_content = cleaned_ans_text
-            if settings.SHOW_THINK_TAGS and last_thinking_content:
-                cleaned_think_text = self._clean_thinking_content(last_thinking_content)
-                if cleaned_think_text:
-                    final_content = f"<think>{cleaned_think_text}</think>{cleaned_ans_text}"
-            return ChatCompletionResponse(
-                id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
-                choices=[{"index": 0, "message": {"role": "assistant", "content": final_content}, "finish_reason": "stop"}],
-            )
         except Exception:
             logger.exception("Non-stream processing failed"); raise

 """
+Proxy handler for Z.AI API requests, updated with simplified signature logic.
 """
+import json, logging, re, time, uuid, base64, hashlib, hmac
 from typing import AsyncGenerator, Dict, Any, Tuple, List
 import httpx
 from fastapi import HTTPException
 from fastapi.responses import StreamingResponse
             limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
             http2=True,
         )
+        # The primary secret key from the reference code.
+        self.primary_secret = "junjie".encode('utf-8')
     async def aclose(self):
         if not self.client.is_closed:
             await self.client.aclose()
+    def _get_timestamp_millis(self) -> int:
+        return int(time.time() * 1000)
     def _parse_jwt_token(self, token: str) -> Dict[str, str]:
+        """A simple JWT payload decoder to get user ID ('sub' claim)."""
         try:
             parts = token.split('.')
+            if len(parts) != 3: return {"user_id": ""}
             payload_b64 = parts[1]
+            payload_b64 += '=' * (-len(payload_b64) % 4) # Add padding if needed
+            payload_json = base64.urlsafe_b64decode(payload_b64).decode('utf-8')
+            payload = json.loads(payload_json)
+            return {"user_id": payload.get("sub", "")}
         except Exception:
+            # It's okay if this fails; we'll proceed with an empty user_id.
             return {"user_id": ""}
+    def _generate_signature(self, e_payload: str, t_payload: str) -> Dict[str, Any]:
+        """
+        Generates the signature based on the logic from the reference JS code.
+        This is a two-level HMAC-SHA256 process.
+        Args:
+            e_payload (str): The simplified payload string (e.g., "requestId,...,timestamp,...").
+            t_payload (str): The last message content.
+        Returns:
+            A dictionary with 'signature' and 'timestamp'.
+        """
+        # The provided reference code uses a different logic for the key derivation.
+        # It's based on a timestamp bucket. Let's re-implement that one.
+        # However, the OTHER reference code `signature_generator.py` uses a different method.
+        # Let's stick to the one from the new `utils.py` and `signature_generator.py` for now.
+        # The provided python snippet in the prompt is actually different from the JS.
+        # The python snippet is: `n = timestamp_ms // (5 * 60 * 1000)`
+        # The JS snippet is: `minuteBucket = Math.floor(timestampMs / 60000)`
+        # Let's trust the JS one as it's more complete. Let's try the python one first as it's provided.
+        # --- Let's use the Python snippet logic from the prompt first ---
+        timestamp_ms = self._get_timestamp_millis()
+        message_string = f"{e_payload}|{t_payload}|{timestamp_ms}"
+        # Per the Python snippet: n is a 5-minute bucket
+        n = timestamp_ms // (5 * 60 * 1000)
+        # Intermediate key derivation
+        msg1 = str(n).encode("utf-8")
+        intermediate_key = hmac.new(self.primary_secret, msg1, hashlib.sha256).hexdigest()
+        # Final signature
+        msg2 = message_string.encode("utf-8")
+        final_signature = hmac.new(intermediate_key.encode("utf-8"), msg2, hashlib.sha256).hexdigest()
+        return {"signature": final_signature, "timestamp": timestamp_ms}
+    def _clean_thinking_content(self, text: str) -> str:
+        if not text: return ""
+        cleaned_text = re.sub(r'<summary>.*?</summary>|<glm_block.*?</glm_block>|<[^>]*duration="[^"]*"[^>]*>', '', text, flags=re.DOTALL)
+        cleaned_text = cleaned_text.replace("</thinking>", "").replace("<Full>", "").replace("</Full>", "")
         cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
         cleaned_text = cleaned_text.replace("Thinking…", "")
         return cleaned_text.strip()
     def _clean_answer_content(self, text: str) -> str:
+        if not text: return ""
+        cleaned_text = re.sub(r'<glm_block.*?</glm_block>|<details[^>]*>.*?</details>|<summary>.*?</summary>', '', text, flags=re.DOTALL)
         return cleaned_text
     def _serialize_msgs(self, msgs) -> list:
         out = []
         for m in msgs:
+            # Adapting to Pydantic v1/v2 and dicts
             if hasattr(m, "dict"): out.append(m.dict())
             elif hasattr(m, "model_dump"): out.append(m.model_dump())
             elif isinstance(m, dict): out.append(m)
         return out
     async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str, str]:
+        """Prepares the request body, headers, cookie, and URL for the upstream API."""
         ck = await cookie_manager.get_next_cookie()
         if not ck: raise HTTPException(503, "No available cookies")
+        model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
         chat_id = str(uuid.uuid4())
         request_id = str(uuid.uuid4())
+        # --- NEW Simplified Signature Payload Logic ---
+        user_info = self._parse_jwt_token(ck)
+        user_id = user_info.get("user_id", "")
+        # The reference code uses a separate UUID for user_id in payload, let's follow that.
+        # This seems strange, but let's replicate the reference code exactly.
+        payload_user_id = str(uuid.uuid4())
+        payload_request_id = str(uuid.uuid4())
+        payload_timestamp = str(self._get_timestamp_millis())
+        # e: The simplified payload for the signature
+        e_payload = f"requestId,{payload_request_id},timestamp,{payload_timestamp},user_id,{payload_user_id}"
+        # t: The last message content
+        t_payload = ""
+        if req.messages:
+            last_message = req.messages[-1]
+            if isinstance(last_message.content, str):
+                t_payload = last_message.content
+        # Generate the signature
+        signature_data = self._generate_signature(e_payload, t_payload)
+        signature = signature_data["signature"]
+        signature_timestamp = signature_data["timestamp"]
+        # The reference code sends these as URL parameters, not in the body.
+        url_params = {
+            "requestId": payload_request_id,
+            "timestamp": payload_timestamp,
+            "user_id": payload_user_id,
+            "signature_timestamp": str(signature_timestamp)
+        }
+        # Construct URL with query parameters
+        # Note: The reference code has a typo `f"{BASE_URL}/api/chat/completions"`, it should be `z.ai`
+        final_url = httpx.URL(settings.UPSTREAM_URL).copy_with(params=url_params)
+        body = {
+            "stream": True,
+            "model": model,
+            "messages": self._serialize_msgs(req.messages),
+            "chat_id": chat_id,
+            "id": request_id,
+            "features": {
+                "image_generation": False,
+                "web_search": False,
+                "auto_web_search": False,
+                "preview_mode": False,
+                "flags": [],
+                "enable_thinking": True,
+            }
+        }
+        headers = {
+            "Accept": "*/*",
+            "Accept-Language": "zh-CN",
+            "Authorization": f"Bearer {ck}",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://chat.z.ai",
+            "Referer": "https://chat.z.ai/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
+            "X-FE-Version": "prod-fe-1.0.95",
+            "X-Signature": signature,
+        }
+        return body, headers, ck, str(final_url)
     async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
         ck = None
         try:
+            body, headers, ck, url = await self._prep_upstream(req)
             comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
             think_open = False
             yielded_think_buffer = ""
                     if not think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = True
                     cleaned_full_text = self._clean_thinking_content(text)
+                    delta_to_send = cleaned_full_text[len(yielded_think_buffer):]
                     if delta_to_send:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': delta_to_send}, 'finish_reason': None}]})}\n\n"
                     yielded_think_buffer = cleaned_full_text
                 elif content_type == "answer":
                     if think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = False
                     cleaned_text = self._clean_answer_content(text)
                     if cleaned_text:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); err_body = await resp.aread()
                     err_msg = f"Error: {resp.status_code} - {err_body.decode(errors='ignore')}"
+                    logger.error(f"Upstream error: {err_msg}")
                     err = {"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model, "choices": [{"index": 0, "delta": {"content": err_msg}, "finish_reason": "stop"}],}
                     yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"; return
                 await cookie_manager.mark_cookie_success(ck)
                     for line in raw.strip().split('\n'):
                         line = line.strip()
                         if not line.startswith('data: '): continue
                         payload_str = line[6:]
+                        # The reference code has a special 'done' phase, but the original Z.AI uses [DONE]
                         if payload_str == '[DONE]':
                             if think_open:
                                 yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
                             return
                         try:
                             dat = json.loads(payload_str).get("data", {})
+                        except (json.JSONDecodeError, AttributeError): continue
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
                         if not content_chunk:
+                            # Handle case where chunk is just usage info, etc.
+                            if phase == 'other' and dat.get('usage'):
+                                pass # In streaming, usage might come with the final chunk
+                            else:
+                                continue
                         if phase == "thinking":
+                            current_raw_thinking = content_chunk if dat.get("edit_content") is not None else current_raw_thinking + content_chunk
                             async for item in yield_delta("thinking", current_raw_thinking):
                                 yield item
                         elif phase == "answer":
                             content_to_process = content_chunk
                             if is_first_answer_chunk:
                                     parts = content_to_process.split('</details>', 1)
                                     content_to_process = parts[1] if len(parts) > 1 else ""
                                 is_first_answer_chunk = False
                             if content_to_process:
                                 async for item in yield_delta("answer", content_to_process):
                                     yield item
             logger.exception("Stream error"); raise
     async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
+        # This part of the code can be simplified as well, but let's focus on fixing the streaming first.
+        # The logic will be almost identical to the streaming one.
         ck = None
         try:
+            body, headers, ck, url = await self._prep_upstream(req)
+            # For non-stream, set stream to False in the body
+            body["stream"] = False
+            async with self.client.post(url, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
+                    logger.error(f"Upstream error: {resp.status_code} - {error_detail}")
                     raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
                 await cookie_manager.mark_cookie_success(ck)
+                # Z.AI non-stream response is a single JSON object
+                response_data = resp.json()
+                # We need to adapt Z.AI's response format to OpenAI's format
+                final_content = ""
+                finish_reason = "stop" # Default
+                if "choices" in response_data and response_data["choices"]:
+                    first_choice = response_data["choices"][0]
+                    if "message" in first_choice and "content" in first_choice["message"]:
+                        final_content = first_choice["message"]["content"]
+                    if "finish_reason" in first_choice:
+                        finish_reason = first_choice["finish_reason"]
+                return ChatCompletionResponse(
+                    id=response_data.get("id", f"chatcmpl-{uuid.uuid4().hex[:29]}"),
+                    created=int(time.time()),
+                    model=req.model,
+                    choices=[{"index": 0, "message": {"role": "assistant", "content": final_content}, "finish_reason": finish_reason}],
+                )
         except Exception:
             logger.exception("Non-stream processing failed"); raise