Spaces:
Runtime error
Runtime error
| from openai import OpenAI | |
| import os | |
| import time | |
| # Use your existing Hugging Face endpoint | |
| client = OpenAI( | |
| base_url="https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/", | |
| api_key=os.getenv("HF_TOKEN") | |
| ) | |
| def analyze_with_model(prompt): | |
| """Analyze prompt with LLM, returning a generator for streaming""" | |
| try: | |
| # Use the Hugging Face Inference API with proper streaming | |
| response = client.chat.completions.create( | |
| model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf", | |
| messages=[{"role": "user", "content": prompt}], | |
| stream=True, # Enable streaming for real-time responses | |
| temperature=0.7, | |
| max_tokens=8192, # Increased token limit | |
| timeout=120 # Increased timeout for longer responses | |
| ) | |
| # Stream the response chunks | |
| for chunk in response: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| yield content | |
| time.sleep(0.01) # Smooth out the stream | |
| except Exception as e: | |
| error_msg = str(e) | |
| # Enhanced error detection for common Hugging Face issues | |
| if "503" in error_msg: | |
| yield f"Error during analysis: Service temporarily unavailable (503). The model server is likely initializing. Please wait 5 minutes and try again. Details: {error_msg}" | |
| elif "timeout" in error_msg.lower(): | |
| yield f"Error during analysis: Request timed out. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}" | |
| elif "connection" in error_msg.lower(): | |
| yield f"Error during analysis: Connection error. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}" | |
| elif "limit" in error_msg.lower(): | |
| yield f"Error during analysis: Rate limit exceeded. Please wait a moment and try again. Details: {error_msg}" | |
| else: | |
| yield f"Error during analysis: {error_msg}" | |