import os import base64 import io import json from typing import Optional, List from fastapi import FastAPI, File, UploadFile, HTTPException, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from pydantic import BaseModel from dotenv import load_dotenv import uvicorn from ollama import chat from ollama import ChatResponse load_dotenv() # Path to your local mistral model (gguf) # Initialize LLaMA.cpp (Mistral) instance """llm = Llama( model_path=MODEL_PATH, n_ctx=4096, # context size n_threads=8, # adjust per CPU n_batch=512, # batching verbose=False )""" # Initialize FastAPI app app = FastAPI( title="Mistral Multimodal Chat API", description="A FastAPI backend for Mistral (via llama.cpp) with text and image support", version="1.0.0" ) # Add CORS middleware for React frontend app.add_middleware( CORSMiddleware, allow_origins=["*"], # In production, replace with specific origins allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Pydantic models class TextChatRequest(BaseModel): message: str conversation_history: Optional[List[dict]] = [] class TextChatResponse(BaseModel): response: str conversation_history: List[dict] class ImageAnalysisRequest(BaseModel): image_base64: str prompt: Optional[str] = "Analyze this image" preset_action: Optional[str] = None class ImageAnalysisResponse(BaseModel): response: str analysis_type: str # Helpers def encode_image_to_base64(image_file: UploadFile) -> str: try: image_data = image_file.file.read() return base64.b64encode(image_data).decode('utf-8') except Exception as e: raise HTTPException(status_code=400, detail=f"Error processing image: {str(e)}") def get_preset_prompt(action: str) -> str: presets = { "analyze": "Analyze this image in detail. Describe what you see, identify key elements, colors, composition, and any notable features.", "summarize": "Provide a concise summary of what's shown in this image in 2-3 sentences.", "describe": "Describe this image as if you're explaining it to someone who cannot see it. Be detailed and specific.", "extract_text": "Extract and transcribe any text visible in this image. If no text is present, say 'No text detected'.", "identify_objects": "Identify and list all the objects, people, or items you can see in this image.", "explain_context": "Explain the context and setting of this image. What's happening? Where might this be taken?" } return presets.get(action, "Analyze this image") # ================= API Endpoints ================= # @app.get("/") async def root(): return { "message": "Mistral Multimodal Chat API", "version": "1.0.0", "endpoints": { "/chat/text": "Text-only chat with Mistral", "/chat/image-upload": "Upload image and ask Mistral", "/chat/image-base64": "Base64 image with Mistral", "/presets": "Available preset actions" } } @app.get("/presets") async def get_presets(): return { "presets": [ {"key": "analyze", "label": "Analyze Image", "description": "Detailed analysis of the image"}, {"key": "summarize", "label": "Summarize", "description": "Quick summary of image content"}, {"key": "describe", "label": "Describe", "description": "Detailed description for accessibility"}, {"key": "extract_text", "label": "Extract Text", "description": "Extract any text from the image"}, {"key": "identify_objects", "label": "Identify Objects", "description": "List objects and items in the image"}, {"key": "explain_context", "label": "Explain Context", "description": "Explain the setting and context"} ] } @app.post("/chat/text", response_model=TextChatResponse) async def text_chat(request: TextChatRequest): print("ashraf arart") try: # Prepare conversation """history = request.conversation_history or [] history.append({"role": "user", "content": request.message}) # Build prompt prompt = "" for msg in history: role = msg["role"] content = msg["content"] if role == "user": prompt += f"User: {content}\n" else: prompt += f"Assistant: {content}\n" prompt += "Assistant:" """ messages = request.conversation_history.copy() if request.conversation_history else [] messages.append({"role": "user", "content": request.message}) # Call mistral # !pip install llama-cpp-python print(messages) """output = llm( prompt, max_tokens=1024, temperature=0.7, stop=["User:", "Assistant:"] )""" response: ChatResponse = chat(model='gemma3:4b', messages=messages) print(response) response_text = response['message']['content'] messages.append({"role": "assistant", "content": response_text}) return TextChatResponse( response=response_text, conversation_history=messages ) except Exception as e: raise HTTPException(status_code=500, detail=f"Error : {str(e)}") @app.post("/chat/image-upload") async def image_upload_chat( image: UploadFile = File(...), prompt: Optional[str] = Form(None), preset_action: Optional[str] = Form(None) ): """Upload an image file and chat with GPT-5""" try: # Validate image file if not image.content_type.startswith('image/'): raise HTTPException(status_code=400, detail="File must be an image") # Convert image to base64 base64_image = encode_image_to_base64(image) # Determine the prompt to use if preset_action: final_prompt = get_preset_prompt(preset_action) analysis_type = preset_action elif prompt: final_prompt = prompt analysis_type = "custom" else: final_prompt = "Analyze this image" analysis_type = "default" # Prepare the message for GPT-5 messages = [ { "role": "user", "content": final_prompt, "images": [f"{base64_image}"] } ] # Call GPT-5 API response: ChatResponse = chat(model='gemma3:4b', messages=messages,) assistant_response = response['message']['content'] return ImageAnalysisResponse( response=assistant_response, analysis_type=analysis_type ) except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}") @app.post("/chat/image-base64", response_model=ImageAnalysisResponse) async def image_base64_chat(request: ImageAnalysisRequest): try: # Only prompt-based since mistral does not support vision natively final_prompt = ( get_preset_prompt(request.preset_action) if request.preset_action else request.prompt or "Analyze this image" ) # We embed the base64 into the prompt (just as metadata) prompt = f"{final_prompt}\n(Image is base64 encoded, length={len(request.image_base64)} chars)" output = llm(prompt, max_tokens=512, temperature=0.7) response_text = output["choices"][0]["text"].strip() return ImageAnalysisResponse(response=response_text, analysis_type=request.preset_action or "custom") except Exception as e: raise HTTPException(status_code=500, detail=f"Error with mistral image analysis: {str(e)}") if __name__ == "__main__": uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)