Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import httpx | |
| import asyncio | |
| import torch | |
| from bs4 import BeautifulSoup | |
| from concurrent.futures import ThreadPoolExecutor | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" | |
| # Load the model & tokenizer from Hugging Face | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16) | |
| # Constants | |
| HEADERS = {"User-Agent": "Mozilla/5.0"} | |
| MAX_CASE_TEXT_LENGTH = 9500 | |
| MAX_CONCURRENT_REQUESTS = 100 | |
| executor = ThreadPoolExecutor(max_workers=10) | |
| semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS) | |
| async def fetch_url(url, client): | |
| """Fetch a URL asynchronously with concurrency control.""" | |
| async with semaphore: | |
| response = await client.get(url, headers=HEADERS) | |
| return response | |
| async def search_indiankanoon(query, client): | |
| """Search Indian Kanoon for cases.""" | |
| search_url = f"https://indiankanoon.org/search/?formInput={query}" | |
| response = await fetch_url(search_url, client) | |
| if response.status_code != 200: | |
| return [] | |
| soup = BeautifulSoup(response.text, "lxml") | |
| return [ | |
| {"title": link.text.strip(), "url": "https://indiankanoon.org" + link["href"]} | |
| for link in soup.select(".result_title a")[:10] | |
| ] | |
| async def scrape_case(url, client): | |
| """Scrape case details from Indian Kanoon.""" | |
| response = await fetch_url(url, client) | |
| if response.status_code != 200: | |
| return {"title": "Unknown", "text": "Failed to fetch case details.", "url": url} | |
| soup = BeautifulSoup(response.text, "lxml") | |
| paragraphs = [ | |
| p.get_text(separator=" ", strip=True) | |
| for fragment in soup.select(".expanded_headline .fragment") | |
| for p in fragment.find_all("p") | |
| ] | |
| case_text = " ".join(paragraphs)[:MAX_CASE_TEXT_LENGTH] if paragraphs else "No case text found." | |
| return {"title": "Unknown", "text": case_text, "url": url} | |
| def summarize_text(text): | |
| """Summarize case text using the Hugging Face model.""" | |
| prompt = f""" | |
| You are an Indian legal AI assistant. Summarize the following court case with high accuracy, using only the provided text. | |
| Do NOT add assumptions or external knowledge. | |
| Include: | |
| - Case Title (if available) | |
| - Key Dates (case, judgement, arrested, seen, call, evidence, person, action time and date.) | |
| - Laws, Acts, or Articles cited (verbatim) | |
| - Main Legal Issue (in brief) | |
| - Court's Decision & Reasoning (without opinion) | |
| - Precedent or Impact (if mentioned in the case text) | |
| Ensure the summary remains neutral, concise, and fact-based. | |
| Case Text: {text}... | |
| """ | |
| input_ids = tokenizer(prompt, return_tensors="pt").input_ids | |
| with torch.no_grad(): | |
| output = model.generate(input_ids, max_length=500) | |
| return tokenizer.decode(output[0], skip_special_tokens=True) | |
| async def process_case(case, client): | |
| """Process a single case asynchronously.""" | |
| case_data = await scrape_case(case["url"], client) | |
| if not case_data["text"] or "Failed to fetch" in case_data["text"]: | |
| return {"title": case["title"], "summary": "Summary unavailable due to missing case text."} | |
| loop = asyncio.get_running_loop() | |
| summary = await loop.run_in_executor(executor, summarize_text, case_data["text"]) | |
| return {"title": case["title"], "summary": summary} | |
| async def fetch_and_process_cases(query): | |
| """Fetch case references and summarize them asynchronously.""" | |
| async with httpx.AsyncClient(timeout=10) as client: | |
| cases = await search_indiankanoon(query, client) | |
| if not cases: | |
| return None | |
| tasks = [process_case(case, client) for case in cases] | |
| return await asyncio.gather(*tasks) | |
| def run_async_task(query): | |
| """Run an async task inside a synchronous function for Streamlit.""" | |
| return asyncio.run(fetch_and_process_cases(query)) | |
| # Streamlit UI | |
| st.title("Indian Legal AI - Case Reference & Summarization") | |
| query = st.text_input("Enter your legal query:") | |
| if st.button("Search"): | |
| with st.spinner("Fetching results..."): | |
| results = run_async_task(query) | |
| if not results: | |
| st.error("No results found.") | |
| else: | |
| summaries = [f"### {case['title']}\n\n{case['summary']}\n\n" for case in results] | |
| st.write("### Overall Insights for Lawyers:") | |
| st.info("\n".join(summaries)) | |