Spaces:

yoSabareesh
/

the-indian-lawyer

Sleeping

App Files Files Community

the-indian-lawyer / app.py

yoSabareesh

Update app.py

50b6a81 verified 8 months ago

raw

history blame contribute delete

4.44 kB

	import streamlit as st
	import httpx
	import asyncio
	import torch
	from bs4 import BeautifulSoup
	from concurrent.futures import ThreadPoolExecutor
	from transformers import AutoModelForCausalLM, AutoTokenizer

	MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

	# Load the model & tokenizer from Hugging Face
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16)

	# Constants
	HEADERS = {"User-Agent": "Mozilla/5.0"}
	MAX_CASE_TEXT_LENGTH = 9500
	MAX_CONCURRENT_REQUESTS = 100

	executor = ThreadPoolExecutor(max_workers=10)
	semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)

	async def fetch_url(url, client):
	"""Fetch a URL asynchronously with concurrency control."""
	async with semaphore:
	response = await client.get(url, headers=HEADERS)
	return response

	async def search_indiankanoon(query, client):
	"""Search Indian Kanoon for cases."""
	search_url = f"https://indiankanoon.org/search/?formInput={query}"
	response = await fetch_url(search_url, client)

	if response.status_code != 200:
	return []

	soup = BeautifulSoup(response.text, "lxml")
	return [
	{"title": link.text.strip(), "url": "https://indiankanoon.org" + link["href"]}
	for link in soup.select(".result_title a")[:10]
	]

	async def scrape_case(url, client):
	"""Scrape case details from Indian Kanoon."""
	response = await fetch_url(url, client)

	if response.status_code != 200:
	return {"title": "Unknown", "text": "Failed to fetch case details.", "url": url}

	soup = BeautifulSoup(response.text, "lxml")
	paragraphs = [
	p.get_text(separator=" ", strip=True)
	for fragment in soup.select(".expanded_headline .fragment")
	for p in fragment.find_all("p")
	]

	case_text = " ".join(paragraphs)[:MAX_CASE_TEXT_LENGTH] if paragraphs else "No case text found."
	return {"title": "Unknown", "text": case_text, "url": url}

	def summarize_text(text):
	"""Summarize case text using the Hugging Face model."""
	prompt = f"""
	You are an Indian legal AI assistant. Summarize the following court case with high accuracy, using only the provided text.
	Do NOT add assumptions or external knowledge.

	Include:
	- Case Title (if available)
	- Key Dates (case, judgement, arrested, seen, call, evidence, person, action time and date.)
	- Laws, Acts, or Articles cited (verbatim)
	- Main Legal Issue (in brief)
	- Court's Decision & Reasoning (without opinion)
	- Precedent or Impact (if mentioned in the case text)

	Ensure the summary remains neutral, concise, and fact-based.

	Case Text: {text}...
	"""

	input_ids = tokenizer(prompt, return_tensors="pt").input_ids
	with torch.no_grad():
	output = model.generate(input_ids, max_length=500)

	return tokenizer.decode(output[0], skip_special_tokens=True)

	async def process_case(case, client):
	"""Process a single case asynchronously."""
	case_data = await scrape_case(case["url"], client)

	if not case_data["text"] or "Failed to fetch" in case_data["text"]:
	return {"title": case["title"], "summary": "Summary unavailable due to missing case text."}

	loop = asyncio.get_running_loop()
	summary = await loop.run_in_executor(executor, summarize_text, case_data["text"])
	return {"title": case["title"], "summary": summary}

	async def fetch_and_process_cases(query):
	"""Fetch case references and summarize them asynchronously."""
	async with httpx.AsyncClient(timeout=10) as client:
	cases = await search_indiankanoon(query, client)
	if not cases:
	return None

	tasks = [process_case(case, client) for case in cases]
	return await asyncio.gather(*tasks)

	def run_async_task(query):
	"""Run an async task inside a synchronous function for Streamlit."""
	return asyncio.run(fetch_and_process_cases(query))

	# Streamlit UI
	st.title("Indian Legal AI - Case Reference & Summarization")

	query = st.text_input("Enter your legal query:")
	if st.button("Search"):
	with st.spinner("Fetching results..."):
	results = run_async_task(query)

	if not results:
	st.error("No results found.")
	else:
	summaries = [f"### {case['title']}\n\n{case['summary']}\n\n" for case in results]
	st.write("### Overall Insights for Lawyers:")
	st.info("\n".join(summaries))