Alamgirapi commited on
Commit
e5ec5b1
·
verified ·
1 Parent(s): 9ea1183

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -155
app.py CHANGED
@@ -1,156 +1,180 @@
1
- import os
2
- import tempfile
3
- import shutil
4
- from flask import Flask, request, jsonify, render_template, redirect, url_for, flash
5
- import asyncio
6
- from retriever.document_store import DocumentStore
7
- from retriever.rag_pipeline import RAGPipeline
8
- from models.model_loader import load_llm
9
- from config import Config
10
-
11
- app = Flask(__name__)
12
- app.config.from_object(Config)
13
- app.secret_key = os.getenv('SECRET_KEY', 'your-secret-key-here')
14
-
15
- # Initialize components
16
- print("Initializing document store...")
17
- document_store = DocumentStore()
18
- print("Document store initialized")
19
-
20
- print("Loading LLM...")
21
- llm = load_llm(api_key=app.config["LLAMA_API_KEY"])
22
- print("LLM loaded")
23
-
24
- print("Initializing RAG pipeline...")
25
- rag_pipeline = RAGPipeline(document_store, llm)
26
- print("RAG pipeline initialized")
27
-
28
- @app.route('/')
29
- def index():
30
- """Home page"""
31
- return render_template('index.html')
32
-
33
- @app.route('/add_data', methods=['GET', 'POST'])
34
- def add_data():
35
- """Add data to the document store"""
36
- if request.method == 'POST':
37
- content = request.form.get('content')
38
- title = request.form.get('title', 'Untitled')
39
-
40
- if content:
41
- try:
42
- document_store.add_text(content=content, title=title)
43
- flash('Data added successfully!', 'success')
44
- return redirect(url_for('index'))
45
- except Exception as e:
46
- flash(f'Error adding data: {str(e)}', 'error')
47
- else:
48
- flash('Content is required', 'error')
49
-
50
- return render_template('add_data.html')
51
-
52
- @app.route('/upload_file', methods=['POST'])
53
- def upload_file():
54
- """Upload and process a file (PDF, TXT, etc.)"""
55
- if 'file' not in request.files:
56
- flash('No file selected', 'error')
57
- return redirect(url_for('add_data'))
58
-
59
- file = request.files['file']
60
- if file.filename == '':
61
- flash('No file selected', 'error')
62
- return redirect(url_for('add_data'))
63
-
64
- if file:
65
- try:
66
- # Create a temporary file
67
- with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp_file:
68
- file.save(tmp_file.name)
69
-
70
- # Process the file and add to document store
71
- document_store.add_document(tmp_file.name)
72
-
73
- # Clean up temporary file
74
- os.unlink(tmp_file.name)
75
-
76
- flash('File uploaded and processed successfully!', 'success')
77
- except Exception as e:
78
- flash(f'Error processing file: {str(e)}', 'error')
79
-
80
- return redirect(url_for('add_data'))
81
-
82
- @app.route('/api/generate', methods=['POST'])
83
- async def api_generate():
84
- """API endpoint to generate text based on stored data"""
85
- data = request.json
86
- query = data.get('query', '')
87
- gen_type = data.get('type', 'bio') # bio, cover_letter, general
88
-
89
- if not query:
90
- return jsonify({"error": "Query is required"}), 400
91
-
92
- try:
93
- # Generate response using RAG pipeline
94
- response = await rag_pipeline.generate(query, gen_type)
95
- return jsonify({"response": response})
96
- except Exception as e:
97
- return jsonify({"error": f"Error generating response: {str(e)}"}), 500
98
-
99
- @app.route('/generate', methods=['GET', 'POST'])
100
- def generate():
101
- """Generate text based on a query and display results"""
102
- if request.method == 'POST':
103
- query = request.form.get('query', '')
104
- gen_type = request.form.get('type', 'bio')
105
-
106
- if query:
107
- try:
108
- # Run the async function using asyncio
109
- response = asyncio.run(rag_pipeline.generate(query, gen_type))
110
- return render_template('generate.html', query=query, response=response, gen_type=gen_type)
111
- except Exception as e:
112
- flash(f'Error generating response: {str(e)}', 'error')
113
- return render_template('generate.html', query=query, error=str(e))
114
- else:
115
- flash('Query is required', 'error')
116
-
117
- return render_template('generate.html')
118
-
119
- @app.route('/debug/documents', methods=['GET'])
120
- def debug_documents():
121
- """Debug endpoint to view stored documents"""
122
- try:
123
- doc_count = len(document_store.documents)
124
- chunk_count = sum(len(doc.get('chunks', [])) for doc in document_store.documents.values())
125
-
126
- docs_summary = []
127
- for doc_id, doc in document_store.documents.items():
128
- docs_summary.append({
129
- "id": doc_id,
130
- "title": doc.get("title", "Untitled"),
131
- "chunks": len(doc.get("chunks", [])),
132
- "first_chunk_preview": doc.get("chunks", [""])[0][:100] + "..." if doc.get("chunks") else ""
133
- })
134
-
135
- return render_template(
136
- 'debug.html',
137
- doc_count=doc_count,
138
- chunk_count=chunk_count,
139
- docs=docs_summary
140
- )
141
- except Exception as e:
142
- return f"Error in debug endpoint: {str(e)}", 500
143
-
144
- @app.route('/health')
145
- def health_check():
146
- """Health check endpoint"""
147
- return jsonify({"status": "healthy", "message": "RAG application is running"})
148
-
149
- if __name__ == '__main__':
150
- # Ensure data directory exists
151
- os.makedirs("data", exist_ok=True)
152
-
153
- # Get port from environment variable (Hugging Face Spaces uses PORT)
154
- port = int(os.environ.get('PORT', 7860))
155
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  app.run(host='0.0.0.0', port=port, debug=False)
 
1
+ import os
2
+ import sys
3
+ import tempfile
4
+ import shutil
5
+ from datetime import datetime
6
+
7
+ # CRITICAL: Set cache directories BEFORE importing any HuggingFace libraries
8
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers'
9
+ os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/tmp/sentence_transformers'
10
+ os.environ['HF_HOME'] = '/tmp/huggingface'
11
+ os.environ['HF_DATASETS_CACHE'] = '/tmp/datasets'
12
+
13
+ # Create cache directories
14
+ cache_dirs = ['/tmp/transformers', '/tmp/sentence_transformers', '/tmp/huggingface', '/tmp/datasets']
15
+ for cache_dir in cache_dirs:
16
+ try:
17
+ os.makedirs(cache_dir, exist_ok=True)
18
+ print(f" Created cache directory: {cache_dir}")
19
+ except PermissionError as e:
20
+ print(f" Failed to create {cache_dir}: {e}")
21
+ sys.exit(1)
22
+
23
+ # Now import other modules
24
+ from flask import Flask, request, jsonify, render_template, redirect, url_for, flash
25
+ import asyncio
26
+ from retriever.document_store import DocumentStore
27
+ from retriever.rag_pipeline import RAGPipeline
28
+ from models.model_loader import load_llm
29
+ from config import Config
30
+
31
+ app = Flask(__name__)
32
+ app.config.from_object(Config)
33
+ app.secret_key = os.getenv('SECRET_KEY', 'your-secret-key-here')
34
+
35
+ # Initialize components
36
+ print("===== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
37
+ print("Initializing document store...")
38
+ print(f"Using vector DB path: {os.getenv('VECTOR_DB_PATH', 'data/vector_store')}")
39
+
40
+ document_store = DocumentStore()
41
+ print("Document store initialized")
42
+
43
+ print("Loading LLM...")
44
+ llm = load_llm(api_key=app.config["LLAMA_API_KEY"])
45
+ print("LLM loaded")
46
+
47
+ print("Initializing RAG pipeline...")
48
+ rag_pipeline = RAGPipeline(document_store, llm)
49
+ print("RAG pipeline initialized")
50
+
51
+ @app.route('/')
52
+ def index():
53
+ """Home page"""
54
+ return render_template('index.html')
55
+
56
+ @app.route('/add_data', methods=['GET', 'POST'])
57
+ def add_data():
58
+ """Add data to the document store"""
59
+ if request.method == 'POST':
60
+ content = request.form.get('content')
61
+ title = request.form.get('title', 'Untitled')
62
+
63
+ if content:
64
+ try:
65
+ document_store.add_text(content=content, title=title)
66
+ flash('Data added successfully!', 'success')
67
+ return redirect(url_for('index'))
68
+ except Exception as e:
69
+ flash(f'Error adding data: {str(e)}', 'error')
70
+ else:
71
+ flash('Content is required', 'error')
72
+
73
+ return render_template('add_data.html')
74
+
75
+ @app.route('/upload_file', methods=['POST'])
76
+ def upload_file():
77
+ """Upload and process a file (PDF, TXT, etc.)"""
78
+ if 'file' not in request.files:
79
+ flash('No file selected', 'error')
80
+ return redirect(url_for('add_data'))
81
+
82
+ file = request.files['file']
83
+ if file.filename == '':
84
+ flash('No file selected', 'error')
85
+ return redirect(url_for('add_data'))
86
+
87
+ if file:
88
+ try:
89
+ # Create a temporary file
90
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp_file:
91
+ file.save(tmp_file.name)
92
+
93
+ # Process the file and add to document store
94
+ document_store.add_document(tmp_file.name)
95
+
96
+ # Clean up temporary file
97
+ os.unlink(tmp_file.name)
98
+
99
+ flash('File uploaded and processed successfully!', 'success')
100
+ except Exception as e:
101
+ flash(f'Error processing file: {str(e)}', 'error')
102
+
103
+ return redirect(url_for('add_data'))
104
+
105
+ @app.route('/api/generate', methods=['POST'])
106
+ async def api_generate():
107
+ """API endpoint to generate text based on stored data"""
108
+ data = request.json
109
+ query = data.get('query', '')
110
+ gen_type = data.get('type', 'bio') # bio, cover_letter, general
111
+
112
+ if not query:
113
+ return jsonify({"error": "Query is required"}), 400
114
+
115
+ try:
116
+ # Generate response using RAG pipeline
117
+ response = await rag_pipeline.generate(query, gen_type)
118
+ return jsonify({"response": response})
119
+ except Exception as e:
120
+ return jsonify({"error": f"Error generating response: {str(e)}"}), 500
121
+
122
+ @app.route('/generate', methods=['GET', 'POST'])
123
+ def generate():
124
+ """Generate text based on a query and display results"""
125
+ if request.method == 'POST':
126
+ query = request.form.get('query', '')
127
+ gen_type = request.form.get('type', 'bio')
128
+
129
+ if query:
130
+ try:
131
+ # Run the async function using asyncio
132
+ response = asyncio.run(rag_pipeline.generate(query, gen_type))
133
+ return render_template('generate.html', query=query, response=response, gen_type=gen_type)
134
+ except Exception as e:
135
+ flash(f'Error generating response: {str(e)}', 'error')
136
+ return render_template('generate.html', query=query, error=str(e))
137
+ else:
138
+ flash('Query is required', 'error')
139
+
140
+ return render_template('generate.html')
141
+
142
+ @app.route('/debug/documents', methods=['GET'])
143
+ def debug_documents():
144
+ """Debug endpoint to view stored documents"""
145
+ try:
146
+ doc_count = len(document_store.documents)
147
+ chunk_count = sum(len(doc.get('chunks', [])) for doc in document_store.documents.values())
148
+
149
+ docs_summary = []
150
+ for doc_id, doc in document_store.documents.items():
151
+ docs_summary.append({
152
+ "id": doc_id,
153
+ "title": doc.get("title", "Untitled"),
154
+ "chunks": len(doc.get("chunks", [])),
155
+ "first_chunk_preview": doc.get("chunks", [""])[0][:100] + "..." if doc.get("chunks") else ""
156
+ })
157
+
158
+ return render_template(
159
+ 'debug.html',
160
+ doc_count=doc_count,
161
+ chunk_count=chunk_count,
162
+ docs=docs_summary
163
+ )
164
+ except Exception as e:
165
+ return f"Error in debug endpoint: {str(e)}", 500
166
+
167
+ @app.route('/health')
168
+ def health_check():
169
+ """Health check endpoint"""
170
+ return jsonify({"status": "healthy", "message": "RAG application is running"})
171
+
172
+ if __name__ == '__main__':
173
+ # Ensure data directory exists
174
+ os.makedirs("data", exist_ok=True)
175
+
176
+ # Get port from environment variable (Hugging Face Spaces uses PORT)
177
+ port = int(os.environ.get('PORT', 7860))
178
+
179
+ print(f"Starting Flask app on port {port}")
180
  app.run(host='0.0.0.0', port=port, debug=False)