Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

App Files Files Community

Alamgirapi commited on Aug 6, 2025

Commit

e6c2921

verified ·

1 Parent(s): 6b934fc

Update app.py

Browse files

Files changed (1) hide show

app.py +302 -359

app.py CHANGED Viewed

@@ -2,441 +2,384 @@ import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
-from NoCodeTextClassifier.EDA import Informations, Visualizations
-from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization
-from NoCodeTextClassifier.models import Models
 import os
 import pickle
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 import io
 # Set page config
-st.set_page_config(page_title="Text Classification App", page_icon="📝", layout="wide")
-# Utility functions
 def save_artifacts(obj, folder_name, file_name):
-    """Save artifacts like encoders and vectorizers"""
     try:
         os.makedirs(folder_name, exist_ok=True)
-        with open(os.path.join(folder_name, file_name), 'wb') as f:
             pickle.dump(obj, f)
         return True
     except Exception as e:
-        st.error(f"Error saving {file_name}: {str(e)}")
         return False
 def load_artifacts(folder_name, file_name):
-    """Load saved artifacts"""
     try:
-        with open(os.path.join(folder_name, file_name), 'rb') as f:
-            return pickle.load(f)
-    except FileNotFoundError:
-        st.error(f"File {file_name} not found in {folder_name} folder")
-        return None
     except Exception as e:
-        st.error(f"Error loading {file_name}: {str(e)}")
         return None
 def load_model(model_name):
-    """Load trained model"""
-    try:
-        with open(os.path.join('models', model_name), 'rb') as f:
-            return pickle.load(f)
-    except FileNotFoundError:
-        st.error(f"Model {model_name} not found. Please train a model first.")
-        return None
-    except Exception as e:
-        st.error(f"Error loading model {model_name}: {str(e)}")
-        return None
-def safe_read_csv(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']):
-    """Safely read CSV with multiple encoding options"""
-    for encoding in encoding_options:
-        try:
-            # Reset file pointer
-            uploaded_file.seek(0)
-            # Read as bytes first, then decode
-            content = uploaded_file.read()
-            if isinstance(content, bytes):
-                content = content.decode(encoding)
-            # Use StringIO to create a file-like object
-            df = pd.read_csv(io.StringIO(content))
-            st.success(f"File loaded successfully with {encoding} encoding")
-            return df
-        except UnicodeDecodeError:
-            continue
-        except Exception as e:
-            st.warning(f"Failed to read with {encoding} encoding: {str(e)}")
-            continue
-    # If all encodings fail, try pandas default
-    try:
-        uploaded_file.seek(0)
-        df = pd.read_csv(uploaded_file)
-        st.success("File loaded with default encoding")
-        return df
-    except Exception as e:
-        st.error(f"All encoding attempts failed. Error: {str(e)}")
-        return None
 def predict_text(model_name, text, vectorizer_type="tfidf"):
-    """Make prediction on new text"""
     try:
-        # Load model
         model = load_model(model_name)
         if model is None:
             return None, None
-        # Load vectorizer
         vectorizer_file = f"{vectorizer_type}_vectorizer.pkl"
         vectorizer = load_artifacts("artifacts", vectorizer_file)
         if vectorizer is None:
             return None, None
-        # Load label encoder
         encoder = load_artifacts("artifacts", "encoder.pkl")
         if encoder is None:
             return None, None
-        # Clean and vectorize text
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
-        # Transform text using the same vectorizer used during training
         text_vector = vectorizer.transform([clean_text])
-        # Make prediction
         prediction = model.predict(text_vector)
         prediction_proba = None
-        # Get prediction probabilities if available
         if hasattr(model, 'predict_proba'):
             try:
                 prediction_proba = model.predict_proba(text_vector)[0]
             except:
-                pass
-        # Decode prediction
         predicted_label = encoder.inverse_transform(prediction)[0]
         return predicted_label, prediction_proba
     except Exception as e:
-        st.error(f"Error during prediction: {str(e)}")
         return None, None
-# Streamlit App
-st.title('📝 No Code Text Classification App')
-st.write('Understand the behavior of your text data and train a model to classify the text data')
-# Sidebar
-st.sidebar.title("Navigation")
-section = st.sidebar.radio("Choose Section", ["Data Analysis", "Train Model", "Predictions"])
-# Upload Data
-st.sidebar.subheader("📁 Upload Your Dataset")
-train_data = st.sidebar.file_uploader("Upload training data", type=["csv"], key="train_upload")
-test_data = st.sidebar.file_uploader("Upload test data (optional)", type=["csv"], key="test_upload")
-# Global variables to store data and settings
 if 'vectorizer_type' not in st.session_state:
     st.session_state.vectorizer_type = "tfidf"
 if 'train_df' not in st.session_state:
     st.session_state.train_df = None
-if 'info' not in st.session_state:
-    st.session_state.info = None
-# Process uploaded data
-if train_data is not None:
-    try:
-        # Use safe CSV reading function
-        train_df = safe_read_csv(train_data)
-        if train_df is not None:
-            st.session_state.train_df = train_df
-            if test_data is not None:
-                test_df = safe_read_csv(test_data)
-                st.session_state.test_df = test_df
-            else:
-                st.session_state.test_df = None
-            st.sidebar.success("✅ Data loaded successfully!")
-            st.write("Training Data Preview:")
-            st.write(train_df.head(3))
-            columns = train_df.columns.tolist()
-            text_data = st.sidebar.selectbox("Choose the text column:", columns, key="text_col")
-            target = st.sidebar.selectbox("Choose the target column:", columns, key="target_col")
-            if text_data and target:
-                try:
-                    # Process data
-                    info = Informations(train_df, text_data, target)
-                    train_df['clean_text'] = info.clean_text()
-                    train_df['text_length'] = info.text_length()
-                    # Handle label encoding manually
-                    from sklearn.preprocessing import LabelEncoder
-                    label_encoder = LabelEncoder()
-                    train_df['target'] = label_encoder.fit_transform(train_df[target])
-                    # Save label encoder for later use
-                    if save_artifacts(label_encoder, "artifacts", "encoder.pkl"):
-                        st.sidebar.success("✅ Data processed successfully!")
-                    st.session_state.train_df = train_df
-                    st.session_state.info = info
-                except Exception as e:
-                    st.error(f"Error processing data: {str(e)}")
-                    st.session_state.train_df = None
-                    st.session_state.info = None
-    except Exception as e:
-        st.error(f"Error loading data: {str(e)}")
-        st.session_state.train_df = None
-        st.session_state.info = None
-# Get data from session state
-train_df = st.session_state.get('train_df')
-info = st.session_state.get('info')
-# Data Analysis Section
-if section == "Data Analysis":
-    if train_data is not None and train_df is not None:
-        try:
-            st.subheader("📊 Get Insights from the Data")
-            col1, col2, col3 = st.columns(3)
-            with col1:
-                st.metric("Data Shape", f"{info.shape()[0]} rows × {info.shape()[1]} cols")
-            with col2:
-                st.metric("Classes", len(train_df['target'].unique()))
-            with col3:
-                st.metric("Missing Values", info.missing_values())
-            st.write("**Class Distribution:**", info.class_imbalanced())
-            st.write("**Processed Data Preview:**")
-            st.write(train_df[['clean_text', 'text_length', 'target']].head(3))
-            st.markdown("**Text Length Analysis**")
-            st.write(info.analysis_text_length('text_length'))
-            # Calculate correlation manually
-            correlation = train_df[['text_length', 'target']].corr().iloc[0, 1]
-            st.write(f"**Correlation between Text Length and Target:** {correlation:.4f}")
-            st.subheader("📈 Visualizations")
-            try:
-                columns = train_df.columns.tolist()
-                text_col = next((col for col in columns if 'text' in col.lower() or col in ['message', 'content', 'review']), columns[0])
-                target_col = next((col for col in columns if col in ['label', 'target', 'class', 'category']), columns[-1])
-                vis = Visualizations(train_df, text_col, target_col)
-                vis.class_distribution()
-                vis.text_length_distribution()
-            except Exception as e:
-                st.error(f"Error generating visualizations: {str(e)}")
-        except Exception as e:
-            st.error(f"Error in data analysis: {str(e)}")
     else:
-        st.warning("⚠️ Please upload training data to get insights")
-# Train Model Section
 elif section == "Train Model":
-    if train_data is not None and train_df is not None:
-        try:
-            st.subheader("🤖 Train a Model")
-            # Create two columns for model selection
-            col1, col2 = st.columns(2)
-            with col1:
-                st.markdown("**Select Model:**")
-                model = st.radio("Choose the Model", [
-                    "Logistic Regression", "Decision Tree",
-                    "Random Forest", "Linear SVC", "SVC",
-                    "Multinomial Naive Bayes", "Gaussian Naive Bayes"
-                ])
-            with col2:
-                st.markdown("**Select Vectorizer:**")
-                vectorizer_choice = st.radio("Choose Vectorizer", ["Tfidf Vectorizer", "Count Vectorizer"])
-            # Initialize vectorizer
-            if vectorizer_choice == "Tfidf Vectorizer":
-                vectorizer = TfidfVectorizer(max_features=10000, stop_words='english')
-                st.session_state.vectorizer_type = "tfidf"
-            else:
-                vectorizer = CountVectorizer(max_features=10000, stop_words='english')
-                st.session_state.vectorizer_type = "count"
-            st.write("**Training Data Preview:**")
-            st.write(train_df[['clean_text', 'target']].head(3))
-            # Vectorize text data
-            with st.spinner("Vectorizing text data..."):
-                X = vectorizer.fit_transform(train_df['clean_text'])
-                y = train_df['target']
-            # Split data
-            X_train, X_test, y_train, y_test = process.split_data(X, y)
-            st.write(f"**Data split** - Train: {X_train.shape}, Test: {X_test.shape}")
-            # Save vectorizer for later use
-            vectorizer_filename = f"{st.session_state.vectorizer_type}_vectorizer.pkl"
-            save_artifacts(vectorizer, "artifacts", vectorizer_filename)
-            if st.button("🚀 Start Training", type="primary"):
-                with st.spinner("Training model..."):
-                    try:
-                        models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
-                        # Train selected model
-                        if model == "Logistic Regression":
-                            models.LogisticRegression()
-                        elif model == "Decision Tree":
-                            models.DecisionTree()
-                        elif model == "Linear SVC":
-                            models.LinearSVC()
-                        elif model == "SVC":
-                            models.SVC()
-                        elif model == "Multinomial Naive Bayes":
-                            models.MultinomialNB()
-                        elif model == "Random Forest":
-                            models.RandomForestClassifier()
-                        elif model == "Gaussian Naive Bayes":
-                            models.GaussianNB()
-                        st.success("🎉 Model training completed!")
-                        st.info("You can now use the 'Predictions' section to classify new text.")
-                    except Exception as e:
-                        st.error(f"Error during model training: {str(e)}")
-        except Exception as e:
-            st.error(f"Error in model training: {str(e)}")
     else:
-        st.warning("⚠️ Please upload training data to train a model")
-# Predictions Section
 elif section == "Predictions":
-    st.subheader("🔮 Perform Predictions on New Text")
-    # Check if models exist
-    if os.path.exists("models") and os.listdir("models"):
-        # Text input for prediction
-        text_input = st.text_area("Enter the text to classify:", height=100, placeholder="Type your text here...")
-        # Model selection
-        available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
-        if available_models:
-            selected_model = st.selectbox("Choose the trained model:", available_models)
-            # Prediction button
-            if st.button("🎯 Predict", key="single_predict", type="primary"):
-                if text_input.strip():
-                    with st.spinner("Making prediction..."):
-                        predicted_label, prediction_proba = predict_text(
-                            selected_model,
-                            text_input,
-                            st.session_state.get('vectorizer_type', 'tfidf')
-                        )
-                        if predicted_label is not None:
-                            st.success("✅ Prediction completed!")
-                            # Display results
-                            st.markdown("### 📊 Prediction Results")
-                            col1, col2 = st.columns([2, 1])
-                            with col1:
-                                st.markdown(f"**Input Text:** {text_input}")
-                            with col2:
-                                st.markdown(f"**Predicted Class:** `{predicted_label}`")
-                            # Display probabilities if available
-                            if prediction_proba is not None:
-                                st.markdown("**Class Probabilities:**")
-                                # Load encoder to get class names
-                                encoder = load_artifacts("artifacts", "encoder.pkl")
-                                if encoder is not None:
-                                    classes = encoder.classes_
-                                    prob_df = pd.DataFrame({
-                                        'Class': classes,
-                                        'Probability': prediction_proba
-                                    }).sort_values('Probability', ascending=False)
-                                    col1, col2 = st.columns(2)
-                                    with col1:
-                                        st.bar_chart(prob_df.set_index('Class'))
-                                    with col2:
-                                        st.dataframe(prob_df, use_container_width=True)
-                else:
-                    st.warning("⚠️ Please enter some text to classify")
         else:
-            st.warning("⚠️ No trained models found. Please train a model first.")
     else:
-        st.warning("⚠️ No trained models found. Please go to 'Train Model' section to train a model first.")
-    # Option to classify multiple texts
-    st.markdown("---")
-    st.subheader("📊 Batch Predictions")
-    uploaded_file = st.file_uploader("Upload a CSV file with text to classify", type=['csv'], key="batch_upload")
-    if uploaded_file is not None:
-        try:
-            batch_df = safe_read_csv(uploaded_file)
-            if batch_df is not None:
-                st.write("**Uploaded data preview:**")
-                st.write(batch_df.head())
-                # Select text column
-                text_column = st.selectbox("Select the text column:", batch_df.columns.tolist())
-                if os.path.exists("models") and os.listdir("models"):
-                    available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
-                    batch_model = st.selectbox("Choose model for batch prediction:", available_models, key="batch_model")
-                    if st.button("🚀 Run Batch Predictions", key="batch_predict", type="primary"):
-                        with st.spinner("Processing batch predictions..."):
-                            predictions = []
-                            progress_bar = st.progress(0)
-                            for idx, text in enumerate(batch_df[text_column]):
-                                pred, _ = predict_text(
-                                    batch_model,
-                                    str(text),
-                                    st.session_state.get('vectorizer_type', 'tfidf')
-                                )
-                                predictions.append(pred if pred is not None else "Error")
-                                progress_bar.progress((idx + 1) / len(batch_df))
-                            batch_df['Predicted_Class'] = predictions
-                            st.success("✅ Batch predictions completed!")
-                            st.write("**Results:**")
-                            st.write(batch_df[[text_column, 'Predicted_Class']])
-                            # Download results
-                            csv = batch_df.to_csv(index=False)
-                            st.download_button(
-                                label="📥 Download predictions as CSV",
-                                data=csv,
-                                file_name="batch_predictions.csv",
-                                mime="text/csv"
-                            )
-        except Exception as e:
-            st.error(f"Error in batch prediction: {str(e)}")

 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 import os
 import pickle
 import io
+import traceback
+import sys
+from datetime import datetime
+# Import ML libraries with error handling
+try:
+    from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+    from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+    from sklearn.preprocessing import LabelEncoder
+    st.success("✅ Sklearn imported successfully")
+except ImportError as e:
+    st.error(f"❌ Sklearn import error: {e}")
+# Import custom modules with error handling
+try:
+    from NoCodeTextClassifier.EDA import Informations, Visualizations
+    from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization
+    from NoCodeTextClassifier.models import Models
+    st.success("✅ NoCodeTextClassifier imported successfully")
+except ImportError as e:
+    st.error(f"❌ NoCodeTextClassifier import error: {e}")
+    st.info("Please ensure NoCodeTextClassifier package is installed")
 # Set page config
+st.set_page_config(page_title="Debug Text Classification", page_icon="🔍", layout="wide")
+# Debug section
+st.sidebar.header("🔍 Debug Information")
+debug_mode = st.sidebar.checkbox("Enable Debug Mode", value=True)
+def debug_log(message, level="INFO"):
+    """Debug logging function"""
+    if debug_mode:
+        timestamp = datetime.now().strftime("%H:%M:%S")
+        st.sidebar.write(f"**{timestamp} [{level}]:** {message}")
+def detailed_error_info(e):
+    """Get detailed error information"""
+    error_type = type(e).__name__
+    error_message = str(e)
+    error_traceback = traceback.format_exc()
+    return {
+        'type': error_type,
+        'message': error_message,
+        'traceback': error_traceback
+    }
+def inspect_uploaded_file(uploaded_file):
+    """Inspect uploaded file properties"""
+    debug_log("🔍 Inspecting uploaded file...")
+    try:
+        file_info = {
+            'name': uploaded_file.name,
+            'type': uploaded_file.type,
+            'size': uploaded_file.size,
+            'file_id': getattr(uploaded_file, 'file_id', 'Not available')
+        }
+        debug_log(f"File name: {file_info['name']}")
+        debug_log(f"File type: {file_info['type']}")
+        debug_log(f"File size: {file_info['size']} bytes")
+        debug_log(f"File ID: {file_info['file_id']}")
+        # Try to read first few bytes
+        uploaded_file.seek(0)
+        first_bytes = uploaded_file.read(100)
+        debug_log(f"First 100 bytes type: {type(first_bytes)}")
+        debug_log(f"First 100 bytes preview: {first_bytes[:50]}...")
+        # Reset file pointer
+        uploaded_file.seek(0)
+        return file_info
+    except Exception as e:
+        error_info = detailed_error_info(e)
+        debug_log(f"❌ Error inspecting file: {error_info['type']}: {error_info['message']}", "ERROR")
+        st.sidebar.error(f"File inspection error: {error_info['message']}")
+        return None
+def safe_read_csv_debug(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']):
+    """Safely read CSV with extensive debugging"""
+    debug_log("🔄 Starting CSV read process...")
+    # Inspect file first
+    file_info = inspect_uploaded_file(uploaded_file)
+    if file_info is None:
+        return None
+    # Try different reading methods
+    methods = [
+        ("Direct pandas read", lambda f: pd.read_csv(f)),
+        ("BytesIO method", lambda f: pd.read_csv(io.BytesIO(f.read()))),
+        ("StringIO method", lambda f: pd.read_csv(io.StringIO(f.read().decode('utf-8')))),
+    ]
+    for method_name, method_func in methods:
+        debug_log(f"🔄 Trying method: {method_name}")
+        for encoding in encoding_options:
+            try:
+                debug_log(f"  - Attempting encoding: {encoding}")
+                uploaded_file.seek(0)
+                if method_name == "Direct pandas read":
+                    df = pd.read_csv(uploaded_file, encoding=encoding)
+                elif method_name == "BytesIO method":
+                    uploaded_file.seek(0)
+                    content = uploaded_file.read()
+                    df = pd.read_csv(io.BytesIO(content), encoding=encoding)
+                elif method_name == "StringIO method":
+                    uploaded_file.seek(0)
+                    content = uploaded_file.read()
+                    if isinstance(content, bytes):
+                        content = content.decode(encoding)
+                    df = pd.read_csv(io.StringIO(content))
+                debug_log(f"✅ Success with {method_name} + {encoding}")
+                debug_log(f"DataFrame shape: {df.shape}")
+                debug_log(f"Columns: {list(df.columns)}")
+                st.success(f"File loaded successfully using {method_name} with {encoding} encoding")
+                return df
+            except UnicodeDecodeError as e:
+                debug_log(f"  - Unicode error with {encoding}: {str(e)}", "WARNING")
+                continue
+            except Exception as e:
+                error_info = detailed_error_info(e)
+                debug_log(f"  - Error with {method_name} + {encoding}: {error_info['type']}: {error_info['message']}", "ERROR")
+                # Show detailed error for 403 or permission errors
+                if "403" in str(e) or "permission" in str(e).lower():
+                    st.error("🚨 PERMISSION ERROR DETECTED!")
+                    st.error(f"Method: {method_name}, Encoding: {encoding}")
+                    st.error(f"Error type: {error_info['type']}")
+                    st.error(f"Error message: {error_info['message']}")
+                    st.code(error_info['traceback'])
+                continue
+    debug_log("❌ All reading methods failed", "ERROR")
+    st.error("All CSV reading methods failed. Check debug log for details.")
+    return None
+# Utility functions with debugging
 def save_artifacts(obj, folder_name, file_name):
+    """Save artifacts with debugging"""
+    debug_log(f"💾 Saving {file_name} to {folder_name}")
     try:
         os.makedirs(folder_name, exist_ok=True)
+        full_path = os.path.join(folder_name, file_name)
+        with open(full_path, 'wb') as f:
             pickle.dump(obj, f)
+        debug_log(f"✅ Successfully saved {file_name}")
         return True
     except Exception as e:
+        error_info = detailed_error_info(e)
+        debug_log(f"❌ Error saving {file_name}: {error_info['message']}", "ERROR")
+        st.error(f"Save error: {error_info['message']}")
         return False
 def load_artifacts(folder_name, file_name):
+    """Load artifacts with debugging"""
+    debug_log(f"📂 Loading {file_name} from {folder_name}")
     try:
+        full_path = os.path.join(folder_name, file_name)
+        if not os.path.exists(full_path):
+            debug_log(f"❌ File not found: {full_path}", "ERROR")
+            return None
+        with open(full_path, 'rb') as f:
+            obj = pickle.load(f)
+        debug_log(f"✅ Successfully loaded {file_name}")
+        return obj
     except Exception as e:
+        error_info = detailed_error_info(e)
+        debug_log(f"❌ Error loading {file_name}: {error_info['message']}", "ERROR")
+        st.error(f"Load error: {error_info['message']}")
         return None
 def load_model(model_name):
+    """Load model with debugging"""
+    debug_log(f"🤖 Loading model: {model_name}")
+    return load_artifacts("models", model_name)
 def predict_text(model_name, text, vectorizer_type="tfidf"):
+    """Make prediction with debugging"""
+    debug_log(f"🔮 Starting prediction with {model_name}")
     try:
+        # Load components
         model = load_model(model_name)
         if model is None:
             return None, None
         vectorizer_file = f"{vectorizer_type}_vectorizer.pkl"
         vectorizer = load_artifacts("artifacts", vectorizer_file)
         if vectorizer is None:
             return None, None
         encoder = load_artifacts("artifacts", "encoder.pkl")
         if encoder is None:
             return None, None
+        debug_log("🧹 Cleaning text...")
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
+        debug_log(f"Cleaned text preview: {clean_text[:50]}...")
+        debug_log("🔢 Vectorizing text...")
         text_vector = vectorizer.transform([clean_text])
+        debug_log(f"Vector shape: {text_vector.shape}")
+        debug_log("🎯 Making prediction...")
         prediction = model.predict(text_vector)
         prediction_proba = None
         if hasattr(model, 'predict_proba'):
             try:
                 prediction_proba = model.predict_proba(text_vector)[0]
+                debug_log(f"Prediction probabilities: {prediction_proba}")
             except:
+                debug_log("No prediction probabilities available", "WARNING")
         predicted_label = encoder.inverse_transform(prediction)[0]
+        debug_log(f"✅ Prediction complete: {predicted_label}")
         return predicted_label, prediction_proba
     except Exception as e:
+        error_info = detailed_error_info(e)
+        debug_log(f"❌ Prediction error: {error_info['message']}", "ERROR")
+        st.error(f"Prediction error: {error_info['message']}")
+        if debug_mode:
+            st.code(error_info['traceback'])
         return None, None
+# Main App
+st.title('🔍 Debug Text Classification App')
+st.write('Debug version to identify and fix issues')
+# Environment info
+if debug_mode:
+    st.sidebar.subheader("🖥️ Environment Info")
+    st.sidebar.write(f"Python version: {sys.version}")
+    st.sidebar.write(f"Streamlit version: {st.__version__}")
+    st.sidebar.write(f"Pandas version: {pd.__version__}")
+    st.sidebar.write(f"Current working directory: {os.getcwd()}")
+    # Check directory permissions
+    try:
+        test_dir = "test_permissions"
+        os.makedirs(test_dir, exist_ok=True)
+        test_file = os.path.join(test_dir, "test.txt")
+        with open(test_file, 'w') as f:
+            f.write("test")
+        os.remove(test_file)
+        os.rmdir(test_dir)
+        st.sidebar.success("✅ File system permissions OK")
+    except Exception as e:
+        st.sidebar.error(f"❌ File system permission issue: {e}")
+# Sidebar navigation
+section = st.sidebar.radio("Choose Section", ["File Upload Debug", "Data Analysis", "Train Model", "Predictions"])
+# Session state initialization
 if 'vectorizer_type' not in st.session_state:
     st.session_state.vectorizer_type = "tfidf"
 if 'train_df' not in st.session_state:
     st.session_state.train_df = None
+# File Upload Debug Section
+if section == "File Upload Debug":
+    st.subheader("🔍 File Upload Debugging")
+    st.info("This section helps debug file upload issues. Upload your file and see detailed error information.")
+    train_data = st.file_uploader("Upload training data (DEBUG MODE)", type=["csv"], key="debug_upload")
+    if train_data is not None:
+        st.write("### File Upload Detected!")
+        # Show raw file info
+        st.write("**Raw File Information:**")
+        st.json({
+            "name": train_data.name,
+            "type": train_data.type if hasattr(train_data, 'type') else "Unknown",
+            "size": train_data.size if hasattr(train_data, 'size') else "Unknown"
+        })
+        # Try to read the file
+        st.write("### Attempting to Read File...")
+        with st.spinner("Reading file with debug mode..."):
+            df = safe_read_csv_debug(train_data)
+        if df is not None:
+            st.success("🎉 File successfully loaded!")
+            st.write("**Data Preview:**")
+            st.dataframe(df.head())
+            st.write(f"**Shape:** {df.shape}")
+            st.write(f"**Columns:** {list(df.columns)}")
+            st.write(f"**Data Types:**")
+            st.write(df.dtypes)
+            # Store in session state
+            st.session_state.train_df = df
+        else:
+            st.error("❌ Failed to load file. Check the debug log for details.")
+            # Additional troubleshooting
+            st.write("### 🔧 Troubleshooting Steps:")
+            st.write("1. Check if your file is a valid CSV")
+            st.write("2. Try saving your CSV with different encoding (UTF-8 recommended)")
+            st.write("3. Check if file size is within limits")
+            st.write("4. Ensure no special characters in filename")
+            st.write("5. Try uploading from a different location")
+# Other sections (simplified for debugging)
+elif section == "Data Analysis":
+    st.subheader("📊 Data Analysis")
+    if st.session_state.train_df is not None:
+        df = st.session_state.train_df
+        st.write("Using loaded data from debug session:")
+        st.dataframe(df.head())
+        # Basic analysis without custom modules if they fail
+        st.write(f"**Shape:** {df.shape}")
+        st.write(f"**Columns:** {list(df.columns)}")
+        st.write(f"**Missing values:**")
+        st.write(df.isnull().sum())
     else:
+        st.warning("No data loaded. Please use 'File Upload Debug' section first.")
 elif section == "Train Model":
+    st.subheader("🤖 Train Model")
+    st.info("Use this section after successfully loading data in debug mode.")
+    if st.session_state.train_df is not None:
+        st.success("Data available for training!")
+        # Add your training logic here
     else:
+        st.warning("No data loaded. Please use 'File Upload Debug' section first.")
 elif section == "Predictions":
+    st.subheader("🔮 Predictions")
+    st.info("Use this section after training a model.")
+    # Check for trained models
+    if os.path.exists("models"):
+        models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+        if models:
+            st.write(f"Available models: {models}")
         else:
+            st.info("No trained models found.")
     else:
+        st.info("Models directory not found.")
+# Debug summary
+if debug_mode:
+    st.sidebar.markdown("---")
+    st.sidebar.subheader("📋 Debug Summary")
+    if st.session_state.train_df is not None:
+        st.sidebar.success("✅ Data loaded successfully")
+    else:
+        st.sidebar.warning("⚠️ No data loaded")