NoCodeTextClassifier / Inference.py
Alamgirapi's picture
Upload Inference.py
7830025 verified
from NoCodeTextClassifier.preprocessing import *
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from pathlib import Path
import joblib
# Input the email
text = input("Enter the Email: \n")
# load train data
train_path = Path("./ML Engineer/train.csv")
df = pd.read_csv(train_path)
# clean the text
currency_symbols = r'[\$\£\€\¥\₹\¢\₽\₩\₪]'
text_cleaner = TextCleaner(currency_symbols)
df['clean_text'] = df['email'].apply(lambda x: text_cleaner.clean_text(x))
# fit the TfIdfVecotrizer with train data
vectorizer = TfidfVectorizer(max_features=10000)
X = vectorizer.fit(df['clean_text'])
# clean the input email
clean_text = str(text_cleaner.clean_text(text))
print(f"\nThe clean text is : {clean_text}")
# vectorize the clean email
y = vectorizer.transform([clean_text])
# Load the model from the file
loaded_model = joblib.load('email_detection_model.pkl')
# perform prediction of mail
predictions = int(loaded_model.predict(y)[0])
predictions = "spam" if predictions==1 else "not_spam"
# print the prediction
print(f"\nThe prediction is : {predictions}")