Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

NoCodeTextClassifier / Inference.py

Upload Inference.py

7830025 verified 5 months ago

1.15 kB

	from NoCodeTextClassifier.preprocessing import *
	from sklearn.feature_extraction.text import TfidfVectorizer
	import pandas as pd
	from pathlib import Path
	import joblib

	# Input the email
	text = input("Enter the Email: \n")

	# load train data
	train_path = Path("./ML Engineer/train.csv")
	df = pd.read_csv(train_path)

	# clean the text
	currency_symbols = r'[\$\£\€\¥\₹\¢\₽\₩\₪]'
	text_cleaner = TextCleaner(currency_symbols)
	df['clean_text'] = df['email'].apply(lambda x: text_cleaner.clean_text(x))

	# fit the TfIdfVecotrizer with train data
	vectorizer = TfidfVectorizer(max_features=10000)
	X = vectorizer.fit(df['clean_text'])

	# clean the input email
	clean_text = str(text_cleaner.clean_text(text))
	print(f"\nThe clean text is : {clean_text}")

	# vectorize the clean email
	y = vectorizer.transform([clean_text])

	# Load the model from the file
	loaded_model = joblib.load('email_detection_model.pkl')

	# perform prediction of mail
	predictions = int(loaded_model.predict(y)[0])
	predictions = "spam" if predictions==1 else "not_spam"

	# print the prediction
	print(f"\nThe prediction is : {predictions}")