Code for Python Email Spam Classifier

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np
data = pd.read_csv('spam.csv')
print(data['v1'])
0        ham
1        ham
2       spam
3        ham
4        ham
        ... 
5567    spam
5568     ham
5569     ham
5570     ham
5571     ham
Name: v1, Length: 5572, dtype: object
# Pre-data processing
columnsdrop = [col for col in data.columns if 'Unnamed' in col]
data.drop(columns=columnsdrop, inplace=True)
data['v1']=data['v1'].map({'ham':1,'spam':0}) # binary hot encoding
cv = CountVectorizer()
data=data.dropna()
print(data)
x_train,x_test,y_train,y_test=train_test_split(data['v2'],data['v1'],test_size = 0.3)
x_train = cv.fit_transform(x_train) # Bag of words
      v1                                                 v2
0      1  Go until jurong point, crazy.. Available only ...
1      1                      Ok lar... Joking wif u oni...
2      0  Free entry in 2 a wkly comp to win FA Cup fina...
3      1  U dun say so early hor... U c already then say...
4      1  Nah I don't think he goes to usf, he lives aro...
...   ..                                                ...
5567   0  This is the 2nd time we have tried 2 contact u...
5568   1              Will Ì_ b going to esplanade fr home?
5569   1  Pity, * was in mood for that. So...any other s...
5570   1  The guy did some bitching but I acted like i'd...
5571   1                         Rofl. Its true to its name

[5572 rows x 2 columns]
from sklearn.linear_model import LogisticRegression
MNB = LogisticRegression(C=1.0, solver='lbfgs', max_iter=100)

MNB.fit(x_train, y_train)


LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
from sklearn.metrics import r2_score
print(type(x_test))
predicted = MNB.predict(cv.transform(x_test).toarray()) 
p2=MNB.predict(x_train.toarray()) 
print(predicted)
print(r2_score(y_test,predicted))
print(r2_score(y_train,p2))
print(y_test)
print()
<class 'pandas.core.series.Series'>
[1 1 0 ... 1 1 1]
0.8271387955544068
0.9848356501090111
319     1
3471    1
1021    0
5299    1
1092    1
       ..
1636    1
4517    1
4431    1
3666    1
866     1
Name: v1, Length: 1672, dtype: int64
text=["click on my link thing.com"]
testingvalue=MNB.predict(cv.transform(text))
print(testingvalue)
[1]


coefficients = MNB.coef_  # Coefficients for each feature
intercept = MNB.intercept_ 
print(coefficients, intercept)
[[-0.42117837 -0.40637999 -0.00323472 ...  0.1307255   0.00748295
   0.00334214]] [4.65871166]