import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib
class aienglish:
def __init__(self):
# Load the dataset
self. df = pd.read_csv('datatrainisenglish.csv')
def preprocess(self):
# Split the data
self.X = self.df['Text']
self.y = self.df['IsEnc']
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
# TF-IDF vectorization
self.tfidf_vectorizer = TfidfVectorizer()
self.X_train_tfidf = self.tfidf_vectorizer.fit_transform(self.X_train)
self.X_test_tfidf = self.tfidf_vectorizer.transform(self.X_test)
def train(self):
# Train a Logistic Regression model
self.model = LogisticRegression()
self.model.fit(self.X_train_tfidf, self.y_train)
# Make predictions
y_pred =self.model.predict(self.X_test_tfidf)
# Evaluate the model
accuracy = accuracy_score(self.y_test, y_pred)
report = classification_report(self.y_test, y_pred)
print("Accuracy:", accuracy)
def export(self):
joblib.dump(self.model, "Checkingenglishmodel.joblib")
# Save the vectorizer to a file
joblib.dump(self.tfidf_vectorizer, "vectorizerenglish.joblib")
First Key Commit
This commit is an Ai english class that uses logistic progressino to tell whether the encrypted text when decrypted with each key is actually decrypted or not. It is essential part of decrypt the key-based ciphers as it brute forces all the keys. This AI was able to work extremely well producing accuracy rates of around 90% in testing.
# imports
from sklearn.feature_extraction.text import CountVectorizer # converting text to numbers
from sklearn.naive_bayes import MultinomialNB #Naive Bayes Multinomial Algorthim AI model
from sklearn.model_selection import train_test_split # Data Formatting
from sklearn.metrics import accuracy_score, classification_report # scoring the model
import pandas as pd
import numpy as np
from sklearn.svm import SVC # Support Vector Machine
from sklearn.ensemble import RandomForestClassifier # Random Forest
import keras
import joblib
class ai:
def __init__(self,filename):
self.data=pd.read_csv(filename) # reading input in
self.ciphers=['Cipher','ceaser','morse','substitution','hexadecimal','binary'] # cipher name list
self.vectorizer = CountVectorizer() # creating the bag-of-words
def preprocess(self): # preprocessing or training
self.vectorizer.fit(self.data['Text'])
self.X = self.vectorizer.transform(self.data['Text']).toarray() # transofrming the input text for training
Y_series=self.data['Cipher']
Y_encoded=[]
for i in Y_series:
if(i!='Cipher'):
Y_encoded.append(self.ciphers.index(i)) # categorical encoding matching number to output text
self.Y=np.array(Y_encoded)
self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(self.X, self.Y, test_size=0.3, random_state=42)
def train(self):
model = keras.Sequential([ # creating the model with keras
keras.layers.Dense(128, activation='relu', input_shape=(self.X.shape[1],)),
keras.layers.Dropout(0.5), # Removing percentage nuerons to prevent overfit
keras.layers.Dense(64, activation='relu'),
keras.layers.Dropout(0.5), # Removing percentage of nuerons to prevent overfit
keras.layers.Dense(len(self.ciphers), activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
self.model = model
self.model.fit(self.X_train, self.Y_train, epochs=5, validation_split=0.2)
# Y_pred =self.model.predict(self.X_test) # predicting the testing model data
# Y_train_pred=self.model.predict(self.X_train)
# Accuracy for testing and training to check overfit and underfit
def export(self):
model_json = self.model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# Saving the nueral network weights into the file for quick evaluation and prediction rather than just training+predict every time
self.model.save_weights("Decryptionclassify.h5")
print("Checkpoint:THEE Model is Saved") # checkpoint to display that the file is updated
def evaluate(self):
print("EVALUATION REPORT","-"*50)
Y_pred = self.model.predict(self.X_test) # Predicting the testing model data and the training model data for accuracy later to check stuff
Y_train_pred = self.model.predict(self.X_train)
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_train_pred_classes = np.argmax(Y_train_pred, axis=1)
# Getting the score to test how good the model is and to see if overfitting occurs and other issues
test_accuracy = accuracy_score(self.Y_test, Y_pred_classes)
train_accuracy = accuracy_score(self.Y_train, Y_train_pred_classes)
print("Testing Accuracy ", test_accuracy)
print("Training Accuracy ", train_accuracy)
2nd Key commit
This commit is the second AI class that focused on clasifying each encrypted text into a different type of encryption such as ceasar, substitution, binary, etc. This commit is important for the decryption part of our website as it will help us clasify which decryption algorithim to run and if we need to use the other AI model for key-based ciphers. This AI model also uses Deep Learning network to classify as I found it produced slightly better results at 70% testing accuracy.
import threading
# import "packages" from flask
from flask import render_template # import render_template from "public" flask libraries
from aienglishprediction import aienglishprediction
from flask_cors import CORS
# import "packages" from "this" project
from __init__ import app,db # Definitions initialization
from rsa import RSA # import RSA cipher
from flask import Flask, render_template, request, jsonify
from urllib.parse import quote as url_quote
import subprocess
import os
from caesar import caesar as c1 # first cipher ceasar
from substitution import substitution as c2 # second cipher subtitution
from generate import generate as gn # importing the generator api
from morse import morse # importing morse cipher
from binary import binary # importing binary cipher
from hex import hexadecimal # importing hexadecimal cipher
from aiprediction import aiprediction # importing the ai prediciton class
app = Flask(__name__)
CORS(app)
# Initialize the SQLAlchemy object to work with the Flask app instance
# db.init_app(app)
@app.errorhandler(404) # catch for URL not found
def page_not_found(e):
# note that we set the 404 status explicitly
return render_template('404.html'), 404
@app.route('/') # connects default URL to index() function
def index():
return render_template("index.html")
@app.route('/table/') # connects /stub/ URL to stub() function
def table():
return render_template("table.html")
@app.route("/caesarencrypt", methods=["POST"])
def caesarencrypt():
text = request.json.get("text") # getting the text
print(text)
gen=gn() # creating api random generator class
value=gen.getrandom(1)[0] # getting random number
cipher1=c1(int(value),text)
encrypted = cipher1.encrypt() # encrypting with ceasar cipher
print(encrypted)
return jsonify(str(encrypted))
@app.route("/morseencrypt", methods=["POST"])
def morseencrypt():
text = request.json.get("text") # getting the text
morseobject = morse(text) # creating morse object
encrypted = morseobject.encrypt() # encrypting with morse code
print(encrypted)
return jsonify(str(encrypted)) # outputting morse encrypted ode
@app.route("/binaryencrypt", methods=["POST"])
def binaryencrypt():
text = request.json.get("text") # getting the text
bin=binary(text)
encrypted = bin.encrypt() # encrypting with binary cipher
print(encrypted)
return jsonify(str(encrypted)) # returning the encrypted value
@app.route("/hexencrypt", methods=["POST"])
def hexencrypt():
text = request.json.get("text") # getting textbox words
hex=hexadecimal(text) # creating hexadecimal object
encrypted=hex.encrypt() # encryptihg with hexadecimal cipher
print(encrypted)
return jsonify(str(encrypted)) # outputting string
@app.route("/subencrypt", methods=["POST"])
def subencrypt():
text = request.json.get("text") # getting the text
gen=gn()
value=gen.getrandom(1)[0] # getting random number from api
sub=subencrypt(value,text)
encrypted = sub.encrypt() # substitution cipher
print(encrypted)
return jsonify(str(encrypted))
@app.route("/rsaencrypt", methods=["POST"])
def rsa():
text = request.json.get("text") # getting text
print('here')
rsaobj=RSA()
rsa = RSA(bits=2048) # initializing the RSA object
print("Created Object")
plaintext = text
plaintext = int.from_bytes(plaintext.encode(), byteorder='big') # setup code
ciphertext = rsa.rsa_encrypt(plaintext) # encrypting with rsa
print(ciphertext)
return jsonify(str(ciphertext))
@app.route("/decrypt", methods=["POST"])
def decrypt():
text = request.json.get("text") # getting text
output=""
predictor=aiprediction()
value=predictor.pred(text) # Predicting cipher used for encryption
eng=aienglishprediction() # Creator the english checker object
if(value=="binary"): # checking for binary cipher
object=binary(text)
output=object.decrypt()
elif(value=="hexadecimal"): # checking for hexadecimal cipher
object=hexadecimal(text)
output= object.decrypt()
elif(value=="morse"): # checking for morse cipher
object=morse(text)
output= object.decrypt()
else:
for key in range(1,27): # going through all possibly keys(brute forcing)
eng=aienglishprediction() # creating an object of the ai class
objecter=c1(key,text)
print(value)
if value == "ceasar": # checking which cipher
objecter = c1(key, text) # creating object for ceasar
elif value == "substitution":
objecter = c2(key, text) # creating object for substitution
if eng.predict(objecter.decrypt()) == 0: # decrypting and seeing if it's close to english
output= objecter.decrypt()
return jsonify(str(output)) # returning the output
if __name__ == "__main__":
# change name for testing
from flask_cors import CORS
cors = CORS(app)
app.run(debug=True, host="0.0.0.0", port="8080")
3rd Key Commit
In this file I worked on most of this but the commit I will focus on will be in the decrypt function in which I first classify the algorithim used for encryption with the AI. Then if it is a key-based cipher I Brute force all of the keys and check using AI if the key is the correct one and if so I return the decrypted text. This was essential for the decrpytion functionality of our website.