import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

class aienglish:
    def __init__(self):
        # Load the dataset
       self. df = pd.read_csv('datatrainisenglish.csv')
    
    def preprocess(self):

    # Split the data
        self.X = self.df['Text']
        self.y = self.df['IsEnc']
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

        # TF-IDF vectorization
        self.tfidf_vectorizer = TfidfVectorizer()
        self.X_train_tfidf = self.tfidf_vectorizer.fit_transform(self.X_train)
        self.X_test_tfidf = self.tfidf_vectorizer.transform(self.X_test)
    def train(self):
    # Train a Logistic Regression model
        self.model = LogisticRegression()
        self.model.fit(self.X_train_tfidf, self.y_train)

        # Make predictions
        y_pred =self.model.predict(self.X_test_tfidf)

        # Evaluate the model
        accuracy = accuracy_score(self.y_test, y_pred)
        report = classification_report(self.y_test, y_pred)
        print("Accuracy:", accuracy)
    def export(self):
        

        joblib.dump(self.model, "Checkingenglishmodel.joblib")

        # Save the vectorizer to a file
        joblib.dump(self.tfidf_vectorizer, "vectorizerenglish.joblib")

First Key Commit

This commit is an Ai english class that uses logistic progressino to tell whether the encrypted text when decrypted with each key is actually decrypted or not. It is essential part of decrypt the key-based ciphers as it brute forces all the keys. This AI was able to work extremely well producing accuracy rates of around 90% in testing.

# imports
from sklearn.feature_extraction.text import CountVectorizer # converting text to numbers
from sklearn.naive_bayes import MultinomialNB #Naive Bayes Multinomial Algorthim AI model
from sklearn.model_selection import train_test_split # Data Formatting
from sklearn.metrics import accuracy_score, classification_report # scoring the model
import pandas as pd
import numpy as np
from sklearn.svm import SVC # Support Vector Machine
from sklearn.ensemble import RandomForestClassifier # Random Forest
import keras
import joblib
class ai:
    def __init__(self,filename):
        self.data=pd.read_csv(filename) # reading input in
        self.ciphers=['Cipher','ceaser','morse','substitution','hexadecimal','binary'] # cipher name list
        self.vectorizer = CountVectorizer() # creating the bag-of-words
        
        
    def preprocess(self): # preprocessing or training
        self.vectorizer.fit(self.data['Text'])
        self.X = self.vectorizer.transform(self.data['Text']).toarray() # transofrming the input text for training
        Y_series=self.data['Cipher']
        Y_encoded=[]
        for i in Y_series:
            if(i!='Cipher'):
                Y_encoded.append(self.ciphers.index(i)) # categorical encoding matching number to output text
        self.Y=np.array(Y_encoded)
        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(self.X, self.Y, test_size=0.3, random_state=42)
        

        
    def train(self):
        model = keras.Sequential([ # creating the model with keras
            keras.layers.Dense(128, activation='relu', input_shape=(self.X.shape[1],)),
            keras.layers.Dropout(0.5),  # Removing percentage nuerons to prevent overfit
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dropout(0.5),  # Removing percentage of nuerons to prevent overfit
            keras.layers.Dense(len(self.ciphers), activation='softmax')
        ])
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        self.model = model
        self.model.fit(self.X_train, self.Y_train, epochs=5, validation_split=0.2)
        # Y_pred =self.model.predict(self.X_test) # predicting the testing model data
        # Y_train_pred=self.model.predict(self.X_train)
        # Accuracy for testing and training to check overfit and underfit
    def export(self):
        model_json = self.model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        # Saving the nueral network weights into the file for quick evaluation and prediction rather than just training+predict every time
        self.model.save_weights("Decryptionclassify.h5")
        print("Checkpoint:THEE Model is Saved") # checkpoint to display that the file is updated
    def evaluate(self):
        print("EVALUATION REPORT","-"*50)
        Y_pred = self.model.predict(self.X_test)  # Predicting the testing model data and the training model data for accuracy later to check stuff
        Y_train_pred = self.model.predict(self.X_train)
        Y_pred_classes = np.argmax(Y_pred, axis=1)
        Y_train_pred_classes = np.argmax(Y_train_pred, axis=1)
        # Getting the score to test how good the model is and to see if overfitting occurs and other issues
        test_accuracy = accuracy_score(self.Y_test, Y_pred_classes)
        train_accuracy = accuracy_score(self.Y_train, Y_train_pred_classes)
        print("Testing Accuracy ", test_accuracy)
        print("Training Accuracy ", train_accuracy)
        
        
        
       
        
        
        
    

2nd Key commit

This commit is the second AI class that focused on clasifying each encrypted text into a different type of encryption such as ceasar, substitution, binary, etc. This commit is important for the decryption part of our website as it will help us clasify which decryption algorithim to run and if we need to use the other AI model for key-based ciphers. This AI model also uses Deep Learning network to classify as I found it produced slightly better results at 70% testing accuracy.

import threading

# import "packages" from flask
from flask import render_template  # import render_template from "public" flask libraries
from aienglishprediction import aienglishprediction
from flask_cors import CORS
# import "packages" from "this" project
from __init__ import app,db  # Definitions initialization
from rsa import RSA # import RSA cipher

from flask import Flask, render_template, request, jsonify
from urllib.parse import quote as url_quote
import subprocess
import os
from caesar import caesar as c1 # first cipher ceasar
from substitution import substitution as c2 # second cipher subtitution
from generate import generate as gn # importing the generator api
from morse import morse # importing morse cipher
from binary import binary # importing binary cipher
from hex import hexadecimal # importing hexadecimal cipher
from aiprediction import aiprediction # importing the ai prediciton class
app = Flask(__name__)
CORS(app)


# Initialize the SQLAlchemy object to work with the Flask app instance
# db.init_app(app)

@app.errorhandler(404)  # catch for URL not found
def page_not_found(e):
    # note that we set the 404 status explicitly
    return render_template('404.html'), 404

@app.route('/')  # connects default URL to index() function
def index():
    return render_template("index.html")

@app.route('/table/')  # connects /stub/ URL to stub() function
def table():
    return render_template("table.html")

@app.route("/caesarencrypt", methods=["POST"])
def caesarencrypt():
    text = request.json.get("text")  # getting the text
    print(text)
    gen=gn()  # creating api random generator class
    value=gen.getrandom(1)[0] # getting random number
    cipher1=c1(int(value),text)
    encrypted = cipher1.encrypt() # encrypting with ceasar cipher
    print(encrypted)
    return jsonify(str(encrypted))

@app.route("/morseencrypt", methods=["POST"])
def morseencrypt():
    text = request.json.get("text")  # getting the text
    morseobject = morse(text) # creating morse object
    encrypted = morseobject.encrypt() # encrypting with morse code
    print(encrypted)
    return jsonify(str(encrypted)) # outputting morse encrypted ode

@app.route("/binaryencrypt", methods=["POST"])
def binaryencrypt():
    text = request.json.get("text") # getting the text
    bin=binary(text)
    encrypted = bin.encrypt() # encrypting with binary cipher
    print(encrypted)
    return jsonify(str(encrypted)) # returning the encrypted value

@app.route("/hexencrypt", methods=["POST"])
def hexencrypt():
    text = request.json.get("text") # getting textbox words
    hex=hexadecimal(text) # creating hexadecimal object
    encrypted=hex.encrypt() # encryptihg with hexadecimal cipher
    print(encrypted)
    return jsonify(str(encrypted)) # outputting string

@app.route("/subencrypt", methods=["POST"])
def subencrypt():
    text = request.json.get("text") # getting the text
    gen=gn()
    value=gen.getrandom(1)[0] # getting random number from api
    sub=subencrypt(value,text)
    encrypted = sub.encrypt() # substitution cipher
    print(encrypted)
    return jsonify(str(encrypted))

@app.route("/rsaencrypt", methods=["POST"])
def rsa():
    text = request.json.get("text") # getting text
    print('here')
    rsaobj=RSA()
    rsa = RSA(bits=2048) # initializing the RSA object
    print("Created Object")
    plaintext = text
    plaintext = int.from_bytes(plaintext.encode(), byteorder='big') # setup code

    ciphertext = rsa.rsa_encrypt(plaintext) # encrypting with rsa
    
    print(ciphertext)
    
    return jsonify(str(ciphertext))

@app.route("/decrypt", methods=["POST"])
def decrypt():
    text = request.json.get("text") # getting text
    output=""
    predictor=aiprediction()
    value=predictor.pred(text) # Predicting cipher used for encryption
    eng=aienglishprediction() # Creator the english checker object
    if(value=="binary"): # checking for binary cipher
        object=binary(text)
        output=object.decrypt()
    elif(value=="hexadecimal"): # checking for hexadecimal cipher
        object=hexadecimal(text)
        output= object.decrypt()
    elif(value=="morse"): # checking for morse cipher
        object=morse(text)
        output= object.decrypt()
    else:  
        for key in range(1,27): # going through all possibly keys(brute forcing)
            eng=aienglishprediction() # creating an object of the ai class
            objecter=c1(key,text) 
            print(value)
            if value == "ceasar": # checking which cipher
                objecter = c1(key, text) # creating object for ceasar
            elif value == "substitution":
                objecter = c2(key, text) # creating object for substitution
            if eng.predict(objecter.decrypt()) == 0: # decrypting and seeing if it's close to english
                output= objecter.decrypt()
    return jsonify(str(output)) # returning the output


if __name__ == "__main__":
    
    # change name for testing
    from flask_cors import CORS
    cors = CORS(app)
    app.run(debug=True, host="0.0.0.0", port="8080")

3rd Key Commit

In this file I worked on most of this but the commit I will focus on will be in the decrypt function in which I first classify the algorithim used for encryption with the AI. Then if it is a key-based cipher I Brute force all of the keys and check using AI if the key is the correct one and if so I return the decrypted text. This was essential for the decrpytion functionality of our website.

Key Commits for Passion Project • 19 min read

Description

First Key Commit

2nd Key commit

3rd Key Commit