forked from JuJu2181/Document_Classification_System
-
Notifications
You must be signed in to change notification settings - Fork 0
/
classifier.py
40 lines (36 loc) · 1.2 KB
/
classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import nltk
import pickle
import fitz
from sklearn.feature_extraction.text import CountVectorizer
from train_model import input_process
def load_model_and_vectorizer():
"""
Function to load the model and vectorizer
Returns:
None
"""
# ? Load the model and vectorizer from pickle file
model = pickle.load(open("classifier.model",'rb'))
vectorizer = pickle.load(open("vectorizer.pickle","rb"))
return model, vectorizer
if __name__ == "__main__":
model, vectorizer = load_model_and_vectorizer()
print("Model Loaded Sucessfully")
#* read the input text from the user
path = input("Enter the path of the file: ")
# ? read the input text from the file
doc = fitz.open(path)
content = ""
# ? read the content of the file
for page in range(len(doc)):
content += doc[page].get_text()
# ? preprocess the input text
content = input_process(content)
# ? transform the input text to vector
content = vectorizer.transform([content])
# ? predict the label of the input text
pred = model.predict(content)
if pred[0] == 1:
print("The document is about AI")
else:
print("The document is about WEB")