-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflask_app.py
103 lines (75 loc) · 3.21 KB
/
flask_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pandas as pd
import numpy as np
import re
import os
import PyPDF2
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords
from flask import render_template, Flask, request
import flask
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
stemmer=SnowballStemmer("english")
def pdf_to_text(pdf):
pdfReader = PyPDF2.PdfFileReader(pdf)
numPage=pdfReader.numPages
pdftext=" "
for page in range(numPage):
pageObj =pdfReader.getPage(page)
text= pageObj.extractText()
pagetext="".join(text)
pdftext=" ".join([pagetext, pdftext])
alltext = re.sub('[\n]', '', pdftext)
all_words=alltext.split(" ")
affterstemmer=[]
for word in all_words:
affterstemmer.append(stemmer.stem(word))
stop = set(stopwords.words('english'))
afterstop=[]
for word in affterstemmer:
if word not in stop:
afterstop.append(word)
return " ".join(afterstop)
THIS_FOLDER = os.path.dirname(os.path.abspath(__file__))
## Geting ready the training data
data = pd.read_csv('resumes.csv')
X_train=[]
for i in data['path']:
my_file = os.path.join(THIS_FOLDER, i)
pdf = open(my_file, 'rb')
X_train.append(pdf_to_text(pdf))
count_vect = CountVectorizer(lowercase = False, max_df = .6)
tfidf_transformer = TfidfTransformer()
X_train_counts = count_vect.fit_transform(X_train)
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
## classifier
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train_tfidf,data['succes'])
def getPredictions(clf,count_vect,tfidf_transformer,X_test):
#do the training
X_test_counts = count_vect.transform(X_test)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)
return clf.predict(X_test_tfidf),clf.predict_proba(X_test_tfidf)
app = Flask(__name__)
APP_ROOT = os.path.dirname(os.path.abspath("app.py"))
@app.route("/")
def index():
return render_template("index.html")
@app.route("/upload", methods=['POST'])
def upload():
target = os.path.join(APP_ROOT, "testresumes/")
print(target)
if not os.path.isdir(target):
os.mkdir(target)
for file in request.files.getlist("file"):
text=pdf_to_text(file)
X_test=[]
X_test.append(text)
result, probs = getPredictions(clf,count_vect,tfidf_transformer,X_test)
if result[0]==1:
return render_template("result.html" , prob1= probs[0][0],prob2= probs[0][1], sonuc="Your resume looks great! Here are some companies, You may want to consider: Apple, Hewlett-Packard, IBM, Amazon, Microsoft, Google, Intel, Cisco Systems, Oracle, Qualcomm, EMC, Xerox, Danaher, eBay, Uber, Plantair, Snapchat, Github, HackerRank, Twitter, Facebook, Texas Instruments, Quora, Intuit, Infosys, LinkedIn, Yahoo!, Kaspersky, Nvidia, AMD")
elif result[0]==2:
return render_template("result.html", prob1= probs[0][0],prob2= probs[0][1],sonuc="Your resume looks good but you need some improvement. Get experience from this companies: BMC Software, Pros Holding Inc., NetIQ, Quorum Business Solutions Inc, Alert Logic Inc., HCSS.")# waiting for tiers
return render_template("index.html")
if __name__ == "__main__":
app.run()