Skip to content

Commit

Permalink
Added PyOCR!
Browse files Browse the repository at this point in the history
  • Loading branch information
luccadimario committed Feb 21, 2024
1 parent 98360cf commit 46765b8
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

/node_modules/
API_KEYS.txt
apple_auth.p8
.DS_Store
33 changes: 33 additions & 0 deletions PyOCR/flask_ocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from flask import Flask
from flask import request
from ocr_handwritten import *
from ocr_compgen import *
app = Flask(__name__)

@app.route('/<int:number>/')
def incrementer(number):
return "Incremented number is " + str(number+1)


@app.route('/ocr/handwritten/', methods = ['POST'])
def upload_handwritten():
if request.method == 'POST':
content = request.json
imgString = content['b64']
return (runOcr(imgString))
else:
return("This endpoint only accepts POST requests")

@app.route('/ocr/compgen/', methods = ['POST'])
def upload_compgen():
if request.method == 'POST':
content = request.json
imgString = content['b64']
return (run_ocr_compgen(imgString))
else:
return("This endpoint only accepts POST requests")




app.run()
14 changes: 14 additions & 0 deletions PyOCR/ocr_compgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pytesseract
from pytesseract import Output
from PIL import Image
import cv2
from io import BytesIO
import base64

def run_ocr_compgen(base64String):
data = base64String
image = Image.open(BytesIO(base64.b64decode(data))).convert("RGB")
text = pytesseract.image_to_string(image,lang='eng')
return(text)


29 changes: 29 additions & 0 deletions PyOCR/ocr_handwritten.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests
from IPython.display import display
from PIL import Image
from io import BytesIO
import base64

def runOcr(base64String):
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# load image from the IAM dataset
#url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
#image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
data = base64String
image = Image.open(BytesIO(base64.b64decode(data))).convert("RGB")

image.show()

pixel_values = processor(image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values, max_new_tokens = 255)

generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

return(generated_text)




0 comments on commit 46765b8

Please sign in to comment.