环境准备:https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.7/deploy/hubserving#24-%E5%90%AF%E5%8A%A8%E6%9C%8D%E5%8A%A1
服务器启动命令
hub serving start -c deploy/hubserving/ocr_system/config.json
客户端请求
python tools/test_hubserving.py --server_url=server_url --image_dir=image_path
import base64
import sys
import time
import requests
import json
import asyncio
import aiohttpimport pandas as pd
from sqlalchemy import create_engine, text
import timefrom PIL import Image
from io import BytesIO
import cv2
import numpy as np
import iotime1 = time.time()def read_data():conn = create_engine('mysql+pymysql://xxx:xxx@xxx.xxx.xx.xx:3306/x?charset=uxxxtf8')connection = conn.connect()page = 0sql_select = f"SELECT * from xj_zsjh_png LIMIT {page * 10}, 100" # 1000条100s results = pd.read_sql(sql=text(sql_select), con=connection)out = json.loads(results.to_json(orient='records'))img_strs_list = [x['bas64_str'] for x in out]return img_strs_listdef cv2_to_base64(image):return base64.b64encode(image).decode('utf8')def save_data(results, file_name):df = pd.DataFrame([[i['text'] for i in x] for x in results])df.to_csv(file_name, index=False)def process_image_(img_str):binary = base64.b64decode(img_str)image = Image.open(BytesIO(binary))# 创建一个新的RGB图像,将Alpha通道设置为0rgb_image = Image.new('RGB', image.size, (255, 255, 255))# 将RGBA图像的颜色信息复制到RGB图像rgb_image.paste(image, (0, 0), mask=image)# 裁剪图片left = 535top = 0right = left + 240bottom = image.size[1]cropped_image = rgb_image.crop((left, top, right, bottom))# 创建一个BytesIO对象image_bytes = io.BytesIO()# 将图像保存到BytesIO对象中cropped_image.save(image_bytes, format='JPEG')return image_bytesdef main2():results = []img_strs_list = read_data()for i, img_str in enumerate(img_strs_list):image_bytes = process_image_(img_str)data = {'images': [cv2_to_base64(image_bytes.getvalue())]}headers = {'Content-Type': 'application/json'}response = requests.post("http://192.168.0.189:8868/predict/ocr_system", data=json.dumps(data), headers=headers)if response.status_code == 200:res = response.json()["results"][0]results.append(res)else:print('Error:', response.status_code)save_data(results, 'normal.csv')async def process_image(img_str):image_bytes = process_image_(img_str)headers = {'Content-Type': 'application/json'}# 发送 OCR 请求data = {'images': [cv2_to_base64(image_bytes.getvalue())]}async with aiohttp.ClientSession() as session:async with session.post("http://192.168.0.189:8868/predict/ocr_system", data=json.dumps(data),headers=headers) as response:if response.status == 200:res = (await response.json())["results"][0]return reselse:print(f'Error: {response.status}')return Noneasync def process_images(img_strs_list):tasks = []sem = asyncio.Semaphore(1) # 限制并发数为5 当创建过多session时就会报错async with sem:for img_str in img_strs_list:task = asyncio.create_task(process_image(img_str))tasks.append(task)results = await asyncio.gather(*tasks)return results# 在主函数中调用异步任务
async def main():img_strs_list = read_data()results = await process_images(img_strs_list)save_data(results, 'async.csv')# asyncio.run(main()) #100条 8.666
main2() #100条 9.667 # 96.832
print(f'当前页 共花费--> ', round(time.time() - time1, 3), '\n') # 1.813
(异步与非异步结果差不多)
结果