效果:1:
效果图2:
一、安装tesseract.js
npm i tesseract.js
二、静态页面实现
<template><div><div style="marginTop:100px"><input @change="handleChage" type="file" id="image-input" accept="image/*"><br /><button @click="processImage">提取文字</button><div id="show-picture"></div></div><div><p style="color:red">提取到的内容:</p><span id="result"></span></div></div>
</template>
三、选择图片显示在页面上
<script setup>import { createWorker } from 'tesseract.js'; //将选择的图片显示在页面上const handleChage = () => {document.getElementById("result").innerText = ""let getUserPhoto = document.getElementById("image-input");//创建一个FileReader对象,用于读取图像文件let reader = new FileReader();//读取第一个文件,并转为base64格式reader.readAsDataURL(getUserPhoto.files[0]);//只显示第一个图片reader.onload = function () {let image = document.createElement("img");image.width = "400";//设置图片image.src = reader.result;let showPicture = document.getElementById("show-picture");while (showPicture.firstChild) {showPicture.removeChild(showPicture.firstChild);}showPicture.appendChild(image)};}<script>
四、核心代码,功能实现
const processImage = () => {let worker;let input = document.getElementById('image-input');if (input.files && input.files[0]) {let reader = new FileReader();reader.onload = async function (e) {//创建一个Worker线程,参数为需要识别的语言, chi_sim代表简体中文worker = await createWorker('chi_sim')worker.recognize(e.target.result).then(result => {// 提取出的文字,给元素赋值let extractedText = result.data.text;document.getElementById('result').innerText = extractedText;}).catch(error => {console.error('Error:', error);}).finally(() => {if (worker)// 清除当前Worker线程worker.terminate();})};reader.readAsDataURL(input.files[0]);}
}