EXCEL VBA调用adobe的api识别电子PDF发票里内容并登记台账
代码如下
使用须知:
1、工具--引用里勾选[Adobe Acrobat 10.0 Type Library]
2、安装Adobe Acrobat pro软件Dim sht As Worksheet
Function BrowseFolders() As String '浏览目录Dim objshell As ObjectDim objFolder As ObjectSet objshell = CreateObject("Shell.Application")Set objFolder = objshell.BrowseForFolder(0, "请指定发票文件所在的文件夹", 0, 0)BrowseFolders = ""If Not objFolder Is Nothing ThenBrowseFolders = objFolder.Self.PathEnd IfSet objFolder = NothingSet objshell = Nothing
End FunctionSub cmd_getpdf_Click()Dim Pth As String '文件路径Dim PDFName As String, Wapp As Object, Mstr As StringApplication.ScreenUpdating = False'============================================Pth = BrowseFoldersIf Pth = "" ThenPth = Sheet1.Range("A9").TextEnd IfIf Pth = "" ThenPth = ThisWorkbook.PathEnd IfIf Right(Pth, 1) <> "\" Then Pth = Pth & "\"Sheet1.Range("A8") = "上次路径:"Sheet1.Range("A9") = PthSheet1.Range("a15:a10000") = ""If Dir(Pth & "*.pdf") = "" ThenMsgBox "指定目录没有找到发票PDF文件!"Sheet1.Range("A9") = ""Exit SubEnd If'Debug.Print Pth'============================================For Each sht In ThisWorkbook.SheetsApplication.DisplayAlerts = FalseIf sht.Name = "发票资料读取到Excel" Then sht.DeleteApplication.DisplayAlerts = TrueNextSet sht = Worksheets.Add(, Worksheets(Sheets.Count))sht.Name = "发票资料读取到Excel"sht.Range("A1:J1") = Array("发票号码", "发票日期", "货物或*名称", "规格型号", "单位", "数量", "单价", "金额", "税率", "税额")'============================================定义表头字段PDFName = Dir(Pth & "*.pdf")Do While PDFName <> ""Call Imp_Into_XL(Pth & PDFName)PDFName = DirLoopsht.Columns.AutoFitMsgBox "操作完成!"'============================================Application.ScreenUpdating = True
End SubSub Imp_Into_XL(PDF_File As String)Dim AC_PD As Acrobat.AcroPDDocDim AC_Hi As Acrobat.AcroHiliteListDim AC_PG As Acrobat.AcroPDPageDim AC_PGTxt As Acrobat.AcroPDTextSelectDim Yes_Fir As BooleanDim Ct_Page As LongDim i As Long, j As Long, k As Long, m As IntegerDim T_Str As StringDim Hld, XL, Brr(), RowNo%, Arr As Variant, sss%Dim Hld_Txt As VariantDim FPHM As String '发票号码Dim FPRQ As String '发票日期Dim GGXH As String '规格型号Dim HWMC As String '货物名称Dim SL_SV As String '数量-税率Dim SL_SV_Temp As String '数量-税率的临时存变量Dim HWDW As String '货物单位Dim SL As String '数量Dim DW As String '单位Dim XH As String '型号'====================================================定义字段类型Set AC_PD = New Acrobat.AcroPDDocSet AC_Hi = New Acrobat.AcroHiliteListAC_Hi.Add 0, 32767With AC_PD.Open PDF_FileCt_Page = .GetNumPagesIf Ct_Page = -1 ThenMsgBox "请确认发票文件 '" & PDF_File & "'".CloseGoTo h_endEnd IfFor i = 1 To 1 ' Ct_Page '只考虑一个文档有一张发票的情形T_Str = ""Set AC_PG = .AcquirePage(i - 1)Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)If Not AC_PGTxt Is Nothing ThenWith AC_PGTxtFor j = 0 To .GetNumText - 1T_Str = T_Str & .GetText(j)Next jEnd WithEnd If'==========================================================If T_Str <> "" ThenHld_Txt = Split(T_Str, vbCrLf)FPHM = "": FPRQ = "":: GGXH = "": HWMC = ""For j = 0 To UBound(Hld_Txt)If InStr(Hld_Txt(j), "年月日") = 0 ThenIf InStr(Hld_Txt(j), "年") > 0 And InStr(Hld_Txt(j), "月") > 0 And InStr(Hld_Txt(j), "日") > 0 Then '当字符串里含有年月日时Hld_Txt(j) = Repce2(Hld_Txt(j))Hld_Txt(j) = Trim(Replace(Hld_Txt(j), "开票日期:", "")) '如果有"开票日期:"几个字,将其替换掉FPRQ = Left(Hld_Txt(j), 4) & "-" & Mid(Hld_Txt(j), 6, 2) & "-" & Mid(Hld_Txt(j), 9, 2)Exit ForEnd IfEnd IfNext jFor j = 0 To UBound(Hld_Txt)If TestNumber(Hld_Txt(j)) Then '测试是否含有数字并以数字结尾的类型,加以判断If Len(Hld_Txt(j)) = 10 And TestCH(Hld_Txt(j)) = False Then '当字符串里没有年月日,但是以"2023 06 30"有空格,共有10个字符串位置形式存在时取得发票日期If InStr(Hld_Txt(j), " ") > 0 And UBound(Split(Hld_Txt(j), " ")) > 0 ThenFPRQ = "'" & RegR(Hld_Txt(j)) '取得发票日期Exit ForEnd IfEnd IfEnd IfNext jFor j = 0 To UBound(Hld_Txt)If TestNumber(Hld_Txt(j)) Then '测试是否含有数字并以数字结尾的类型,加以判断Hld_Txt(j) = Trim(Replace(Hld_Txt(j), "发票号码:", "")) '如果有"发票号码:"几个字,将其替换掉If Len(Hld_Txt(j)) = 8 Or Len(Hld_Txt(j)) = 20 Then '//***限定要取出的发票号码为8位或者20位数字,否则发票号码取不出来If IsNumeric(Hld_Txt(j)) ThenIf InStr(Hld_Txt(j), ".") = 0 And InStr(Hld_Txt(j), ChrW(165)) = 0 ThenFPHM = Regs(Hld_Txt(j)) '取得8位或者20位的发票号码Exit ForEnd IfEnd IfEnd IfEnd IfNext jk = 0For j = 0 To UBound(Hld_Txt)If Len(Trim(Hld_Txt(j))) > 2 Then '//***当字符数大于2时,有的只有一个*号,这种情形需要排除If Left(Trim(Hld_Txt(j)), 1) = "*" Or InStr(Hld_Txt(j), "详见") > 0 Then '////当货物名称前面第一个字符是*号或者含有(详见)时Arr = Array("+", "<", ">") '/***密码区有许多有这几个符号,遇到了就避开它sss = 0For m = LBound(Arr) To UBound(Arr) '//***避免遇到密码区以*号开头,并且有Arr数组里符号的情形If InStr(Hld_Txt(j), Arr(m)) > 0 Then sss = sss + 1Next mIf sss = 0 ThenHld_Txt(j) = Trim(Hld_Txt(j)) '清除前后空格Hld_Txt(j) = StrConv(Hld_Txt(j), vbNarrow) '全角转为半角Hld_Txt(j) = Repce(Hld_Txt(j)) '将字符串中多个空格变成一个If InStr(Hld_Txt(j), "%") > 0 Or Right(Trim(Hld_Txt(j)), 1) = "*" ThenFor m = UBound(Split(Hld_Txt(j), " ")) To 0 Step -1If TestCHNum(Split(Hld_Txt(j), " ")(m)) = False Or Trim(Split(Hld_Txt(j), " ")(m)) = "*" Then '循环判定,取出有数字的数量-税额部分//有部分的金额和税额是*号If TestCH(Split(Hld_Txt(j), " ")(m)) = True And InStr(Hld_Txt(j), "不征税") = 0 Then Exit ForSL_SV = Split(Hld_Txt(j), " ")(m) & " " & SL_SVSL_SV_Temp = Split(Hld_Txt(j), " ")(m) & " " & SL_SV_Temp '增加这个变量,存下原始的数量金额部分If InStr(SL_SV, "不征税") > 0 And Len(SL_SV) > 3 Then SL_SV = Left(SL_SV, InStr(SL_SV, "税")) & " " & Right(SL_SV, Len(SL_SV) - InStr(SL_SV, "税"))SL_SV = Trim(SL_SV)SL_SV_Temp = Trim(SL_SV_Temp)If m < UBound(Split(Hld_Txt(j), " ")) And Split(Hld_Txt(j), " ")(m) < 0 Then Exit ForElseIf TestCHNum(Split(Hld_Txt(j), " ")(m)) = True Then '循环判定,如含有中文+数字,则需拆分SL_SV = RegSL(Split(Hld_Txt(j), " ")(m)) & " " & SL_SVSL_SV_Temp = RegSL(Split(Hld_Txt(j), " ")(m)) & " " & SL_SV_TempSL_SV = Trim(SL_SV)SL_SV_Temp = Trim(SL_SV_Temp)Exit ForEnd IfNext mSL_SV = Repce(SL_SV): SL_SV_Temp = Repce(SL_SV_Temp) '用原始的数量金额部分来方便取出GGXHGGXH = Trim(Replace(Hld_Txt(j), SL_SV_Temp, "")) '去掉数量-税额部分,下余的是规格型号 ////***前面做过变动后,这里用replace取不出余下的规格型号SL_SV_Temp = ""SL_SV = SL_JE(SL_SV) '数量-税额部分,不能用trim去掉前面空格If InStr(GGXH, "费") > 0 ThenHWMC = Left(GGXH, InStr(GGXH, "费")) '货物名称,有费字的取费字前面字符(含费字)作为货物名称GGXH = Trim(Replace(GGXH, HWMC, "")) '费字后面的是规格型号+单位ElseIf InStr(GGXH, " ") = 0 ThenHWMC = GGXH: GGXH = "" '规格型号没有包含空格时,货物名称就取ggxh,将原来的ggxh置空ElseHWMC = Split(GGXH, " ")(0) '规格型号有包含空格时,货物名称取ggxh的第一个空格前的字符GGXH = Trim(Replace(GGXH, HWMC, "")) '规格型号取除了货物名称后的余下的值End IfEnd IfIf InStr(GGXH, " ") = 0 Then '当规格型号没有空格时********Select Case Len(GGXH)Case Is = 0 '当费后面的字符数量为0时If Split(SL_SV, " ")(0) = "" Then '当数据部分第一个字符为空时,货物名称就只为货物名称HWMC = HWMC & " " & " "Else '当数据部分第一个字符不为空时,货物名称取最后一个值为单位,次一个值为规格型号If Mid(HWMC, Len(HWMC) - 1, 2) = "服务" Or InStr(HWMC, "费") > 0 ThenHWMC = HWMC & " " & " " '当货物名称最后两个字是"服务"时或含有"费"时,已经不能拆开了.ElseIf InStr(HWMC, "费") = 0 ThenDW = Right(HWMC, 1) '取右边一位做单位*****XH = Mid(HWMC, Len(HWMC) - 1, 1)HWMC = Left(HWMC, Len(HWMC) - 2)If InStr(HWMC, XH & DW) > 0 Or InStr(HWMC, XH) > 0 Or InStr(HWMC, DW) > 0 ThenHWMC = HWMC & XH & DW & " " & " "ElseHWMC = HWMC & " " & XH & " " & DWEnd IfEnd IfEnd IfCase Is >= 1 '当费后面的字符数量为1或者大于1时DW = Right(GGXH, 1) '取右边一位做单位XH = Replace(GGXH, DW, "") '余下的是型号If Split(SL_SV, " ")(0) = "" ThenHWMC = HWMC & " " & " "ElseIf XH <> "" ThenHWMC = HWMC & " " & XH & " " & DWElseHWMC = HWMC & " " & " " & DWEnd IfEnd IfEnd SelectElseIf InStr(GGXH, " ") > 0 Then '当规格型号有空格时If Split(SL_SV, " ")(0) <> "" ThenHWDW = Split(GGXH, " ")(UBound(Split(GGXH, " "))) '单位If Len(HWDW) > 1 ThenHWDW = Right(HWDW, 1)GGXH = Replace(GGXH, HWDW, "")GGXH = Replace(GGXH, " ", "_")HWMC = HWMC & " " & GGXH & " " & HWDWElseXH = Trim(Replace(GGXH, HWDW, "")) '规格型号If XH = "" ThenIf Len(HWDW) > 1 ThenDW = Right(HWDW, 1)XH = Replace(HWDW, DW, "")HWMC = HWMC & " " & XH & " " & DWElseIf Len(HWDW) = 1 ThenHWMC = HWMC & " " & " " & DWEnd IfElseDW = HWDWXH = Trim(Replace(XH, " ", "_")) '去掉规格型号中的空格,用下横线连接HWMC = HWMC & " " & XH & " " & DWEnd IfEnd IfElseIf Split(SL_SV, " ")(0) = "" ThenXH = Replace(GGXH, " ", "_") '去掉规格型号中的空格,用下横线连接HWMC = HWMC & " " & XH & " " '没有单位,要加上表示单位的空格End IfEnd IfElseIf UBound(Split(Hld_Txt(j), " ")) <= 2 And InStr(Hld_Txt(j), "%") = 0 Then '当品名与数量金额等不在同一行时HWMC = Hld_Txt(j)For m = j To UBound(Hld_Txt)If InStr(Hld_Txt(m), "%") > 0 Then SL_SV_Temp = Hld_Txt(m): Exit ForNext mFor m = UBound(Split(SL_SV_Temp, " ")) To 0 Step -1If TestCHNum(Split(SL_SV_Temp, " ")(m)) = False Or Trim(Split(SL_SV_Temp, " ")(m)) = "*" Then '循环判定,取出有数字的数量-税额部分//有部分的金额和税额是*号If TestCH(Split(SL_SV_Temp, " ")(m)) = True And InStr(SL_SV_Temp, "不征税") = 0 Then Exit ForSL_SV = Split(SL_SV_Temp, " ")(m) & " " & SL_SV '增加这个变量,存下原始的数量金额部分If InStr(SL_SV, "不征税") > 0 And Len(SL_SV) > 3 Then SL_SV = Left(SL_SV, InStr(SL_SV, "税")) & " " & Right(SL_SV, Len(SL_SV) - InStr(SL_SV, "税"))SL_SV = Trim(SL_SV)If m < UBound(Split(SL_SV_Temp, " ")) And Split(SL_SV_Temp, " ")(m) < 0 Then Exit ForElseIf TestCHNum(Split(SL_SV_Temp, " ")(m)) = True Then '循环判定,如含有中文+数字,则需拆分SL_SV = RegSL(Split(SL_SV_Temp, " ")(m)) & " " & SL_SVSL_SV = Trim(SL_SV)Exit ForEnd IfNext mSL_SV_Temp = Replace(SL_SV_Temp, SL_SV, "")SL_SV = Repce(SL_SV) '用原始的数量金额部分来方便留下GGXHGGXH = Trim(Replace(SL_SV_Temp, SL_SV, "")) '去掉数量-税额部分,下余的是规格型号 ////***前面做过变动后,这里用replace取不出余下的规格型号If Len(GGXH) = 0 Then '当规格型号为空时DW = Split(HWMC, " ")(UBound(Split(HWMC, " ")))HWMC = Trim(Replace(HWMC, DW, ""))XH = Trim(Replace(HWMC, Split(HWMC, " ")(0), " "))HWMC = Trim(Replace(HWMC, XH, ""))If InStr(HWMC, " ") > 0 Then HWMC = Replace(HWMC, " ", "_") '将货物名称里原有的空格用下划线代替If InStr(XH, " ") > 0 Then XH = Replace(XH, " ", "_") '将型号里原有的空格用下划线代替If Len(XH) > 0 ThenHWMC = HWMC & " " & XH & " " & DWElseHWMC = HWMC & " " & " " & DWEnd IfElseIf Len(GGXH) > 0 Then '当规格型号不为空时If InStr(HWMC, " ") > 0 Then HWMC = Replace(HWMC, " ", "_") '将货物名称里原有的空格用下划线代替If InStr(GGXH, " ") > 0 ThenDW = Split(GGXH, " ")(UBound(Split(GGXH, " "))) '单位XH = Trim(Replace(SL_SV_Temp, DW, "")) '型号If InStr(XH, " ") > 0 Then XH = Replace(XH, " ", "_") '将型号里原有的空格用下划线代替If Len(XH) > 0 ThenHWMC = HWMC & " " & XH & " " & DWElseHWMC = HWMC & " " & " " & DWEnd IfElseDW = Right(GGXH, 1) '取右边一位做单位*****XH = Replace(GGXH, DW, "")HWMC = HWMC & " " & XH & " " & DWEnd IfEnd IfEnd IfIf Split(SL_SV, " ")(0) = "" Then '///*****************Hld_Txt(j) = HWMC & SL_SVElseHld_Txt(j) = HWMC & " " & SL_SVEnd IfHWMC = "": SL_SV = "": SL = "": DW = "": XH = "": GGXH = "": HWDW = "": SL_SV_Temp = ""If UBound(Split(Hld_Txt(j), " ")) = 7 Thenk = k + 1ReDim Preserve Brr(1 To 10, 1 To k)Brr(1, k) = "'" & FPHM: Brr(2, k) = FPRQ '编号及日期For m = 0 To UBound(Split(Hld_Txt(j), " "))Brr(3 + m, k) = Split(Hld_Txt(j), " ")(m)Next mElseGoTo 0End IfEnd IfEnd IfEnd IfNext jWith shtIf k = 0 Then GoTo 0RowNo = .Cells(65536, 1).End(3).Row + 1.Cells(RowNo, 1).Resize(UBound(Brr, 2), UBound(Brr)) = Application.Transpose(Brr)' .Cells(RowNo, 11) = PDF_File '将文件名称放在最后一列Erase BrrEnd WithElseIf T_Str = "" Then
0MsgBox PDF_File & "文件没有取到数据,请检查!", vbOKOnly, "ExcelHome"Sheet1.Cells(Sheet1.Cells(65536, 1).End(3).Row + 1, 1) = PDF_File '将有问题的文件名称放在sheet1表中,方便查验Exit ForEnd If'===========================================================Next i.CloseEnd Withh_end:Set AC_PGTxt = NothingSet AC_PG = NothingSet AC_Hi = NothingSet AC_PD = Nothing
End Sub
Function Regs(STR) '取发票号码
Dim reg As Object, mh As VariantSet reg = CreateObject("VBScript.RegExp")With reg.Global = True.Pattern = "(^\d{8}$|^\d{20}$)" '是8位或者是20位Set mh = .Execute(STR)Regs = mh.Item(0).SubMatches.Item(0)End With
End Function
Function RegR(STR) '取发票日期
Dim reg As Object, mh As VariantSet reg = CreateObject("VBScript.RegExp")With reg.Global = True.Pattern = "(^\d{4} \d{2} \d{2}$)" '前四位年,中两位月,后两位日Set mh = .Execute(STR)RegR = Replace(mh.Item(0).SubMatches.Item(0), " ", "-")End With
End Function
Function RegSL(STR) '取数量
Dim reg As Object, mh As VariantSet reg = CreateObject("VBScript.RegExp")With reg.Global = True'.Pattern = "[\u4e00-\u9fff](\d+\.\d+|\d+)" '中文后面跟的数量为小数或整数.Pattern = "[\u4e00-\u9fff]([-]?\d+\.\d+$|[-]?\d+$)" '中文后面跟的数量为小数或整数Set mh = .Execute(STR)RegSL = mh.Item(0).SubMatches.Item(0)End With
End Function
Function TestNumber(STR) '测试是否最后是数字
Dim reg As ObjectSet reg = CreateObject("VBScript.RegExp")With reg.Global = True.Pattern = "^\d+\.\d+$|\d+$"TestNumber = .test(STR)End With
End Function
Function TestCH(STR) '测试是否以中文开始
Dim reg As ObjectSet reg = CreateObject("VBScript.RegExp")With reg.Global = True.Pattern = "[\u4e00-\u9fff]+"TestCH = .test(STR)End With
End Function
Function TestCHNum(STR) '测试是否以中文后跟随数字
Dim reg As ObjectSet reg = CreateObject("VBScript.RegExp")With reg.Global = True.Pattern = "[\u4e00-\u9fff]([-]?\d+\.\d+$|[-]?\d+$)"TestCHNum = .test(STR)End With
End Function
Public Function Repce(STR) '多个空格变成一个
With CreateObject("VBSCRIPT.REGEXP").Global = True.Pattern = "\s+"Repce = .Replace(STR, " ")
End With
End Function
Public Function Repce2(STR) '去掉中间空格
With CreateObject("VBSCRIPT.REGEXP").Global = True.Pattern = "\s+"Repce2 = .Replace(STR, "")
End With
End Function
Public Function SL_JE(STR) '处理数量金额这部分Dim i%, str_tempSelect Case UBound(Split(STR, " "))Case Is >= 5For i = UBound(Split(STR, " ")) To UBound(Split(STR, " ")) - 4 Step -1str_temp = Split(STR, " ")(i) & " " & str_tempNext iSL_JE = Trim(str_temp)Case 4SL_JE = STRCase 2SL_JE = " " & STREnd Select
End Function