闲得无聊,趁着休息研究了一下极验4滑块验证码的安全性,是否有机器识别、自动化拖拽的可能性。首先看一下效果
如何识别验证码
1、下载图片
下载图片可以参考博客《采集极验4滑块验证码图片数据》
2、标记图片
3、标记滑动距离
实现代码
__author__ = "dengxinyan"import io
import re
import time
import json
import base64
import random
import requests
import urllib
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ChromeOptions
from selenium.webdriver import FirefoxOptions# PIL图片保存为base64编码
def PIL_base64(img, coding='utf-8'):img_format = img.formatif img_format == None:img_format = 'JPEG'format_str = 'JPEG'if 'png' == img_format.lower():format_str = 'PNG'if 'gif' == img_format.lower():format_str = 'gif'if img.mode == "P":img = img.convert('RGB')if img.mode == "RGBA":format_str = 'PNG'img_format = 'PNG'output_buffer = BytesIO()# img.save(output_buffer, format=format_str)img.save(output_buffer, quality=100, format=format_str)byte_data = output_buffer.getvalue()base64_str = 'data:image/' + img_format.lower() + ';base64,' + base64.b64encode(byte_data).decode(coding)return base64_str# 验证码识别接口
def shibie(img):url = "http://www.detayun.cn/openapi/verify_code_identify/"data = {# 用户的key"key":"nWrzPFUgFuqXQrCJJUM6",# 验证码类型"verify_idf_id":"23",# 样例图片"img_base64":PIL_base64(img),"img_byte": None,# 中文点选,空间语义类型验证码的文本描述(这里缺省为空字符串)"words":""}header = {"Content-Type": "application/json"}# 发送请求调用接口response = requests.post(url=url, json=data, headers=header)print(response.text)return response.json()def run(headless=False):# 保存已经下载过的图片,防止重复img_url_list = []# 配置参数option = FirefoxOptions()if headless:option.add_argument('--headless')else:option.add_argument('--window-size=100,100')driver = webdriver.Firefox(executable_path=r'webdriver\geckodriver.exe', options=option)# 伪装浏览器driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => false,});")navigator_webdriver = driver.execute_script("return navigator.webdriver")driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5],});")plugins_length = driver.execute_script("return navigator.plugins.length")# 发送请求driver.get('https://www.geetest.com/adaptive-captcha-demo')# 等待【滑动拼图验证】元素出现WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//button[contains(text(),"一点即过验证")]'))# 创建 ActionChains 对象action = ActionChains(driver)# 向窗口发送按键事件,例如向下滚动 1000pxaction.send_keys([Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN], 1000)action.perform()for i in range(5):# 等待【滑动拼图验证】元素出现WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//button[contains(text(),"一点即过验证")]'))# 找到【滑动拼图验证】元素tag1 = driver.find_element_by_xpath('//button[contains(text(),"一点即过验证")]/..')# 点击tag1.click()time.sleep(1)# 等待【滑动拼图验证】元素出现WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//button[contains(text(),"滑动拼图验证")]'))# 找到【滑动拼图验证】元素tag1 = driver.find_element_by_xpath('//button[contains(text(),"滑动拼图验证")]/..')# 点击tag1.click()time.sleep(1)# 等待【点击按钮开始验证】元素出现WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//div[@aria-label="点击按钮开始验证"]'))# 找到【点击按钮开始验证】元素tag2 = driver.find_element_by_xpath('//div[@aria-label="点击按钮开始验证"]')# 点击tag2.click()# 等待【验证码大图】元素出现WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//div[contains(@class,"geetest_bg")]'))while 1:try:# 找到【注册】元素tag7 = driver.find_element_by_xpath('//div[contains(@class,"geetest_bg")]')# 获取 img 标签的 src 属性style = tag7.get_attribute('style')# 提取图片链接img_url = re.findall('url[(]"(.+?)"[)]', style)[0]breakexcept:continue# 请求获取图片response = requests.get(url=img_url)img = Image.open(BytesIO(response.content))# img = base64_PIL(base64_str)# 保存图片img.save('temp_img/{}.jpg'.format(int(time.time() * 1000)))img_url_list.append(img_url)# 验证码识别接口result = shibie(img)if result['code'] != 200:print(result)returnmove_x = int(result['data']['res_str'].replace('滑动', '').replace('px', ''))print('结果:', move_x)time.sleep(2)# 找到滑块 geetest_btn# 等待【找到滑块】元素出现WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//div[contains(@class,"geetest_btn")]'))# 找到【找到滑块】元素tag8 = driver.find_elements_by_xpath('//div[contains(@class,"geetest_btn")]')[1]print(tag8)# 滑动滑块action = ActionChains(driver)action.click_and_hold(tag8).perform()# 计算实际滑动距离 = 像素距离 + 前面空白距离move_x = move_x - 15action.move_by_offset(move_x, 0)# n = (random.randint(3, 5))# move_x = move_x / n# for i in range(n):# action.move_by_offset(move_x, 0)# time.sleep(0.02)action.release().perform()# 判断是否滑动成功time.sleep(2)if __name__ == '__main__':run(headless=False)
极验4滑块验证码识别我也封装成了API接口,可以提供给大家免费调用:得塔云
总结
1、和极验3相比,极验4大图反爬能力变弱了。因为极验3图片链接很快就会失效。
2、和极验3相比,极验4图片的解析难度变小了。因为极验3的图片是打乱的,还需要拼图。
3、极验4滑动过程也没有验证滑动轨迹