通过redfish协议实现服务器固件升级、从虚拟光驱启动自检盘并等待完成,最后截图保存
- 版本信息
- 代码
新开发的PCIE设备在做服务器适配时,有时需要服务器厂家更新BMC或BIOS固件。同时,我们也希望对PCIE设备做一些检测,最后收集一些信息存档。如果需要处理的服务器很多,通过BMC的界面进行人工操作就会比较麻烦。以下提供了一个脚本,供参考。
主要思路:
- 采用haneWIN NFS Server搭建一个NFS服务,目录为nfs,里面存放着boot.iso(设备检测镜像)
- 通过redfish协议登录BMC,获取PCIE设备信息,服务器信息,升级固件,重启服务器,挂iso,设置启动方式
- 截屏获取KVM的内容,通过图片相似度的方法判断ISO里的检测程序是否运行完成.
版本信息
属性 | 值 |
---|---|
NFS服务器 | haneWIN NFS Server |
服务器型号 | NF5270M6 |
代码
# -*- coding: utf-8 -*-from queue import Queue
from skimage.metrics import structural_similarity
import traceback
import cv2
import codecs
import csv
import argparse
import shutil
import json
import time
import redfish
import sys
import os
import uuid
import threading
import warnings
warnings.filterwarnings("ignore")
import loggingparser = argparse.ArgumentParser()
parser.add_argument('-server_list', type=str,required=True, help="server_list")
parser.add_argument('-nfs_server', type=str, required=True, help="nfs server")
parser.add_argument('-threads', type=int, required=True, help="nfs server")
parser.add_argument('-checkonly', type=int, required=True, help="check only")args = parser.parse_args()class TimeSpan:"""统计代码段的耗时""" def __init__(self,logger,prefix=""):self.prefix = prefixself.logger=loggerdef __enter__(self):self.end = Noneself.start = time.time()def __exit__(self, exc_type, exc_val, exc_tb):self.end = time.time()interval = self.end - self.startunit = "sec"if interval > 60:unit = "min"interval = interval/60self.logger.info('%-64s:%.3f(%s)' % (self.prefix, interval, unit))def isSimilarity(filename):"""判断自检程序是否运行完成Args:filename ([string]): [截屏图片路径]Returns:[bool]: [是否完成]""" last_image = cv2.imread('target_image.jpg', cv2.IMREAD_GRAYSCALE)img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)cell_h, cell_w = last_image.shapehoff = 280h, w = img.shapewhile hoff < 320:img2 = img[hoff:hoff+cell_h, 0:cell_w]ssim = structural_similarity(last_image, img2)if ssim > 0.5:# cv2.imwrite("{}_{}.jpg".format(hoff,int(ssim)),img2)return Truehoff += 3return Falseclass RedFishProxy:def __init__(self,handle,retry_count=3):self.handle=handleself.retry_count=retry_countdef post(self, path, args=None, body=None, headers=None):count=0while True:response=self.handle.post(path,args,body,headers)if response._status == 500 and count<self.retry_count:time.sleep(2)count+=1continueelse:return response def get(self, path, args=None, headers=None):count=0while True:response=self.handle.get(path,args,headers)if response._status == 500 and count<self.retry_count:time.sleep(2)count+=1continueelse:return response def delete(self,path, args=None, headers=None):count=0while True:response=self.handle.delete(path,args,headers)if response._status == 500 and count<self.retry_count:time.sleep(2)count+=1continueelse:return response def patch(self, path, args=None, body=None, headers=None):count=0while True:response=self.handle.patch(path,args,body,headers)if response._status == 500 and count<self.retry_count:time.sleep(2)count+=1continueelse:return response class InspurVA1Query:def __init__(self,logger,index,bmc_host, username, password, nfs_server, try_count):self.logger=loggerself.nfs_server = nfs_serverself.username = usernameself.password = passwordself.try_count = try_countself.bmc_host = bmc_hostself.seq = 0self.token=Noneself.index=indexself.redfish_client=RedFishProxy(redfish.redfish_client(base_url=self.bmc_host, username=self.username, password=self.password))def Login(self):url = '/redfish/v1/SessionService/Sessions'req_body = {"UserName": self.username,"Password": self.password, "SessionTimeOut": 300}req_headers = {"Content-Type": "application/json"}response = self.redfish_client.post(url, headers=req_headers, body=req_body)if response._status == 201:session = json.loads(response._read.decode())self.token = session["Oem"]['Public']['X-Auth-Token']self.Id = session["Id"]return Trueelse:self.logger.error("Thermal:{}".format(response))return Falsedef Logout(self):if self.token:url = '/redfish/v1/SessionService/Sessions/{}'.format(self.Id)req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.delete(url, headers=req_headers)self.token=Noneif response._status == 200:return Trueself.logger.error("Logout:{}".format(response))return Falseelse:return Falsedef QueryMedia(self):url = '/redfish/v1/Managers/1/VirtualMedia/CD'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)self.logger.info("QueryMedia:{}".format(response))return Truedef IsMounted(self):"""查询是否已经加载""" url = '/redfish/v1/Managers/1/VirtualMedia/CD'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:body = json.loads(response._read.decode())if 'Inserted' in body:return body['Inserted']return Falseself.logger.error("IsMounted:{}".format(response))return Falsedef InsertMedia(self):url = '/redfish/v1/Managers/1/VirtualMedia/CD/Actions/VirtualMedia.InsertMedia'req_headers = {"X-Auth-Token": self.token}req_body = {"TransferProtocolType": 'NFS',"Image": '{}/nfs/boot.iso'.format(self.nfs_server)}response = self.redfish_client.post(url, headers=req_headers, body=req_body)if response._status == 200:while True:if self.IsMounted():return Trueself.logger.error("InsertMedia:{}".format(response))return Falsedef EnableVirtualCDBoot(self,timeout=400):url = '/redfish/v1/Systems/1/Bios'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:ETag = response.getheader("ETag") #获取 ETagbody = json.loads(response._read.decode())# with open("{}_{}.json".format(self.bmc_host,uuid.uuid4().hex[:8]),"w") as f:# f.write(json.dumps(body))bootOrderName=['UefiBootOrder1','UefiBootOrder2','UefiBootOrder3','UefiBootOrder4']cureOrder=""for name in bootOrderName:if name in body['Attributes'] and body['Attributes'][name].find('CD/DVD')>=0:cureOrder=namebreakif body['Attributes']['FixedBootOrderEn']=='Disabled' and cureOrder!="":self.logger.info("CDBoot Already Enable:{} {} {}".format(body['Attributes']['FixedBootOrderEn'],cureOrder,body['Attributes'][cureOrder]))return Trueurl = '/redfish/v1/Systems/1/Bios/Settings'req_headers = {"X-Auth-Token": self.token,"If-Match":ETag}req_body = {}req_body['Attributes']={}req_body['Attributes']['FixedBootOrderEn']='Disabled'if cureOrder=="":req_body['Attributes']['UefiBootOrder4']="CD/DVD:UEFI: AMI Virtual CDROM0 1.00" #最后一个启动项设置为CD启动,不影响正常的启动response = self.redfish_client.patch(url, headers=req_headers, body=req_body)if response._status in [200]:if not self.ComputerSystemReset("ForceRestart"):return Falsebeg=time.time()ii=0while True:time.sleep(2)cur=time.time()if cur-beg>timeout:self.logger.error("EnableVirtualCDBoot Timeout:{}".format(cur-beg))return False# url = '/redfish/v1/Systems/1/Bios/Settings'# req_headers = {"X-Auth-Token": self.token}# response = self.redfish_client.get(url, headers=req_headers)# if response._status == 200:# body = json.loads(response._read.decode())# print(body)# if 'Attributes' not in body:# return Trueurl = '/redfish/v1/Systems/1/Bios'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:body = json.loads(response._read.decode())cureOrder=""for name in bootOrderName:if name in body['Attributes'] and body['Attributes'][name].find('CD/DVD')>=0:cureOrder=namebreak if body['Attributes']['FixedBootOrderEn']=='Disabled' and cureOrder!="":self.logger.info("EnableVirtualCDBoot Finished:{} {} {}".format(cur-beg,cureOrder,body['Attributes'][cureOrder]))return True else:self.logger.error("QueryBiosSetting2:{}".format(response))else:self.logger.error("BiosSetting:{}".format(response))else:self.logger.error("QueryBiosSetting1:{}".format(response))return Falsedef SetBootOrder(self):url = '/redfish/v1/Systems/1'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:ETag = response.getheader("ETag")body = json.loads(response._read.decode())AssetTag = body['AssetTag']IndicatorLED = body['IndicatorLED']HostName = body['HostName']req_headers = {"X-Auth-Token": self.token, "If-Match": ETag}req_body = {}# req_body['AssetTag']=AssetTag# req_body['IndicatorLED']=IndicatorLED# req_body['HostName']=HostNamereq_body['Boot'] = {}req_body['Boot']['BootSourceOverrideTarget'] = 'Cd'req_body['Boot']['BootSourceOverrideEnabled'] = 'Once'req_body['Boot']['BootSourceOverrideMode'] = 'UEFI'response = self.redfish_client.patch(url, headers=req_headers, body=req_body)if response._status == 200: for i in range(10):url = '/redfish/v1/Systems/1'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:body = json.loads(response._read.decode())BootSourceOverrideTarget=body['Boot']['BootSourceOverrideTarget']if BootSourceOverrideTarget=="Cd":return Trueself.logger.error("SetBootOrder:{}".format(response))else:self.logger.error("SetBootOrder:{}".format(response))return Falsedef SetBootOrderLegacy(self):url = '/redfish/v1/Systems/1'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:ETag = response.getheader("ETag")body = json.loads(response._read.decode())AssetTag = body['AssetTag']IndicatorLED = body['IndicatorLED']HostName = body['HostName']req_headers = {"X-Auth-Token": self.token, "If-Match": ETag}req_body = {}# req_body['AssetTag']=AssetTag# req_body['IndicatorLED']=IndicatorLED# req_body['HostName']=HostNamereq_body['Boot'] = {}req_body['Boot']['BootSourceOverrideTarget'] = 'Hdd'req_body['Boot']['BootSourceOverrideEnabled'] = 'Continuous'req_body['Boot']['BootSourceOverrideMode'] = 'UEFI' #UEFI Legacyresponse = self.redfish_client.patch(url, headers=req_headers, body=req_body)if response._status == 200: body = json.loads(response._read.decode())print(body)for i in range(10):url = '/redfish/v1/Systems/1'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:body = json.loads(response._read.decode())BootSourceOverrideMode=body['Boot']['BootSourceOverrideMode']print("BootSourceOverrideMode:",BootSourceOverrideMode)if BootSourceOverrideMode=="UEFI":return True else:self.logger.error("SetBootOrder:{}".format(response))else:self.logger.error("SetBootOrder:{}".format(response))return Falsedef EjectMedia(self):if self.IsMounted():url = '/redfish/v1/Managers/1/VirtualMedia/CD/Actions/VirtualMedia.EjectMedia'req_headers = {"X-Auth-Token": self.token}req_body = {"TransferProtocolType": 'NFS', "ImageName": 'boot.iso'}response = self.redfish_client.post(url, headers=req_headers, body=req_body)if response._status in [200, 500]:while True:if not self.IsMounted():return Trueself.logger.error("EjectMedia:{}".format(response))return Falsereturn Truedef ComputerSystemReset(self, ResetType):url = '/redfish/v1/Systems/1/Actions/ComputerSystem.Reset'req_headers = {"X-Auth-Token": self.token}req_body = {"ResetType": ResetType}response = self.redfish_client.post(url, headers=req_headers, body=req_body)if response._status == 200:body = json.loads(response._read.decode())return Trueself.logger.error("ComputerSystemReset:{}".format(response))return Falsedef ChassisReset(self, ResetType,retry_count=3):for i in range(retry_count):url = '/redfish/v1/Chassis/1/Actions/Chassis.Reset'req_headers = {"X-Auth-Token": self.token}req_body = {"ResetType": ResetType}response = self.redfish_client.post(url, headers=req_headers, body=req_body)if response._status == 200:body = json.loads(response._read.decode())return Trueself.logger.error("ChassisReset[{}-{}]:{}".format(i,ResetType,response))time.sleep(2)return Falsedef WaitFinished(self,timeout=5*60):beg = time.time()snap_count=0while True:time.sleep(3) #频率不宜太快,否则容易导致系统出问题cur = time.time()if cur-beg > timeout:self.logger.error("WaitFinished,Timeout")return Falseurl = '/redfish/v1/Managers/1/Actions/Oem/Public/KVM/Screenshot'req_headers = {"X-Auth-Token": self.token}snap_count+=1response = self.redfish_client.post(url, headers=req_headers)if response._status == 200:retry_count=10 #如果正在生成,不要再触发抓图,等待抓屏完成,否则会导致黑屏while retry_count>0:cur = time.time()if cur-beg > timeout:self.logger.error("WaitFinished,Timeout")return Falseurl = '/redfish/v1/Managers/1/Actions/Oem/Public/KVM/ScreenshotDownload'req_headers = {"X-Auth-Token": self.token}req_body = {"PictureAttributes": 'manual'}response = self.redfish_client.post(url, headers=req_headers, body=req_body) #if response._status != 200:# print("{}-{} {}:{}".format(snap_count,retry_count,response._status,json.loads(response._read.decode("utf-8","ignore"))))if response._status == 404: # The file is being generatedtime.sleep(2)retry_count-=1continueelif response._status == 500: # There are no manual pictures at presentbreakelif response._status == 401: # Invalid Authenticationbreakelif response._status == 200:image_path = "{}-{}-{}-ing.jpg".format(self.index,self.bmc_host, self.seq)with open(image_path, "wb") as f:f.write(response.read)if isSimilarity(image_path):shutil.move(image_path, self.result_image)return Truebreakelse:self.logger.error("ScreenshotDownload:{}".format(response))breakelse:self.logger.error("Screenshot:{}".format(response))def PCIEDeviceSummary(self,target_dev_count=3):"""获取PCIE链路信息"""url = '/redfish/v1/Systems/1'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status != 200:self.logger.error("Systems:{}".format(response))return Falseurl = '/redfish/v1/Systems/1/Bios'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status != 200:self.logger.error("Bios:{}".format(response))return Falseurl = '/redfish/v1/Chassis/1/PCIeDevices'req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:body = json.loads(response._read.decode())count=body['Members@odata.count']#如果没开机,则跳过if count==0:self.logger.info("{} PowerStatus=Off".format(self.bmc_host))return True#如果发现掉卡,返回失败if count<target_dev_count:self.logger.error("VA1 Lost,Current:{}".format(count))return Falsefor i in range(count):for _ in range(30):url = '/redfish/v1/Chassis/1/PCIeDevices/{}'.format(i)req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:body = json.loads(response._read.decode())State=body['Status']['State']Health=body['Status']['Health']SlotNumber=body['Oem']['Public']['SlotNumber']Manufacturer=body['Manufacturer']if Manufacturer!="NVIDIA":continueurl = '/redfish/v1/Chassis/1/PCIeDevices/{}/PCIeFunctions/1'.format(i)req_headers = {"X-Auth-Token": self.token}response = self.redfish_client.get(url, headers=req_headers)if response._status == 200:body = json.loads(response._read.decode())LinkWidth=body['Oem']['Public']['LinkWidth']LinkSpeed=body['Oem']['Public']['LinkSpeed']self.logger.info("{} {} {} {} {} {} {}".format(i,Manufacturer,State,Health,SlotNumber,LinkWidth,LinkSpeed))breakelse:time.sleep(1)else:time.sleep(1)else:self.logger.error("PCIeDevices1:{}".format(response))return Falsereturn Truedef run(self):"""测试序列"""# 测试次数for i in range(self.try_count):self.seq = iself.result_image = "{}-{}-{}-done.jpg".format(self.index,self.bmc_host, self.seq)#如果运行过,则跳过if os.path.exists(self.result_image):continuetry:#统计总耗时with TimeSpan(self.logger,"{}-{} InspurVA1QueryE2E:".format(self.bmc_host, self.seq)):#创建RedFish会话,获取tokenwith TimeSpan(self.logger,"*1.{}-{}-Login".format(self.bmc_host, self.seq)):self.Logout()if not self.Login():continue#通过BMC查看卡数是否正常,如果掉卡直接返回失败with TimeSpan(self.logger,"*2.{}-{}-PCIEDeviceSummary".format(self.bmc_host, self.seq)):if not self.PCIEDeviceSummary():self.logger.error("ERROR,{}".format(self.bmc_host))continue #服务器下电,防止虚拟光驱被占用,导致后续加载失败with TimeSpan(self.logger,"*3.{}-{}-PowerDown".format(self.bmc_host, self.seq)):if not self.ChassisReset("ForceOff"):continue#弹出虚拟光驱with TimeSpan(self.logger,"*4.{}-{}-EjectMedia".format(self.bmc_host, self.seq)):if not self.EjectMedia():continue#设置虚拟光驱NFS挂载参数with TimeSpan(self.logger,"*5.{}-{}-InsertMedia".format(self.bmc_host, self.seq)):if not self.InsertMedia():continue#服务器上电with TimeSpan(self.logger,"*6.{}-{}-PowerOn".format(self.bmc_host, self.seq)):if not self.ChassisReset("On"):continue#确认并开启虚拟光驱启动功能with TimeSpan(self.logger,"*7.{}-{}-EnableVirtualCDBoot".format(self.bmc_host, self.seq)):if not self.EnableVirtualCDBoot():continue#设置下一次从虚拟光驱启动with TimeSpan(self.logger,"*8.{}-{}-SetBootOrder".format(self.bmc_host, self.seq)):if not self.SetBootOrder():continue#重启,从光驱启动with TimeSpan(self.logger,"*9.{}-{}-ForceRestart".format(self.bmc_host, self.seq)): if not self.ComputerSystemReset("ForceRestart"):return Falseif i==0:time.sleep(120)continue#KVM循环截屏,ISO中的检测程序完后会打印"Please press Enter to activate this console"#通过计算图像的SSIM,判断测图片中是否出现了以上打印#5分钟如果没有检测到,则超时退出with TimeSpan(self.logger,"*10.{}-{}-WaitFinished".format(self.bmc_host, self.seq)):if not self.WaitFinished():continue#弹出虚拟光驱with TimeSpan(self.logger,"*11.{}-{}-EjectMedia".format(self.bmc_host, self.seq)):if not self.EjectMedia():continue#服务器下电再上电with TimeSpan(self.logger,"*12.{}-{}-PowerCycle".format(self.bmc_host, self.seq)):if not self.ChassisReset('PowerCycle'):continue#注销RedFish会话with TimeSpan(self.logger,"*13.{}-{}-Logout".format(self.bmc_host, self.seq)):if not self.Logout():continuereturn Trueexcept:self.logger.error("{}-{} Failed:".format(self.bmc_host, self.seq))traceback.print_exc(file=open('traceback_info.txt','a+'))def FetchThread(checkonly,index,q):"""自检任务线程""" logger=Nonewhile True:if q.empty():time.sleep(0.1)continuerow = q.get()if row is None:breakif logger is None:logger = logging.getLogger("FetchThread:{}".format(index))logger.setLevel(level = logging.INFO)handler = logging.FileHandler("nvidia_aic_check_inspur_{}.log".format(index))handler_ch=logging.StreamHandler()handler.setLevel(logging.INFO)handler_ch.setLevel(logging.INFO)formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')handler.setFormatter(formatter)handler_ch.setFormatter(formatter)logger.addHandler(handler)logger.addHandler(handler_ch)index, bmc_addr, username, password, try_count = rowlogger.info(row)t = InspurVA1Query(logger,index,bmc_addr, username, password,nfs_server, int(try_count)) if checkonly==1:if t.Login() and t.PCIEDeviceSummary():print("{} True".format(bmc_addr))else:print("{} False".format(bmc_addr))else:#t.run()t.Login()t.SetBootOrderLegacy()t.ComputerSystemReset("ForceRestart")if __name__ == '__main__':nfs_server = args.nfs_server.strip()server_list = args.server_list.strip()threads = args.threadscheckonly=args.checkonlyrequest_queue = Queue(threads)tasks = []for i in range(threads):t = threading.Thread(target=FetchThread, args=(checkonly,i,request_queue, ))t.start()tasks.append(t)with codecs.open(server_list, "r", 'utf-8') as csvfile:csvreader = csv.reader(csvfile)next(csvreader)for row in csvreader:if row[0].startswith("#"):continue request_queue.put(row)for i in range(threads):request_queue.put(None)for t in tasks:t.join()'''
重启BMC
curl -X POST https://192.168.1.100/redfish/v1/Systems/1/Actions/ComputerSystem.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin
curl -X POST https://192.168.1.100/redfish/v1/Managers/1/Actions/Manager.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin
curl -X POST https://192.168.1.100/redfish/v1/Managers/1/Actions/Manager.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin
'''