- 安装
pip install nvidia-ml-py
from pynvml import *
def nvidia_info():# pip install nvidia-ml-pynvidia_dict = {"state": True,"nvidia_version": "","nvidia_count": 0,"gpus": []}try:nvmlInit()nvidia_dict["nvidia_version"] = nvmlSystemGetDriverVersion()nvidia_dict["nvidia_count"] = nvmlDeviceGetCount()for i in range(nvidia_dict["nvidia_count"]):handle = nvmlDeviceGetHandleByIndex(i)memory_info = nvmlDeviceGetMemoryInfo(handle)gpu = {"gpu_name": nvmlDeviceGetName(handle),"total": memory_info.total,"free": memory_info.free,"used": memory_info.used,"temperature": f"{nvmlDeviceGetTemperature(handle, 0)}℃","powerStatus": nvmlDeviceGetPowerState(handle)}nvidia_dict['gpus'].append(gpu)except NVMLError as _:nvidia_dict["state"] = Falseexcept Exception as _:nvidia_dict["state"] = Falsefinally:try:nvmlShutdown()except:passreturn nvidia_dictdef check_gpu_mem_usedRate():max_rate = 0.0while True:info = nvidia_info()# print(info)used = info['gpus'][0]['used']tot = info['gpus'][0]['total']print(f"GPU0 used: {used}, tot: {tot}, 使用率:{used/tot}")if used/tot > max_rate:max_rate = used/totprint("GPU0 最大使用率:", max_rate)
在跑任务时,另外运行脚本调用 check_gpu_mem_usedRate
就可以知道最大的 GPU内存 使用率,线上服务不要用的太满,最大80%左右为宜,防止极端情况GPU显存溢出
参考:
python获取GPU,CPU,硬盘,内存,系统,用户使用情况信息
【Python管理GPU】pynvml工具的安装与使用