RK3568温控
cat /sys/class/thermal/thermal_zone0/temp
cat /sys/class/thermal/thermal_zone1/temp
cat /sys/class/thermal/cooling_device0/cur_state
cat /sys/class/thermal/cooling_device1/cur_state
cat /sys/class/thermal/cooling_device2/cur_state
thermal_zone0:是soc的温度;
thermal_zone1: 是gpu的温度。
冷却设备有三个:
功能介绍
Linux的Thermal机制是基于Zone为单位的热管理机制,核心包括三个部分:获取区域温度的设备thermal_zone_device、区域降温的设备thermal_cooling_device、温控策略thermal_governor。thermal_governor从thermal_zone_device获取区域温度,然后根据当前温度,决定调用哪个降温设备来为该区域降温。
(1)Thermal sensor driver:SoC内部CPU和GPU的旁边通常会有用于获取它们温度的传感器,比如 tsadc(Temperature Sensor ADC)。
注:
ADC,即Analog-to-Digital Converter(模拟数字转换器)
-
ADC的作用:将连续变化的模拟信号转换离散的数字信号的器件
-
常见的模拟信号:温度、压力、声音
-
AD转换步骤:采样、量化、编码
(2)Thermal cooling device:降温设备,比如风扇。这里有点特殊的是,CPU和GPU不仅是发热设备(即需要实施温控策略的设备),也可以是降温设备。当我们降低CPU/GPU的运行频率的时候,它们就在充当降温设备(降低产热量即是在降温)。
(3)Thermal governer:温控策略,Linux内核中的温控策略要比上面的空调控制精细得多,而且也提供了多种策略。
(4)Thermal core:组织并管理上面三个组件,并通过sysfs和用户空间交互。
归纳一下:核心为thermal_core;可以获取温度的设备抽象为thermal_zone_device,如Temp Sensor、NTC(板上的热敏电阻)等;控制温度的设备抽象为thermal_cooling_device,如风扇、CPU、DDR、GPU等;温控策略抽象为thermal_governor,如step_wise、bang_bang等。
linux thermal框架
Linux Thermal框架可以分为Thermal Core、Thermal Governor、Thermal Cooling、Thermal Driver以及Thermal Device Tree五大部分。
Thermal Core:用于和user space、Thermal Governor、Thermal Driver交互。
Thermal Governor:主要包括gov_bang_bang、gov_fair_share、gov_power_allocator、gov_step_wise、gov_user_space等,最常用的为gov_power_allocator.
Thermal Cooling:主要包括cpufreq_cooling、cpuidle_cooling、devfreq_cooling等。
thermal core
内核将采集区域温度的设备抽象为结构体struct thermal_zone_device,主要成员包括char type[]设备名称;int temperature当前温度;int last_temperature上次采集问题;struct thermal_governer *governor对应governor; int polling_delay温度采集时间间隔等等。其中struct thermal_zone_device_ops *ops是采集区域温度设备的操作抽象,包括绑定降温设备,获取设备温度等。
kernel/linux/thermal.h中定义了thernal_zone_device & thermal_zone_device_ops、thermal_governor、thermal_cooling_device & thermal_cooling_device_ops结构体。
struct thermal_zone_device {int id; // 设备的唯一标识符char type[THERMAL_NAME_LENGTH]; // 设备名称struct device device; // 设备相关联的struct device结构体struct thermal_attr *trip_temp_attrs; // 温度触发器(trip)的温度属性链表struct thermal_attr *trip_type_attrs; // 温度触发器的触发类型属性链表struct thermal_attr *trip_hyst_attrs; // 温度触发器的滞后属性链表void *devdata;int trips;unsigned long trips_disabled; /* bitmap for disabled trips */int passive_delay;int polling_delay; // 采集温度的时间间隔int temperature; // 当前采集的温度int last_temperature; // 上次采集的温度int emul_temperature;int passive;unsigned int forced_passive; // 强制进入被动散热模式的标志atomic_t need_update;struct thermal_zone_device_ops *ops; // 区域温度设备的操作struct thermal_zone_params *tzp; // 记录一些信息,如governor namestruct thermal_governor *governor; // 温控策略void *governor_data; struct list_head thermal_instances; // 降温设备struct idr idr; // 管理热区设备实例的IDstruct mutex lock;struct list_head node; // 热区设备的链表节点struct delayed_work poll_queue; // 用于轮询区域温度
};struct thermal_zone_params {char governor_name[THERMAL_NAME_LENGTH];/** a boolean to indicate if the thermal to hwmon sysfs interface* is required. when no_hwmon == false, a hwmon sysfs interface* will be created. when no_hwmon == true, nothing will be done*/bool no_hwmon;int num_tbps; /* Number of tbp entries */struct thermal_bind_params *tbp;/** Sustainable power (heat) that this thermal zone can dissipate in* mW*/u32 sustainable_power;/** Proportional parameter of the PID controller when* overshooting (i.e., when temperature is below the target)*/s32 k_po;/** Proportional parameter of the PID controller when* undershooting*/s32 k_pu;/* Integral parameter of the PID controller */s32 k_i;/* Derivative parameter of the PID controller */s32 k_d;/* threshold below which the error is no longer accumulated */s32 integral_cutoff;/** @slope: slope of a linear temperature adjustment curve.* Used by thermal zone drivers.*/int slope;/** @offset: offset of a linear temperature adjustment curve.* Used by thermal zone drivers (default 0).*/int offset;
};struct thermal_zone_device_ops {// 绑定一个降温设备到该热区设备int (*bind) (struct thermal_zone_device *,struct thermal_cooling_device *);// 解绑一个降温设备从该热区设备int (*unbind) (struct thermal_zone_device *,struct thermal_cooling_device *);// 获取当前热区设备的温度int (*get_temp) (struct thermal_zone_device *, int *);// 获取当前热区设备的工作模式int (*get_mode) (struct thermal_zone_device *,enum thermal_device_mode *);// 设置当前热区设备的工作模式 int (*set_mode) (struct thermal_zone_device *,enum thermal_device_mode);// 获取指定温度触发器的触发类型 int (*get_trip_type) (struct thermal_zone_device *, int,enum thermal_trip_type *);// 获取触发等级对应的温度 int (*get_trip_temp) (struct thermal_zone_device *, int, int *);// 设置触发等级对应的温度int (*set_trip_temp) (struct thermal_zone_device *, int, int);int (*get_trip_hyst) (struct thermal_zone_device *, int, int *);int (*set_trip_hyst) (struct thermal_zone_device *, int, int);int (*get_crit_temp) (struct thermal_zone_device *, int *);int (*set_emul_temp) (struct thermal_zone_device *, int);// 获取温度的变化趋势int (*get_trend) (struct thermal_zone_device *, int,enum thermal_trend *);int (*notify) (struct thermal_zone_device *, int,enum thermal_trip_type);
};// 内核将温控策略抽象为结构体struct thermal_governor,
//主要成员包括:char name[THERMAL_NAME_LENGTH]策略名称;int (*throttle)()温控决策等等。
struct thermal_governor {char name[THERMAL_NAME_LENGTH];int (*bind_to_tz)(struct thermal_zone_device *tz);void (*unbind_from_tz)(struct thermal_zone_device *tz);int (*throttle)(struct thermal_zone_device *tz, int trip);struct list_head governor_list;
};
// 执行温控策略的设备成为区域降温设备,
//内核抽象为结构体struct thermal_cooling_device,struct thermal_cooling_device_ops是区域降温设备的操作集合。
struct thermal_cooling_device {int id; //每个thermal_cooling_device有独立的idchar type[THERMAL_NAME_LENGTH]; // 名称struct device device;struct device_node *np;void *devdata;const struct thermal_cooling_device_ops *ops;bool updated; /* true if the cooling device does not need update */struct mutex lock; /* protect thermal_instances list */struct list_head thermal_instances;struct list_head node;
};struct thermal_cooling_device_ops {//获取总的状态数,相当于降温等级int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); //获取当前状态 int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); //设置状态 int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); // 获取所请求的功率 int (*get_requested_power)(struct thermal_cooling_device *,struct thermal_zone_device *, u32 *);// 将指定状态(降温等级)转换为对应的功率 int (*state2power)(struct thermal_cooling_device *,struct thermal_zone_device *, unsigned long, u32 *);// 将指定功率转换为对应的状态(降温等级) int (*power2state)(struct thermal_cooling_device *,struct thermal_zone_device *, u32, unsigned long *);
};
初始化
thermal_governor注册
以step_wise governor为例:
int thermal_gov_step_wise_register(void)
{// 调用thermal_core.c中的方法return thermal_register_governor(&thermal_gov_step_wise);
}
static int __init thermal_init(void)
{int result;// 注册所有的governors result = thermal_register_governors();if (result)goto error;result = class_register(&thermal_class);if (result)goto unregister_governors;result = genetlink_init();if (result)goto unregister_class;result = of_parse_thermal_zones();if (result)goto exit_netlink;result = register_pm_notifier(&thermal_pm_nb);if (result)pr_warn("Thermal: Can not register suspend notifier, return %d\n",result);return 0;exit_netlink:genetlink_exit();
unregister_class:class_unregister(&thermal_class);
unregister_governors:thermal_unregister_governors();
error:idr_destroy(&thermal_tz_idr);idr_destroy(&thermal_cdev_idr);mutex_destroy(&thermal_idr_lock);mutex_destroy(&thermal_list_lock);mutex_destroy(&thermal_governor_lock);return result;
}static int __init thermal_register_governors(void)
{int result;// 调用step_wise governor中的方法,为系统默认的govresult = thermal_gov_step_wise_register();if (result)return result;result = thermal_gov_fair_share_register();if (result)return result;result = thermal_gov_bang_bang_register();if (result)return result;result = thermal_gov_user_space_register();if (result)return result;// 注册IPA governorreturn thermal_gov_power_allocator_register();
}// 将第一个注册的governor设置为系统默认governor,即step_wise governor
int thermal_register_governor(struct thermal_governor *governor)
{int err;const char *name;struct thermal_zone_device *pos;if (!governor)return -EINVAL;mutex_lock(&thermal_governor_lock);err = -EBUSY;if (__find_governor(governor->name) == NULL) {err = 0;//链接到thermal_governor_listlist_add(&governor->governor_list, &thermal_governor_list);if (!def_governor && !strncmp(governor->name,DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH))def_governor = governor; //第一个设置为def_governor}.......
}
thermal_zone_device注册
struct thermal_zone_device *thermal_zone_device_register(const char *type,int trips, int mask, void *devdata,struct thermal_zone_device_ops *ops,struct thermal_zone_params *tzp,int passive_delay, int polling_delay)
{struct thermal_zone_device *tz;enum thermal_trip_type trip_type;int trip_temp;int result;int count;int passive = 0;struct thermal_governor *governor;.........................................................................//分配内存tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL);.........................................................................//初始化idr,并获取ididr_init(&tz->idr);mutex_init(&tz->lock);result = get_idr(&thermal_tz_idr, &thermal_idr_lock, &tz->id);..........................................................................strlcpy(tz->type, type ? : "", sizeof(tz->type)); //设置名称tz->ops = ops; //操作集合tz->tzp = tzp; //参数tz->device.class = &thermal_class;tz->devdata = devdata;tz->trips = trips;tz->passive_delay = passive_delay;tz->polling_delay = polling_delay; //采集时间间隔/* A new thermal zone needs to be updated anyway. */atomic_set(&tz->need_update, 1);........................................................................//根据governor name,设置降温策略if (tz->tzp)governor = __find_governor(tz->tzp->governor_name);elsegovernor = def_governor;.........//链接到thermal_tz_listmutex_lock(&thermal_list_lock);list_add_tail(&tz->node, &thermal_tz_list);mutex_unlock(&thermal_list_lock);/* 尝试绑定已注册的降温设备 */bind_tz(tz);thermal_zone_device_reset(tz);/* Update the new thermal zone and mark it as already updated. */if (atomic_cmpxchg(&tz->need_update, 1, 0))thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);return tz;..........
}
thermal_cooling_device注册
struct thermal_cooling_device *
thermal_cooling_device_register(char *type, void *devdata,const struct thermal_cooling_device_ops *ops)
{return __thermal_cooling_device_register(NULL, type, devdata, ops);
}static struct thermal_cooling_device *
__thermal_cooling_device_register(struct device_node *np,char *type, void *devdata,const struct thermal_cooling_device_ops *ops)
{struct thermal_cooling_device *cdev;struct thermal_zone_device *pos = NULL;int result;if (type && strlen(type) >= THERMAL_NAME_LENGTH)return ERR_PTR(-EINVAL);if (!ops || !ops->get_max_state || !ops->get_cur_state ||!ops->set_cur_state)return ERR_PTR(-EINVAL);// 分配内存cdev = kzalloc(sizeof(struct thermal_cooling_device), GFP_KERNEL);if (!cdev)return ERR_PTR(-ENOMEM);result = get_idr(&thermal_cdev_idr, &thermal_idr_lock, &cdev->id);if (result) {kfree(cdev);return ERR_PTR(result);}strlcpy(cdev->type, type ? : "", sizeof(cdev->type));mutex_init(&cdev->lock);INIT_LIST_HEAD(&cdev->thermal_instances);// 初始化成员,将mtk的ops和devdata赋值给thermal_cooling_device cdev->np = np;cdev->ops = ops;cdev->updated = false;cdev->device.class = &thermal_class;cdev->device.groups = cooling_device_attr_groups;cdev->devdata = devdata;dev_set_name(&cdev->device, "cooling_device%d", cdev->id);// 注册deviceresult = device_register(&cdev->device);if (result) {release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);kfree(cdev);return ERR_PTR(result);}/* Add 'this' new cdev to the global cdev list */// 新的thermal_cooling_device加入到thermal_cdev_list链表mutex_lock(&thermal_list_lock);list_add(&cdev->node, &thermal_cdev_list);mutex_unlock(&thermal_list_lock);/* Update binding information for 'this' new cdev */// 尝试绑定到已注册的温度采集设备thermal_zone_device bind_cdev(cdev);mutex_lock(&thermal_list_lock);list_for_each_entry(pos, &thermal_tz_list, node)if (atomic_cmpxchg(&pos->need_update, 1, 0))thermal_zone_device_update(pos);mutex_unlock(&thermal_list_lock);return cdev;
}static void bind_cdev(struct thermal_cooling_device *cdev)
{int i, ret;const struct thermal_zone_params *tzp;struct thermal_zone_device *pos = NULL;mutex_lock(&thermal_list_lock);// 遍历thermal_zone_device list,逐个绑定thermal_cooling_device list_for_each_entry(pos, &thermal_tz_list, node) {if (!pos->tzp && !pos->ops->bind)continue;if (pos->ops->bind) {// 调用 thermal_zone_device中thermal_zone_device_ops成员中的bind方法ret = pos->ops->bind(pos, cdev);if (ret)print_bind_err_msg(pos, cdev, ret);continue;}tzp = pos->tzp;if (!tzp || !tzp->tbp)continue;for (i = 0; i < tzp->num_tbps; i++) {if (tzp->tbp[i].cdev || !tzp->tbp[i].match)continue;if (tzp->tbp[i].match(pos, cdev))continue;tzp->tbp[i].cdev = cdev;__bind(pos, tzp->tbp[i].trip_mask, cdev,tzp->tbp[i].binding_limits,tzp->tbp[i].weight);}}mutex_unlock(&thermal_list_lock);
}
温度采集设备与降温设备的联系
同一个温度采集设备可以对应多个降温设备,结构体struct thermal_instance用于连接温度采集设备与降温设备,成员struct thermal_zone_device *tz是对应的温度采集设备,struct thermal_cooling_device *cdev是对应的降温设备,int trip触发登记(对应一个温度),当温度采集设备采集的温度达到一定值时,调用对应trip登记的降温设备。
struct thermal_instance {.................................................................struct thermal_zone_device *tz; //对应温度采集设备struct thermal_cooling_device *cdev; //对应降温设备int trip; //触发等级struct list_head tz_node; //链接到温度采集设备struct list_head cdev_node; //链接到降温设备.................................................................
};
以温度采集设备绑定降温设备为例,当温度采集设备注册时会尝试绑定所有已经注册的降温设备。以CPU为例,bind接口对应的是tscpu_bind(),从代码中可以看出如果降温设备的名称为g_bind0--g_bind9中的一个将会绑定CPU温度采集设备和降温设备。tscpu_bind()接口中也定义了各种名称降温设备对应的触发等级。
static void bind_tz(struct thermal_zone_device *tz)
{int i, ret;struct thermal_cooling_device *pos = NULL;const struct thermal_zone_params *tzp = tz->tzp;if (!tzp && !tz->ops->bind)return;mutex_lock(&thermal_list_lock);if (tz->ops->bind) {//尝试绑定所有的已经注册的降温设备list_for_each_entry(pos, &thermal_cdev_list, node) {ret = tz->ops->bind(tz, pos);if (ret)print_bind_err_msg(tz, pos, ret);}goto exit;}...........................................................
}static int tscpu_bind(struct thermal_zone_device *thermal, struct thermal_cooling_device *cdev)
{int table_val = 0;if (!strcmp(cdev->type, g_bind0)) {table_val = 0;tscpu_config_all_tc_hw_protect(trip_temp[0], tc_mid_trip);} else if (!strcmp(cdev->type, g_bind1)) {table_val = 1;tc_mid_trip = trip_temp[1];tscpu_config_all_tc_hw_protect(trip_temp[0], tc_mid_trip);} else if (!strcmp(cdev->type, g_bind2)) {table_val = 2;} else if (!strcmp(cdev->type, g_bind3)) {table_val = 3;} else if (!strcmp(cdev->type, g_bind4)) {.....................................................} else {return 0;}//以table_val为触发等级绑定发热设备和降温设备if (mtk_thermal_zone_bind_cooling_device(thermal, table_val, cdev)) {tscpu_warn("tscpu_bind error binding cooling dev\n");return -EINVAL;}tscpu_printk("tscpu_bind binding OK, %d\n", table_val);return 0;
}
温度采集设备知道了触发等级和降温温度,还需要知道触发等级对应的温度。thermal_zone_device_ops的get_trip_temp()用于查询触发等级对应的温度,以mtkcpu为例,所有降温设备的触发温度保存在数据中,触发等级就是该数组的下标。
static int tscpu_get_trip_temp
(struct thermal_zone_device *thermal, int trip, int *temp)
{*temp = trip_temp[trip];return 0;
}
cooling device
以cpu coolig为例:
cpufreq_state2power:根据cpu cooling state换算cpu power。
static int cpufreq_state2power(struct thermal_cooling_device *cdev,struct thermal_zone_device *tz,unsigned long state, u32 *power)
{unsigned int freq, num_cpus;cpumask_t cpumask;u32 static_power, dynamic_power;int ret;struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);// 根据cpumask得到在线cpu核数num_cpus = cpumask_weight(&cpumask);/* None of our cpus are online, so no power */if (num_cpus == 0) {*power = 0;return 0;}// 根据cpu state得到当前的频率freq = cpufreq_device->freq_table[state];if (!freq)return -EINVAL;// 计算当前频率下的cpu动态功耗dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;// 计算当前频率下的cpu静态功耗ret = get_static_power(cpufreq_device, tz, freq, &static_power);if (ret)return ret;// 计算当前频率下的cpu总的功耗*power = static_power + dynamic_power;return 0;
}
cpufreq_power2state:根据cpu power换算cpu cooling state.
static int cpufreq_power2state(struct thermal_cooling_device *cdev,struct thermal_zone_device *tz, u32 power,unsigned long *state)
{unsigned int cpu, cur_freq, target_freq;int ret;s32 dyn_power;u32 last_load, normalised_power, static_power;struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);/* None of our cpus are online */if (cpu >= nr_cpu_ids)return -ENODEV;// 计算当前cpu频率cur_freq = cpufreq_quick_get(cpu);// 计算当前频率下的静态功耗ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);if (ret)return ret;// 计算当前频率下的动态功耗dyn_power = power - static_power;dyn_power = dyn_power > 0 ? dyn_power : 0;last_load = cpufreq_device->last_load ?: 1;// 计算归一化功耗normalised_power = (dyn_power * 100) / last_load;// 根据归一化功耗计算出目标频率target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);// 根据目标频率得到cpu state*state = cpufreq_cooling_get_level(cpu, target_freq);if (*state == THERMAL_CSTATE_INVALID) {dev_warn_ratelimited(&cdev->device,"Failed to convert %dKHz for cpu %d into a cdev state\n",target_freq, cpu);return -EINVAL;}trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,target_freq, *state, power);return 0;
}static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,u32 freq)
{int i;for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {if (freq > cpufreq_cdev->em->table[i].frequency)break;}// 查表获取return cpufreq_cdev->em->table[i + 1].power;
}
遍历了一下cpufrep_cdev里的em->table,这个table蕴含了freq和power的对应关系,这个table是跟芯片密切相关的,往往在出厂的时候就已经预制好了。
Linux Thermal框架-CSDN博客
模拟量转数字量原理
模拟量转数字量的过程通常涉及一下几个关键步骤:
采样:该步骤将连续的模拟信号转换为时间上离散变化的信号。
保持:该步骤存储采样结果,直到下一次采样。
量化:该步骤将采样电平转换为与之最接近的离散数字电平;
编码:该步骤将量化后的结果便是为特定的数制形式。
模拟信号转化为数字信号的过程中,会使用到模数转换器(ADC),这种转换通常需要一个参考模拟量作为转换的标准,常见的参考标准为ADC芯片最大的可转换信号大小。
ADC的分辨率用于标识模拟输入信号的位数,提高分辨率可以更准确低复现模拟信号并降低量化误差,但这也可能增加成本。例如一个8位的ADC可以将5V的模拟量分为256等分,从而得出相应的数字量。
此外,还有间接比较型的模数转换,该转换中输入模拟量不是直接参考电压比较,而是将二者变为中间的某种物理量再进行比较,然后将比较所得的结果进行数字编码。