本篇文章主要是自己的学习笔记,主要内容是分析linux系统中设备的Suspend和Resume流程,用到的内核版本为 linux-4.14。
目录
1、Linux 内核的Suspend方法
2、__device_suspend 函数
3、pm_op 函数
4、suspend_enter 函数
5、resume流程
1、Linux 内核的Suspend方法
在 Linux 内核中有三种Suspend 的方法,分别是 Freeze、Standby、Suspend to RAM,在用户空间向 /sys/power/state 文件写入“freeze”、”standby”、”mem”就可以触发相应的Suspend,如下所示,关于这几种 Suspend 的区别如果大家感兴趣可以自行去查阅资料,这里就不在暂开了。
echo "freeze" > /sys/power/state
echo "standby" > /sys/power/state
echo "mem" > /sys/power/state
当执行上面命令会通过 sysfs 陷入到内核,并触发 Suspend ,相应的处理代码如下:
static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,const char *buf, size_t n)
{suspend_state_t state;int error;error = pm_autosleep_lock();if (error)return error;if (pm_autosleep_state() > PM_SUSPEND_ON) {error = -EBUSY;goto out;}state = decode_state(buf, n);if (state < PM_SUSPEND_MAX) {if (state == PM_SUSPEND_MEM)state = mem_sleep_current;error = pm_suspend(state);} else if (state == PM_SUSPEND_MAX) {error = hibernate();} else {error = -EINVAL;}out:pm_autosleep_unlock();return error ? error : n;
}
decode_state 函数主要功能是 根据输入的字符串进行相应的匹配,返回值 state 的定义如下:
typedef int __bitwise suspend_state_t;#define PM_SUSPEND_ON ((__force suspend_state_t) 0)
#define PM_SUSPEND_TO_IDLE ((__force suspend_state_t) 1)
#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 2)
#define PM_SUSPEND_MEM ((__force suspend_state_t) 3)
#define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE
#define PM_SUSPEND_MAX ((__force suspend_state_t) 4)
如果 state 满足相关条件就会进入 pm_suspend 函数,该函数定义如下:
int pm_suspend(suspend_state_t state)
{int error;if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)return -EINVAL;pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);error = enter_state(state);if (error) {suspend_stats.fail++;dpm_save_failed_errno(error);} else {suspend_stats.success++;}pr_info("suspend exit\n");return error;
}
然后再进入 enter_state 函数,定义如下:
static int enter_state(suspend_state_t state)
{int error;trace_suspend_resume(TPS("suspend_enter"), state, true);if (state == PM_SUSPEND_TO_IDLE) {
#ifdef CONFIG_PM_DEBUGif (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {pr_warn("Unsupported test mode for suspend to idle, please choose none/freezer/devices/platform.\n");return -EAGAIN;}
#endif} else if (!valid_state(state)) {return -EINVAL;}if (!mutex_trylock(&pm_mutex))return -EBUSY;if (state == PM_SUSPEND_TO_IDLE)s2idle_begin();#ifndef CONFIG_SUSPEND_SKIP_SYNCtrace_suspend_resume(TPS("sync_filesystems"), 0, true);pr_info("Syncing filesystems ... ");sys_sync();pr_cont("done.\n");trace_suspend_resume(TPS("sync_filesystems"), 0, false);
#endifpm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);pm_suspend_clear_flags();error = suspend_prepare(state);if (error)goto Unlock;if (suspend_test(TEST_FREEZER))goto Finish;trace_suspend_resume(TPS("suspend_enter"), state, false);pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]);pm_restrict_gfp_mask();error = suspend_devices_and_enter(state);pm_restore_gfp_mask();Finish:events_check_enabled = false;pm_pr_dbg("Finishing wakeup.\n");suspend_finish();Unlock:mutex_unlock(&pm_mutex);return error;
}
(1)valid_state 函数主要是用来检查平台是否支持该电源状态,该函数的定义如下:
static bool valid_state(suspend_state_t state)
{/** PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level* support and need to be valid to the low level* implementation, no valid callback implies that none are valid.*/return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
}
通过注释也能够大概知道该函数的作用,当 state 等于 standby 或者 mem时,则需要调用suspend_ops 中的 valid 回调,通过底层平台代码判断是否支持,关于 valid 回调的实现后续会介绍。
(2)suspend_prepare 函数主要进行 suspend 前的准备,比如 switch console 和 thread freezing,如果失败,则终止 suspend。函数定义如下:
/*** suspend_prepare - Prepare for entering system sleep state.** Common code run for every system sleep state that can be entered (except for* hibernation). Run suspend notifiers, allocate the "suspend" console and* freeze processes.*/
static int suspend_prepare(suspend_state_t state)
{int error, nr_calls = 0;if (!sleep_state_supported(state))return -EPERM;pm_prepare_console();error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls);if (error) {nr_calls--;goto Finish;}trace_suspend_resume(TPS("freeze_processes"), 0, true);error = suspend_freeze_processes();trace_suspend_resume(TPS("freeze_processes"), 0, false);if (!error)return 0;suspend_stats.failed_freeze++;dpm_save_failed_step(SUSPEND_FREEZE);Finish:__pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL);pm_restore_console();return error;
}
sleep_state_supported : 检查suspend_ops是否有提供.enter回调,该回调会在后面使用到。
static bool sleep_state_supported(suspend_state_t state)
{return state == PM_SUSPEND_TO_IDLE || (suspend_ops && suspend_ops->enter);
}
pm_prepare_console :将当前console切换到一个虚拟console。
__pm_notifier_call_chain :发送开始 suspend 的消息。
suspend_freeze_processes :freeze用户空间进程和一些内核线程。
(3)suspend_devices_and_enter 函数的实现如下:
/*** suspend_devices_and_enter - Suspend devices and enter system sleep state.* @state: System sleep state to enter.*/
int suspend_devices_and_enter(suspend_state_t state)
{int error;bool wakeup = false;if (!sleep_state_supported(state))return -ENOSYS;pm_suspend_target_state = state;error = platform_suspend_begin(state);if (error)goto Close;suspend_console();suspend_test_start();error = dpm_suspend_start(PMSG_SUSPEND);if (error) {pr_err("Some devices failed to suspend, or early wake event detected\n");goto Recover_platform;}suspend_test_finish("suspend devices");if (suspend_test(TEST_DEVICES))goto Recover_platform;do {error = suspend_enter(state, &wakeup);} while (!error && !wakeup && platform_suspend_again(state));Resume_devices:suspend_test_start();dpm_resume_end(PMSG_RESUME);suspend_test_finish("resume devices");trace_suspend_resume(TPS("resume_console"), state, true);resume_console();trace_suspend_resume(TPS("resume_console"), state, false);Close:platform_resume_end(state);pm_suspend_target_state = PM_SUSPEND_ON;return error;Recover_platform:platform_recover(state);goto Resume_devices;
}
sleep_state_supported : 再次检查suspend_ops是否有提供.enter回调。
platform_suspend_begin :通过suspend_ops 提供了 begin 回调则调用,通知平台代码,让其作相应的处理。
suspend_console :挂起console。
suspend_test_start :记录系统挂起的开始时间点。
dpm_suspend_start:调用所有设备的->prepare和->suspend回调函数
函数的实现如下:
/*** dpm_suspend_start - Prepare devices for PM transition and suspend them.* @state: PM transition of the system being carried out.** Prepare all non-sysdev devices for system PM transition and execute "suspend"* callbacks for them.*/
int dpm_suspend_start(pm_message_t state)
{int error;error = dpm_prepare(state);if (error) {suspend_stats.failed_prepare++;dpm_save_failed_step(SUSPEND_PREPARE);} elseerror = dpm_suspend(state);return error;
}
dpm_prepare :对非系统的设备进行prepare,在这个函数之后,设备的子系统就不能再注册了。
dpm_suspend :调用所有非系统设备的 suspend 函数,设备将停止操作。
dpm_suspend 函数的调用流程如下:
int dpm_suspend(pm_message_t state)==> error = device_suspend(dev);
==> return __device_suspend(dev, pm_transition, false);
2、__device_suspend 函数
dpm_suspend函数最后是调用到了__device_suspend 函数,该函数的实现如下:
/*** device_suspend - Execute "suspend" callbacks for given device.* @dev: Device to handle.* @state: PM transition of the system being carried out.* @async: If true, the device is being suspended asynchronously.*/
static int __device_suspend(struct device *dev, pm_message_t state, bool async)
{pm_callback_t callback = NULL;const char *info = NULL;int error = 0;DECLARE_DPM_WATCHDOG_ON_STACK(wd);TRACE_DEVICE(dev);TRACE_SUSPEND(0);dpm_wait_for_subordinate(dev, async);if (async_error) {dev->power.direct_complete = false;goto Complete;}/** If a device configured to wake up the system from sleep states* has been suspended at run time and there's a resume request pending* for it, this is equivalent to the device signaling wakeup, so the* system suspend operation should be aborted.*/if (pm_runtime_barrier(dev) && device_may_wakeup(dev))pm_wakeup_event(dev, 0);if (pm_wakeup_pending()) {dev->power.direct_complete = false;async_error = -EBUSY;goto Complete;}if (dev->power.syscore)goto Complete;/* Avoid direct_complete to let wakeup_path propagate. */if (device_may_wakeup(dev) || dev->power.wakeup_path)dev->power.direct_complete = false;if (dev->power.direct_complete) {if (pm_runtime_status_suspended(dev)) {pm_runtime_disable(dev);if (pm_runtime_status_suspended(dev))goto Complete;pm_runtime_enable(dev);}dev->power.direct_complete = false;}dpm_watchdog_set(&wd, dev);device_lock(dev);if (dev->pm_domain) {info = "power domain ";callback = pm_op(&dev->pm_domain->ops, state);goto Run;}if (dev->type && dev->type->pm) {info = "type ";callback = pm_op(dev->type->pm, state);goto Run;}if (dev->class) {if (dev->class->pm) {info = "class ";callback = pm_op(dev->class->pm, state);goto Run;} else if (dev->class->suspend) {pm_dev_dbg(dev, state, "legacy class ");error = legacy_suspend(dev, state, dev->class->suspend,"legacy class ");goto End;}}if (dev->bus) {if (dev->bus->pm) {info = "bus ";callback = pm_op(dev->bus->pm, state);} else if (dev->bus->suspend) {pm_dev_dbg(dev, state, "legacy bus ");error = legacy_suspend(dev, state, dev->bus->suspend,"legacy bus ");goto End;}}Run:if (!callback && dev->driver && dev->driver->pm) {info = "driver ";callback = pm_op(dev->driver->pm, state);}#ifdef CONFIG_MTK_RAM_CONSOLEif (async)aee_rr_rec_last_async_func((unsigned long int)callback);elseaee_rr_rec_last_sync_func((unsigned long int)callback);
#endiferror = dpm_run_callback(callback, dev, state, info);End:if (!error) {struct device *parent = dev->parent;dev->power.is_suspended = true;if (parent) {spin_lock_irq(&parent->power.lock);dev->parent->power.direct_complete = false;if (dev->power.wakeup_path&& !dev->parent->power.ignore_children)dev->parent->power.wakeup_path = true;spin_unlock_irq(&parent->power.lock);}dpm_clear_suppliers_direct_complete(dev);} else {log_suspend_abort_reason("Callback failed on %s in %pS returned %d",dev_name(dev), callback, error);}device_unlock(dev);dpm_watchdog_clear(&wd);Complete:if (error)async_error = error;complete_all(&dev->power.completion);TRACE_SUSPEND(error);return error;
}
通过注释就可以看出这个函数是执行系统中给定设备的 suspend 回调函数。
在旧版本的 linux 内核中,这些callbacks是放在设备模型的已经结构体中,比如struct bus_type、struct device/driver、struct class 等这些数据结构中都会有suspend/resume函数的身影,但这样做就不太具备良好的封装特性,和实用性。
后来就将这些Callbacks封装为一个统一的数据结构,也就是 struct dev_pm_ops ,上层的数据结构只需要包含这个结构即可。该结构体的定义如下:
struct dev_pm_ops {int (*prepare)(struct device *dev);void (*complete)(struct device *dev);int (*suspend)(struct device *dev);int (*resume)(struct device *dev);int (*freeze)(struct device *dev);int (*thaw)(struct device *dev);int (*poweroff)(struct device *dev);int (*restore)(struct device *dev);int (*suspend_late)(struct device *dev);int (*resume_early)(struct device *dev);int (*freeze_late)(struct device *dev);int (*thaw_early)(struct device *dev);int (*poweroff_late)(struct device *dev);int (*restore_early)(struct device *dev);int (*suspend_noirq)(struct device *dev);int (*resume_noirq)(struct device *dev);int (*freeze_noirq)(struct device *dev);int (*thaw_noirq)(struct device *dev);int (*poweroff_noirq)(struct device *dev);int (*restore_noirq)(struct device *dev);int (*runtime_suspend)(struct device *dev);int (*runtime_resume)(struct device *dev);int (*runtime_idle)(struct device *dev);
};
这里面的 callbacks 都是和具体设备挂钩的,比如 suspend / resume ,callbacks 的实现和具体的设备有很大关系,这就需要工程师在设计Driver的时候,知道这些 callbacks 的使用场景,根据具体的需求进行分析。
回到 __device_suspend 函数中,callback = pm_op() 函数就是用来获取设备相应的回调函数,保存在callback 变量中,调用顺序为
-> struct dev_pm_domain *pm_domain
-> struct device_type *type;
-> struct class *class;
-> struct bus_type *bus;
3、pm_op 函数
通过上面的分析可以知道__device_suspend函数最后是调用了pm_op函数,函数实现如下:
/*** pm_op - Return the PM operation appropriate for given PM event.* @ops: PM operations to choose from.* @state: PM transition of the system being carried out.*/
static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
{switch (state.event) {
#ifdef CONFIG_SUSPENDcase PM_EVENT_SUSPEND:return ops->suspend;case PM_EVENT_RESUME:return ops->resume;
#endif /* CONFIG_SUSPEND */
#ifdef CONFIG_HIBERNATE_CALLBACKScase PM_EVENT_FREEZE:case PM_EVENT_QUIESCE:return ops->freeze;case PM_EVENT_HIBERNATE:return ops->poweroff;case PM_EVENT_THAW:case PM_EVENT_RECOVER:return ops->thaw;break;case PM_EVENT_RESTORE:return ops->restore;
#endif /* CONFIG_HIBERNATE_CALLBACKS */}return NULL;
}
这样便得到设备的 suspend callback 函数,然后继续往下可以看到 dpm_run_callback(callback, dev, state, info) 函数,该函数就是运行前面获取的 callback,函数的定义如下:
static int dpm_run_callback(pm_callback_t cb, struct device *dev,pm_message_t state, char *info)
{ktime_t calltime;int error;if (!cb)return 0;......error = cb(dev);......return error;
}
这样就能够执行到 driver 中的 suspend 回调函数。
系统在 suspend/resume 的过程中,会依次调用 prepare —> suspend —> suspend_late —> suspend_noirq —> wakeup —> resume_noirq —> resume_early —> resume。目前就是调用到 suspend 函数,其它的调用流程会在后面体现。
4、suspend_enter 函数
好了,接下来分析其它代码,现在回到suspend_devices_ and_enter 函数中,dpm_suspend_start 函数已经分析完了,现在分析 suspend_enter 函数,该函数的定义如下:
/*** suspend_enter - Make the system enter the given sleep state.* @state: System sleep state to enter.* @wakeup: Returns information that the sleep state should not be re-entered.** This function should be called after devices have been suspended.*/
static int suspend_enter(suspend_state_t state, bool *wakeup)
{int error, last_dev;error = platform_suspend_prepare(state);if (error)goto Platform_finish;error = dpm_suspend_late(PMSG_SUSPEND);if (error) {last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;last_dev %= REC_FAILED_NUM;pr_err("late suspend of devices failed\n");log_suspend_abort_reason("late suspend of %s device failed",suspend_stats.failed_devs[last_dev]);goto Platform_finish;}error = platform_suspend_prepare_late(state);if (error)goto Devices_early_resume;if (state == PM_SUSPEND_TO_IDLE && pm_test_level != TEST_PLATFORM) {s2idle_loop();goto Platform_early_resume;}error = dpm_suspend_noirq(PMSG_SUSPEND);if (error) {last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;last_dev %= REC_FAILED_NUM;pr_err("noirq suspend of devices failed\n");log_suspend_abort_reason("noirq suspend of %s device failed",suspend_stats.failed_devs[last_dev]);goto Platform_early_resume;}error = platform_suspend_prepare_noirq(state);if (error)goto Platform_wake;if (suspend_test(TEST_PLATFORM))goto Platform_wake;error = disable_nonboot_cpus();if (error || suspend_test(TEST_CPUS)) {log_suspend_abort_reason("Disabling non-boot cpus failed");goto Enable_cpus;}arch_suspend_disable_irqs();BUG_ON(!irqs_disabled());error = syscore_suspend();if (!error) {*wakeup = pm_wakeup_pending();if (!(suspend_test(TEST_CORE) || *wakeup)) {trace_suspend_resume(TPS("machine_suspend"),state, true);error = suspend_ops->enter(state);trace_suspend_resume(TPS("machine_suspend"),state, false);} else if (*wakeup) {error = -EBUSY;}syscore_resume();}arch_suspend_enable_irqs();BUG_ON(irqs_disabled());Enable_cpus:enable_nonboot_cpus();Platform_wake:platform_resume_noirq(state);dpm_resume_noirq(PMSG_RESUME);Platform_early_resume:platform_resume_early(state);Devices_early_resume:dpm_resume_early(PMSG_RESUME);Platform_finish:platform_resume_finish(state);return error;
}
platform_suspend_prepare : 检查平台是否提供suspend_ops->prepare() 函数。
dpm_suspend_late:延迟挂起设备,在最后阶段挂起设备。
platform_suspend_prepare_late:准备进入睡眠状态的延迟阶段,执行平台相关的准备操作。
dpm_suspend_noirq:在不需要中断的情况下挂起设备。
platform_suspend_prepare_noirq:准备进入睡眠状态的无中断阶段,执行平台相关的准备操作。
disable_nonboot_cpus:关闭所有非 boot CPU 。
arch_suspend_disable_irqs:关闭全局中断
syscore_suspend:执行系统核心的挂起操作。
pm_wakeup_pending:检查在这段时间内是否有唤醒事件的发生,如果有就要终止suspend
如果前面阶段都一切顺利,则调用suspend_ops->enter(state)回调进行 suspend ,这时系统已经睡过去了,完成系统的suspend。
5、resume流程
系统的 resume 过程刚好和suspend的流程恰好相反,这里就不展开分析了:
syscore_resume()
:恢复系统核心。
arch_suspend_enable_irqs()
:使能中断。
enable_nonboot_cpus()
:使能非启动的 CPU。
platform_resume_noirq(state)
:在没有中断的情况下恢复平台。
dpm_resume_noirq(PMSG_RESUME)
:在没有中断的情况下恢复设备。
platform_resume_early(state)
:早期恢复平台。
dpm_resume_early(PMSG_RESUME)
:早期恢复设备。
platform_resume_finish(state)
:完成平台的恢复操作。
整个系统的 suspend/resume 流程就分析到这里,如果想看suspend_ops->enter函数到底做了什么如果大家感兴趣可以看我的另外一篇文章。