进程和进程的生命周期
- 进程:指计算机中已运行的程序。进程本身不是基本的运行单位,而是线程的容器。程序本身不是基本的运行单位,而是线程的容器。程序是指令、数据和组织形式的描述,进程才是程序的真正运行实例。
- Linux内核把进程叫做Task,进程的虚拟地址空间可分为用户虚拟地址空间和内核虚拟地址空间,所有进程共享内核虚拟地址空间,每个进程有独立的用户虚拟地址空间。
进程的两种特殊形式
- 没有用户虚拟地址空间的进程叫做内核线程
- 共享用户虚拟地址空间的进程叫做用户线程
- 共享同一个用户虚拟地址空间的所有用户线程叫做线程组
C语言标准库进程 | Linux内核进程 |
---|---|
包括多个线程的进程 | 线程组 |
一个线程的进程 | 进程或者任务 |
线程 | 共享用户虚拟地址的空间的进程 |
Linux通过:ps输出当前系统的进程状态。显示瞬间进程状态,不是动态连续;如果想动态连续,使用top命令。
- USER:使用者
- PID:进程编号
- VSZ:进程占用的虚拟内存容量(KB)
- RSS:进程占用的固定内存量是多少
- TTY:在哪个终端运行
- STAT:程序目前状态
- S:睡眠状态,但是基于唤醒
- R:在运行
进程的生命周期
-
进程状态
- 创建状态
- 就绪状态
- 执行状态
- 阻塞状态
- 终止状态
- Linux内核提供API设置进程状态
- TASK_RUNNING:运行态或者就绪态,在内核中时运行态和就绪态的集合;
- TASK_INterRUPTIBLE:可中断睡眠状态(又叫浅睡眠状态),进程阻塞时如果条件满足,内核就将进程状态该问RUN状态,加入就绪队列;
- TASK_UNINTERRUPTIBLE:不可中断状态(又叫深度睡眠状态),进程在睡眠不被干扰,我们可以通过ps命令查看被标记为
D
得到进程就是不可中断状态进程; - __TASK_STOPPED:终止状态;
- EXIT_ZOMBIE:僵尸状态
task_struct数据结构分析
将进程抽象为进程控制块(PCB,Process Control BLock),Linux内核中使用task_struct
结构描述进程控制块。
Linux内核进程描述符(控制块)task_struct核心成员分析
// 进程控制块
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK/** For reasons of header soup (see current_thread_info()), this* must be the first element of task_struct.*/struct thread_info thread_info;
#endif/* -1 unrunnable, 0 runnable, >0 stopped: */volatile long state; // 进程状态标志/** This begins the randomizable portion of task_struct. Only* scheduling-critical items should be added above here.*/randomized_struct_fields_startvoid *stack; // 纸箱内核栈refcount_t usage;/* Per task flags (PF_*), defined further below: */unsigned int flags;unsigned int ptrace;#ifdef CONFIG_SMPstruct llist_node wake_entry;int on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK/* Current CPU: */unsigned int cpu;
#endifunsigned int wakee_flips;unsigned long wakee_flip_decay_ts;struct task_struct *last_wakee;/** recent_used_cpu is initially set as the last CPU used by a task* that wakes affine another task. Waker/wakee relationships can* push tasks around a CPU where each wakeup moves to the next one.* Tracking a recently used CPU allows a quick search for a recently* used CPU that may be idle.*/int recent_used_cpu;int wake_cpu;
#endifint on_rq;/*调度策略和优先级*/int prio;int static_prio;int normal_prio;unsigned int rt_priority;const struct sched_class *sched_class;struct sched_entity se;struct sched_rt_entity rt;
#ifdef CONFIG_CGROUP_SCHEDstruct task_group *sched_task_group;
#endifstruct sched_dl_entity dl;#ifdef CONFIG_UCLAMP_TASK/* Clamp values requested for a scheduling entity */struct uclamp_se uclamp_req[UCLAMP_CNT];/* Effective clamp values used for a scheduling entity */struct uclamp_se uclamp[UCLAMP_CNT];
#endif#ifdef CONFIG_PREEMPT_NOTIFIERS/* List of struct preempt_notifier: */struct hlist_head preempt_notifiers;
#endif#ifdef CONFIG_BLK_DEV_IO_TRACEunsigned int btrace_seq;
#endifunsigned int policy;int nr_cpus_allowed; // const cpumask_t *cpus_ptr; // 此成员允许进程在哪个cpu上运行cpumask_t cpus_mask;#ifdef CONFIG_PREEMPT_RCUint rcu_read_lock_nesting;union rcu_special rcu_read_unlock_special;struct list_head rcu_node_entry;struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_PREEMPT_RCU */#ifdef CONFIG_TASKS_RCUunsigned long rcu_tasks_nvcsw;u8 rcu_tasks_holdout;u8 rcu_tasks_idx;int rcu_tasks_idle_cpu;struct list_head rcu_tasks_holdout_list;
#endif /* #ifdef CONFIG_TASKS_RCU */struct sched_info sched_info;struct list_head tasks;
#ifdef CONFIG_SMPstruct plist_node pushable_tasks;struct rb_node pushable_dl_tasks;
#endif// 指向内存描述符。进程:mm和active_mm指向同一个内存描述符。内核线程:mm是空指针// 当内核线程运行时,active_mm指向从进程借用内存描述符struct mm_struct *mm;struct mm_struct *active_mm; /* Per-thread vma caching: */struct vmacache vmacache;#ifdef SPLIT_RSS_COUNTINGstruct task_rss_stat rss_stat;
#endifint exit_state;int exit_code;int exit_signal;/* The signal sent when the parent dies: */int pdeath_signal;/* JOBCTL_*, siglock protected: */unsigned long jobctl;/* Used for emulating ABI behavior of previous Linux versions: */unsigned int personality;/* Scheduler bits, serialized by scheduler locks: */unsigned sched_reset_on_fork:1;unsigned sched_contributes_to_load:1;unsigned sched_migrated:1;unsigned sched_remote_wakeup:1;
#ifdef CONFIG_PSIunsigned sched_psi_wake_requeue:1;
#endif/* Force alignment to the next boundary: */unsigned :0;/* Unserialized, strictly 'current' *//* Bit to tell LSMs we're in execve(): */unsigned in_execve:1;unsigned in_iowait:1;
#ifndef TIF_RESTORE_SIGMASKunsigned restore_sigmask:1;
#endif
#ifdef CONFIG_MEMCGunsigned in_user_fault:1;
#endif
#ifdef CONFIG_COMPAT_BRKunsigned brk_randomized:1;
#endif
#ifdef CONFIG_CGROUPS/* disallow userland-initiated cgroup migration */unsigned no_cgroup_migration:1;/* task is frozen/stopped (used by the cgroup freezer) */unsigned frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP/* to be used once the psi infrastructure lands upstream. */unsigned use_memdelay:1;
#endifunsigned long atomic_flags; /* Flags requiring atomic access. */struct restart_block restart_block;pid_t pid; // 全局进程号pid_t tgid; // 全局线程组的标识符#ifdef CONFIG_STACKPROTECTOR/* Canary value for the -fstack-protector GCC feature: */unsigned long stack_canary;
#endif/** Pointers to the (original) parent process, youngest child, younger sibling,* older sibling, respectively. (p->father can be replaced with* p->real_parent->pid)*//* Real parent process: */struct task_struct __rcu *real_parent; // 指向真实父进程/* Recipient of SIGCHLD, wait4() reports: */struct task_struct __rcu *parent; // 指向父进程 如果使用系统调用跟踪进程,这个是跟踪进程,否则和real_parent是相同的/** Children/sibling form the list of natural children:*/struct list_head children;struct list_head sibling;struct task_struct *group_leader; // 指向线程组的组长/** 'ptraced' is the list of tasks this task is using ptrace() on.** This includes both natural children and PTRACE_ATTACH targets.* 'ptrace_entry' is this task's link on the p->parent->ptraced list.*/struct list_head ptraced;struct list_head ptrace_entry;/* PID/PID hash table linkage. */struct pid *thread_pid;struct hlist_node pid_links[PIDTYPE_MAX]; // 进程号,进程组标识符和会话标识符struct list_head thread_group;struct list_head thread_node;struct completion *vfork_done;/* CLONE_CHILD_SETTID: */int __user *set_child_tid;/* CLONE_CHILD_CLEARTID: */int __user *clear_child_tid;u64 utime;u64 stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIMEu64 utimescaled;u64 stimescaled;
#endifu64 gtime;struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GENstruct vtime vtime;
#endif#ifdef CONFIG_NO_HZ_FULLatomic_t tick_dep_mask;
#endif/* Context switch counts: */unsigned long nvcsw;unsigned long nivcsw;/* Monotonic time in nsecs: */u64 start_time;/* Boot based time in nsecs: */u64 start_boottime;/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */unsigned long min_flt;unsigned long maj_flt;/* Empty if CONFIG_POSIX_CPUTIMERS=n */struct posix_cputimers posix_cputimers;/* Process credentials: *//* Tracer's credentials at attach: */const struct cred __rcu *ptracer_cred;/* Objective and real subjective task credentials (COW): */const struct cred __rcu *real_cred; // 此成员指向主体和真实客体证书/* Effective (overridable) subjective task credentials (COW): */const struct cred __rcu *cred; // 指向有效证书 但是可以临时改变#ifdef CONFIG_KEYS/* Cached requested key. */struct key *cached_requested_key;
#endif/** executable name, excluding path.** - normally initialized setup_new_exec()* - access it with [gs]et_task_comm()* - lock it with task_lock()*/char comm[TASK_COMM_LEN]; // 进程名称struct nameidata *nameidata;// 下面这两个成员用于UNIX系统,型号量和共享内存
#ifdef CONFIG_SYSVIPCstruct sysv_sem sysvsem;struct sysv_shm sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASKunsigned long last_switch_count;unsigned long last_switch_time;
#endif/* Filesystem information: */struct fs_struct *fs; // 文件系统信息,主要是进程根目录和当前工作目录/* Open file information: */struct files_struct *files; // 打开文件表/* Namespaces: */struct nsproxy *nsproxy; // 命名空间// 下面这快成员用于信号处理/* Signal handlers: */struct signal_struct *signal;struct sighand_struct __rcu *sighand;sigset_t blocked;sigset_t real_blocked;/* Restored if set_restore_sigmask() was used: */sigset_t saved_sigmask;struct sigpending pending;unsigned long sas_ss_sp;size_t sas_ss_size;unsigned int sas_ss_flags;struct callback_head *task_works;#ifdef CONFIG_AUDIT
#ifdef CONFIG_AUDITSYSCALLstruct audit_context *audit_context;
#endifkuid_t loginuid;unsigned int sessionid;
#endifstruct seccomp seccomp;/* Thread group tracking: */u64 parent_exec_id;u64 self_exec_id;/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */spinlock_t alloc_lock;/* Protection of the PI data structures: */raw_spinlock_t pi_lock;struct wake_q_node wake_q;#ifdef CONFIG_RT_MUTEXES/* PI waiters blocked on a rt_mutex held by this task: */struct rb_root_cached pi_waiters;/* Updated under owner's pi_lock and rq lock */struct task_struct *pi_top_task;/* Deadlock detection and priority inheritance handling: */struct rt_mutex_waiter *pi_blocked_on;
#endif#ifdef CONFIG_DEBUG_MUTEXES/* Mutex deadlock detection: */struct mutex_waiter *blocked_on;
#endif#ifdef CONFIG_DEBUG_ATOMIC_SLEEPint non_block_count;
#endif#ifdef CONFIG_TRACE_IRQFLAGSunsigned int irq_events;unsigned long hardirq_enable_ip;unsigned long hardirq_disable_ip;unsigned int hardirq_enable_event;unsigned int hardirq_disable_event;int hardirqs_enabled;int hardirq_context;unsigned long softirq_disable_ip;unsigned long softirq_enable_ip;unsigned int softirq_disable_event;unsigned int softirq_enable_event;int softirqs_enabled;int softirq_context;
#endif#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48ULu64 curr_chain_key;int lockdep_depth;unsigned int lockdep_recursion;struct held_lock held_locks[MAX_LOCK_DEPTH];
#endif#ifdef CONFIG_UBSANunsigned int in_ubsan;
#endif/* Journalling filesystem info: */void *journal_info;/* Stacked block device info: */struct bio_list *bio_list;#ifdef CONFIG_BLOCK/* Stack plugging: */struct blk_plug *plug;
#endif/* VM state: */struct reclaim_state *reclaim_state;struct backing_dev_info *backing_dev_info;struct io_context *io_context;#ifdef CONFIG_COMPACTIONstruct capture_control *capture_control;
#endif/* Ptrace state: */unsigned long ptrace_message;kernel_siginfo_t *last_siginfo;struct task_io_accounting ioac;
#ifdef CONFIG_PSI/* Pressure stall state */unsigned int psi_flags;
#endif
#ifdef CONFIG_TASK_XACCT/* Accumulated RSS usage: */u64 acct_rss_mem1;/* Accumulated virtual memory usage: */u64 acct_vm_mem1;/* stime + utime since last update: */u64 acct_timexpd;
#endif
#ifdef CONFIG_CPUSETS/* Protected by ->alloc_lock: */nodemask_t mems_allowed;/* Seqence number to catch updates: */seqcount_t mems_allowed_seq;int cpuset_mem_spread_rotor;int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS/* Control Group info protected by css_set_lock: */struct css_set __rcu *cgroups;/* cg_list protected by css_set_lock and tsk->alloc_lock: */struct list_head cg_list;
#endif
#ifdef CONFIG_X86_CPU_RESCTRLu32 closid;u32 rmid;
#endif
#ifdef CONFIG_FUTEXstruct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPATstruct compat_robust_list_head __user *compat_robust_list;
#endifstruct list_head pi_state_list;struct futex_pi_state *pi_state_cache;struct mutex futex_exit_mutex;unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTSstruct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];struct mutex perf_event_mutex;struct list_head perf_event_list;
#endif
#ifdef CONFIG_DEBUG_PREEMPTunsigned long preempt_disable_ip;
#endif
#ifdef CONFIG_NUMA/* Protected by alloc_lock: */struct mempolicy *mempolicy;short il_prev;short pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCINGint numa_scan_seq;unsigned int numa_scan_period;unsigned int numa_scan_period_max;int numa_preferred_nid;unsigned long numa_migrate_retry;/* Migration stamp: */u64 node_stamp;u64 last_task_numa_placement;u64 last_sum_exec_runtime;struct callback_head numa_work;/** This pointer is only modified for current in syscall and* pagefault context (and for tasks being destroyed), so it can be read* from any of the following contexts:* - RCU read-side critical section* - current->numa_group from everywhere* - task's runqueue locked, task not running*/struct numa_group __rcu *numa_group;/** numa_faults is an array split into four regions:* faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer* in this precise order.** faults_memory: Exponential decaying average of faults on a per-node* basis. Scheduling placement decisions are made based on these* counts. The values remain static for the duration of a PTE scan.* faults_cpu: Track the nodes the process was running on when a NUMA* hinting fault was incurred.* faults_memory_buffer and faults_cpu_buffer: Record faults per node* during the current scan window. When the scan completes, the counts* in faults_memory and faults_cpu decay and these values are copied.*/unsigned long *numa_faults;unsigned long total_numa_faults;/** numa_faults_locality tracks if faults recorded during the last* scan window were remote/local or failed to migrate. The task scan* period is adapted based on the locality of the faults with different* weights depending on whether they were shared or private faults*/unsigned long numa_faults_locality[3];unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */#ifdef CONFIG_RSEQstruct rseq __user *rseq;u32 rseq_sig;/** RmW on rseq_event_mask must be performed atomically* with respect to preemption.*/unsigned long rseq_event_mask;
#endifstruct tlbflush_unmap_batch tlb_ubc;union {refcount_t rcu_users;struct rcu_head rcu;};/* Cache last used pipe for splice(): */struct pipe_inode_info *splice_pipe;struct page_frag task_frag;#ifdef CONFIG_TASK_DELAY_ACCTstruct task_delay_info *delays;
#endif#ifdef CONFIG_FAULT_INJECTIONint make_it_fail;unsigned int fail_nth;
#endif/** When (nr_dirtied >= nr_dirtied_pause), it's time to call* balance_dirty_pages() for a dirty throttling pause:*/int nr_dirtied;int nr_dirtied_pause;/* Start of a write-and-pause period: */unsigned long dirty_paused_when;#ifdef CONFIG_LATENCYTOPint latency_record_count;struct latency_record latency_record[LT_SAVECOUNT];
#endif/** Time slack values; these are used to round up poll() and* select() etc timeout values. These are in nanoseconds.*/u64 timer_slack_ns;u64 default_timer_slack_ns;#ifdef CONFIG_KASANunsigned int kasan_depth;
#endif#ifdef CONFIG_FUNCTION_GRAPH_TRACER/* Index of current stored address in ret_stack: */int curr_ret_stack;int curr_ret_depth;/* Stack of return addresses for return function tracing: */struct ftrace_ret_stack *ret_stack;/* Timestamp for last schedule: */unsigned long long ftrace_timestamp;/** Number of functions that haven't been traced* because of depth overrun:*/atomic_t trace_overrun;/* Pause tracing: */atomic_t tracing_graph_pause;
#endif#ifdef CONFIG_TRACING/* State flags for use by tracers: */unsigned long trace;/* Bitmask and counter of trace recursion: */unsigned long trace_recursion;
#endif /* CONFIG_TRACING */#ifdef CONFIG_KCOV/* See kernel/kcov.c for more details. *//* Coverage collection mode enabled for this task (0 if disabled): */unsigned int kcov_mode;/* Size of the kcov_area: */unsigned int kcov_size;/* Buffer for coverage collection: */void *kcov_area;/* KCOV descriptor wired with this task or NULL: */struct kcov *kcov;/* KCOV common handle for remote coverage collection: */u64 kcov_handle;/* KCOV sequence number: */int kcov_sequence;
#endif#ifdef CONFIG_MEMCGstruct mem_cgroup *memcg_in_oom;gfp_t memcg_oom_gfp_mask;int memcg_oom_order;/* Number of pages to reclaim on returning to userland: */unsigned int memcg_nr_pages_over_high;/* Used by memcontrol for targeted memcg charge: */struct mem_cgroup *active_memcg;
#endif#ifdef CONFIG_BLK_CGROUPstruct request_queue *throttle_queue;
#endif#ifdef CONFIG_UPROBESstruct uprobe_task *utask;
#endif
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)unsigned int sequential_io;unsigned int sequential_io_avg;
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEPunsigned long task_state_change;
#endifint pagefault_disabled;
#ifdef CONFIG_MMUstruct task_struct *oom_reaper_list;
#endif
#ifdef CONFIG_VMAP_STACKstruct vm_struct *stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK/* A live task holds one reference: */refcount_t stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCHint patch_state;
#endif
#ifdef CONFIG_SECURITY/* Used by LSM modules for access restriction: */void *security;
#endif#ifdef CONFIG_GCC_PLUGIN_STACKLEAKunsigned long lowest_stack;unsigned long prev_lowest_stack;
#endif/** New fields for task_struct should be added above here, so that* they are included in the randomized portion of task_struct.*/randomized_struct_fields_end/* CPU-specific state of this task: */struct thread_struct thread;/** WARNING: on x86, 'thread_struct' contains a variable-sized* structure. It *MUST* be at the end of 'task_struct'.** Do not put anything below here!*/
};
进程优先级和系统调用
进程优先级
- 限期进程的优先级是-1;
- 实时进程的优先级是1-99,优先级数值越大,表示优先级越高;
- 普通进程的静态优先级为100-139,优先级数值越小,优先级越高,可以通过修改nice值改变普通进程的优先级,优先级等于120+nice值。
系统调用
运行应用程序时,调用fork()/vfork()/clone()
函数就是系统调用。系统调用就是应用程序进入内核空间执行任务,比如:创建进程、文件IO等等。具体如图:
-
如何研究系统调用,举个例子:
内核线程
他是独立运行在内核中的进程,与普通用户进程区别在于内核线程没有独立的进程地址空间。task_struct结构里面有一个成员指针mm设置为NULL,他只能运行在内核空间通常被称为守护线程。一般用于执行一下任务:
- 周期性修改内存页与页来源块设备同步;
- 如果内存页很少使用,写入交换区;
- 管理延时动作(defferred action);
- 实现文件系统的事务日志。
退出进程
- 主动终止:从某个主函数返回(链接程序会主动添加到exit系统调用,主动调用exit’函数)
- 被动终止:接收到SIGKILL等杀死信号或异常被终止