【数据结构】【rq】【sched

技术2023-08-05 80

rq

每个cpu都有自己的struct rq结构，rq描述的是cpu上运行的所有进程，其中包括实时进程和一个根cfs运行队列。因为dl>idle>fair，所以调度器选择进程的先后顺序也为dl>rt>fair

/* * This is the main, per-CPU runqueue data structure. * * Locking rule: those places that want to lock multiple runqueues * (such as the load balancing or the thread migration code), lock * acquire operations must be ordered by ascending &runqueue. */ struct rq { /* runqueue lock: */ raw_spinlock_t lock; /* * nr_running and cpu_load should be in the same cacheline because * remote CPUs use both these fields when doing load calculation. */ /*这个rq里面存在多少个running task，包括RT，fair，DL sched class的task*/ unsigned int nr_running; #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; #endif /*用以表示处理器的负载，在每个处理器的rq中都会有对应到该处理器的cpu_load参数配置，在每次处理器触发scheduler tick时，都会呼叫函数 update_cpu_load_active,进行cpu_load的更新。在系统初始化的时候会呼叫函数sched_init把rq的cpu_load array初始化为0. 了解他的更新方式最好的方式是通过函数update_cpu_load,公式如下澹? cpu_load[0]会直接等待rq中load.weight的值。 cpu_load[1]=(cpu_load[1]*(2-1)+cpu_load[0])/2 cpu_load[2]=(cpu_load[2]*(4-1)+cpu_load[0])/4 cpu_load[3]=(cpu_load[3]*(8-1)+cpu_load[0])/8 cpu_load[4]=(cpu_load[4]*(16-1)+cpu_load[0]/16 呼叫函数this_cpu_load时，所返回的cpu load值是cpu_load[0] 而在进行cpu blance或migration时，就会呼叫函数 source_load target_load取得对该处理器cpu_load index值，来进行计算*/ #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX];//表示该rq所在cpu的历史load，一般有5个 unsigned long last_load_update_tick; /*在选择下一个调度实体的时候，需要判断此task是否是misfit task，是否做的决策是 ● 不相同的，比如会强制balance等等*/ unsigned int misfit_task; #ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif #ifdef CONFIG_NO_HZ_FULL unsigned long last_sched_tick; #endif #ifdef CONFIG_CPU_QUIET /* time-based average load */ u64 nr_last_stamp; u64 nr_running_integral; seqcount_t ave_seqcnt; #endif /* capture load from *all* tasks on this cpu: */ /*load->weight值，会是目前所执行的schedule entity的 load->weight的总和，也就是说rq的load->weight越高，也表示所负责的排程单元load->weight总和越高表示处理器所负荷的执行单元也越重*/ /*在rq里面的可运行的所有task的总的load，当nr_running数量发生变化时也会更新*/ struct load_weight load;//表示当前cpu的load，这个load是它所有就绪进程的load之和（同样包括cfs,rq及正在运行的） /*在每次scheduler tick中呼叫update_cpu_load时，这个值就增加一，可以用来反馈目前cpu load更新的次数*/ /*在rq里面有多少个task的load需要更新*/ unsigned long nr_load_updates; /*用来累加处理器进行context switch的次数，会在函数schedule呼叫时进行累加，并可以通过函数 nr_context_switches统计目前所有处理器总共的context switch 次数，或是可以透过查看档案/proc/stat中的ctxt位得知目前整个系统触发context switch的次数*/ /*进程发生上下文切换的次数，只有proc 文件系统里面会导出这个统计数值*/ u64 nr_switches; struct cfs_rq cfs;//该rq所包括的cfs_rq运行队列，这个是所有cfs_rq的root struct rt_rq rt; struct dl_rq dl; /*用以支援可以group cfs tasks的机制*/ #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ /*在有设置fair group scheduling 的环境下，会基于原本cfs rq中包含有若干task的group 所成的排程集合，也就是说当有一个group a 就会有自己的cfs rq用来排程自己所属的tasks, 而属于这group a的tasks所使用到的处理器时间就会以这group a总共所分的的时间为上限。基于cgroup的fair group scheduling 架构，可以创造出有阶层性的task组织，根据不同task的功能群组化在配置给该群主对应的处理器资源，让属于该群主下的task可以透过rq机制排程。使用属于该群主下的资源。这个变数主要是管理CFS RQ list，操作上可以透过函数 list_add_leaf_cfs_rq把一个group cfs rq加入到list中，或透过函数list_del_leaf_cfs_rq把一个group cfs rq移除，并可以透过for_each_leaf_cfs_rq把一个rq上得所有leaf cfs_rq走一遍*/ /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list;//如果使用的cgroup来创建嵌套的group，那么这个group的cfs_rq通过该变量组织，注：每个cgroup都有一个cfs_rq struct list_head *tmp_alone_branch; #endif /* CONFIG_FAIR_GROUP_SCHED */ /* * This is part of a global counter where only the total sum * over all CPUs matters. A task can increase this counter on * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: */ /*一般来说，linux kernel 的task状态可以为TASK_RUNNING TASK_INTERRUPTIBLE(sleep), TASK_UNINTERRUPTIBLE(Deactivate Task,此时Task会从rq中移除)或TASK_STOPPED. 透过这个变数会统计目前rq中有多少task属于 TASK_UNINTERRUPTIBLE的状态。当呼叫函数 active_task时，会把nr_uninterruptible值减一，并透过该函数 enqueue_task把对应的task依据所在的scheduling class 放在对应的rq中，并把目前rq中nr_running值加一*/ unsigned long nr_uninterruptible; idle:指向属于idle-task scheduling class 的idle task; stop:指向目前最高等级属于stop-task scheduling class 的task;*/ struct task_struct *curr, *idle, *stop; /*基于处理器的jiffies值，用以记录下次进行处理器 balancing 的时间点*/ unsigned long next_balance; /*用以存储context-switch发生时，前一个task的memory management 结构并可用在函数finish_task_switch中，透过函数mmdrop释放前一个 task的记忆体资源*/ struct mm_struct *prev_mm; unsigned int clock_skip_update; /* 用以记录目前rq的clock值/ /*用以记录目前rq的clock值，基本上该值会等于透过sched_clock_cpu (cpu_of(rq))的回传值，并会在每次呼叫scheduler_tick时透过函数update_rq_clock更新目前rq clock值。在实作部分，函数sched_clock_cpu会透过sched_clock_local或 ched_clock_remote取得对应的sched_clock_data,而处理的sched_clock_data 值，会透过函数sched_clock_tick在每次呼叫scheduler_tick时进行更新；*/ u64 clock;//运行队列的时钟，这个时钟是后面cfs,rq所使用的时钟，也是大多数proc显然的时间相关的时钟 u64 clock_task; /*用以记录目前rq中有多少task处于等待i/o的sleep状态在实际的使用上，例如当driver接受来自task的调用，但处于等待i/o 回复的阶段时，为了充分利用处理器的执行资源，这时就可以在driver中呼叫函数io_schedule，此时就会把目前rq中的nr_iowait加一，并设定目前task的io_wait为1 然后触发scheduling 让其他task有机会可以得到处理器执行时间*/ atomic_t nr_iowait; #ifdef CONFIG_SMP /*root domain是基于多核心架构下的机制，会由rq结构记住目前采用的root domain，其中包括了目前的cpu mask(包括span,online rt overload), reference count 跟cpupri 当root domain有被rq参考到时，refcount 就加一，反之就减一。而cpu mask span表示rq可挂上的cpu mask,noline为rq目前已经排程的 cpu mask cpu上执行real-time task.可以参考函数pull_rt_task，当一个rq中属于 real-time的task已经执行完毕，就会透过函数pull_rt_task从该 rq中属于rto_mask cpu mask 可以执行的处理器上，找出是否有一个处理器有大于一个以上的real-time task，若有就会转到目前这个执行完成 real-time task 的处理器上而cpupri不同于Task本身有区分140個(0-139) Task Priority (0-99為RT Priority 而 100-139為Nice值 -20-19). CPU Priority本身有102個Priority (包括,-1 為Invalid, 0為Idle,1為Normal,2-101對應到Real-Time Priority 0-99). 參考函式convert_prio, Task Priority如果是 140就會對應到 CPU Idle,如果是大於等於100就會對應到CPU Normal, 若是Task Priority介於0-99之間,就會對應到CPU Real-Time Priority 101-2之間.) 在實際的操作上,例如可以透過函式cpupri_find 帶入一個要插入的Real-Time Task,此時就會依據cpupri中 pri_to_cpu選擇一個目前執行Real-Time Task且該Task 的優先級比目前要插入的Task更低的處理器, 並透過CPU Mask(lowest_mask)返回目前可以選擇的處理器Mask. 實作的部份可以參考檔案kernel/sched_cpupri.c. 在初始化的過程中,會透過函式sched_init呼叫函式init_defrootdomain, 對Root Domain與 CPU Priority機制進行初始化.*/ struct root_domain *rd; /*Schedule Domain是基於多核心架構下的機制. 每個處理器都會有一個基礎的Scheduling Domain, Scheduling Domain可以有階層性的架構,透過parent 可以找到上一層的Domain,或是透過child找到下一層的 Domain (NULL表示結尾.).並可透過span 栏位,表示這個Domain所能涵蓋的處理器範圍. 通常Base Domain會涵蓋系統中所有處理器的個數, 而Child Domain所能涵蓋的處理器個數不超過它的 Parent Domain. 而當在進行Scheduling Domain 中的Task Balance 時,就會以該Domain所能涵蓋的處理器為最大範圍. 同時,每個Schedule Domain都會包括一個或一個以上的 CPU Groups (結構為struct sched_group),並透過next變數把 CPU Groups串連在一起(成為一個單向的Circular linked list), 每個CPU Group都會有變數cpumask來定义這個CPU Group 所涵蓋的處理器範圍.並且CPU Group所包括的處理器範圍,必需涵蓋在所屬的Schedule Domain處理器範圍中. 當進行Scheduling Domain的Balancing時,會以其下的CPU Groups 為單位,根據cpu_power （會是該Group所涵蓋的處理器 Tasks Loading的總和）來比較不同的CPU Groups的負荷, 以進行Tasks的移動,達到Balancing的目的. 在有支援SMP的架構下,會在函式sched_init中,呼叫open_softirq, 註冊 SCHED_SOFTIRQ Software IRQ与其对应的 Callback函式 run_rebalance_domains. 並會在每次呼叫函式scheduler_tick時, 透過函式trigger_load_balance确认是否目前的jiffies值已經大於RunQueue下一次要觸發Load Balance的next_balance時間值, 並透過函式raise_softirq觸發SCHED_SOFTIRQ Software IRQ. 在Software IRQ觸發後,就會呼叫函式run_rebalance_domains, 並在函式rebalance_domains中,進行后续處理器上的 Scheduling Domain Load Balance動作. 有關Scheduling Domain進一步的內容,也可以參考 Linux Kernel文件 Documentation/scheduler/sched-domains.txt.*/ /* 当前CPU所在基本调度域，每个调度域包含一个或多个CPU组，每个CPU组包含该调度 ● 域中一个或多个CPU子集，负载均衡都是在调度域中的组之间完成的，不能跨域进行负载均衡 */ struct sched_domain *sd; unsigned long cpu_capacity; unsigned long cpu_capacity_orig; struct callback_head *balance_callback; unsigned char idle_balance; /* For active balancing */ /*當RunQueue中此值為1,表示這個RunQueue正在進行 Fair Scheduling的Load Balance,此時會呼叫stop_one_cpu_nowait 暫停該RunQueue所屬處理器的排程,並透過函式 active_load_balance_cpu_stop,把Tasks從最忙碌的處理器, 移到Idle的處理器上執行.*/ int active_balance;/* 如果需要把进程迁移到其他运行队列，就需要设置这个位 */ int push_cpu; struct cpu_stop_work active_balance_work; /* cpu of this runqueue: */ int cpu; int online; struct list_head cfs_tasks; #ifdef CONFIG_INTEL_DWS struct intel_dws dws; #endif u64 rt_avg; u64 age_stamp; u64 idle_stamp; u64 avg_idle; /* This is used to determine avg_idle's max value */ u64 max_idle_balance_cost; #endif #ifdef CONFIG_SCHED_WALT /* * max_freq = user or thermal defined maximum * max_possible_freq = maximum supported by hardware */ unsigned int cur_freq, max_freq, min_freq, max_possible_freq; struct cpumask freq_domain_cpumask; u64 cumulative_runnable_avg; int efficiency; /* Differentiate cpus with different IPC capability */ int load_scale_factor; int capacity; int max_possible_capacity; u64 window_start; u64 curr_runnable_sum; u64 prev_runnable_sum; u64 nt_curr_runnable_sum; u64 nt_prev_runnable_sum; u64 cur_irqload; u64 avg_irqload; u64 irqload_ts; #endif /* CONFIG_SCHED_WALT */ #ifdef CONFIG_IRQ_TIME_ACCOUNTING // 上次中断发生的时间 u64 prev_irq_time; #endif #ifdef CONFIG_PARAVIRT u64 prev_steal_time; #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING u64 prev_steal_time_rq; #endif /* calc_load related fields */ /*用以記錄下一次計算CPU Load的時間,初始值為目前的jiffies加上五秒與1次的Scheduling Tick的間隔 (=jiffies + LOAD_FREQ,且LOAD_FREQ=(5*HZ+1))*/ /*负载均衡相关*/ unsigned long calc_load_update; long calc_load_active; #ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SMP int hrtick_csd_pending; struct call_single_data hrtick_csd; #endif struct hrtimer hrtick_timer; #endif /*统计调度信息使用*/ #ifdef CONFIG_SCHEDSTATS /* latency stats */ struct sched_info rq_sched_info; unsigned long long rq_cpu_time; /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* sys_sched_yield() stats */ unsigned int yld_count; /* schedule() stats */ unsigned int sched_count; unsigned int sched_goidle; /* try_to_wake_up() stats */ unsigned int ttwu_count; unsigned int ttwu_local; #ifdef CONFIG_SMP struct eas_stats eas_stats;/*统计eas状态信息，由于看的arm新的调度器是基于EAS实现的*/ #endif #endif #ifdef CONFIG_SMP struct llist_head wake_list; #endif #ifdef CONFIG_CPU_IDLE /* Must be inspected within a rcu lock section */ /* Must be inspected within a rcu lock section */ /*在cpuidle_enter_state中设置，即cpu进入相应的idle state时候才会设置这两个参数*/ struct cpuidle_state *idle_state; int idle_state_idx; #endif }; 这个是另一篇博客上看到的： https://blog.csdn.net/wukongmingjing/article/details/82024371 /* * This is the main, per-CPU runqueue data structure. * * Locking rule: those places that want to lock multiple runqueues * (such as the load balancing or the thread migration code), lock * acquire operations must be ordered by ascending &runqueue. */ struct rq { /* runqueue lock: */ raw_spinlock_t lock; /* * nr_running and cpu_load should be in the same cacheline because * remote CPUs use both these fields when doing load calculation. */ /*这个rq里面存在多少个running task，包括RT，fair，DL sched class的task*/ unsigned int nr_running; #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; #endif #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; /*在选择下一个调度实体的时候，需要判断此task是否是misfit task，是否做的决策是不相同的，比如会强制balance等等*/ unsigned int misfit_task; #ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif #ifdef CONFIG_NO_HZ_FULL unsigned long last_sched_tick; #endif #ifdef CONFIG_CPU_QUIET /* time-based average load */ u64 nr_last_stamp; u64 nr_running_integral; seqcount_t ave_seqcnt; #endif /* capture load from *all* tasks on this cpu: */ /*在rq里面的可运行的所有task的总的load，当nr_running数量发生变化时也会更新*/ struct load_weight load; /*在rq里面有多少个task的load需要更新*/ unsigned long nr_load_updates; /*进程发生上下文切换的次数，只有proc 文件系统里面会导出这个统计数值*/ u64 nr_switches; /*每个cpu上的rq，都包含了cfs_rq,rt_rq和dl_rq调度队列，包含红黑树的根*/ struct cfs_rq cfs; struct rt_rq rt; struct dl_rq dl; #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; struct list_head *tmp_alone_branch; #endif /* CONFIG_FAIR_GROUP_SCHED */ /* * This is part of a global counter where only the total sum * over all CPUs matters. A task can increase this counter on * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: *//* 曾经处于队列但现在处于TASK_UNINTERRUPTIBLE状态的进程数量 */ unsigned long nr_uninterruptible; /*curr指针表示当前运行的task指针，idle表示rq里面没有其他进程可以运行，最后执行 idle task，此cpu进入idle状态，stop表示当前task sched_class stop调度类*/ struct task_struct *curr, *idle, *stop; /*下一次balance的时间，系统会周期性的做balance动作。*/ unsigned long next_balance; struct mm_struct *prev_mm; unsigned int clock_skip_update; /*rq运行时间，是一个累加值*/ u64 clock; u64 clock_task; /*当前rq里面有多少iowait数量*/ atomic_t nr_iowait; #ifdef CONFIG_SMP struct root_domain *rd; /* 当前CPU所在基本调度域，每个调度域包含一个或多个CPU组，每个CPU组包含该调度域中一个或多个CPU子集，负载均衡都是在调度域中的组之间完成的，不能跨域进行负载均衡 */ struct sched_domain *sd; /*cpu_capacity：此cpu的实际capacity，会随着系统运行变化而变化，初始值为capacity_orig，cpu_capacity_orig：是dts配置的各个cpu的capacity，是一个常量*/ unsigned long cpu_capacity; unsigned long cpu_capacity_orig; struct callback_head *balance_callback; unsigned char idle_balance; /* For active balancing */ /* 如果需要把进程迁移到其他运行队列，就需要设置这个位 */ int active_balance; int push_cpu; struct task_struct *push_task; struct cpu_stop_work active_balance_work; /* cpu of this runqueue: */ /* 该运行队列所属CPU */ int cpu; int online; struct list_head cfs_tasks; #ifdef CONFIG_INTEL_DWS struct intel_dws dws; #endif /*rt task的负载，随着sched period周期衰减一半。看这个函数：sched_avg_update*/ u64 rt_avg; /* 该运行队列存活时间,区别于rq运行时间，数值update在一个cpu启动的时候和调度器初始化的时候*/ u64 age_stamp; /*在某个cpu变成idle的时候，标记rq idle的时间戳*/ u64 idle_stamp; /*rq平均idle的时间*/ u64 avg_idle; /* This is used to determine avg_idle's max value */ u64 max_idle_balance_cost; #endif /*在WALT window assist load tracing的文章中详细的讲解了这几个参数怎么计算，怎么实现的*/ #ifdef CONFIG_SCHED_WALT u64 cumulative_runnable_avg; u64 window_start; u64 curr_runnable_sum; u64 prev_runnable_sum; u64 nt_curr_runnable_sum; u64 nt_prev_runnable_sum; u64 cur_irqload; u64 avg_irqload; u64 irqload_ts; u64 cum_window_demand; /*为了解决某一个问题，我们自己添加的几个flag，目的是针对高负载（两个窗口都是高负载）情况下使用前一个窗口此task的running time来计算此task在当前窗口的util 数值（cpu_util_freq此函数计算）。现在的系统使用的是此task在当前窗口的比重来计算util的，可能会存在频率不稳定的情况*/ enum { CPU_BUSY_CLR = 0, CPU_BUSY_PREPARE, CPU_BUSY_SET, } is_busy; #endif /* CONFIG_SCHED_WALT */ #ifdef CONFIG_IRQ_TIME_ACCOUNTING /*计算irq起来的时间戳*/ u64 prev_irq_time; #endif #ifdef CONFIG_PARAVIRT u64 prev_steal_time; #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING u64 prev_steal_time_rq; #endif /* calc_load related fields */ /*负载均衡相关*/ unsigned long calc_load_update; long calc_load_active; #ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SMP int hrtick_csd_pending; struct call_single_data hrtick_csd; #endif struct hrtimer hrtick_timer; #endif /*统计调度信息使用*/ #ifdef CONFIG_SCHEDSTATS /* latency stats */ struct sched_info rq_sched_info; unsigned long long rq_cpu_time; /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* sys_sched_yield() stats */ unsigned int yld_count; /* schedule() stats */ unsigned int sched_count; unsigned int sched_goidle; /* try_to_wake_up() stats */ unsigned int ttwu_count; unsigned int ttwu_local; #ifdef CONFIG_SMP /*统计eas状态信息，由于看的arm新的调度器是基于EAS实现的*/ struct eas_stats eas_stats; #endif #endif #ifdef CONFIG_SMP struct llist_head wake_list; #endif #ifdef CONFIG_CPU_IDLE /* Must be inspected within a rcu lock section */ /*在cpuidle_enter_state中设置，即cpu进入相应的idle state时候才会设置这两个参数*/ struct cpuidle_state *idle_state; int idle_state_idx; #endif };

sched_entity

se就是一个调度实体，可以将他看作是一个进程或者是一个进程组

struct util_est

struct sched_entity { /* for load-balancing负荷权重，这个决定了进程在CPU上的运行时间和被调度次数 */ struct load_weight load; /* for load-balancing */ struct rb_node run_node; struct list_head group_node;/* 实体所在的进程组 */ //表示是否在运行队列或正在执行（当前执行的进程是不保存在红黑树里，但它的on_rq还是标志着的，记住这个也很关键） /* 表明是否处于CFS红黑树运行队列中，需要明确一个观点就是，CFS运行队列里面包含有一个红黑树，但这个红黑树并不是CFS运行队列的全部，因为红黑树仅仅是用于选择出下一个调度程序的算法。很简单的一个例子，普通程序运行时，其并不在红黑树中，但是还是处于CFS运行队列中，其on_rq为真。只有准备退出、即将睡眠等待和转为实时进程的进程其CFS运行队列的on_rq为假。 */ unsigned int on_rq;/* 是否在就绪队列上 */ //这个并不是表示进程开始执行的时候，而是每次update_curr都会更新该时钟为当前rq的clock，它主要用于计算上次执行update_curr到这次再执行，总共发的cpu clock，然后再把这个差值加到sum_exec_runtime u64 exec_start;/* 上次启动的时间*//* 开始运行时间 */ u64 sum_exec_runtime;/* 总运行时间 */ u64 vruntime;是本进程生命周期中在CPU上运行的虚拟时钟。那么何时应该更新这些时间呢?这是通过调用update_curr实现的, 该函数在多处调用. //进程总共执行的cpu clock（占用cpu的物理时间） //上次该进程被调度时已经占用的cpu时间（每次在调度一个新的进程时都会把它的se->prev_sum_exec_runtime = se->sum_exec_runtime），所以sum_exec_runtime- prev_sum_exec_runtime就是这次调度占用cpu的clock ///* 进程在切换进CPU时的sum_exec_runtime值 */ u64 prev_sum_exec_runtime;是用于记录该进程的CPU消耗时间，这个是真实的CPU消耗时间。在进程撤销时会将sum_exec_runtime保存到prev_sum_exec_runtime中 u64 nr_migrations;/* 此调度实体中进程移到其他CPU组的数量 */ #ifdef CONFIG_SCHEDSTATS struct sched_statistics statistics; #endif #ifdef CONFIG_FAIR_GROUP_SCHED int depth; /* 代表此进程组的深度，每个进程组都比其parent调度组深度大1 */ /* 父亲调度实体指针，如果是进程则指向其运行队列的调度实体，如果是进程组则指向其上一个进程组的调度实体 * 在 set_task_rq 函数中设置 */ struct sched_entity *parent;//该se的上级se（只对组调度有用） /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq;//该se就绪时所属的cfs_rq，在不同cpu上该值是不一样的（即cfs_rq_of(se)找到的是se所在的父se的my_q） /* rq "owned" by this entity/group: */ /* 实体的红黑树运行队列，如果为NULL表明其是一个进程，若非NULL表明其是调度组 */ struct cfs_rq *my_q;//该se下面的管理的se组成的cfs_rq（只对组调度有用），这个与cfs_rq的区别很重要，它是该se本身所管理的所有下级se所组成的运行队列，而cfs_rq则是该se所属的父级运行队列 #endif #ifdef CONFIG_SMP /* Per entity load average tracking */ struct sched_avg avg; #endif };

/**

* struct util_est - Estimation（估算） utilization of FAIR tasks

* @enqueued: instantaneous（瞬时） estimated utilization of a task/cpu

* @ewma: the Exponential（指数） Weighted Moving Average (EWMA)

* utilization of a task

* Support data structure to track an Exponential Weighted Moving Average

* (EWMA) of a FAIR task's utilization. New samples are added to the moving

* average each time a task completes an activation（激活）. Sample's weight is chosen

* so that the EWMA will be relatively insensitive（不敏感） to transient（瞬时） changes to the

* task's workload.

* The enqueued attribute has a slightly different meaning for tasks and cpus:

* - task: the task's util_avg at last task dequeue time

* - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU

* Thus, the util_est.enqueued of a task represents the contribution on the

* estimated utilization of the CPU where that task is currently enqueued.

* Only for tasks we track a moving average of the past(过去的) instantaneous

* estimated utilization. This allows to absorb(吸收) sporadic(零星) drops in utilization

* of an otherwise almost periodic task.

struct util_est {

unsigned int enqueued;入队列时估算的task的效力

unsigned int ewma;指数加权动态的平均task的效力

#define UTIL_EST_WEIGHT_SHIFT 2

} __attribute__((__aligned__(sizeof(u64))));

Processed: 0.015, SQL: 9