// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/sched/core.c
*
* Core kernel scheduler code and related syscalls
*
* Copyright (C) 1991-2002 Linus Torvalds
*/
#include "sched.h"
#include <linux/nospec.h>
#include <linux/kcov.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
#include "../workqueue_internal.h"
#include "../../fs/io-wq.h"
#include "../smpboot.h"
#include "pelt.h"
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
/*
* Export tracepoints that act as a bare tracehook (ie: have no trace event
* associated with them) to allow external modules to probe them.
*/
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
/*
* Debugging: various feature bits
*
* If SCHED_DEBUG is disabled, each compilation unit has its own copy of
* sysctl_sched_features, defined in sched.h, to allow constants propagation
* at compile time and compiler optimization based on features default.
*/
#define SCHED_FEAT(name, enabled) \
(1UL << __SCHED_FEAT_##name) * enabled |
const_debug unsigned int sysctl_sched_features =
#include "features.h"
0;
#undef SCHED_FEAT
#endif
/*
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
*/
const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
* period over which we measure -rt task CPU usage in us.
* default: 1s
*/
unsigned int sysctl_sched_rt_period = 1000000;
__read_mostly int scheduler_running;
/*
* part of the period that we allow rt tasks to run in us.
* default: 0.95s
*/
int sysctl_sched_rt_runtime = 950000;
/*
* __task_rq_lock - lock the rq @p resides on.
*/
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(rq->lock)
{
struct rq *rq;
lockdep_assert_held(&p->pi_lock);
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
rq_pin_lock(rq, rf);
return rq;
}
raw_spin_unlock(&rq->lock);
while (unlikely(task_on_rq_migrating(p)))
cpu_relax();
}
}
/*
* task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
*/
struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(p->pi_lock)
__acquires(rq->lock)
{
struct rq *rq;
for (;;) {
raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
rq = task_rq(p);
raw_spin_lock(&rq->lock);
/*
* move_queued_task() task_rq_lock()
*
* ACQUIRE (rq->lock)
* [S] ->on_rq = MIGRATING [L] rq = task_rq()
* WMB (__set_task_cpu()) ACQUIRE (rq->lock);
* [S] ->cpu = new_cpu [L] task_rq()
* [L] ->on_rq
* RELEASE (rq->lock)
*
* If we observe the old CPU in task_rq_lock(), the acquire of
* the old rq->lock will fully serialize against the stores.
*
* If we observe the new CPU in task_rq_lock(), the address
* dependency headed by '[L] rq = task_rq()' and the acquire
* will pair with the WMB to ensure we then also see migrating.
*/
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
rq_pin_lock(rq, rf);
return rq;
}
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
while (unlikely(task_on_rq_migrating(p)))
cpu_relax();
}
}
/*
* RQ-clock updating methods:
*/
static void update_rq_clock_task(struct rq *rq, s64 delta)
{
/*
* In theory, the compile should just see 0 here, and optimize out the call
* to sched_rt_avg_update. But I don't trust it...
*/
s64 __maybe_unused steal = 0, irq_delta = 0;
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
/*
* Since irq_time is only updated on {soft,}irq_exit, we might run into
* this case when a previous update_rq_clock() happened inside a
* {soft,}irq region.
*
* When this happens, we stop ->clock_task and only update the
* prev_irq_time stamp to account for the part that fit, so that a next
* update will consume the rest. This ensures ->clock_task is
* monotonic.
*
* It does however cause some slight miss-attribution of {soft,}irq
* time, a more accurate solution would be to update the irq_time using
* the current rq->clock timestamp, except that would require using
* atomic ops.
*/
if (irq_delta > delta)
irq_delta = delta;
rq->prev_irq_time += irq_delta;
delta -= irq_delta;
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
if (static_key_false((¶virt_steal_rq_enabled))) {
steal = paravirt_steal_clock(cpu_of(rq));
steal -= rq->prev_steal_time_rq;
if (unlikely(steal > delta))
steal = delta;
rq->prev_steal_time_rq += steal;
delta -= steal;
}
#endif
rq->clock_task += delta;
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
update_irq_load_avg(rq, irq_delta + steal);
#endif
update_rq_clock_pelt(rq, delta);
}
void update_rq_clock(struct rq *rq)
{
s64 delta;
lockdep_assert_held(&rq->lock);
if (rq->clock_update_flags & RQCF_ACT_SKIP)
return;
#ifdef CONFIG_SCHED_DEBUG
if (sched_feat(WARN_DOUBLE_CLOCK))
SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED);
rq->clock_update_flags |= RQCF_UPDATED;
#endif
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
if (delta < 0)
return;
rq->clock += delta;
update_rq_clock_task(rq, delta);
}
#ifdef CONFIG_SCHED_HRTICK
/*
* Use HR-timers to deliver accurate preemption points.
*/
static void hrtick_clear(struct rq *rq)
{
if (hrtimer_active(&rq->hrtick_timer))
hrtimer_cancel(&rq->hrtick_timer);
}
/*
* High-resolution timer tick.
* Runs from hardirq context with interrupts disabled.
*/
static enum hrtimer_restart hrtick(struct hrtimer *timer)
{
struct rq *rq = container_of(timer, struct rq, hrtick_timer);
struct rq_flags rf;
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
rq_lock(rq, &rf);
update_rq_clock(rq);
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
rq_unlock(rq, &rf);
return HRTIMER_NORESTART;
}
#ifdef CONFIG_SMP
static void __hrtick_restart(struct rq *rq)
{
struct hrtimer *timer = &rq->hrtick_timer;
hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
}
/*
* called from hardirq (IPI) context
*/
static void __hrtick_start(void *arg)
{
struct rq *rq = arg;
struct rq_flags rf;
rq_lock(rq, &rf);
__hrtick_restart(rq);
rq_unlock(rq, &rf);
}
/*
* Called to set the hrtick timer state.
*
* called with rq->lock held and irqs disabled
*/
void hrtick_start(struct rq *rq, u64 delay)
{
struct hrtimer *timer = &rq->hrtick_timer;
ktime_t time;
s64 delta;
/*
* Don't schedule slices shorter than 10000ns, that just
* doesn't make sense and can cause timer DoS.
*/
delta = max_t(s64, delay, 10000LL);
time = ktime_add_ns(timer->base->get_time(), delta);
hrtimer_set_expires(timer, time);
if (rq == this_rq())
__hrtick_restart(rq);
else
smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
}
#else
/*
* Called to set the hrtick timer state.
*
* called with rq->lock held and irqs disabled
*/
void hrtick_start(struct rq *rq, u64 delay)
{
/*
* Don't schedule slices shorter than 10000ns, that just
* doesn't make sense. Rely on vruntime for fairness.
*/
delay = max_t(u64, delay, 10000LL);
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
HRTIMER_MODE_REL_PINNED_HARD);
}
#endif /* CONFIG_SMP */
static void hrtick_rq_init(struct rq *rq)
{
#ifdef CONFIG_SMP
rq->hrtick_csd.flags = 0;
rq->hrtick_csd.func = __hrtick_start;
rq->hrtick_csd.info = rq;
#endif
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
rq->hrtick_timer.function = hrtick;
}
#else /* CONFIG_SCHED_HRTICK */
static inline void hrtick_clear(struct rq *rq)
{
}
static inline void hrtick_rq_init(struct rq *rq)
{
}
#endif /* CONFIG_SCHED_HRTICK */
/*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#define fetch_or(ptr, mask) \
({ \
typeof(ptr) _ptr = (ptr); \
typeof(mask) _mask = (mask); \
typeof(*_ptr) _old, _val = *_ptr; \
\
for (;;) { \
_old = cmpxchg(_ptr, _val, _val | _mask); \
if (_old == _val) \
break; \
_val = _old; \
} \
_old; \
})
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
/*
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
* this avoids any races wrt polling state changes and thereby avoids
* spurious IPIs.
*/
static bool set_nr_and_not_polling(struct task_struct *p)
{
struct thread_info *ti = task_thread_info(p);
return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
}
/*
* Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
*
* If this returns true, then the idle task promises to call
* sched_ttwu_pending() and reschedule soon.
*/
static bool set_nr_if_polling(struct task_struct *p)
{
struct thread_info *ti = task_thread_info(p);
typeof(ti->flags) old, val = READ_ONCE(ti->flags);
for (;;) {
if (!(val & _TIF_POLLING_NRFLAG))
return false;
if (val & _TIF_NEED_RESCHED)
return true;
old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
if (old == val)
break;
val = old;
}
return true;
}
#else
static bool set_nr_and_not_polling(struct task_struct *p)
{
set_tsk_need_resched(p);
return true;
}
#ifdef CONFIG_SMP
static bool set_nr_if_polling(struct task_struct *p)
{
return false;
}
#endif
#endif
static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
struct wake_q_node *node = &task->wake_q;
/*
* Atomically grab the task, if ->wake_q is !nil already it means
* its already queued (either by us or someone else) and will get the
* wakeup due to that.
*
* In order to ensure that a pending wakeup will observe our pending
* state, even in the failed case, an explicit smp_mb() must be used.
*/
smp_mb__before_atomic();
if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
return false;
/*
* The head is context local, there can be no concurrency.
*/
*head->lastp = node;
head->lastp = &node->next;
return true;
}
/**
* wake_q_add() - queue a wakeup for 'later' waking.
* @head: the wake_q_head to add @task to
* @task: the task to queue for 'later' wakeup
*
* Queue a task for later wakeup, most likely by the wake_up_q() call in the
* same context, _HOWEVER_ this is not guaranteed, the wakeup can come
* instantly.
*
* This function must be used as-if it were wake_up_process(); IOW the task
* must be ready to be woken at this location.
*/
void wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
if (__wake_q_add(head, task))
get_task_struct(task);
}
/**
* wake_q_add_safe() - safely queue a wakeup for 'later' waking.
* @head: the wake_q_head to add @task to
* @task: the task to queue for 'later' wakeup
*
* Queue a task for later wakeup, most likely by the wake_up_q() call in the
* same context, _HOWEVER_ this is not guaranteed, the wakeup can come
* instantly.
*
* This function must be used as-if it were wake_up_process(); IOW the task
* must be ready to be woken at this location.
*
* This function is essentially a task-safe equivalent to wake_q_add(). Callers
* that already hold reference to @task can call the 'safe' version and trust
* wake_q to do the right thing depending whether or not the @task is already
* queued for wakeup.
*/
void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
{
if (!__wake_q_add(head, task))
put_task_struct(task);
}
void wake_up_q(struct wake_q_head *head)
{
struct wake_q_node *node = head->first;
while (node != WAKE_Q_TAIL) {
struct task_struct *task;
task = container_of(node, struct task_struct, wake_q);
BUG_ON(!task);
/* Task can safely be re-inserted now: */
node = node->next;
task->wake_q.next = NULL;
/*
* wake_up_process() executes a full barrier, which pairs with
* the queueing in wake_q_add() so as not to miss wakeups.
*/
wake_up_process(task);
put_task_struct(task);
}
}
/*
* resched_curr - mark rq's current task 'to be rescheduled now'.
*
* On UP this means the setting of the need_resched flag, on SMP it
* might also involve a cross-CPU call to trigger the scheduler on
* the target CPU.
*/
void resched_curr(struct rq *rq)
{
struct task_struct *curr = rq->curr;
int cpu;
lockdep_assert_held(&rq->lock);
if (test_tsk_need_resched(curr))
return;
cpu = cpu_of(rq);
if (cpu == smp_processor_id()) {
set_tsk_need_resched(curr);
set_preempt_need_resched();
return;
}
if (set_nr_and_not_polling(curr))
smp_send_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
}
void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
if (cpu_online(cpu) || cpu == smp_processor_id())
resched_curr(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
#ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ_COMMON
/*
* In the semi idle case, use the nearest busy CPU for migrating timers
* from an idle CPU. This is good for power-savings.
*
* We don't do similar optimization for completely idle system, as
* selecting an idle CPU will add more delays to the timers than intended
* (as that CPU's timer base may not be uptodate wrt jiffies etc).
*/
int get_nohz_timer_target(void)
{
int i, cpu = smp_processor_id(), default_cpu = -1;
struct sched_domain *sd;
if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
if (!idle_cpu(cpu))
return cpu;
default_cpu = cpu;
}
rcu_read_lock();
for_each_domain(cpu, sd) {
for_each_cpu_and(i, sched_domain_span(sd),
housekeeping_cpumask(HK_FLAG_TIMER)) {
if (cpu == i)
continue;
if (!idle_cpu(i)) {
cpu = i;
goto unlock;
}
}
}
if (default_cpu == -1)
default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
cpu = default_cpu;
unlock:
rcu_read_unlock();
return cpu;
}
/*
* When add_timer_on() enqueues a timer into the timer wheel of an
* idle CPU then this timer might expire before the next timer event
* which is scheduled to wake up that CPU. In case of a completely
* idle system the next event might even be infinite time into the
* future. wake_up_idle_cpu() ensures that the CPU is woken up and
* leaves the inner idle loop so the newly added timer is taken into
* account when the CPU goes back to idle and evaluates the timer
* wheel for the next timer event.
*/
static void wake_up_idle_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
if (cpu == smp_processor_id())
return;
if (set_nr_and_not_polling(rq->idle))
smp_send_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
}
static bool wake_up_full_nohz_cpu(int cpu)
{
/*
* We just need the target to call irq_exit() and re-evaluate
* the next tick. The nohz full kick at least implies that.
* If needed we can still optimize that later with an
* empty IRQ.
*/
if (cpu_is_offline(cpu))
return true; /* Don't try to wake offline CPUs. */
if (tick_nohz_full_cpu(cpu)) {
if (cpu != smp_processor_id() ||
tick_nohz_tick_stopped())
tick_nohz_full_kick_cpu(cpu);
return true;
}
return false;
}
/*
* Wake up the specified CPU. If the CPU is going offline, it is the
* caller's responsibility to deal with the lost wakeup, for example,
* by hooking into the CPU_DEAD notifier like timers and hrtimers do.
*/
void wake_up_nohz_cpu(int cpu)
{
if (!wake_up_full_nohz_cpu(cpu))
wake_up_idle_cpu(cpu);
}
static inline bool got_nohz_idle_kick(void)
{
int cpu = smp_processor_id();
if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
return false;
if (idle_cpu(cpu) && !need_resched())
return true;
/*
* We can't run Idle Load Balance on this CPU for this time so we
* cancel it and clear NOHZ_BALANCE_KICK
*/
atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
return false;
}
#else /* CONFIG_NO_HZ_COMMON */
static inline bool got_nohz_idle_kick(void)
{
return false;
}
#endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL
bool sched_can_stop_tick(struct rq *rq)
{
int fifo_nr_running;
/* Deadline tasks, even if single, need the tick */
if (rq->dl.dl_nr_running)
return false;
/*
* If there are more than one RR tasks, we need the tick to effect the
* actual RR behaviour.
*/
if (rq->rt.rr_nr_running) {
if (rq->rt.rr_nr_running == 1)
return true;
else
return false;
}
/*
* If there's no RR tasks, but FIFO tasks, we can skip the tick, no
* forced preemption between FIFO tasks.
*/
fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
if (fifo_nr_running)
return true;
/*
* If there are no DL,RR/FIFO tasks, there must only be CFS tasks left;
* if there's more than one we need the tick for involuntary
* preemption.
*/
if (rq->nr_running > 1)
return false;
return true;
}
#endif /* CONFIG_NO_HZ_FULL */
#endif /* CONFIG_SMP */
#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
(defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
/*
* Iterate task_group tree rooted at *from, calling @down when first entering a
* node and @up when leaving it for the final time.
*
* Caller must hold rcu_lock or sufficient equivalent.
*/
int walk_tg_tree_from(struct task_group *from,
tg_visitor down, tg_visitor up, void *data)
{
struct task_group *parent, *child;
int ret;
parent = from;
down:
ret = (*down)(parent, data);
if (ret)
goto out;
list_for_each_entry_rcu(child, &parent->children, siblings) {
parent = child;
goto down;
up:
continue;
}
ret = (*up)(parent, data);
if (ret || parent == from)
goto out;
child = parent;
parent = parent->parent;
if (parent)
goto up;
out:
return ret;
}
int tg_nop(struct task_group *tg, void *data)
{
return 0;
}
#endif
static void set_load_weight(struct task_struct *p, bool update_load)
{
int prio = p->static_prio - MAX_RT_PRIO;
struct load_weight *load = &p->se.load;
/*
* SCHED_IDLE tasks get minimal weight:
*/
if (task_has_idle_policy(p)) {
load->weight = scale_load(WEIGHT_IDLEPRIO);
load->inv_weight = WMULT_IDLEPRIO;
return;
}
/*
* SCHED_OTHER tasks have to update their load when changing their
* weight
*/
if (update_load && p->sched_class == &fair_sched_class) {
reweight_task(p, prio);
} else {
load->weight = scale_load(sched_prio_to_weight[prio]);
load->inv_weight = sched_prio_to_wmult[prio];
}
}
#ifdef CONFIG_UCLAMP_TASK
/*
* Serializes updates of utilization clamp values
*
* The (slow-path) user-space triggers utilization clamp value updates which
* can require updates on (fast-path) scheduler's data structures used to
* support enqueue/dequeue operations.
* While the per-CPU rq lock protects fast-path update operations, user-space
* requests are serialized using a mutex to reduce the risk of conflicting
* updates or API abuses.
*/
static DEFINE_MUTEX(uclamp_mutex);
/* Max allowed minimum utilization */
unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
/* Max allowed maximum utilization */
unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
/* All clamps are required to be less or equal than these values */
static struct uclamp_se uclamp_default[UCLAMP_CNT];
/* Integer rounded range for each bucket */
#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
#define for_each_clamp_id(clamp_id) \
for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
{
return clamp_value / UCLAMP_BUCKET_DELTA;
}
static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
{
return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
}
static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
{
if (clamp_id == UCLAMP_MIN)
return 0;
return SCHED_CAPACITY_SCALE;
}
static inline void uclamp_se_set(struct uclamp_se *uc_se,
unsigned int value, bool user_defined)
{
uc_se->value = value;
uc_se->bucket_id = uclamp_bucket_id(value);
uc_se->user_defined = user_defined;
}
static inline unsigned int
uclamp_idle_value(struct rq *rq, enum uclamp_id clamp_id,
unsigned int clamp_value)
{
/*
* Avoid blocked utilization pushing up the frequency when we go
* idle (which drops the max-clamp) by retaining the last known
* max-clamp.
*/
if (clamp_id == UCLAMP_MAX) {
rq->uclamp_flags |= UCLAMP_FLAG_IDLE;
return clamp_value;
}
return uclamp_none(UCLAMP_MIN);
}
static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
unsigned int clamp_value)
{
/* Reset max-clamp retention only on idle exit */
if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
return;
WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
}
static inline
unsigned int uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
unsigned int clamp_value)
{
struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
int bucket_id = UCLAMP_BUCKETS - 1;
/*
* Since both min and max clamps are max aggregated, find the
* top most bucket with tasks in.
*/
for ( ; bucket_id >= 0; bucket_id--) {
if (!bucket[bucket_id].tasks)
continue;
return