forked from rrcarlosr/Jetpack
240 lines
7.1 KiB
Diff
240 lines
7.1 KiB
Diff
|
From 71f9b17d0b1121b8219bd2752056ca1b00fa7db1 Mon Sep 17 00:00:00 2001
|
||
|
From: Haris Okanovic <haris.okanovic@ni.com>
|
||
|
Date: Fri, 3 Feb 2017 17:26:44 +0100
|
||
|
Subject: [PATCH 161/352] timers: Don't wake ktimersoftd on every tick
|
||
|
|
||
|
We recently upgraded from 4.1 to 4.6 and noticed a minor latency
|
||
|
regression caused by an additional thread wakeup (ktimersoftd) in
|
||
|
interrupt context on every tick. The wakeups are from
|
||
|
run_local_timers() raising TIMER_SOFTIRQ. Both TIMER and SCHED softirq
|
||
|
coalesced into one ksoftirqd wakeup prior to Sebastian's change to split
|
||
|
timers into their own thread.
|
||
|
|
||
|
There's already logic in run_local_timers() to avoid some unnecessary
|
||
|
wakeups of ksoftirqd, but it doesn't seems to catch them all. In
|
||
|
particular, I've seen many unnecessary wakeups when jiffies increments
|
||
|
prior to run_local_timers().
|
||
|
|
||
|
Change the way timers are collected per Julia and Thomas'
|
||
|
recommendation: Expired timers are now collected in interrupt context
|
||
|
and fired in ktimersoftd to avoid double-walk of `pending_map`.
|
||
|
|
||
|
Collect expired timers in interrupt context to avoid overhead of waking
|
||
|
ktimersoftd on every tick. ktimersoftd now wakes only when one or more
|
||
|
timers are ready, which yields a minor reduction in small latency spikes.
|
||
|
|
||
|
This is implemented by storing lists of expired timers in timer_base,
|
||
|
updated on each tick. Any addition to the lists wakes ktimersoftd
|
||
|
(softirq) to process those timers.
|
||
|
|
||
|
Signed-off-by: Haris Okanovic <haris.okanovic@ni.com>
|
||
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||
|
---
|
||
|
kernel/time/timer.c | 96 +++++++++++++++++++++++++++++++++++++----------------
|
||
|
1 file changed, 67 insertions(+), 29 deletions(-)
|
||
|
|
||
|
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
|
||
|
index a416cee..c1b88e0 100644
|
||
|
--- a/kernel/time/timer.c
|
||
|
+++ b/kernel/time/timer.c
|
||
|
@@ -207,6 +207,8 @@ struct timer_base {
|
||
|
bool must_forward_clk;
|
||
|
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
|
||
|
struct hlist_head vectors[WHEEL_SIZE];
|
||
|
+ struct hlist_head expired_lists[LVL_DEPTH];
|
||
|
+ int expired_count;
|
||
|
} ____cacheline_aligned;
|
||
|
|
||
|
static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
|
||
|
@@ -1364,7 +1366,8 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
-static void expire_timers(struct timer_base *base, struct hlist_head *head)
|
||
|
+static inline void __expire_timers(struct timer_base *base,
|
||
|
+ struct hlist_head *head)
|
||
|
{
|
||
|
while (!hlist_empty(head)) {
|
||
|
struct timer_list *timer;
|
||
|
@@ -1395,21 +1398,38 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
-static int __collect_expired_timers(struct timer_base *base,
|
||
|
- struct hlist_head *heads)
|
||
|
+static void expire_timers(struct timer_base *base)
|
||
|
+{
|
||
|
+ struct hlist_head *head;
|
||
|
+
|
||
|
+ while (base->expired_count--) {
|
||
|
+ head = base->expired_lists + base->expired_count;
|
||
|
+ __expire_timers(base, head);
|
||
|
+ }
|
||
|
+ base->expired_count = 0;
|
||
|
+}
|
||
|
+
|
||
|
+static void __collect_expired_timers(struct timer_base *base)
|
||
|
{
|
||
|
unsigned long clk = base->clk;
|
||
|
struct hlist_head *vec;
|
||
|
- int i, levels = 0;
|
||
|
+ int i;
|
||
|
unsigned int idx;
|
||
|
|
||
|
+ /*
|
||
|
+ * expire_timers() must be called at least once before we can
|
||
|
+ * collect more timers
|
||
|
+ */
|
||
|
+ if (WARN_ON(base->expired_count))
|
||
|
+ return;
|
||
|
+
|
||
|
for (i = 0; i < LVL_DEPTH; i++) {
|
||
|
idx = (clk & LVL_MASK) + i * LVL_SIZE;
|
||
|
|
||
|
if (__test_and_clear_bit(idx, base->pending_map)) {
|
||
|
vec = base->vectors + idx;
|
||
|
- hlist_move_list(vec, heads++);
|
||
|
- levels++;
|
||
|
+ hlist_move_list(vec,
|
||
|
+ &base->expired_lists[base->expired_count++]);
|
||
|
}
|
||
|
/* Is it time to look at the next level? */
|
||
|
if (clk & LVL_CLK_MASK)
|
||
|
@@ -1417,7 +1437,6 @@ static int __collect_expired_timers(struct timer_base *base,
|
||
|
/* Shift clock for the next level granularity */
|
||
|
clk >>= LVL_CLK_SHIFT;
|
||
|
}
|
||
|
- return levels;
|
||
|
}
|
||
|
|
||
|
#ifdef CONFIG_NO_HZ_COMMON
|
||
|
@@ -1616,8 +1635,7 @@ void timer_clear_idle(void)
|
||
|
base->is_idle = false;
|
||
|
}
|
||
|
|
||
|
-static int collect_expired_timers(struct timer_base *base,
|
||
|
- struct hlist_head *heads)
|
||
|
+static void collect_expired_timers(struct timer_base *base)
|
||
|
{
|
||
|
/*
|
||
|
* NOHZ optimization. After a long idle sleep we need to forward the
|
||
|
@@ -1634,20 +1652,49 @@ static int collect_expired_timers(struct timer_base *base,
|
||
|
if (time_after(next, jiffies)) {
|
||
|
/* The call site will increment clock! */
|
||
|
base->clk = jiffies - 1;
|
||
|
- return 0;
|
||
|
+ return;
|
||
|
}
|
||
|
base->clk = next;
|
||
|
}
|
||
|
- return __collect_expired_timers(base, heads);
|
||
|
+ __collect_expired_timers(base);
|
||
|
}
|
||
|
#else
|
||
|
-static inline int collect_expired_timers(struct timer_base *base,
|
||
|
- struct hlist_head *heads)
|
||
|
+static inline void collect_expired_timers(struct timer_base *base)
|
||
|
{
|
||
|
- return __collect_expired_timers(base, heads);
|
||
|
+ __collect_expired_timers(base);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
+static int find_expired_timers(struct timer_base *base)
|
||
|
+{
|
||
|
+ const unsigned long int end_clk = jiffies;
|
||
|
+
|
||
|
+ while (!base->expired_count && time_after_eq(end_clk, base->clk)) {
|
||
|
+ collect_expired_timers(base);
|
||
|
+ base->clk++;
|
||
|
+ }
|
||
|
+
|
||
|
+ return base->expired_count;
|
||
|
+}
|
||
|
+
|
||
|
+/* Called from CPU tick routine to quickly collect expired timers */
|
||
|
+static int tick_find_expired(struct timer_base *base)
|
||
|
+{
|
||
|
+ int count;
|
||
|
+
|
||
|
+ raw_spin_lock(&base->lock);
|
||
|
+
|
||
|
+ if (unlikely(time_after(jiffies, base->clk + HZ))) {
|
||
|
+ /* defer to ktimersoftd; don't spend too long in irq context */
|
||
|
+ count = -1;
|
||
|
+ } else
|
||
|
+ count = find_expired_timers(base);
|
||
|
+
|
||
|
+ raw_spin_unlock(&base->lock);
|
||
|
+
|
||
|
+ return count;
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
* Called from the timer interrupt handler to charge one tick to the current
|
||
|
* process. user_tick is 1 if the tick is user time, 0 for system.
|
||
|
@@ -1674,12 +1721,6 @@ void update_process_times(int user_tick)
|
||
|
*/
|
||
|
static inline void __run_timers(struct timer_base *base)
|
||
|
{
|
||
|
- struct hlist_head heads[LVL_DEPTH];
|
||
|
- int levels;
|
||
|
-
|
||
|
- if (!time_after_eq(jiffies, base->clk))
|
||
|
- return;
|
||
|
-
|
||
|
raw_spin_lock_irq(&base->lock);
|
||
|
|
||
|
/*
|
||
|
@@ -1698,14 +1739,9 @@ static inline void __run_timers(struct timer_base *base)
|
||
|
*/
|
||
|
base->must_forward_clk = false;
|
||
|
|
||
|
- while (time_after_eq(jiffies, base->clk)) {
|
||
|
-
|
||
|
- levels = collect_expired_timers(base, heads);
|
||
|
- base->clk++;
|
||
|
+ while (find_expired_timers(base))
|
||
|
+ expire_timers(base);
|
||
|
|
||
|
- while (levels--)
|
||
|
- expire_timers(base, heads + levels);
|
||
|
- }
|
||
|
raw_spin_unlock_irq(&base->lock);
|
||
|
wakeup_timer_waiters(base);
|
||
|
}
|
||
|
@@ -1731,12 +1767,12 @@ void run_local_timers(void)
|
||
|
|
||
|
hrtimer_run_queues();
|
||
|
/* Raise the softirq only if required. */
|
||
|
- if (time_before(jiffies, base->clk)) {
|
||
|
+ if (time_before(jiffies, base->clk) || !tick_find_expired(base)) {
|
||
|
if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
|
||
|
return;
|
||
|
/* CPU is awake, so check the deferrable base. */
|
||
|
base++;
|
||
|
- if (time_before(jiffies, base->clk))
|
||
|
+ if (time_before(jiffies, base->clk) || !tick_find_expired(base))
|
||
|
return;
|
||
|
}
|
||
|
raise_softirq(TIMER_SOFTIRQ);
|
||
|
@@ -1927,6 +1963,7 @@ int timers_dead_cpu(unsigned int cpu)
|
||
|
forward_timer_base(new_base);
|
||
|
|
||
|
BUG_ON(old_base->running_timer);
|
||
|
+ BUG_ON(old_base->expired_count);
|
||
|
|
||
|
for (i = 0; i < WHEEL_SIZE; i++)
|
||
|
migrate_timer_list(new_base, old_base->vectors + i);
|
||
|
@@ -1953,6 +1990,7 @@ static void __init init_timer_cpu(int cpu)
|
||
|
#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
init_swait_queue_head(&base->wait_for_running_timer);
|
||
|
#endif
|
||
|
+ base->expired_count = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
--
|
||
|
2.7.4
|
||
|
|