]> git.dujemihanovic.xyz Git - linux.git/commitdiff
block: fix that blk_time_get_ns() doesn't update time after schedule
authorYu Kuai <yukuai3@huawei.com>
Thu, 11 Apr 2024 03:23:48 +0000 (11:23 +0800)
committerJens Axboe <axboe@kernel.dk>
Fri, 12 Apr 2024 14:31:54 +0000 (08:31 -0600)
While monitoring the throttle time of IO from iocost, it's found that
such time is always zero after the io_schedule() from ioc_rqos_throttle,
for example, with the following debug patch:

+       printk("%s-%d: %s enter %llu\n", current->comm, current->pid, __func__, blk_time_get_ns());
        while (true) {
                set_current_state(TASK_UNINTERRUPTIBLE);
                if (wait.committed)
                        break;
                io_schedule();
        }
+       printk("%s-%d: %s exit  %llu\n", current->comm, current->pid, __func__, blk_time_get_ns());

It can be observerd that blk_time_get_ns() always return the same time:

[ 1068.096579] fio-1268: ioc_rqos_throttle enter 1067901962288
[ 1068.272587] fio-1268: ioc_rqos_throttle exit  1067901962288
[ 1068.274389] fio-1268: ioc_rqos_throttle enter 1067901962288
[ 1068.472690] fio-1268: ioc_rqos_throttle exit  1067901962288
[ 1068.474485] fio-1268: ioc_rqos_throttle enter 1067901962288
[ 1068.672656] fio-1268: ioc_rqos_throttle exit  1067901962288
[ 1068.674451] fio-1268: ioc_rqos_throttle enter 1067901962288
[ 1068.872655] fio-1268: ioc_rqos_throttle exit  1067901962288

And I think the root cause is that 'PF_BLOCK_TS' is always cleared
by blk_flush_plug() before scheduel(), hence blk_plug_invalidate_ts()
will never be called:

blk_time_get_ns
 plug->cur_ktime = ktime_get_ns();
 current->flags |= PF_BLOCK_TS;

io_schedule:
 io_schedule_prepare
  blk_flush_plug
   __blk_flush_plug
    /* the flag is cleared, while time is not */
    current->flags &= ~PF_BLOCK_TS;
 schedule
 sched_update_worker
  /* the flag is not set, hence plug->cur_ktime is not cleared */
  if (tsk->flags & PF_BLOCK_TS)
   blk_plug_invalidate_ts()

blk_time_get_ns
 /* got the time stashed before schedule */
 return plug->cur_ktime;

Fix the problem by clearing cached time in __blk_flush_plug().

Fixes: 06b23f92af87 ("block: update cached timestamp post schedule/preemption")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20240411032349.3051233-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-core.c

index 3a6f5603fb44b6c8b524ee2da68f033a0b894835..b795ac177281ad7adec63528d53def2fff1139a5 100644 (file)
@@ -1197,6 +1197,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
        if (unlikely(!rq_list_empty(plug->cached_rq)))
                blk_mq_free_plug_rqs(plug);
 
+       plug->cur_ktime = 0;
        current->flags &= ~PF_BLOCK_TS;
 }