linux/net/sched
Eric Dumazet 8fb6503592 net/sched: accept TCA_STAB only for root qdisc
[ Upstream commit 3cb7cf1540 ]

Most qdiscs maintain their backlog using qdisc_pkt_len(skb)
on the assumption it is invariant between the enqueue()
and dequeue() handlers.

Unfortunately syzbot can crash a host rather easily using
a TBF + SFQ combination, with an STAB on SFQ [1]

We can't support TCA_STAB on arbitrary level, this would
require to maintain per-qdisc storage.

[1]
[   88.796496] BUG: kernel NULL pointer dereference, address: 0000000000000000
[   88.798611] #PF: supervisor read access in kernel mode
[   88.799014] #PF: error_code(0x0000) - not-present page
[   88.799506] PGD 0 P4D 0
[   88.799829] Oops: Oops: 0000 [#1] SMP NOPTI
[   88.800569] CPU: 14 UID: 0 PID: 2053 Comm: b371744477 Not tainted 6.12.0-rc1-virtme #1117
[   88.801107] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   88.801779] RIP: 0010:sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
[ 88.802544] Code: 0f b7 50 12 48 8d 04 d5 00 00 00 00 48 89 d6 48 29 d0 48 8b 91 c0 01 00 00 48 c1 e0 03 48 01 c2 66 83 7a 1a 00 7e c0 48 8b 3a <4c> 8b 07 4c 89 02 49 89 50 08 48 c7 47 08 00 00 00 00 48 c7 07 00
All code
========
   0:	0f b7 50 12          	movzwl 0x12(%rax),%edx
   4:	48 8d 04 d5 00 00 00 	lea    0x0(,%rdx,8),%rax
   b:	00
   c:	48 89 d6             	mov    %rdx,%rsi
   f:	48 29 d0             	sub    %rdx,%rax
  12:	48 8b 91 c0 01 00 00 	mov    0x1c0(%rcx),%rdx
  19:	48 c1 e0 03          	shl    $0x3,%rax
  1d:	48 01 c2             	add    %rax,%rdx
  20:	66 83 7a 1a 00       	cmpw   $0x0,0x1a(%rdx)
  25:	7e c0                	jle    0xffffffffffffffe7
  27:	48 8b 3a             	mov    (%rdx),%rdi
  2a:*	4c 8b 07             	mov    (%rdi),%r8		<-- trapping instruction
  2d:	4c 89 02             	mov    %r8,(%rdx)
  30:	49 89 50 08          	mov    %rdx,0x8(%r8)
  34:	48 c7 47 08 00 00 00 	movq   $0x0,0x8(%rdi)
  3b:	00
  3c:	48                   	rex.W
  3d:	c7                   	.byte 0xc7
  3e:	07                   	(bad)
	...

Code starting with the faulting instruction
===========================================
   0:	4c 8b 07             	mov    (%rdi),%r8
   3:	4c 89 02             	mov    %r8,(%rdx)
   6:	49 89 50 08          	mov    %rdx,0x8(%r8)
   a:	48 c7 47 08 00 00 00 	movq   $0x0,0x8(%rdi)
  11:	00
  12:	48                   	rex.W
  13:	c7                   	.byte 0xc7
  14:	07                   	(bad)
	...
[   88.803721] RSP: 0018:ffff9a1f892b7d58 EFLAGS: 00000206
[   88.804032] RAX: 0000000000000000 RBX: ffff9a1f8420c800 RCX: ffff9a1f8420c800
[   88.804560] RDX: ffff9a1f81bc1440 RSI: 0000000000000000 RDI: 0000000000000000
[   88.805056] RBP: ffffffffc04bb0e0 R08: 0000000000000001 R09: 00000000ff7f9a1f
[   88.805473] R10: 000000000001001b R11: 0000000000009a1f R12: 0000000000000140
[   88.806194] R13: 0000000000000001 R14: ffff9a1f886df400 R15: ffff9a1f886df4ac
[   88.806734] FS:  00007f445601a740(0000) GS:ffff9a2e7fd80000(0000) knlGS:0000000000000000
[   88.807225] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   88.807672] CR2: 0000000000000000 CR3: 000000050cc46000 CR4: 00000000000006f0
[   88.808165] Call Trace:
[   88.808459]  <TASK>
[   88.808710] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434)
[   88.809261] ? page_fault_oops (arch/x86/mm/fault.c:715)
[   88.809561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
[   88.809806] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
[   88.810074] ? sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
[   88.810411] sfq_reset (net/sched/sch_sfq.c:525) sch_sfq
[   88.810671] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
[   88.810950] tbf_reset (./include/linux/timekeeping.h:169 net/sched/sch_tbf.c:334) sch_tbf
[   88.811208] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
[   88.811484] netif_set_real_num_tx_queues (./include/linux/spinlock.h:396 ./include/net/sch_generic.h:768 net/core/dev.c:2958)
[   88.811870] __tun_detach (drivers/net/tun.c:590 drivers/net/tun.c:673)
[   88.812271] tun_chr_close (drivers/net/tun.c:702 drivers/net/tun.c:3517)
[   88.812505] __fput (fs/file_table.c:432 (discriminator 1))
[   88.812735] task_work_run (kernel/task_work.c:230)
[   88.813016] do_exit (kernel/exit.c:940)
[   88.813372] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4))
[   88.813639] ? handle_mm_fault (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/memcontrol.h:1022 ./include/linux/memcontrol.h:1045 ./include/linux/memcontrol.h:1052 mm/memory.c:5928 mm/memory.c:6088)
[   88.813867] do_group_exit (kernel/exit.c:1070)
[   88.814138] __x64_sys_exit_group (kernel/exit.c:1099)
[   88.814490] x64_sys_call (??:?)
[   88.814791] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
[   88.815012] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[   88.815495] RIP: 0033:0x7f44560f1975

Fixes: 175f9c1bba ("net_sched: Add size table for qdiscs")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Link: https://patch.msgid.link/20241007184130.3960565-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2024-10-17 15:11:56 +02:00
..
act_api.c net/sched: act_api: fix possible infinite loop in tcf_idr_check_alloc() 2024-07-05 09:14:30 +02:00
act_bpf.c flow_offload: fill flags to action structure 2023-02-22 12:57:10 +01:00
act_connmark.c flow_offload: fill flags to action structure 2023-02-22 12:57:10 +01:00
act_csum.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
act_ct.c sched: act_ct: take care of padding in struct zones_ht_key 2024-08-19 05:45:31 +02:00
act_ctinfo.c net/sched: act_ctinfo: use percpu stats 2023-02-22 12:57:10 +01:00
act_gact.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
act_gate.c flow_offload: fill flags to action structure 2023-02-22 12:57:10 +01:00
act_ife.c flow_offload: fill flags to action structure 2023-02-22 12:57:10 +01:00
act_ipt.c net/sched: act_ipt: add sanity checks on table name and hook locations 2023-07-23 13:47:28 +02:00
act_meta_mark.c
act_meta_skbprio.c
act_meta_skbtcindex.c
act_mirred.c net/sched: act_mirred: Add carrier check 2023-05-17 11:50:17 +02:00
act_mpls.c net/sched: act_mpls: fix action bind logic 2023-03-11 13:57:30 +01:00
act_nat.c flow_offload: fill flags to action structure 2023-02-22 12:57:10 +01:00
act_pedit.c net/sched: act_pedit: Add size check for TCA_PEDIT_PARMS_EX 2023-07-23 13:47:29 +02:00
act_police.c net: sched: act_police: fix sparse errors in tcf_police_dump() 2023-06-14 11:13:03 +02:00
act_sample.c net/sched: act_sample: fix action bind logic 2023-03-11 13:57:30 +01:00
act_simple.c flow_offload: fill flags to action structure 2023-02-22 12:57:10 +01:00
act_skbedit.c flow_offload: fill flags to action structure 2023-02-22 12:57:10 +01:00
act_skbmod.c net/sched: act_skbmod: prevent kernel-infoleak 2024-04-10 16:19:38 +02:00
act_tunnel_key.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
act_vlan.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
cls_api.c net/sched: cls_api: Fix lockup on flushing explicitly created chain 2023-06-21 15:59:18 +02:00
cls_basic.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
cls_bpf.c net: sched: cls_bpf: Undo tcf_bind_filter in case of an error 2023-07-27 08:47:00 +02:00
cls_cgroup.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
cls_flow.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
cls_flower.c net/sched: flower: Ensure both minimum and maximum ports are specified 2023-07-23 13:47:45 +02:00
cls_fw.c net/sched: cls_fw: No longer copy tcf_result on update to avoid use-after-free 2023-08-11 15:13:52 +02:00
cls_matchall.c net_sched: refactor TC action init API 2021-08-02 10:24:38 +01:00
cls_route.c net/sched: cls_route: No longer copy tcf_result on update to avoid use-after-free 2023-08-11 15:13:52 +02:00
cls_u32.c net: sched: cls_u32: Fix allocation size in u32_init() 2023-11-08 17:26:45 +01:00
em_canid.c
em_cmp.c
em_ipset.c
em_ipt.c
em_meta.c
em_nbyte.c
em_text.c net: sched: em_text: fix possible memory leak in em_text_destroy() 2024-01-15 18:51:12 +01:00
em_u32.c
ematch.c net_sched: reject TCF_EM_SIMPLE case for complex ematch module 2022-12-31 13:14:39 +01:00
Kconfig net/sched: Retire dsmark qdisc 2024-03-01 13:21:42 +01:00
Makefile net/sched: Retire dsmark qdisc 2024-03-01 13:21:42 +01:00
sch_api.c net/sched: accept TCA_STAB only for root qdisc 2024-10-17 15:11:56 +02:00
sch_blackhole.c
sch_cake.c sched: sch_cake: fix bulk flow accounting logic for host fairness 2024-09-12 11:07:44 +02:00
sch_cbs.c
sch_choke.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_codel.c
sch_drr.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_etf.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_ets.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_fifo.c net_sched: fix NULL deref in fifo_set_limit() 2021-10-01 14:59:10 -07:00
sch_fq_codel.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_fq_pie.c net/sched: fq_pie: avoid stalls in fq_pie_timer() 2023-09-19 12:22:58 +02:00
sch_fq.c net/sched: sch_fq: fix integer overflow of "credit" 2023-05-11 23:00:31 +09:00
sch_frag.c net/sched: Extend qdisc control block with tc control block 2022-01-05 12:42:33 +01:00
sch_generic.c net/sched: fix netdevice reference leaks in attach_default_qdiscs() 2022-09-08 12:28:02 +02:00
sch_gred.c net: sched: Fix spelling mistakes 2021-05-31 22:44:56 -07:00
sch_hfsc.c net/sched: sch_hfsc: upgrade 'rt' to 'sc' when it becomes a inner curve 2023-10-25 11:58:57 +02:00
sch_hhf.c
sch_htb.c net: sched: sch: Fix off by one in htb_activate_prios() 2023-02-22 12:57:11 +01:00
sch_ingress.c net/sched: Reserve TC_H_INGRESS (TC_H_CLSACT) for ingress (clsact) Qdiscs 2023-06-09 10:32:17 +02:00
sch_mq.c net: sched: update default qdisc visibility after Tx queue cnt changes 2021-11-18 19:16:10 +01:00
sch_mqprio.c net/sched: mqprio: Add length check for TCA_MQPRIO_{MAX/MIN}_RATE64 2023-08-03 10:22:36 +02:00
sch_multiq.c net: sched: sch_multiq: fix possible OOB write in multiq_tune() 2024-07-05 09:14:06 +02:00
sch_netem.c sch/netem: fix use after free in netem_dequeue 2024-09-12 11:07:42 +02:00
sch_pie.c
sch_plug.c net: sched: sch_qfq: Fix UAF in qfq_dequeue() 2023-09-19 12:22:59 +02:00
sch_prio.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_qfq.c net: sched: sch_qfq: Fix UAF in qfq_dequeue() 2023-09-19 12:22:59 +02:00
sch_red.c net: sched: Fix use after free in red_enqueue() 2022-11-10 18:15:28 +01:00
sch_sfb.c net: sched: sfb: fix null pointer access issue when sfb_init() fails 2022-10-29 10:12:57 +02:00
sch_sfq.c net/sched: store the last executed chain also for clsact egress 2021-07-29 22:17:37 +01:00
sch_skbprio.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_taprio.c net: sched: consistently use rcu_replace_pointer() in taprio_change() 2024-10-17 15:11:25 +02:00
sch_tbf.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00
sch_teql.c net: sched: delete duplicate cleanup of backlog and qlen 2022-10-29 10:12:57 +02:00