summaryrefslogtreecommitdiff
path: root/arch/arm/net/bpf_jit_32.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/net/bpf_jit_32.c')
-rw-r--r--arch/arm/net/bpf_jit_32.c407
1 files changed, 334 insertions, 73 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index cc29869d12a3..1d672457d02f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -2,6 +2,7 @@
/*
* Just-In-Time compiler for eBPF filters on 32bit ARM
*
+ * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
* Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
* Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
*/
@@ -15,6 +16,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
+#include <linux/math64.h>
#include <asm/cacheflush.h>
#include <asm/hwcap.h>
@@ -36,6 +38,10 @@
* +-----+
* |RSVD | JIT scratchpad
* current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+ * | ... | caller-saved registers
+ * +-----+
+ * | ... | arguments passed on stack
+ * ARM_SP during call => +-----|
* | |
* | ... | Function call stack
* | |
@@ -63,6 +69,12 @@
*
* When popping registers off the stack at the end of a BPF function, we
* reference them via the current ARM_FP register.
+ *
+ * Some eBPF operations are implemented via a call to a helper function.
+ * Such calls are "invisible" in the eBPF code, so it is up to the calling
+ * program to preserve any caller-saved ARM registers during the call. The
+ * JIT emits code to push and pop those registers onto the stack, immediately
+ * above the callee stack frame.
*/
#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \
@@ -70,6 +82,8 @@
#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
+#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3)
+
enum {
/* Stack layout - these are offsets from (top of stack - 4) */
BPF_R2_HI,
@@ -151,7 +165,7 @@ static const s8 bpf2a32[][2] = {
[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
/* Read only Frame Pointer to access Stack */
[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
- /* Temporary Register for internal BPF JIT, can be used
+ /* Temporary Register for BPF JIT, can be used
* for constant blindings and others.
*/
[TMP_REG_1] = {ARM_R7, ARM_R6},
@@ -216,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
return dividend % divisor;
}
+static s32 jit_sdiv32(s32 dividend, s32 divisor)
+{
+ return dividend / divisor;
+}
+
+static s32 jit_smod32(s32 dividend, s32 divisor)
+{
+ return dividend % divisor;
+}
+
+/* Wrappers for 64-bit div/mod */
+static u64 jit_udiv64(u64 dividend, u64 divisor)
+{
+ return div64_u64(dividend, divisor);
+}
+
+static u64 jit_mod64(u64 dividend, u64 divisor)
+{
+ u64 rem;
+
+ div64_u64_rem(dividend, divisor, &rem);
+ return rem;
+}
+
+static s64 jit_sdiv64(s64 dividend, s64 divisor)
+{
+ return div64_s64(dividend, divisor);
+}
+
+static s64 jit_smod64(s64 dividend, s64 divisor)
+{
+ u64 q;
+
+ q = div64_s64(dividend, divisor);
+
+ return dividend - q * divisor;
+}
+
static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
{
inst |= (cond << 28);
@@ -321,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
+#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
+#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)
+
#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
@@ -462,16 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
return to - from - 2;
}
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
{
+ const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
const s8 *tmp = bpf2a32[TMP_REG_1];
+ u32 dst;
#if __LINUX_ARM_ARCH__ == 7
if (elf_hwcap & HWCAP_IDIVA) {
- if (op == BPF_DIV)
- emit(ARM_UDIV(rd, rm, rn), ctx);
- else {
- emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+ if (op == BPF_DIV) {
+ emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
+ } else {
+ emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
}
return;
@@ -495,11 +552,28 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
emit(ARM_MOV_R(ARM_R0, rm), ctx);
}
+ /* Push caller-saved registers on stack */
+ emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
+
/* Call appropriate function */
- emit_mov_i(ARM_IP, op == BPF_DIV ?
- (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+ if (sign) {
+ if (op == BPF_DIV)
+ dst = (u32)jit_sdiv32;
+ else
+ dst = (u32)jit_smod32;
+ } else {
+ if (op == BPF_DIV)
+ dst = (u32)jit_udiv32;
+ else
+ dst = (u32)jit_mod32;
+ }
+
+ emit_mov_i(ARM_IP, dst, ctx);
emit_blx_r(ARM_IP, ctx);
+ /* Restore caller-saved registers from stack */
+ emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx);
+
/* Save return value */
if (rd != ARM_R0)
emit(ARM_MOV_R(rd, ARM_R0), ctx);
@@ -511,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
}
+static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
+ u8 op, u8 sign)
+{
+ u32 dst;
+
+ /* Push caller-saved registers on stack */
+ emit(ARM_PUSH(CALLER_MASK), ctx);
+
+ /*
+ * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
+ * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
+ * we can recover them later after returning from the function call.
+ */
+ if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
+ /*
+ * Move Rm to {R1, R0} if it is not already there.
+ */
+ if (rm[1] != ARM_R0) {
+ if (rn[1] == ARM_R0)
+ emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+ emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
+ emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
+ if (rn[1] == ARM_R0) {
+ emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+ goto cont;
+ }
+ }
+ /*
+ * Move Rn to {R3, R2} if it is not already there.
+ */
+ if (rn[1] != ARM_R2) {
+ emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
+ emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
+ }
+ }
+
+cont:
+
+ /* Call appropriate function */
+ if (sign) {
+ if (op == BPF_DIV)
+ dst = (u32)jit_sdiv64;
+ else
+ dst = (u32)jit_smod64;
+ } else {
+ if (op == BPF_DIV)
+ dst = (u32)jit_udiv64;
+ else
+ dst = (u32)jit_mod64;
+ }
+
+ emit_mov_i(ARM_IP, dst, ctx);
+ emit_blx_r(ARM_IP, ctx);
+
+ /* Save return value */
+ if (rd[1] != ARM_R0) {
+ emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
+ emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
+ }
+
+ /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
+ if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
+ emit(ARM_POP(CALLER_MASK), ctx);
+ } else if (rd[1] != ARM_R0) {
+ emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+ } else {
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+ emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+ }
+}
+
/* Is the translated BPF register on stack? */
static bool is_stacked(s8 reg)
{
@@ -693,22 +839,6 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
}
}
-/* ALU operation (32 bit)
- * dst = dst (op) src
- */
-static inline void emit_a32_alu_r(const s8 dst, const s8 src,
- struct jit_ctx *ctx, const bool is64,
- const bool hi, const u8 op) {
- const s8 *tmp = bpf2a32[TMP_REG_1];
- s8 rn, rd;
-
- rn = arm_bpf_get_reg32(src, tmp[1], ctx);
- rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
- /* ALU operation */
- emit_alu_r(rd, rn, is64, hi, op, ctx);
- arm_bpf_put_reg32(dst, rd, ctx);
-}
-
/* ALU operation (64 bit) */
static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
const s8 src[], struct jit_ctx *ctx,
@@ -741,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}
/* dst = src (4 bytes)*/
-static inline void emit_a32_mov_r(const s8 dst, const s8 src,
+static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
s8 rt;
rt = arm_bpf_get_reg32(src, tmp[0], ctx);
+ if (off && off != 32) {
+ emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
+ emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
+ }
arm_bpf_put_reg32(dst, rt, ctx);
}
@@ -755,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
const s8 src[],
struct jit_ctx *ctx) {
if (!is64) {
- emit_a32_mov_r(dst_lo, src_lo, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
- emit_a32_mov_r(dst_lo, src_lo, ctx);
- emit_a32_mov_r(dst_hi, src_hi, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
+ emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
const u8 *tmp = bpf2a32[TMP_REG_1];
@@ -779,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
}
}
+/* dst = (signed)src */
+static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
+ struct jit_ctx *ctx) {
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *rt;
+
+ rt = arm_bpf_get_reg64(dst, tmp, ctx);
+
+ emit_a32_mov_r(dst_lo, src_lo, off, ctx);
+ if (!is64) {
+ if (!ctx->prog->aux->verifier_zext)
+ /* Zero out high 4 bytes */
+ emit_a32_mov_i(dst_hi, 0, ctx);
+ } else {
+ emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
+ }
+}
+
/* Shift operations */
static inline void emit_a32_alu_i(const s8 dst, const u32 val,
struct jit_ctx *ctx, const u8 op) {
@@ -795,6 +947,9 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val,
case BPF_RSH:
emit(ARM_LSR_I(rd, rd, val), ctx);
break;
+ case BPF_ARSH:
+ emit(ARM_ASR_I(rd, rd, val), ctx);
+ break;
case BPF_NEG:
emit(ARM_RSB_I(rd, rd, val), ctx);
break;
@@ -860,8 +1015,8 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
- _emit(ARM_COND_MI, ARM_B(0), ctx);
- emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
+ _emit(ARM_COND_PL,
+ ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
@@ -929,7 +1084,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[],
rd = arm_bpf_get_reg64(dst, tmp, ctx);
/* Do LSR operation */
- if (val < 32) {
+ if (val == 0) {
+ /* An immediate value of 0 encodes a shift amount of 32
+ * for LSR. To shift by 0, don't do anything.
+ */
+ } else if (val < 32) {
emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx);
@@ -955,7 +1114,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[],
rd = arm_bpf_get_reg64(dst, tmp, ctx);
/* Do ARSH operation */
- if (val < 32) {
+ if (val == 0) {
+ /* An immediate value of 0 encodes a shift amount of 32
+ * for ASR. To shift by 0, don't do anything.
+ */
+ } else if (val < 32) {
emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx);
@@ -992,21 +1155,53 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
arm_bpf_put_reg32(dst_hi, rd[0], ctx);
}
+static bool is_ldst_imm(s16 off, const u8 size)
+{
+ s16 off_max = 0;
+
+ switch (size) {
+ case BPF_B:
+ case BPF_W:
+ off_max = 0xfff;
+ break;
+ case BPF_H:
+ off_max = 0xff;
+ break;
+ case BPF_DW:
+ /* Need to make sure off+4 does not overflow. */
+ off_max = 0xfff - 4;
+ break;
+ }
+ return -off_max <= off && off <= off_max;
+}
+
+static bool is_ldst_imm8(s16 off, const u8 size)
+{
+ s16 off_max = 0;
+
+ switch (size) {
+ case BPF_B:
+ off_max = 0xff;
+ break;
+ case BPF_W:
+ off_max = 0xfff;
+ break;
+ case BPF_H:
+ off_max = 0xff;
+ break;
+ }
+ return -off_max <= off && off <= off_max;
+}
+
/* *(size *)(dst + off) = src */
static inline void emit_str_r(const s8 dst, const s8 src[],
- s32 off, struct jit_ctx *ctx, const u8 sz){
+ s16 off, struct jit_ctx *ctx, const u8 sz){
const s8 *tmp = bpf2a32[TMP_REG_1];
- s32 off_max;
s8 rd;
rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
- if (sz == BPF_H)
- off_max = 0xff;
- else
- off_max = 0xfff;
-
- if (off < 0 || off > off_max) {
+ if (!is_ldst_imm(off, sz)) {
emit_a32_mov_i(tmp[0], off, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
rd = tmp[0];
@@ -1035,18 +1230,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[],
/* dst = *(size*)(src + off) */
static inline void emit_ldx_r(const s8 dst[], const s8 src,
- s32 off, struct jit_ctx *ctx, const u8 sz){
+ s16 off, struct jit_ctx *ctx, const u8 sz){
const s8 *tmp = bpf2a32[TMP_REG_1];
const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
s8 rm = src;
- s32 off_max;
-
- if (sz == BPF_H)
- off_max = 0xff;
- else
- off_max = 0xfff;
- if (off < 0 || off > off_max) {
+ if (!is_ldst_imm(off, sz)) {
emit_a32_mov_i(tmp[0], off, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
rm = tmp[0];
@@ -1083,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
arm_bpf_put_reg64(dst, rd, ctx);
}
+/* dst = *(signed size*)(src + off) */
+static inline void emit_ldsx_r(const s8 dst[], const s8 src,
+ s16 off, struct jit_ctx *ctx, const u8 sz){
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+ s8 rm = src;
+ int add_off;
+
+ if (!is_ldst_imm8(off, sz)) {
+ /*
+ * offset does not fit in the load/store immediate,
+ * construct an ADD instruction to apply the offset.
+ */
+ add_off = imm8m(off);
+ if (add_off > 0) {
+ emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
+ rm = tmp[0];
+ } else {
+ emit_a32_mov_i(tmp[0], off, ctx);
+ emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+ rm = tmp[0];
+ }
+ off = 0;
+ }
+
+ switch (sz) {
+ case BPF_B:
+ /* Load a Byte with sign extension*/
+ emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
+ break;
+ case BPF_H:
+ /* Load a HalfWord with sign extension*/
+ emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
+ break;
+ case BPF_W:
+ /* Load a Word*/
+ emit(ARM_LDR_I(rd[1], rm, off), ctx);
+ break;
+ }
+ /* Carry the sign extension to upper 32 bits */
+ emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
+ arm_bpf_put_reg64(dst, rd, ctx);
+}
+
/* Arithmatic Operation */
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
const u8 rn, struct jit_ctx *ctx, u8 op,
@@ -1161,7 +1394,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
/* tmp2[0] = array, tmp2[1] = index */
- /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ /*
+ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
* goto out;
* tail_call_cnt++;
*/
@@ -1170,7 +1404,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
tc = arm_bpf_get_reg64(tcc, tmp, ctx);
emit(ARM_CMP_I(tc[0], hi), ctx);
_emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx);
- _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
+ _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx);
emit(ARM_ADC_I(tc[0], tc[0], 0), ctx);
arm_bpf_put_reg64(tcc, tmp, ctx);
@@ -1362,7 +1596,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
}
- emit_a32_mov_r64(is64, dst, src, ctx);
+ if (insn->off)
+ emit_a32_movsx_r64(is64, insn->off, dst, src, ctx);
+ else
+ emit_a32_mov_r64(is64, dst, src, ctx);
break;
case BPF_K:
/* Sign-extend immediate value to destination reg */
@@ -1392,7 +1629,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU | BPF_RSH | BPF_X:
- case BPF_ALU | BPF_ARSH | BPF_K:
case BPF_ALU | BPF_ARSH | BPF_X:
case BPF_ALU64 | BPF_ADD | BPF_K:
case BPF_ALU64 | BPF_ADD | BPF_X:
@@ -1439,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
rt = src_lo;
break;
}
- emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+ emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
@@ -1448,11 +1684,25 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_X:
- goto notyet;
- /* dst = dst >> imm */
+ rd = arm_bpf_get_reg64(dst, tmp2, ctx);
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ rs = arm_bpf_get_reg64(src, tmp, ctx);
+ break;
+ case BPF_K:
+ rs = tmp;
+ emit_a32_mov_se_i64(is64, rs, imm, ctx);
+ break;
+ }
+ emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
+ arm_bpf_put_reg64(dst, rd, ctx);
+ break;
/* dst = dst << imm */
- case BPF_ALU | BPF_RSH | BPF_K:
+ /* dst = dst >> imm */
+ /* dst = dst >> imm (signed) */
case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_K:
if (unlikely(imm > 31))
return -EINVAL;
if (imm)
@@ -1521,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
/* dst = htole(dst) */
/* dst = htobe(dst) */
- case BPF_ALU | BPF_END | BPF_FROM_LE:
- case BPF_ALU | BPF_END | BPF_FROM_BE:
+ case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
+ case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
+ /* dst = bswap(dst) */
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
rd = arm_bpf_get_reg64(dst, tmp, ctx);
- if (BPF_SRC(code) == BPF_FROM_LE)
+ if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
goto emit_bswap_uxt;
switch (imm) {
case 16:
@@ -1579,8 +1831,18 @@ exit:
case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_DW:
+ /* LDSX: dst = *(signed size *)(src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
- emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
+ if (BPF_MODE(insn->code) == BPF_MEMSX)
+ emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code));
+ else
+ emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
+ break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
break;
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_W:
@@ -1600,10 +1862,9 @@ exit:
}
emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code));
break;
- /* STX XADD: lock *(u32 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_W:
- /* STX XADD: lock *(u64 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_DW:
+ /* Atomic ops */
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
goto notyet;
/* STX: *(size *)(dst + off) = src */
case BPF_STX | BPF_MEM | BPF_W:
@@ -1735,10 +1996,15 @@ go_jmp:
break;
/* JMP OFF */
case BPF_JMP | BPF_JA:
+ case BPF_JMP32 | BPF_JA:
{
- if (off == 0)
+ if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
+ jmp_offset = bpf2a32_offset(i + imm, i, ctx);
+ else if (BPF_CLASS(code) == BPF_JMP && off != 0)
+ jmp_offset = bpf2a32_offset(i + off, i, ctx);
+ else
break;
- jmp_offset = bpf2a32_offset(i+off, i, ctx);
+
check_imm24(jmp_offset);
emit(ARM_B(jmp_offset), ctx);
break;
@@ -1822,7 +2088,7 @@ static int build_body(struct jit_ctx *ctx)
if (ctx->target == NULL)
ctx->offsets[i] = ctx->idx;
- /* If unsuccesfull, return with error code */
+ /* If unsuccesful, return with error code */
if (ret)
return ret;
}
@@ -1841,11 +2107,6 @@ static int validate_code(struct jit_ctx *ctx)
return 0;
}
-void bpf_jit_compile(struct bpf_prog *prog)
-{
- /* Nothing to do here. We support Internal BPF. */
-}
-
bool bpf_jit_needs_zext(void)
{
return true;
@@ -1936,7 +2197,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
* for jit, although it can decrease the size of the image.
*
* As each arm instruction is of length 32bit, we are translating
- * number of JITed intructions into the size required to store these
+ * number of JITed instructions into the size required to store these
* JITed code.
*/
image_size = sizeof(u32) * ctx.idx;