Commit | Line | Data |
---|---|---|
0ca87f05 ME |
1 | /* bpf_jit_comp.c: BPF JIT compiler for PPC64 |
2 | * | |
3 | * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation | |
4 | * | |
5 | * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com) | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; version 2 | |
10 | * of the License. | |
11 | */ | |
12 | #include <linux/moduleloader.h> | |
13 | #include <asm/cacheflush.h> | |
14 | #include <linux/netdevice.h> | |
15 | #include <linux/filter.h> | |
5082dfb7 DB |
16 | #include <linux/if_vlan.h> |
17 | ||
0ca87f05 ME |
18 | #include "bpf_jit.h" |
19 | ||
20 | #ifndef __BIG_ENDIAN | |
21 | /* There are endianness assumptions herein. */ | |
22 | #error "Little-endian PPC not supported in BPF compiler" | |
23 | #endif | |
24 | ||
25 | int bpf_jit_enable __read_mostly; | |
26 | ||
27 | ||
28 | static inline void bpf_flush_icache(void *start, void *end) | |
29 | { | |
30 | smp_wmb(); | |
31 | flush_icache_range((unsigned long)start, (unsigned long)end); | |
32 | } | |
33 | ||
34 | static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, | |
35 | struct codegen_context *ctx) | |
36 | { | |
37 | int i; | |
38 | const struct sock_filter *filter = fp->insns; | |
39 | ||
40 | if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { | |
41 | /* Make stackframe */ | |
42 | if (ctx->seen & SEEN_DATAREF) { | |
43 | /* If we call any helpers (for loads), save LR */ | |
c75df6f9 | 44 | EMIT(PPC_INST_MFLR | __PPC_RT(R0)); |
0ca87f05 ME |
45 | PPC_STD(0, 1, 16); |
46 | ||
47 | /* Back up non-volatile regs. */ | |
48 | PPC_STD(r_D, 1, -(8*(32-r_D))); | |
49 | PPC_STD(r_HL, 1, -(8*(32-r_HL))); | |
50 | } | |
51 | if (ctx->seen & SEEN_MEM) { | |
52 | /* | |
53 | * Conditionally save regs r15-r31 as some will be used | |
54 | * for M[] data. | |
55 | */ | |
56 | for (i = r_M; i < (r_M+16); i++) { | |
57 | if (ctx->seen & (1 << (i-r_M))) | |
58 | PPC_STD(i, 1, -(8*(32-i))); | |
59 | } | |
60 | } | |
c75df6f9 | 61 | EMIT(PPC_INST_STDU | __PPC_RS(R1) | __PPC_RA(R1) | |
0ca87f05 ME |
62 | (-BPF_PPC_STACKFRAME & 0xfffc)); |
63 | } | |
64 | ||
65 | if (ctx->seen & SEEN_DATAREF) { | |
66 | /* | |
67 | * If this filter needs to access skb data, | |
68 | * prepare r_D and r_HL: | |
69 | * r_HL = skb->len - skb->data_len | |
70 | * r_D = skb->data | |
71 | */ | |
72 | PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, | |
73 | data_len)); | |
74 | PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); | |
75 | PPC_SUB(r_HL, r_HL, r_scratch1); | |
76 | PPC_LD_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); | |
77 | } | |
78 | ||
79 | if (ctx->seen & SEEN_XREG) { | |
80 | /* | |
81 | * TODO: Could also detect whether first instr. sets X and | |
82 | * avoid this (as below, with A). | |
83 | */ | |
84 | PPC_LI(r_X, 0); | |
85 | } | |
86 | ||
87 | switch (filter[0].code) { | |
88 | case BPF_S_RET_K: | |
89 | case BPF_S_LD_W_LEN: | |
90 | case BPF_S_ANC_PROTOCOL: | |
91 | case BPF_S_ANC_IFINDEX: | |
92 | case BPF_S_ANC_MARK: | |
93 | case BPF_S_ANC_RXHASH: | |
5082dfb7 DB |
94 | case BPF_S_ANC_VLAN_TAG: |
95 | case BPF_S_ANC_VLAN_TAG_PRESENT: | |
0ca87f05 ME |
96 | case BPF_S_ANC_CPU: |
97 | case BPF_S_ANC_QUEUE: | |
98 | case BPF_S_LD_W_ABS: | |
99 | case BPF_S_LD_H_ABS: | |
100 | case BPF_S_LD_B_ABS: | |
101 | /* first instruction sets A register (or is RET 'constant') */ | |
102 | break; | |
103 | default: | |
104 | /* make sure we dont leak kernel information to user */ | |
105 | PPC_LI(r_A, 0); | |
106 | } | |
107 | } | |
108 | ||
109 | static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) | |
110 | { | |
111 | int i; | |
112 | ||
113 | if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { | |
114 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); | |
115 | if (ctx->seen & SEEN_DATAREF) { | |
116 | PPC_LD(0, 1, 16); | |
117 | PPC_MTLR(0); | |
118 | PPC_LD(r_D, 1, -(8*(32-r_D))); | |
119 | PPC_LD(r_HL, 1, -(8*(32-r_HL))); | |
120 | } | |
121 | if (ctx->seen & SEEN_MEM) { | |
122 | /* Restore any saved non-vol registers */ | |
123 | for (i = r_M; i < (r_M+16); i++) { | |
124 | if (ctx->seen & (1 << (i-r_M))) | |
125 | PPC_LD(i, 1, -(8*(32-i))); | |
126 | } | |
127 | } | |
128 | } | |
129 | /* The RETs have left a return value in R3. */ | |
130 | ||
131 | PPC_BLR(); | |
132 | } | |
133 | ||
05be1824 JS |
134 | #define CHOOSE_LOAD_FUNC(K, func) \ |
135 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | |
136 | ||
0ca87f05 ME |
137 | /* Assemble the body code between the prologue & epilogue. */ |
138 | static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, | |
139 | struct codegen_context *ctx, | |
140 | unsigned int *addrs) | |
141 | { | |
142 | const struct sock_filter *filter = fp->insns; | |
143 | int flen = fp->len; | |
144 | u8 *func; | |
145 | unsigned int true_cond; | |
146 | int i; | |
147 | ||
148 | /* Start of epilogue code */ | |
149 | unsigned int exit_addr = addrs[flen]; | |
150 | ||
151 | for (i = 0; i < flen; i++) { | |
152 | unsigned int K = filter[i].k; | |
153 | ||
154 | /* | |
155 | * addrs[] maps a BPF bytecode address into a real offset from | |
156 | * the start of the body code. | |
157 | */ | |
158 | addrs[i] = ctx->idx * 4; | |
159 | ||
160 | switch (filter[i].code) { | |
161 | /*** ALU ops ***/ | |
162 | case BPF_S_ALU_ADD_X: /* A += X; */ | |
163 | ctx->seen |= SEEN_XREG; | |
164 | PPC_ADD(r_A, r_A, r_X); | |
165 | break; | |
166 | case BPF_S_ALU_ADD_K: /* A += K; */ | |
167 | if (!K) | |
168 | break; | |
169 | PPC_ADDI(r_A, r_A, IMM_L(K)); | |
170 | if (K >= 32768) | |
171 | PPC_ADDIS(r_A, r_A, IMM_HA(K)); | |
172 | break; | |
173 | case BPF_S_ALU_SUB_X: /* A -= X; */ | |
174 | ctx->seen |= SEEN_XREG; | |
175 | PPC_SUB(r_A, r_A, r_X); | |
176 | break; | |
177 | case BPF_S_ALU_SUB_K: /* A -= K */ | |
178 | if (!K) | |
179 | break; | |
180 | PPC_ADDI(r_A, r_A, IMM_L(-K)); | |
181 | if (K >= 32768) | |
182 | PPC_ADDIS(r_A, r_A, IMM_HA(-K)); | |
183 | break; | |
184 | case BPF_S_ALU_MUL_X: /* A *= X; */ | |
185 | ctx->seen |= SEEN_XREG; | |
186 | PPC_MUL(r_A, r_A, r_X); | |
187 | break; | |
188 | case BPF_S_ALU_MUL_K: /* A *= K */ | |
189 | if (K < 32768) | |
190 | PPC_MULI(r_A, r_A, K); | |
191 | else { | |
192 | PPC_LI32(r_scratch1, K); | |
193 | PPC_MUL(r_A, r_A, r_scratch1); | |
194 | } | |
195 | break; | |
196 | case BPF_S_ALU_DIV_X: /* A /= X; */ | |
197 | ctx->seen |= SEEN_XREG; | |
198 | PPC_CMPWI(r_X, 0); | |
199 | if (ctx->pc_ret0 != -1) { | |
200 | PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); | |
201 | } else { | |
202 | /* | |
203 | * Exit, returning 0; first pass hits here | |
204 | * (longer worst-case code size). | |
205 | */ | |
206 | PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); | |
207 | PPC_LI(r_ret, 0); | |
208 | PPC_JMP(exit_addr); | |
209 | } | |
210 | PPC_DIVWU(r_A, r_A, r_X); | |
211 | break; | |
212 | case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ | |
213 | PPC_LI32(r_scratch1, K); | |
214 | /* Top 32 bits of 64bit result -> A */ | |
215 | PPC_MULHWU(r_A, r_A, r_scratch1); | |
216 | break; | |
217 | case BPF_S_ALU_AND_X: | |
218 | ctx->seen |= SEEN_XREG; | |
219 | PPC_AND(r_A, r_A, r_X); | |
220 | break; | |
221 | case BPF_S_ALU_AND_K: | |
222 | if (!IMM_H(K)) | |
223 | PPC_ANDI(r_A, r_A, K); | |
224 | else { | |
225 | PPC_LI32(r_scratch1, K); | |
226 | PPC_AND(r_A, r_A, r_scratch1); | |
227 | } | |
228 | break; | |
229 | case BPF_S_ALU_OR_X: | |
230 | ctx->seen |= SEEN_XREG; | |
231 | PPC_OR(r_A, r_A, r_X); | |
232 | break; | |
233 | case BPF_S_ALU_OR_K: | |
234 | if (IMM_L(K)) | |
235 | PPC_ORI(r_A, r_A, IMM_L(K)); | |
236 | if (K >= 65536) | |
237 | PPC_ORIS(r_A, r_A, IMM_H(K)); | |
238 | break; | |
02871903 DB |
239 | case BPF_S_ANC_ALU_XOR_X: |
240 | case BPF_S_ALU_XOR_X: /* A ^= X */ | |
241 | ctx->seen |= SEEN_XREG; | |
242 | PPC_XOR(r_A, r_A, r_X); | |
243 | break; | |
244 | case BPF_S_ALU_XOR_K: /* A ^= K */ | |
245 | if (IMM_L(K)) | |
246 | PPC_XORI(r_A, r_A, IMM_L(K)); | |
247 | if (K >= 65536) | |
248 | PPC_XORIS(r_A, r_A, IMM_H(K)); | |
249 | break; | |
0ca87f05 ME |
250 | case BPF_S_ALU_LSH_X: /* A <<= X; */ |
251 | ctx->seen |= SEEN_XREG; | |
252 | PPC_SLW(r_A, r_A, r_X); | |
253 | break; | |
254 | case BPF_S_ALU_LSH_K: | |
255 | if (K == 0) | |
256 | break; | |
257 | else | |
258 | PPC_SLWI(r_A, r_A, K); | |
259 | break; | |
260 | case BPF_S_ALU_RSH_X: /* A >>= X; */ | |
261 | ctx->seen |= SEEN_XREG; | |
262 | PPC_SRW(r_A, r_A, r_X); | |
263 | break; | |
264 | case BPF_S_ALU_RSH_K: /* A >>= K; */ | |
265 | if (K == 0) | |
266 | break; | |
267 | else | |
268 | PPC_SRWI(r_A, r_A, K); | |
269 | break; | |
270 | case BPF_S_ALU_NEG: | |
271 | PPC_NEG(r_A, r_A); | |
272 | break; | |
273 | case BPF_S_RET_K: | |
274 | PPC_LI32(r_ret, K); | |
275 | if (!K) { | |
276 | if (ctx->pc_ret0 == -1) | |
277 | ctx->pc_ret0 = i; | |
278 | } | |
279 | /* | |
280 | * If this isn't the very last instruction, branch to | |
281 | * the epilogue if we've stuff to clean up. Otherwise, | |
282 | * if there's nothing to tidy, just return. If we /are/ | |
283 | * the last instruction, we're about to fall through to | |
284 | * the epilogue to return. | |
285 | */ | |
286 | if (i != flen - 1) { | |
287 | /* | |
288 | * Note: 'seen' is properly valid only on pass | |
289 | * #2. Both parts of this conditional are the | |
290 | * same instruction size though, meaning the | |
291 | * first pass will still correctly determine the | |
292 | * code size/addresses. | |
293 | */ | |
294 | if (ctx->seen) | |
295 | PPC_JMP(exit_addr); | |
296 | else | |
297 | PPC_BLR(); | |
298 | } | |
299 | break; | |
300 | case BPF_S_RET_A: | |
301 | PPC_MR(r_ret, r_A); | |
302 | if (i != flen - 1) { | |
303 | if (ctx->seen) | |
304 | PPC_JMP(exit_addr); | |
305 | else | |
306 | PPC_BLR(); | |
307 | } | |
308 | break; | |
309 | case BPF_S_MISC_TAX: /* X = A */ | |
310 | PPC_MR(r_X, r_A); | |
311 | break; | |
312 | case BPF_S_MISC_TXA: /* A = X */ | |
313 | ctx->seen |= SEEN_XREG; | |
314 | PPC_MR(r_A, r_X); | |
315 | break; | |
316 | ||
317 | /*** Constant loads/M[] access ***/ | |
318 | case BPF_S_LD_IMM: /* A = K */ | |
319 | PPC_LI32(r_A, K); | |
320 | break; | |
321 | case BPF_S_LDX_IMM: /* X = K */ | |
322 | PPC_LI32(r_X, K); | |
323 | break; | |
324 | case BPF_S_LD_MEM: /* A = mem[K] */ | |
325 | PPC_MR(r_A, r_M + (K & 0xf)); | |
326 | ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); | |
327 | break; | |
328 | case BPF_S_LDX_MEM: /* X = mem[K] */ | |
329 | PPC_MR(r_X, r_M + (K & 0xf)); | |
330 | ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); | |
331 | break; | |
332 | case BPF_S_ST: /* mem[K] = A */ | |
333 | PPC_MR(r_M + (K & 0xf), r_A); | |
334 | ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); | |
335 | break; | |
336 | case BPF_S_STX: /* mem[K] = X */ | |
337 | PPC_MR(r_M + (K & 0xf), r_X); | |
338 | ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); | |
339 | break; | |
340 | case BPF_S_LD_W_LEN: /* A = skb->len; */ | |
341 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); | |
342 | PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); | |
343 | break; | |
344 | case BPF_S_LDX_W_LEN: /* X = skb->len; */ | |
345 | PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); | |
346 | break; | |
347 | ||
348 | /*** Ancillary info loads ***/ | |
349 | ||
350 | /* None of the BPF_S_ANC* codes appear to be passed by | |
351 | * sk_chk_filter(). The interpreter and the x86 BPF | |
352 | * compiler implement them so we do too -- they may be | |
353 | * planted in future. | |
354 | */ | |
355 | case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ | |
356 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, | |
357 | protocol) != 2); | |
358 | PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, | |
359 | protocol)); | |
360 | /* ntohs is a NOP with BE loads. */ | |
361 | break; | |
362 | case BPF_S_ANC_IFINDEX: | |
363 | PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, | |
364 | dev)); | |
365 | PPC_CMPDI(r_scratch1, 0); | |
366 | if (ctx->pc_ret0 != -1) { | |
367 | PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); | |
368 | } else { | |
369 | /* Exit, returning 0; first pass hits here. */ | |
370 | PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); | |
371 | PPC_LI(r_ret, 0); | |
372 | PPC_JMP(exit_addr); | |
373 | } | |
374 | BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, | |
375 | ifindex) != 4); | |
376 | PPC_LWZ_OFFS(r_A, r_scratch1, | |
377 | offsetof(struct net_device, ifindex)); | |
378 | break; | |
379 | case BPF_S_ANC_MARK: | |
380 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); | |
381 | PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, | |
382 | mark)); | |
383 | break; | |
384 | case BPF_S_ANC_RXHASH: | |
385 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); | |
386 | PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, | |
387 | rxhash)); | |
388 | break; | |
5082dfb7 DB |
389 | case BPF_S_ANC_VLAN_TAG: |
390 | case BPF_S_ANC_VLAN_TAG_PRESENT: | |
391 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); | |
392 | PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, | |
393 | vlan_tci)); | |
394 | if (filter[i].code == BPF_S_ANC_VLAN_TAG) | |
395 | PPC_ANDI(r_A, r_A, VLAN_VID_MASK); | |
396 | else | |
397 | PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); | |
398 | break; | |
0ca87f05 ME |
399 | case BPF_S_ANC_QUEUE: |
400 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, | |
401 | queue_mapping) != 2); | |
402 | PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, | |
403 | queue_mapping)); | |
404 | break; | |
405 | case BPF_S_ANC_CPU: | |
406 | #ifdef CONFIG_SMP | |
407 | /* | |
408 | * PACA ptr is r13: | |
409 | * raw_smp_processor_id() = local_paca->paca_index | |
410 | */ | |
411 | BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, | |
412 | paca_index) != 2); | |
413 | PPC_LHZ_OFFS(r_A, 13, | |
414 | offsetof(struct paca_struct, paca_index)); | |
415 | #else | |
416 | PPC_LI(r_A, 0); | |
417 | #endif | |
418 | break; | |
419 | ||
420 | /*** Absolute loads from packet header/data ***/ | |
421 | case BPF_S_LD_W_ABS: | |
05be1824 | 422 | func = CHOOSE_LOAD_FUNC(K, sk_load_word); |
0ca87f05 ME |
423 | goto common_load; |
424 | case BPF_S_LD_H_ABS: | |
05be1824 | 425 | func = CHOOSE_LOAD_FUNC(K, sk_load_half); |
0ca87f05 ME |
426 | goto common_load; |
427 | case BPF_S_LD_B_ABS: | |
05be1824 | 428 | func = CHOOSE_LOAD_FUNC(K, sk_load_byte); |
0ca87f05 | 429 | common_load: |
05be1824 | 430 | /* Load from [K]. */ |
0ca87f05 | 431 | ctx->seen |= SEEN_DATAREF; |
0ca87f05 ME |
432 | PPC_LI64(r_scratch1, func); |
433 | PPC_MTLR(r_scratch1); | |
434 | PPC_LI32(r_addr, K); | |
435 | PPC_BLRL(); | |
436 | /* | |
437 | * Helper returns 'lt' condition on error, and an | |
438 | * appropriate return value in r3 | |
439 | */ | |
440 | PPC_BCC(COND_LT, exit_addr); | |
441 | break; | |
442 | ||
443 | /*** Indirect loads from packet header/data ***/ | |
444 | case BPF_S_LD_W_IND: | |
445 | func = sk_load_word; | |
446 | goto common_load_ind; | |
447 | case BPF_S_LD_H_IND: | |
448 | func = sk_load_half; | |
449 | goto common_load_ind; | |
450 | case BPF_S_LD_B_IND: | |
451 | func = sk_load_byte; | |
452 | common_load_ind: | |
453 | /* | |
454 | * Load from [X + K]. Negative offsets are tested for | |
05be1824 | 455 | * in the helper functions. |
0ca87f05 ME |
456 | */ |
457 | ctx->seen |= SEEN_DATAREF | SEEN_XREG; | |
458 | PPC_LI64(r_scratch1, func); | |
459 | PPC_MTLR(r_scratch1); | |
460 | PPC_ADDI(r_addr, r_X, IMM_L(K)); | |
461 | if (K >= 32768) | |
462 | PPC_ADDIS(r_addr, r_addr, IMM_HA(K)); | |
463 | PPC_BLRL(); | |
464 | /* If error, cr0.LT set */ | |
465 | PPC_BCC(COND_LT, exit_addr); | |
466 | break; | |
467 | ||
468 | case BPF_S_LDX_B_MSH: | |
05be1824 | 469 | func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); |
0ca87f05 ME |
470 | goto common_load; |
471 | break; | |
472 | ||
473 | /*** Jump and branches ***/ | |
474 | case BPF_S_JMP_JA: | |
475 | if (K != 0) | |
476 | PPC_JMP(addrs[i + 1 + K]); | |
477 | break; | |
478 | ||
479 | case BPF_S_JMP_JGT_K: | |
480 | case BPF_S_JMP_JGT_X: | |
481 | true_cond = COND_GT; | |
482 | goto cond_branch; | |
483 | case BPF_S_JMP_JGE_K: | |
484 | case BPF_S_JMP_JGE_X: | |
485 | true_cond = COND_GE; | |
486 | goto cond_branch; | |
487 | case BPF_S_JMP_JEQ_K: | |
488 | case BPF_S_JMP_JEQ_X: | |
489 | true_cond = COND_EQ; | |
490 | goto cond_branch; | |
491 | case BPF_S_JMP_JSET_K: | |
492 | case BPF_S_JMP_JSET_X: | |
493 | true_cond = COND_NE; | |
494 | /* Fall through */ | |
495 | cond_branch: | |
496 | /* same targets, can avoid doing the test :) */ | |
497 | if (filter[i].jt == filter[i].jf) { | |
498 | if (filter[i].jt > 0) | |
499 | PPC_JMP(addrs[i + 1 + filter[i].jt]); | |
500 | break; | |
501 | } | |
502 | ||
503 | switch (filter[i].code) { | |
504 | case BPF_S_JMP_JGT_X: | |
505 | case BPF_S_JMP_JGE_X: | |
506 | case BPF_S_JMP_JEQ_X: | |
507 | ctx->seen |= SEEN_XREG; | |
508 | PPC_CMPLW(r_A, r_X); | |
509 | break; | |
510 | case BPF_S_JMP_JSET_X: | |
511 | ctx->seen |= SEEN_XREG; | |
512 | PPC_AND_DOT(r_scratch1, r_A, r_X); | |
513 | break; | |
514 | case BPF_S_JMP_JEQ_K: | |
515 | case BPF_S_JMP_JGT_K: | |
516 | case BPF_S_JMP_JGE_K: | |
517 | if (K < 32768) | |
518 | PPC_CMPLWI(r_A, K); | |
519 | else { | |
520 | PPC_LI32(r_scratch1, K); | |
521 | PPC_CMPLW(r_A, r_scratch1); | |
522 | } | |
523 | break; | |
524 | case BPF_S_JMP_JSET_K: | |
525 | if (K < 32768) | |
526 | /* PPC_ANDI is /only/ dot-form */ | |
527 | PPC_ANDI(r_scratch1, r_A, K); | |
528 | else { | |
529 | PPC_LI32(r_scratch1, K); | |
530 | PPC_AND_DOT(r_scratch1, r_A, | |
531 | r_scratch1); | |
532 | } | |
533 | break; | |
534 | } | |
535 | /* Sometimes branches are constructed "backward", with | |
536 | * the false path being the branch and true path being | |
537 | * a fallthrough to the next instruction. | |
538 | */ | |
539 | if (filter[i].jt == 0) | |
540 | /* Swap the sense of the branch */ | |
541 | PPC_BCC(true_cond ^ COND_CMP_TRUE, | |
542 | addrs[i + 1 + filter[i].jf]); | |
543 | else { | |
544 | PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]); | |
545 | if (filter[i].jf != 0) | |
546 | PPC_JMP(addrs[i + 1 + filter[i].jf]); | |
547 | } | |
548 | break; | |
549 | default: | |
550 | /* The filter contains something cruel & unusual. | |
551 | * We don't handle it, but also there shouldn't be | |
552 | * anything missing from our list. | |
553 | */ | |
554 | if (printk_ratelimit()) | |
555 | pr_err("BPF filter opcode %04x (@%d) unsupported\n", | |
556 | filter[i].code, i); | |
557 | return -ENOTSUPP; | |
558 | } | |
559 | ||
560 | } | |
561 | /* Set end-of-body-code address for exit. */ | |
562 | addrs[i] = ctx->idx * 4; | |
563 | ||
564 | return 0; | |
565 | } | |
566 | ||
567 | void bpf_jit_compile(struct sk_filter *fp) | |
568 | { | |
569 | unsigned int proglen; | |
570 | unsigned int alloclen; | |
571 | u32 *image = NULL; | |
572 | u32 *code_base; | |
573 | unsigned int *addrs; | |
574 | struct codegen_context cgctx; | |
575 | int pass; | |
576 | int flen = fp->len; | |
577 | ||
578 | if (!bpf_jit_enable) | |
579 | return; | |
580 | ||
581 | addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); | |
582 | if (addrs == NULL) | |
583 | return; | |
584 | ||
585 | /* | |
586 | * There are multiple assembly passes as the generated code will change | |
587 | * size as it settles down, figuring out the max branch offsets/exit | |
588 | * paths required. | |
589 | * | |
590 | * The range of standard conditional branches is +/- 32Kbytes. Since | |
591 | * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to | |
592 | * finish with 8 bytes/instruction. Not feasible, so long jumps are | |
593 | * used, distinct from short branches. | |
594 | * | |
595 | * Current: | |
596 | * | |
597 | * For now, both branch types assemble to 2 words (short branches padded | |
598 | * with a NOP); this is less efficient, but assembly will always complete | |
599 | * after exactly 3 passes: | |
600 | * | |
601 | * First pass: No code buffer; Program is "faux-generated" -- no code | |
602 | * emitted but maximum size of output determined (and addrs[] filled | |
603 | * in). Also, we note whether we use M[], whether we use skb data, etc. | |
604 | * All generation choices assumed to be 'worst-case', e.g. branches all | |
605 | * far (2 instructions), return path code reduction not available, etc. | |
606 | * | |
607 | * Second pass: Code buffer allocated with size determined previously. | |
608 | * Prologue generated to support features we have seen used. Exit paths | |
609 | * determined and addrs[] is filled in again, as code may be slightly | |
610 | * smaller as a result. | |
611 | * | |
612 | * Third pass: Code generated 'for real', and branch destinations | |
613 | * determined from now-accurate addrs[] map. | |
614 | * | |
615 | * Ideal: | |
616 | * | |
617 | * If we optimise this, near branches will be shorter. On the | |
618 | * first assembly pass, we should err on the side of caution and | |
619 | * generate the biggest code. On subsequent passes, branches will be | |
620 | * generated short or long and code size will reduce. With smaller | |
621 | * code, more branches may fall into the short category, and code will | |
622 | * reduce more. | |
623 | * | |
624 | * Finally, if we see one pass generate code the same size as the | |
625 | * previous pass we have converged and should now generate code for | |
626 | * real. Allocating at the end will also save the memory that would | |
627 | * otherwise be wasted by the (small) current code shrinkage. | |
628 | * Preferably, we should do a small number of passes (e.g. 5) and if we | |
629 | * haven't converged by then, get impatient and force code to generate | |
630 | * as-is, even if the odd branch would be left long. The chances of a | |
631 | * long jump are tiny with all but the most enormous of BPF filter | |
632 | * inputs, so we should usually converge on the third pass. | |
633 | */ | |
634 | ||
635 | cgctx.idx = 0; | |
636 | cgctx.seen = 0; | |
637 | cgctx.pc_ret0 = -1; | |
638 | /* Scouting faux-generate pass 0 */ | |
639 | if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) | |
640 | /* We hit something illegal or unsupported. */ | |
641 | goto out; | |
642 | ||
643 | /* | |
644 | * Pretend to build prologue, given the features we've seen. This will | |
645 | * update ctgtx.idx as it pretends to output instructions, then we can | |
646 | * calculate total size from idx. | |
647 | */ | |
648 | bpf_jit_build_prologue(fp, 0, &cgctx); | |
649 | bpf_jit_build_epilogue(0, &cgctx); | |
650 | ||
651 | proglen = cgctx.idx * 4; | |
652 | alloclen = proglen + FUNCTION_DESCR_SIZE; | |
653 | image = module_alloc(max_t(unsigned int, alloclen, | |
654 | sizeof(struct work_struct))); | |
655 | if (!image) | |
656 | goto out; | |
657 | ||
658 | code_base = image + (FUNCTION_DESCR_SIZE/4); | |
659 | ||
660 | /* Code generation passes 1-2 */ | |
661 | for (pass = 1; pass < 3; pass++) { | |
662 | /* Now build the prologue, body code & epilogue for real. */ | |
663 | cgctx.idx = 0; | |
664 | bpf_jit_build_prologue(fp, code_base, &cgctx); | |
665 | bpf_jit_build_body(fp, code_base, &cgctx, addrs); | |
666 | bpf_jit_build_epilogue(code_base, &cgctx); | |
667 | ||
668 | if (bpf_jit_enable > 1) | |
669 | pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, | |
670 | proglen - (cgctx.idx * 4), cgctx.seen); | |
671 | } | |
672 | ||
673 | if (bpf_jit_enable > 1) | |
79617801 DB |
674 | /* Note that we output the base address of the code_base |
675 | * rather than image, since opcodes are in code_base. | |
676 | */ | |
677 | bpf_jit_dump(flen, proglen, pass, code_base); | |
0ca87f05 ME |
678 | |
679 | if (image) { | |
0ca87f05 ME |
680 | bpf_flush_icache(code_base, code_base + (proglen/4)); |
681 | /* Function descriptor nastiness: Address + TOC */ | |
682 | ((u64 *)image)[0] = (u64)code_base; | |
683 | ((u64 *)image)[1] = local_paca->kernel_toc; | |
684 | fp->bpf_func = (void *)image; | |
685 | } | |
686 | out: | |
687 | kfree(addrs); | |
688 | return; | |
689 | } | |
690 | ||
691 | static void jit_free_defer(struct work_struct *arg) | |
692 | { | |
693 | module_free(NULL, arg); | |
694 | } | |
695 | ||
696 | /* run from softirq, we must use a work_struct to call | |
697 | * module_free() from process context | |
698 | */ | |
699 | void bpf_jit_free(struct sk_filter *fp) | |
700 | { | |
701 | if (fp->bpf_func != sk_run_filter) { | |
702 | struct work_struct *work = (struct work_struct *)fp->bpf_func; | |
703 | ||
704 | INIT_WORK(work, jit_free_defer); | |
705 | schedule_work(work); | |
706 | } | |
707 | } |