1  // SPDX-License-Identifier: GPL-2.0
2  #include <linux/moduleloader.h>
3  #include <linux/workqueue.h>
4  #include <linux/netdevice.h>
5  #include <linux/filter.h>
6  #include <linux/bpf.h>
7  #include <linux/cache.h>
8  #include <linux/if_vlan.h>
9  
10  #include <asm/cacheflush.h>
11  #include <asm/ptrace.h>
12  
13  #include "bpf_jit_64.h"
14  
is_simm13(unsigned int value)15  static inline bool is_simm13(unsigned int value)
16  {
17  	return value + 0x1000 < 0x2000;
18  }
19  
is_simm10(unsigned int value)20  static inline bool is_simm10(unsigned int value)
21  {
22  	return value + 0x200 < 0x400;
23  }
24  
is_simm5(unsigned int value)25  static inline bool is_simm5(unsigned int value)
26  {
27  	return value + 0x10 < 0x20;
28  }
29  
is_sethi(unsigned int value)30  static inline bool is_sethi(unsigned int value)
31  {
32  	return (value & ~0x3fffff) == 0;
33  }
34  
bpf_flush_icache(void * start_,void * end_)35  static void bpf_flush_icache(void *start_, void *end_)
36  {
37  	/* Cheetah's I-cache is fully coherent.  */
38  	if (tlb_type == spitfire) {
39  		unsigned long start = (unsigned long) start_;
40  		unsigned long end = (unsigned long) end_;
41  
42  		start &= ~7UL;
43  		end = (end + 7UL) & ~7UL;
44  		while (start < end) {
45  			flushi(start);
46  			start += 32;
47  		}
48  	}
49  }
50  
51  #define S13(X)		((X) & 0x1fff)
52  #define S5(X)		((X) & 0x1f)
53  #define IMMED		0x00002000
54  #define RD(X)		((X) << 25)
55  #define RS1(X)		((X) << 14)
56  #define RS2(X)		((X))
57  #define OP(X)		((X) << 30)
58  #define OP2(X)		((X) << 22)
59  #define OP3(X)		((X) << 19)
60  #define COND(X)		(((X) & 0xf) << 25)
61  #define CBCOND(X)	(((X) & 0x1f) << 25)
62  #define F1(X)		OP(X)
63  #define F2(X, Y)	(OP(X) | OP2(Y))
64  #define F3(X, Y)	(OP(X) | OP3(Y))
65  #define ASI(X)		(((X) & 0xff) << 5)
66  
67  #define CONDN		COND(0x0)
68  #define CONDE		COND(0x1)
69  #define CONDLE		COND(0x2)
70  #define CONDL		COND(0x3)
71  #define CONDLEU		COND(0x4)
72  #define CONDCS		COND(0x5)
73  #define CONDNEG		COND(0x6)
74  #define CONDVC		COND(0x7)
75  #define CONDA		COND(0x8)
76  #define CONDNE		COND(0x9)
77  #define CONDG		COND(0xa)
78  #define CONDGE		COND(0xb)
79  #define CONDGU		COND(0xc)
80  #define CONDCC		COND(0xd)
81  #define CONDPOS		COND(0xe)
82  #define CONDVS		COND(0xf)
83  
84  #define CONDGEU		CONDCC
85  #define CONDLU		CONDCS
86  
87  #define WDISP22(X)	(((X) >> 2) & 0x3fffff)
88  #define WDISP19(X)	(((X) >> 2) & 0x7ffff)
89  
90  /* The 10-bit branch displacement for CBCOND is split into two fields */
WDISP10(u32 off)91  static u32 WDISP10(u32 off)
92  {
93  	u32 ret = ((off >> 2) & 0xff) << 5;
94  
95  	ret |= ((off >> (2 + 8)) & 0x03) << 19;
96  
97  	return ret;
98  }
99  
100  #define CBCONDE		CBCOND(0x09)
101  #define CBCONDLE	CBCOND(0x0a)
102  #define CBCONDL		CBCOND(0x0b)
103  #define CBCONDLEU	CBCOND(0x0c)
104  #define CBCONDCS	CBCOND(0x0d)
105  #define CBCONDN		CBCOND(0x0e)
106  #define CBCONDVS	CBCOND(0x0f)
107  #define CBCONDNE	CBCOND(0x19)
108  #define CBCONDG		CBCOND(0x1a)
109  #define CBCONDGE	CBCOND(0x1b)
110  #define CBCONDGU	CBCOND(0x1c)
111  #define CBCONDCC	CBCOND(0x1d)
112  #define CBCONDPOS	CBCOND(0x1e)
113  #define CBCONDVC	CBCOND(0x1f)
114  
115  #define CBCONDGEU	CBCONDCC
116  #define CBCONDLU	CBCONDCS
117  
118  #define ANNUL		(1 << 29)
119  #define XCC		(1 << 21)
120  
121  #define BRANCH		(F2(0, 1) | XCC)
122  #define CBCOND_OP	(F2(0, 3) | XCC)
123  
124  #define BA		(BRANCH | CONDA)
125  #define BG		(BRANCH | CONDG)
126  #define BL		(BRANCH | CONDL)
127  #define BLE		(BRANCH | CONDLE)
128  #define BGU		(BRANCH | CONDGU)
129  #define BLEU		(BRANCH | CONDLEU)
130  #define BGE		(BRANCH | CONDGE)
131  #define BGEU		(BRANCH | CONDGEU)
132  #define BLU		(BRANCH | CONDLU)
133  #define BE		(BRANCH | CONDE)
134  #define BNE		(BRANCH | CONDNE)
135  
136  #define SETHI(K, REG)	\
137  	(F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
138  #define OR_LO(K, REG)	\
139  	(F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
140  
141  #define ADD		F3(2, 0x00)
142  #define AND		F3(2, 0x01)
143  #define ANDCC		F3(2, 0x11)
144  #define OR		F3(2, 0x02)
145  #define XOR		F3(2, 0x03)
146  #define SUB		F3(2, 0x04)
147  #define SUBCC		F3(2, 0x14)
148  #define MUL		F3(2, 0x0a)
149  #define MULX		F3(2, 0x09)
150  #define UDIVX		F3(2, 0x0d)
151  #define DIV		F3(2, 0x0e)
152  #define SLL		F3(2, 0x25)
153  #define SLLX		(F3(2, 0x25)|(1<<12))
154  #define SRA		F3(2, 0x27)
155  #define SRAX		(F3(2, 0x27)|(1<<12))
156  #define SRL		F3(2, 0x26)
157  #define SRLX		(F3(2, 0x26)|(1<<12))
158  #define JMPL		F3(2, 0x38)
159  #define SAVE		F3(2, 0x3c)
160  #define RESTORE		F3(2, 0x3d)
161  #define CALL		F1(1)
162  #define BR		F2(0, 0x01)
163  #define RD_Y		F3(2, 0x28)
164  #define WR_Y		F3(2, 0x30)
165  
166  #define LD32		F3(3, 0x00)
167  #define LD8		F3(3, 0x01)
168  #define LD16		F3(3, 0x02)
169  #define LD64		F3(3, 0x0b)
170  #define LD64A		F3(3, 0x1b)
171  #define ST8		F3(3, 0x05)
172  #define ST16		F3(3, 0x06)
173  #define ST32		F3(3, 0x04)
174  #define ST64		F3(3, 0x0e)
175  
176  #define CAS		F3(3, 0x3c)
177  #define CASX		F3(3, 0x3e)
178  
179  #define LDPTR		LD64
180  #define BASE_STACKFRAME	176
181  
182  #define LD32I		(LD32 | IMMED)
183  #define LD8I		(LD8 | IMMED)
184  #define LD16I		(LD16 | IMMED)
185  #define LD64I		(LD64 | IMMED)
186  #define LDPTRI		(LDPTR | IMMED)
187  #define ST32I		(ST32 | IMMED)
188  
189  struct jit_ctx {
190  	struct bpf_prog		*prog;
191  	unsigned int		*offset;
192  	int			idx;
193  	int			epilogue_offset;
194  	bool 			tmp_1_used;
195  	bool 			tmp_2_used;
196  	bool 			tmp_3_used;
197  	bool			saw_frame_pointer;
198  	bool			saw_call;
199  	bool			saw_tail_call;
200  	u32			*image;
201  };
202  
203  #define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
204  #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
205  #define TMP_REG_3	(MAX_BPF_JIT_REG + 2)
206  
207  /* Map BPF registers to SPARC registers */
208  static const int bpf2sparc[] = {
209  	/* return value from in-kernel function, and exit value from eBPF */
210  	[BPF_REG_0] = O5,
211  
212  	/* arguments from eBPF program to in-kernel function */
213  	[BPF_REG_1] = O0,
214  	[BPF_REG_2] = O1,
215  	[BPF_REG_3] = O2,
216  	[BPF_REG_4] = O3,
217  	[BPF_REG_5] = O4,
218  
219  	/* callee saved registers that in-kernel function will preserve */
220  	[BPF_REG_6] = L0,
221  	[BPF_REG_7] = L1,
222  	[BPF_REG_8] = L2,
223  	[BPF_REG_9] = L3,
224  
225  	/* read-only frame pointer to access stack */
226  	[BPF_REG_FP] = L6,
227  
228  	[BPF_REG_AX] = G7,
229  
230  	/* temporary register for BPF JIT */
231  	[TMP_REG_1] = G1,
232  	[TMP_REG_2] = G2,
233  	[TMP_REG_3] = G3,
234  };
235  
emit(const u32 insn,struct jit_ctx * ctx)236  static void emit(const u32 insn, struct jit_ctx *ctx)
237  {
238  	if (ctx->image != NULL)
239  		ctx->image[ctx->idx] = insn;
240  
241  	ctx->idx++;
242  }
243  
emit_call(u32 * func,struct jit_ctx * ctx)244  static void emit_call(u32 *func, struct jit_ctx *ctx)
245  {
246  	if (ctx->image != NULL) {
247  		void *here = &ctx->image[ctx->idx];
248  		unsigned int off;
249  
250  		off = (void *)func - here;
251  		ctx->image[ctx->idx] = CALL | ((off >> 2) & 0x3fffffff);
252  	}
253  	ctx->idx++;
254  }
255  
emit_nop(struct jit_ctx * ctx)256  static void emit_nop(struct jit_ctx *ctx)
257  {
258  	emit(SETHI(0, G0), ctx);
259  }
260  
emit_reg_move(u32 from,u32 to,struct jit_ctx * ctx)261  static void emit_reg_move(u32 from, u32 to, struct jit_ctx *ctx)
262  {
263  	emit(OR | RS1(G0) | RS2(from) | RD(to), ctx);
264  }
265  
266  /* Emit 32-bit constant, zero extended. */
emit_set_const(s32 K,u32 reg,struct jit_ctx * ctx)267  static void emit_set_const(s32 K, u32 reg, struct jit_ctx *ctx)
268  {
269  	emit(SETHI(K, reg), ctx);
270  	emit(OR_LO(K, reg), ctx);
271  }
272  
273  /* Emit 32-bit constant, sign extended. */
emit_set_const_sext(s32 K,u32 reg,struct jit_ctx * ctx)274  static void emit_set_const_sext(s32 K, u32 reg, struct jit_ctx *ctx)
275  {
276  	if (K >= 0) {
277  		emit(SETHI(K, reg), ctx);
278  		emit(OR_LO(K, reg), ctx);
279  	} else {
280  		u32 hbits = ~(u32) K;
281  		u32 lbits = -0x400 | (u32) K;
282  
283  		emit(SETHI(hbits, reg), ctx);
284  		emit(XOR | IMMED | RS1(reg) | S13(lbits) | RD(reg), ctx);
285  	}
286  }
287  
emit_alu(u32 opcode,u32 src,u32 dst,struct jit_ctx * ctx)288  static void emit_alu(u32 opcode, u32 src, u32 dst, struct jit_ctx *ctx)
289  {
290  	emit(opcode | RS1(dst) | RS2(src) | RD(dst), ctx);
291  }
292  
emit_alu3(u32 opcode,u32 a,u32 b,u32 c,struct jit_ctx * ctx)293  static void emit_alu3(u32 opcode, u32 a, u32 b, u32 c, struct jit_ctx *ctx)
294  {
295  	emit(opcode | RS1(a) | RS2(b) | RD(c), ctx);
296  }
297  
emit_alu_K(unsigned int opcode,unsigned int dst,unsigned int imm,struct jit_ctx * ctx)298  static void emit_alu_K(unsigned int opcode, unsigned int dst, unsigned int imm,
299  		       struct jit_ctx *ctx)
300  {
301  	bool small_immed = is_simm13(imm);
302  	unsigned int insn = opcode;
303  
304  	insn |= RS1(dst) | RD(dst);
305  	if (small_immed) {
306  		emit(insn | IMMED | S13(imm), ctx);
307  	} else {
308  		unsigned int tmp = bpf2sparc[TMP_REG_1];
309  
310  		ctx->tmp_1_used = true;
311  
312  		emit_set_const_sext(imm, tmp, ctx);
313  		emit(insn | RS2(tmp), ctx);
314  	}
315  }
316  
emit_alu3_K(unsigned int opcode,unsigned int src,unsigned int imm,unsigned int dst,struct jit_ctx * ctx)317  static void emit_alu3_K(unsigned int opcode, unsigned int src, unsigned int imm,
318  			unsigned int dst, struct jit_ctx *ctx)
319  {
320  	bool small_immed = is_simm13(imm);
321  	unsigned int insn = opcode;
322  
323  	insn |= RS1(src) | RD(dst);
324  	if (small_immed) {
325  		emit(insn | IMMED | S13(imm), ctx);
326  	} else {
327  		unsigned int tmp = bpf2sparc[TMP_REG_1];
328  
329  		ctx->tmp_1_used = true;
330  
331  		emit_set_const_sext(imm, tmp, ctx);
332  		emit(insn | RS2(tmp), ctx);
333  	}
334  }
335  
emit_loadimm32(s32 K,unsigned int dest,struct jit_ctx * ctx)336  static void emit_loadimm32(s32 K, unsigned int dest, struct jit_ctx *ctx)
337  {
338  	if (K >= 0 && is_simm13(K)) {
339  		/* or %g0, K, DEST */
340  		emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
341  	} else {
342  		emit_set_const(K, dest, ctx);
343  	}
344  }
345  
emit_loadimm(s32 K,unsigned int dest,struct jit_ctx * ctx)346  static void emit_loadimm(s32 K, unsigned int dest, struct jit_ctx *ctx)
347  {
348  	if (is_simm13(K)) {
349  		/* or %g0, K, DEST */
350  		emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
351  	} else {
352  		emit_set_const(K, dest, ctx);
353  	}
354  }
355  
emit_loadimm_sext(s32 K,unsigned int dest,struct jit_ctx * ctx)356  static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
357  {
358  	if (is_simm13(K)) {
359  		/* or %g0, K, DEST */
360  		emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
361  	} else {
362  		emit_set_const_sext(K, dest, ctx);
363  	}
364  }
365  
analyze_64bit_constant(u32 high_bits,u32 low_bits,int * hbsp,int * lbsp,int * abbasp)366  static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
367  				   int *hbsp, int *lbsp, int *abbasp)
368  {
369  	int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
370  	int i;
371  
372  	lowest_bit_set = highest_bit_set = -1;
373  	i = 0;
374  	do {
375  		if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
376  			lowest_bit_set = i;
377  		if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
378  			highest_bit_set = (64 - i - 1);
379  	}  while (++i < 32 && (highest_bit_set == -1 ||
380  			       lowest_bit_set == -1));
381  	if (i == 32) {
382  		i = 0;
383  		do {
384  			if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
385  				lowest_bit_set = i + 32;
386  			if (highest_bit_set == -1 &&
387  			    ((low_bits >> (32 - i - 1)) & 1))
388  				highest_bit_set = 32 - i - 1;
389  		} while (++i < 32 && (highest_bit_set == -1 ||
390  				      lowest_bit_set == -1));
391  	}
392  
393  	all_bits_between_are_set = 1;
394  	for (i = lowest_bit_set; i <= highest_bit_set; i++) {
395  		if (i < 32) {
396  			if ((low_bits & (1 << i)) != 0)
397  				continue;
398  		} else {
399  			if ((high_bits & (1 << (i - 32))) != 0)
400  				continue;
401  		}
402  		all_bits_between_are_set = 0;
403  		break;
404  	}
405  	*hbsp = highest_bit_set;
406  	*lbsp = lowest_bit_set;
407  	*abbasp = all_bits_between_are_set;
408  }
409  
create_simple_focus_bits(unsigned long high_bits,unsigned long low_bits,int lowest_bit_set,int shift)410  static unsigned long create_simple_focus_bits(unsigned long high_bits,
411  					      unsigned long low_bits,
412  					      int lowest_bit_set, int shift)
413  {
414  	long hi, lo;
415  
416  	if (lowest_bit_set < 32) {
417  		lo = (low_bits >> lowest_bit_set) << shift;
418  		hi = ((high_bits << (32 - lowest_bit_set)) << shift);
419  	} else {
420  		lo = 0;
421  		hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
422  	}
423  	return hi | lo;
424  }
425  
const64_is_2insns(unsigned long high_bits,unsigned long low_bits)426  static bool const64_is_2insns(unsigned long high_bits,
427  			      unsigned long low_bits)
428  {
429  	int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
430  
431  	if (high_bits == 0 || high_bits == 0xffffffff)
432  		return true;
433  
434  	analyze_64bit_constant(high_bits, low_bits,
435  			       &highest_bit_set, &lowest_bit_set,
436  			       &all_bits_between_are_set);
437  
438  	if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
439  	    all_bits_between_are_set != 0)
440  		return true;
441  
442  	if (highest_bit_set - lowest_bit_set < 21)
443  		return true;
444  
445  	return false;
446  }
447  
sparc_emit_set_const64_quick2(unsigned long high_bits,unsigned long low_imm,unsigned int dest,int shift_count,struct jit_ctx * ctx)448  static void sparc_emit_set_const64_quick2(unsigned long high_bits,
449  					  unsigned long low_imm,
450  					  unsigned int dest,
451  					  int shift_count, struct jit_ctx *ctx)
452  {
453  	emit_loadimm32(high_bits, dest, ctx);
454  
455  	/* Now shift it up into place.  */
456  	emit_alu_K(SLLX, dest, shift_count, ctx);
457  
458  	/* If there is a low immediate part piece, finish up by
459  	 * putting that in as well.
460  	 */
461  	if (low_imm != 0)
462  		emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
463  }
464  
emit_loadimm64(u64 K,unsigned int dest,struct jit_ctx * ctx)465  static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
466  {
467  	int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
468  	unsigned int tmp = bpf2sparc[TMP_REG_1];
469  	u32 low_bits = (K & 0xffffffff);
470  	u32 high_bits = (K >> 32);
471  
472  	/* These two tests also take care of all of the one
473  	 * instruction cases.
474  	 */
475  	if (high_bits == 0xffffffff && (low_bits & 0x80000000))
476  		return emit_loadimm_sext(K, dest, ctx);
477  	if (high_bits == 0x00000000)
478  		return emit_loadimm32(K, dest, ctx);
479  
480  	analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
481  			       &lowest_bit_set, &all_bits_between_are_set);
482  
483  	/* 1) mov	-1, %reg
484  	 *    sllx	%reg, shift, %reg
485  	 * 2) mov	-1, %reg
486  	 *    srlx	%reg, shift, %reg
487  	 * 3) mov	some_small_const, %reg
488  	 *    sllx	%reg, shift, %reg
489  	 */
490  	if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
491  	     all_bits_between_are_set != 0) ||
492  	    ((highest_bit_set - lowest_bit_set) < 12)) {
493  		int shift = lowest_bit_set;
494  		long the_const = -1;
495  
496  		if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
497  		    all_bits_between_are_set == 0) {
498  			the_const =
499  				create_simple_focus_bits(high_bits, low_bits,
500  							 lowest_bit_set, 0);
501  		} else if (lowest_bit_set == 0)
502  			shift = -(63 - highest_bit_set);
503  
504  		emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
505  		if (shift > 0)
506  			emit_alu_K(SLLX, dest, shift, ctx);
507  		else if (shift < 0)
508  			emit_alu_K(SRLX, dest, -shift, ctx);
509  
510  		return;
511  	}
512  
513  	/* Now a range of 22 or less bits set somewhere.
514  	 * 1) sethi	%hi(focus_bits), %reg
515  	 *    sllx	%reg, shift, %reg
516  	 * 2) sethi	%hi(focus_bits), %reg
517  	 *    srlx	%reg, shift, %reg
518  	 */
519  	if ((highest_bit_set - lowest_bit_set) < 21) {
520  		unsigned long focus_bits =
521  			create_simple_focus_bits(high_bits, low_bits,
522  						 lowest_bit_set, 10);
523  
524  		emit(SETHI(focus_bits, dest), ctx);
525  
526  		/* If lowest_bit_set == 10 then a sethi alone could
527  		 * have done it.
528  		 */
529  		if (lowest_bit_set < 10)
530  			emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
531  		else if (lowest_bit_set > 10)
532  			emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
533  		return;
534  	}
535  
536  	/* Ok, now 3 instruction sequences.  */
537  	if (low_bits == 0) {
538  		emit_loadimm32(high_bits, dest, ctx);
539  		emit_alu_K(SLLX, dest, 32, ctx);
540  		return;
541  	}
542  
543  	/* We may be able to do something quick
544  	 * when the constant is negated, so try that.
545  	 */
546  	if (const64_is_2insns((~high_bits) & 0xffffffff,
547  			      (~low_bits) & 0xfffffc00)) {
548  		/* NOTE: The trailing bits get XOR'd so we need the
549  		 * non-negated bits, not the negated ones.
550  		 */
551  		unsigned long trailing_bits = low_bits & 0x3ff;
552  
553  		if ((((~high_bits) & 0xffffffff) == 0 &&
554  		     ((~low_bits) & 0x80000000) == 0) ||
555  		    (((~high_bits) & 0xffffffff) == 0xffffffff &&
556  		     ((~low_bits) & 0x80000000) != 0)) {
557  			unsigned long fast_int = (~low_bits & 0xffffffff);
558  
559  			if ((is_sethi(fast_int) &&
560  			     (~high_bits & 0xffffffff) == 0)) {
561  				emit(SETHI(fast_int, dest), ctx);
562  			} else if (is_simm13(fast_int)) {
563  				emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
564  			} else {
565  				emit_loadimm64(fast_int, dest, ctx);
566  			}
567  		} else {
568  			u64 n = ((~low_bits) & 0xfffffc00) |
569  				(((unsigned long)((~high_bits) & 0xffffffff))<<32);
570  			emit_loadimm64(n, dest, ctx);
571  		}
572  
573  		low_bits = -0x400 | trailing_bits;
574  
575  		emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
576  		return;
577  	}
578  
579  	/* 1) sethi	%hi(xxx), %reg
580  	 *    or	%reg, %lo(xxx), %reg
581  	 *    sllx	%reg, yyy, %reg
582  	 */
583  	if ((highest_bit_set - lowest_bit_set) < 32) {
584  		unsigned long focus_bits =
585  			create_simple_focus_bits(high_bits, low_bits,
586  						 lowest_bit_set, 0);
587  
588  		/* So what we know is that the set bits straddle the
589  		 * middle of the 64-bit word.
590  		 */
591  		sparc_emit_set_const64_quick2(focus_bits, 0, dest,
592  					      lowest_bit_set, ctx);
593  		return;
594  	}
595  
596  	/* 1) sethi	%hi(high_bits), %reg
597  	 *    or	%reg, %lo(high_bits), %reg
598  	 *    sllx	%reg, 32, %reg
599  	 *    or	%reg, low_bits, %reg
600  	 */
601  	if (is_simm13(low_bits) && ((int)low_bits > 0)) {
602  		sparc_emit_set_const64_quick2(high_bits, low_bits,
603  					      dest, 32, ctx);
604  		return;
605  	}
606  
607  	/* Oh well, we tried... Do a full 64-bit decomposition.  */
608  	ctx->tmp_1_used = true;
609  
610  	emit_loadimm32(high_bits, tmp, ctx);
611  	emit_loadimm32(low_bits, dest, ctx);
612  	emit_alu_K(SLLX, tmp, 32, ctx);
613  	emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
614  }
615  
emit_branch(unsigned int br_opc,unsigned int from_idx,unsigned int to_idx,struct jit_ctx * ctx)616  static void emit_branch(unsigned int br_opc, unsigned int from_idx, unsigned int to_idx,
617  			struct jit_ctx *ctx)
618  {
619  	unsigned int off = to_idx - from_idx;
620  
621  	if (br_opc & XCC)
622  		emit(br_opc | WDISP19(off << 2), ctx);
623  	else
624  		emit(br_opc | WDISP22(off << 2), ctx);
625  }
626  
emit_cbcond(unsigned int cb_opc,unsigned int from_idx,unsigned int to_idx,const u8 dst,const u8 src,struct jit_ctx * ctx)627  static void emit_cbcond(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
628  			const u8 dst, const u8 src, struct jit_ctx *ctx)
629  {
630  	unsigned int off = to_idx - from_idx;
631  
632  	emit(cb_opc | WDISP10(off << 2) | RS1(dst) | RS2(src), ctx);
633  }
634  
emit_cbcondi(unsigned int cb_opc,unsigned int from_idx,unsigned int to_idx,const u8 dst,s32 imm,struct jit_ctx * ctx)635  static void emit_cbcondi(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
636  			 const u8 dst, s32 imm, struct jit_ctx *ctx)
637  {
638  	unsigned int off = to_idx - from_idx;
639  
640  	emit(cb_opc | IMMED | WDISP10(off << 2) | RS1(dst) | S5(imm), ctx);
641  }
642  
643  #define emit_read_y(REG, CTX)	emit(RD_Y | RD(REG), CTX)
644  #define emit_write_y(REG, CTX)	emit(WR_Y | IMMED | RS1(REG) | S13(0), CTX)
645  
646  #define emit_cmp(R1, R2, CTX)				\
647  	emit(SUBCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
648  
649  #define emit_cmpi(R1, IMM, CTX)				\
650  	emit(SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
651  
652  #define emit_btst(R1, R2, CTX)				\
653  	emit(ANDCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
654  
655  #define emit_btsti(R1, IMM, CTX)			\
656  	emit(ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
657  
emit_compare_and_branch(const u8 code,const u8 dst,u8 src,const s32 imm,bool is_imm,int branch_dst,struct jit_ctx * ctx)658  static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
659  				   const s32 imm, bool is_imm, int branch_dst,
660  				   struct jit_ctx *ctx)
661  {
662  	bool use_cbcond = (sparc64_elf_hwcap & AV_SPARC_CBCOND) != 0;
663  	const u8 tmp = bpf2sparc[TMP_REG_1];
664  
665  	branch_dst = ctx->offset[branch_dst];
666  
667  	if (!is_simm10(branch_dst - ctx->idx) ||
668  	    BPF_OP(code) == BPF_JSET)
669  		use_cbcond = false;
670  
671  	if (is_imm) {
672  		bool fits = true;
673  
674  		if (use_cbcond) {
675  			if (!is_simm5(imm))
676  				fits = false;
677  		} else if (!is_simm13(imm)) {
678  			fits = false;
679  		}
680  		if (!fits) {
681  			ctx->tmp_1_used = true;
682  			emit_loadimm_sext(imm, tmp, ctx);
683  			src = tmp;
684  			is_imm = false;
685  		}
686  	}
687  
688  	if (!use_cbcond) {
689  		u32 br_opcode;
690  
691  		if (BPF_OP(code) == BPF_JSET) {
692  			if (is_imm)
693  				emit_btsti(dst, imm, ctx);
694  			else
695  				emit_btst(dst, src, ctx);
696  		} else {
697  			if (is_imm)
698  				emit_cmpi(dst, imm, ctx);
699  			else
700  				emit_cmp(dst, src, ctx);
701  		}
702  		switch (BPF_OP(code)) {
703  		case BPF_JEQ:
704  			br_opcode = BE;
705  			break;
706  		case BPF_JGT:
707  			br_opcode = BGU;
708  			break;
709  		case BPF_JLT:
710  			br_opcode = BLU;
711  			break;
712  		case BPF_JGE:
713  			br_opcode = BGEU;
714  			break;
715  		case BPF_JLE:
716  			br_opcode = BLEU;
717  			break;
718  		case BPF_JSET:
719  		case BPF_JNE:
720  			br_opcode = BNE;
721  			break;
722  		case BPF_JSGT:
723  			br_opcode = BG;
724  			break;
725  		case BPF_JSLT:
726  			br_opcode = BL;
727  			break;
728  		case BPF_JSGE:
729  			br_opcode = BGE;
730  			break;
731  		case BPF_JSLE:
732  			br_opcode = BLE;
733  			break;
734  		default:
735  			/* Make sure we dont leak kernel information to the
736  			 * user.
737  			 */
738  			return -EFAULT;
739  		}
740  		emit_branch(br_opcode, ctx->idx, branch_dst, ctx);
741  		emit_nop(ctx);
742  	} else {
743  		u32 cbcond_opcode;
744  
745  		switch (BPF_OP(code)) {
746  		case BPF_JEQ:
747  			cbcond_opcode = CBCONDE;
748  			break;
749  		case BPF_JGT:
750  			cbcond_opcode = CBCONDGU;
751  			break;
752  		case BPF_JLT:
753  			cbcond_opcode = CBCONDLU;
754  			break;
755  		case BPF_JGE:
756  			cbcond_opcode = CBCONDGEU;
757  			break;
758  		case BPF_JLE:
759  			cbcond_opcode = CBCONDLEU;
760  			break;
761  		case BPF_JNE:
762  			cbcond_opcode = CBCONDNE;
763  			break;
764  		case BPF_JSGT:
765  			cbcond_opcode = CBCONDG;
766  			break;
767  		case BPF_JSLT:
768  			cbcond_opcode = CBCONDL;
769  			break;
770  		case BPF_JSGE:
771  			cbcond_opcode = CBCONDGE;
772  			break;
773  		case BPF_JSLE:
774  			cbcond_opcode = CBCONDLE;
775  			break;
776  		default:
777  			/* Make sure we dont leak kernel information to the
778  			 * user.
779  			 */
780  			return -EFAULT;
781  		}
782  		cbcond_opcode |= CBCOND_OP;
783  		if (is_imm)
784  			emit_cbcondi(cbcond_opcode, ctx->idx, branch_dst,
785  				     dst, imm, ctx);
786  		else
787  			emit_cbcond(cbcond_opcode, ctx->idx, branch_dst,
788  				    dst, src, ctx);
789  	}
790  	return 0;
791  }
792  
793  /* Just skip the save instruction and the ctx register move.  */
794  #define BPF_TAILCALL_PROLOGUE_SKIP	32
795  #define BPF_TAILCALL_CNT_SP_OFF		(STACK_BIAS + 128)
796  
build_prologue(struct jit_ctx * ctx)797  static void build_prologue(struct jit_ctx *ctx)
798  {
799  	s32 stack_needed = BASE_STACKFRAME;
800  
801  	if (ctx->saw_frame_pointer || ctx->saw_tail_call) {
802  		struct bpf_prog *prog = ctx->prog;
803  		u32 stack_depth;
804  
805  		stack_depth = prog->aux->stack_depth;
806  		stack_needed += round_up(stack_depth, 16);
807  	}
808  
809  	if (ctx->saw_tail_call)
810  		stack_needed += 8;
811  
812  	/* save %sp, -176, %sp */
813  	emit(SAVE | IMMED | RS1(SP) | S13(-stack_needed) | RD(SP), ctx);
814  
815  	/* tail_call_cnt = 0 */
816  	if (ctx->saw_tail_call) {
817  		u32 off = BPF_TAILCALL_CNT_SP_OFF;
818  
819  		emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(G0), ctx);
820  	} else {
821  		emit_nop(ctx);
822  	}
823  	if (ctx->saw_frame_pointer) {
824  		const u8 vfp = bpf2sparc[BPF_REG_FP];
825  
826  		emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx);
827  	} else {
828  		emit_nop(ctx);
829  	}
830  
831  	emit_reg_move(I0, O0, ctx);
832  	emit_reg_move(I1, O1, ctx);
833  	emit_reg_move(I2, O2, ctx);
834  	emit_reg_move(I3, O3, ctx);
835  	emit_reg_move(I4, O4, ctx);
836  	/* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
837  }
838  
build_epilogue(struct jit_ctx * ctx)839  static void build_epilogue(struct jit_ctx *ctx)
840  {
841  	ctx->epilogue_offset = ctx->idx;
842  
843  	/* ret (jmpl %i7 + 8, %g0) */
844  	emit(JMPL | IMMED | RS1(I7) | S13(8) | RD(G0), ctx);
845  
846  	/* restore %i5, %g0, %o0 */
847  	emit(RESTORE | RS1(bpf2sparc[BPF_REG_0]) | RS2(G0) | RD(O0), ctx);
848  }
849  
emit_tail_call(struct jit_ctx * ctx)850  static void emit_tail_call(struct jit_ctx *ctx)
851  {
852  	const u8 bpf_array = bpf2sparc[BPF_REG_2];
853  	const u8 bpf_index = bpf2sparc[BPF_REG_3];
854  	const u8 tmp = bpf2sparc[TMP_REG_1];
855  	u32 off;
856  
857  	ctx->saw_tail_call = true;
858  
859  	off = offsetof(struct bpf_array, map.max_entries);
860  	emit(LD32 | IMMED | RS1(bpf_array) | S13(off) | RD(tmp), ctx);
861  	emit_cmp(bpf_index, tmp, ctx);
862  #define OFFSET1 17
863  	emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET1, ctx);
864  	emit_nop(ctx);
865  
866  	off = BPF_TAILCALL_CNT_SP_OFF;
867  	emit(LD32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx);
868  	emit_cmpi(tmp, MAX_TAIL_CALL_CNT, ctx);
869  #define OFFSET2 13
870  	emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET2, ctx);
871  	emit_nop(ctx);
872  
873  	emit_alu_K(ADD, tmp, 1, ctx);
874  	off = BPF_TAILCALL_CNT_SP_OFF;
875  	emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx);
876  
877  	emit_alu3_K(SLL, bpf_index, 3, tmp, ctx);
878  	emit_alu(ADD, bpf_array, tmp, ctx);
879  	off = offsetof(struct bpf_array, ptrs);
880  	emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx);
881  
882  	emit_cmpi(tmp, 0, ctx);
883  #define OFFSET3 5
884  	emit_branch(BE, ctx->idx, ctx->idx + OFFSET3, ctx);
885  	emit_nop(ctx);
886  
887  	off = offsetof(struct bpf_prog, bpf_func);
888  	emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx);
889  
890  	off = BPF_TAILCALL_PROLOGUE_SKIP;
891  	emit(JMPL | IMMED | RS1(tmp) | S13(off) | RD(G0), ctx);
892  	emit_nop(ctx);
893  }
894  
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx)895  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
896  {
897  	const u8 code = insn->code;
898  	const u8 dst = bpf2sparc[insn->dst_reg];
899  	const u8 src = bpf2sparc[insn->src_reg];
900  	const int i = insn - ctx->prog->insnsi;
901  	const s16 off = insn->off;
902  	const s32 imm = insn->imm;
903  
904  	if (insn->src_reg == BPF_REG_FP)
905  		ctx->saw_frame_pointer = true;
906  
907  	switch (code) {
908  	/* dst = src */
909  	case BPF_ALU | BPF_MOV | BPF_X:
910  		emit_alu3_K(SRL, src, 0, dst, ctx);
911  		if (insn_is_zext(&insn[1]))
912  			return 1;
913  		break;
914  	case BPF_ALU64 | BPF_MOV | BPF_X:
915  		emit_reg_move(src, dst, ctx);
916  		break;
917  	/* dst = dst OP src */
918  	case BPF_ALU | BPF_ADD | BPF_X:
919  	case BPF_ALU64 | BPF_ADD | BPF_X:
920  		emit_alu(ADD, src, dst, ctx);
921  		goto do_alu32_trunc;
922  	case BPF_ALU | BPF_SUB | BPF_X:
923  	case BPF_ALU64 | BPF_SUB | BPF_X:
924  		emit_alu(SUB, src, dst, ctx);
925  		goto do_alu32_trunc;
926  	case BPF_ALU | BPF_AND | BPF_X:
927  	case BPF_ALU64 | BPF_AND | BPF_X:
928  		emit_alu(AND, src, dst, ctx);
929  		goto do_alu32_trunc;
930  	case BPF_ALU | BPF_OR | BPF_X:
931  	case BPF_ALU64 | BPF_OR | BPF_X:
932  		emit_alu(OR, src, dst, ctx);
933  		goto do_alu32_trunc;
934  	case BPF_ALU | BPF_XOR | BPF_X:
935  	case BPF_ALU64 | BPF_XOR | BPF_X:
936  		emit_alu(XOR, src, dst, ctx);
937  		goto do_alu32_trunc;
938  	case BPF_ALU | BPF_MUL | BPF_X:
939  		emit_alu(MUL, src, dst, ctx);
940  		goto do_alu32_trunc;
941  	case BPF_ALU64 | BPF_MUL | BPF_X:
942  		emit_alu(MULX, src, dst, ctx);
943  		break;
944  	case BPF_ALU | BPF_DIV | BPF_X:
945  		emit_write_y(G0, ctx);
946  		emit_alu(DIV, src, dst, ctx);
947  		if (insn_is_zext(&insn[1]))
948  			return 1;
949  		break;
950  	case BPF_ALU64 | BPF_DIV | BPF_X:
951  		emit_alu(UDIVX, src, dst, ctx);
952  		break;
953  	case BPF_ALU | BPF_MOD | BPF_X: {
954  		const u8 tmp = bpf2sparc[TMP_REG_1];
955  
956  		ctx->tmp_1_used = true;
957  
958  		emit_write_y(G0, ctx);
959  		emit_alu3(DIV, dst, src, tmp, ctx);
960  		emit_alu3(MULX, tmp, src, tmp, ctx);
961  		emit_alu3(SUB, dst, tmp, dst, ctx);
962  		goto do_alu32_trunc;
963  	}
964  	case BPF_ALU64 | BPF_MOD | BPF_X: {
965  		const u8 tmp = bpf2sparc[TMP_REG_1];
966  
967  		ctx->tmp_1_used = true;
968  
969  		emit_alu3(UDIVX, dst, src, tmp, ctx);
970  		emit_alu3(MULX, tmp, src, tmp, ctx);
971  		emit_alu3(SUB, dst, tmp, dst, ctx);
972  		break;
973  	}
974  	case BPF_ALU | BPF_LSH | BPF_X:
975  		emit_alu(SLL, src, dst, ctx);
976  		goto do_alu32_trunc;
977  	case BPF_ALU64 | BPF_LSH | BPF_X:
978  		emit_alu(SLLX, src, dst, ctx);
979  		break;
980  	case BPF_ALU | BPF_RSH | BPF_X:
981  		emit_alu(SRL, src, dst, ctx);
982  		if (insn_is_zext(&insn[1]))
983  			return 1;
984  		break;
985  	case BPF_ALU64 | BPF_RSH | BPF_X:
986  		emit_alu(SRLX, src, dst, ctx);
987  		break;
988  	case BPF_ALU | BPF_ARSH | BPF_X:
989  		emit_alu(SRA, src, dst, ctx);
990  		goto do_alu32_trunc;
991  	case BPF_ALU64 | BPF_ARSH | BPF_X:
992  		emit_alu(SRAX, src, dst, ctx);
993  		break;
994  
995  	/* dst = -dst */
996  	case BPF_ALU | BPF_NEG:
997  	case BPF_ALU64 | BPF_NEG:
998  		emit(SUB | RS1(0) | RS2(dst) | RD(dst), ctx);
999  		goto do_alu32_trunc;
1000  
1001  	case BPF_ALU | BPF_END | BPF_FROM_BE:
1002  		switch (imm) {
1003  		case 16:
1004  			emit_alu_K(SLL, dst, 16, ctx);
1005  			emit_alu_K(SRL, dst, 16, ctx);
1006  			if (insn_is_zext(&insn[1]))
1007  				return 1;
1008  			break;
1009  		case 32:
1010  			if (!ctx->prog->aux->verifier_zext)
1011  				emit_alu_K(SRL, dst, 0, ctx);
1012  			break;
1013  		case 64:
1014  			/* nop */
1015  			break;
1016  
1017  		}
1018  		break;
1019  
1020  	/* dst = BSWAP##imm(dst) */
1021  	case BPF_ALU | BPF_END | BPF_FROM_LE: {
1022  		const u8 tmp = bpf2sparc[TMP_REG_1];
1023  		const u8 tmp2 = bpf2sparc[TMP_REG_2];
1024  
1025  		ctx->tmp_1_used = true;
1026  		switch (imm) {
1027  		case 16:
1028  			emit_alu3_K(AND, dst, 0xff, tmp, ctx);
1029  			emit_alu3_K(SRL, dst, 8, dst, ctx);
1030  			emit_alu3_K(AND, dst, 0xff, dst, ctx);
1031  			emit_alu3_K(SLL, tmp, 8, tmp, ctx);
1032  			emit_alu(OR, tmp, dst, ctx);
1033  			if (insn_is_zext(&insn[1]))
1034  				return 1;
1035  			break;
1036  
1037  		case 32:
1038  			ctx->tmp_2_used = true;
1039  			emit_alu3_K(SRL, dst, 24, tmp, ctx);	/* tmp  = dst >> 24 */
1040  			emit_alu3_K(SRL, dst, 16, tmp2, ctx);	/* tmp2 = dst >> 16 */
1041  			emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */
1042  			emit_alu3_K(SLL, tmp2, 8, tmp2, ctx);	/* tmp2 = tmp2 << 8 */
1043  			emit_alu(OR, tmp2, tmp, ctx);		/* tmp  = tmp | tmp2 */
1044  			emit_alu3_K(SRL, dst, 8, tmp2, ctx);	/* tmp2 = dst >> 8 */
1045  			emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */
1046  			emit_alu3_K(SLL, tmp2, 16, tmp2, ctx);	/* tmp2 = tmp2 << 16 */
1047  			emit_alu(OR, tmp2, tmp, ctx);		/* tmp  = tmp | tmp2 */
1048  			emit_alu3_K(AND, dst, 0xff, dst, ctx);	/* dst	= dst & 0xff */
1049  			emit_alu3_K(SLL, dst, 24, dst, ctx);	/* dst  = dst << 24 */
1050  			emit_alu(OR, tmp, dst, ctx);		/* dst  = dst | tmp */
1051  			if (insn_is_zext(&insn[1]))
1052  				return 1;
1053  			break;
1054  
1055  		case 64:
1056  			emit_alu3_K(ADD, SP, STACK_BIAS + 128, tmp, ctx);
1057  			emit(ST64 | RS1(tmp) | RS2(G0) | RD(dst), ctx);
1058  			emit(LD64A | ASI(ASI_PL) | RS1(tmp) | RS2(G0) | RD(dst), ctx);
1059  			break;
1060  		}
1061  		break;
1062  	}
1063  	/* dst = imm */
1064  	case BPF_ALU | BPF_MOV | BPF_K:
1065  		emit_loadimm32(imm, dst, ctx);
1066  		if (insn_is_zext(&insn[1]))
1067  			return 1;
1068  		break;
1069  	case BPF_ALU64 | BPF_MOV | BPF_K:
1070  		emit_loadimm_sext(imm, dst, ctx);
1071  		break;
1072  	/* dst = dst OP imm */
1073  	case BPF_ALU | BPF_ADD | BPF_K:
1074  	case BPF_ALU64 | BPF_ADD | BPF_K:
1075  		emit_alu_K(ADD, dst, imm, ctx);
1076  		goto do_alu32_trunc;
1077  	case BPF_ALU | BPF_SUB | BPF_K:
1078  	case BPF_ALU64 | BPF_SUB | BPF_K:
1079  		emit_alu_K(SUB, dst, imm, ctx);
1080  		goto do_alu32_trunc;
1081  	case BPF_ALU | BPF_AND | BPF_K:
1082  	case BPF_ALU64 | BPF_AND | BPF_K:
1083  		emit_alu_K(AND, dst, imm, ctx);
1084  		goto do_alu32_trunc;
1085  	case BPF_ALU | BPF_OR | BPF_K:
1086  	case BPF_ALU64 | BPF_OR | BPF_K:
1087  		emit_alu_K(OR, dst, imm, ctx);
1088  		goto do_alu32_trunc;
1089  	case BPF_ALU | BPF_XOR | BPF_K:
1090  	case BPF_ALU64 | BPF_XOR | BPF_K:
1091  		emit_alu_K(XOR, dst, imm, ctx);
1092  		goto do_alu32_trunc;
1093  	case BPF_ALU | BPF_MUL | BPF_K:
1094  		emit_alu_K(MUL, dst, imm, ctx);
1095  		goto do_alu32_trunc;
1096  	case BPF_ALU64 | BPF_MUL | BPF_K:
1097  		emit_alu_K(MULX, dst, imm, ctx);
1098  		break;
1099  	case BPF_ALU | BPF_DIV | BPF_K:
1100  		if (imm == 0)
1101  			return -EINVAL;
1102  
1103  		emit_write_y(G0, ctx);
1104  		emit_alu_K(DIV, dst, imm, ctx);
1105  		goto do_alu32_trunc;
1106  	case BPF_ALU64 | BPF_DIV | BPF_K:
1107  		if (imm == 0)
1108  			return -EINVAL;
1109  
1110  		emit_alu_K(UDIVX, dst, imm, ctx);
1111  		break;
1112  	case BPF_ALU64 | BPF_MOD | BPF_K:
1113  	case BPF_ALU | BPF_MOD | BPF_K: {
1114  		const u8 tmp = bpf2sparc[TMP_REG_2];
1115  		unsigned int div;
1116  
1117  		if (imm == 0)
1118  			return -EINVAL;
1119  
1120  		div = (BPF_CLASS(code) == BPF_ALU64) ? UDIVX : DIV;
1121  
1122  		ctx->tmp_2_used = true;
1123  
1124  		if (BPF_CLASS(code) != BPF_ALU64)
1125  			emit_write_y(G0, ctx);
1126  		if (is_simm13(imm)) {
1127  			emit(div | IMMED | RS1(dst) | S13(imm) | RD(tmp), ctx);
1128  			emit(MULX | IMMED | RS1(tmp) | S13(imm) | RD(tmp), ctx);
1129  			emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx);
1130  		} else {
1131  			const u8 tmp1 = bpf2sparc[TMP_REG_1];
1132  
1133  			ctx->tmp_1_used = true;
1134  
1135  			emit_set_const_sext(imm, tmp1, ctx);
1136  			emit(div | RS1(dst) | RS2(tmp1) | RD(tmp), ctx);
1137  			emit(MULX | RS1(tmp) | RS2(tmp1) | RD(tmp), ctx);
1138  			emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx);
1139  		}
1140  		goto do_alu32_trunc;
1141  	}
1142  	case BPF_ALU | BPF_LSH | BPF_K:
1143  		emit_alu_K(SLL, dst, imm, ctx);
1144  		goto do_alu32_trunc;
1145  	case BPF_ALU64 | BPF_LSH | BPF_K:
1146  		emit_alu_K(SLLX, dst, imm, ctx);
1147  		break;
1148  	case BPF_ALU | BPF_RSH | BPF_K:
1149  		emit_alu_K(SRL, dst, imm, ctx);
1150  		if (insn_is_zext(&insn[1]))
1151  			return 1;
1152  		break;
1153  	case BPF_ALU64 | BPF_RSH | BPF_K:
1154  		emit_alu_K(SRLX, dst, imm, ctx);
1155  		break;
1156  	case BPF_ALU | BPF_ARSH | BPF_K:
1157  		emit_alu_K(SRA, dst, imm, ctx);
1158  		goto do_alu32_trunc;
1159  	case BPF_ALU64 | BPF_ARSH | BPF_K:
1160  		emit_alu_K(SRAX, dst, imm, ctx);
1161  		break;
1162  
1163  	do_alu32_trunc:
1164  		if (BPF_CLASS(code) == BPF_ALU &&
1165  		    !ctx->prog->aux->verifier_zext)
1166  			emit_alu_K(SRL, dst, 0, ctx);
1167  		break;
1168  
1169  	/* JUMP off */
1170  	case BPF_JMP | BPF_JA:
1171  		emit_branch(BA, ctx->idx, ctx->offset[i + off], ctx);
1172  		emit_nop(ctx);
1173  		break;
1174  	/* IF (dst COND src) JUMP off */
1175  	case BPF_JMP | BPF_JEQ | BPF_X:
1176  	case BPF_JMP | BPF_JGT | BPF_X:
1177  	case BPF_JMP | BPF_JLT | BPF_X:
1178  	case BPF_JMP | BPF_JGE | BPF_X:
1179  	case BPF_JMP | BPF_JLE | BPF_X:
1180  	case BPF_JMP | BPF_JNE | BPF_X:
1181  	case BPF_JMP | BPF_JSGT | BPF_X:
1182  	case BPF_JMP | BPF_JSLT | BPF_X:
1183  	case BPF_JMP | BPF_JSGE | BPF_X:
1184  	case BPF_JMP | BPF_JSLE | BPF_X:
1185  	case BPF_JMP | BPF_JSET | BPF_X: {
1186  		int err;
1187  
1188  		err = emit_compare_and_branch(code, dst, src, 0, false, i + off, ctx);
1189  		if (err)
1190  			return err;
1191  		break;
1192  	}
1193  	/* IF (dst COND imm) JUMP off */
1194  	case BPF_JMP | BPF_JEQ | BPF_K:
1195  	case BPF_JMP | BPF_JGT | BPF_K:
1196  	case BPF_JMP | BPF_JLT | BPF_K:
1197  	case BPF_JMP | BPF_JGE | BPF_K:
1198  	case BPF_JMP | BPF_JLE | BPF_K:
1199  	case BPF_JMP | BPF_JNE | BPF_K:
1200  	case BPF_JMP | BPF_JSGT | BPF_K:
1201  	case BPF_JMP | BPF_JSLT | BPF_K:
1202  	case BPF_JMP | BPF_JSGE | BPF_K:
1203  	case BPF_JMP | BPF_JSLE | BPF_K:
1204  	case BPF_JMP | BPF_JSET | BPF_K: {
1205  		int err;
1206  
1207  		err = emit_compare_and_branch(code, dst, 0, imm, true, i + off, ctx);
1208  		if (err)
1209  			return err;
1210  		break;
1211  	}
1212  
1213  	/* function call */
1214  	case BPF_JMP | BPF_CALL:
1215  	{
1216  		u8 *func = ((u8 *)__bpf_call_base) + imm;
1217  
1218  		ctx->saw_call = true;
1219  
1220  		emit_call((u32 *)func, ctx);
1221  		emit_nop(ctx);
1222  
1223  		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1224  		break;
1225  	}
1226  
1227  	/* tail call */
1228  	case BPF_JMP | BPF_TAIL_CALL:
1229  		emit_tail_call(ctx);
1230  		break;
1231  
1232  	/* function return */
1233  	case BPF_JMP | BPF_EXIT:
1234  		/* Optimization: when last instruction is EXIT,
1235  		   simply fallthrough to epilogue. */
1236  		if (i == ctx->prog->len - 1)
1237  			break;
1238  		emit_branch(BA, ctx->idx, ctx->epilogue_offset, ctx);
1239  		emit_nop(ctx);
1240  		break;
1241  
1242  	/* dst = imm64 */
1243  	case BPF_LD | BPF_IMM | BPF_DW:
1244  	{
1245  		const struct bpf_insn insn1 = insn[1];
1246  		u64 imm64;
1247  
1248  		imm64 = (u64)insn1.imm << 32 | (u32)imm;
1249  		emit_loadimm64(imm64, dst, ctx);
1250  
1251  		return 1;
1252  	}
1253  
1254  	/* LDX: dst = *(size *)(src + off) */
1255  	case BPF_LDX | BPF_MEM | BPF_W:
1256  	case BPF_LDX | BPF_MEM | BPF_H:
1257  	case BPF_LDX | BPF_MEM | BPF_B:
1258  	case BPF_LDX | BPF_MEM | BPF_DW: {
1259  		const u8 tmp = bpf2sparc[TMP_REG_1];
1260  		u32 opcode = 0, rs2;
1261  
1262  		ctx->tmp_1_used = true;
1263  		switch (BPF_SIZE(code)) {
1264  		case BPF_W:
1265  			opcode = LD32;
1266  			break;
1267  		case BPF_H:
1268  			opcode = LD16;
1269  			break;
1270  		case BPF_B:
1271  			opcode = LD8;
1272  			break;
1273  		case BPF_DW:
1274  			opcode = LD64;
1275  			break;
1276  		}
1277  
1278  		if (is_simm13(off)) {
1279  			opcode |= IMMED;
1280  			rs2 = S13(off);
1281  		} else {
1282  			emit_loadimm(off, tmp, ctx);
1283  			rs2 = RS2(tmp);
1284  		}
1285  		emit(opcode | RS1(src) | rs2 | RD(dst), ctx);
1286  		if (opcode != LD64 && insn_is_zext(&insn[1]))
1287  			return 1;
1288  		break;
1289  	}
1290  	/* speculation barrier */
1291  	case BPF_ST | BPF_NOSPEC:
1292  		break;
1293  	/* ST: *(size *)(dst + off) = imm */
1294  	case BPF_ST | BPF_MEM | BPF_W:
1295  	case BPF_ST | BPF_MEM | BPF_H:
1296  	case BPF_ST | BPF_MEM | BPF_B:
1297  	case BPF_ST | BPF_MEM | BPF_DW: {
1298  		const u8 tmp = bpf2sparc[TMP_REG_1];
1299  		const u8 tmp2 = bpf2sparc[TMP_REG_2];
1300  		u32 opcode = 0, rs2;
1301  
1302  		if (insn->dst_reg == BPF_REG_FP)
1303  			ctx->saw_frame_pointer = true;
1304  
1305  		ctx->tmp_2_used = true;
1306  		emit_loadimm(imm, tmp2, ctx);
1307  
1308  		switch (BPF_SIZE(code)) {
1309  		case BPF_W:
1310  			opcode = ST32;
1311  			break;
1312  		case BPF_H:
1313  			opcode = ST16;
1314  			break;
1315  		case BPF_B:
1316  			opcode = ST8;
1317  			break;
1318  		case BPF_DW:
1319  			opcode = ST64;
1320  			break;
1321  		}
1322  
1323  		if (is_simm13(off)) {
1324  			opcode |= IMMED;
1325  			rs2 = S13(off);
1326  		} else {
1327  			ctx->tmp_1_used = true;
1328  			emit_loadimm(off, tmp, ctx);
1329  			rs2 = RS2(tmp);
1330  		}
1331  		emit(opcode | RS1(dst) | rs2 | RD(tmp2), ctx);
1332  		break;
1333  	}
1334  
1335  	/* STX: *(size *)(dst + off) = src */
1336  	case BPF_STX | BPF_MEM | BPF_W:
1337  	case BPF_STX | BPF_MEM | BPF_H:
1338  	case BPF_STX | BPF_MEM | BPF_B:
1339  	case BPF_STX | BPF_MEM | BPF_DW: {
1340  		const u8 tmp = bpf2sparc[TMP_REG_1];
1341  		u32 opcode = 0, rs2;
1342  
1343  		if (insn->dst_reg == BPF_REG_FP)
1344  			ctx->saw_frame_pointer = true;
1345  
1346  		switch (BPF_SIZE(code)) {
1347  		case BPF_W:
1348  			opcode = ST32;
1349  			break;
1350  		case BPF_H:
1351  			opcode = ST16;
1352  			break;
1353  		case BPF_B:
1354  			opcode = ST8;
1355  			break;
1356  		case BPF_DW:
1357  			opcode = ST64;
1358  			break;
1359  		}
1360  		if (is_simm13(off)) {
1361  			opcode |= IMMED;
1362  			rs2 = S13(off);
1363  		} else {
1364  			ctx->tmp_1_used = true;
1365  			emit_loadimm(off, tmp, ctx);
1366  			rs2 = RS2(tmp);
1367  		}
1368  		emit(opcode | RS1(dst) | rs2 | RD(src), ctx);
1369  		break;
1370  	}
1371  
1372  	case BPF_STX | BPF_ATOMIC | BPF_W: {
1373  		const u8 tmp = bpf2sparc[TMP_REG_1];
1374  		const u8 tmp2 = bpf2sparc[TMP_REG_2];
1375  		const u8 tmp3 = bpf2sparc[TMP_REG_3];
1376  
1377  		if (insn->imm != BPF_ADD) {
1378  			pr_err_once("unknown atomic op %02x\n", insn->imm);
1379  			return -EINVAL;
1380  		}
1381  
1382  		/* lock *(u32 *)(dst + off) += src */
1383  
1384  		if (insn->dst_reg == BPF_REG_FP)
1385  			ctx->saw_frame_pointer = true;
1386  
1387  		ctx->tmp_1_used = true;
1388  		ctx->tmp_2_used = true;
1389  		ctx->tmp_3_used = true;
1390  		emit_loadimm(off, tmp, ctx);
1391  		emit_alu3(ADD, dst, tmp, tmp, ctx);
1392  
1393  		emit(LD32 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx);
1394  		emit_alu3(ADD, tmp2, src, tmp3, ctx);
1395  		emit(CAS | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx);
1396  		emit_cmp(tmp2, tmp3, ctx);
1397  		emit_branch(BNE, 4, 0, ctx);
1398  		emit_nop(ctx);
1399  		break;
1400  	}
1401  	/* STX XADD: lock *(u64 *)(dst + off) += src */
1402  	case BPF_STX | BPF_ATOMIC | BPF_DW: {
1403  		const u8 tmp = bpf2sparc[TMP_REG_1];
1404  		const u8 tmp2 = bpf2sparc[TMP_REG_2];
1405  		const u8 tmp3 = bpf2sparc[TMP_REG_3];
1406  
1407  		if (insn->imm != BPF_ADD) {
1408  			pr_err_once("unknown atomic op %02x\n", insn->imm);
1409  			return -EINVAL;
1410  		}
1411  
1412  		if (insn->dst_reg == BPF_REG_FP)
1413  			ctx->saw_frame_pointer = true;
1414  
1415  		ctx->tmp_1_used = true;
1416  		ctx->tmp_2_used = true;
1417  		ctx->tmp_3_used = true;
1418  		emit_loadimm(off, tmp, ctx);
1419  		emit_alu3(ADD, dst, tmp, tmp, ctx);
1420  
1421  		emit(LD64 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx);
1422  		emit_alu3(ADD, tmp2, src, tmp3, ctx);
1423  		emit(CASX | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx);
1424  		emit_cmp(tmp2, tmp3, ctx);
1425  		emit_branch(BNE, 4, 0, ctx);
1426  		emit_nop(ctx);
1427  		break;
1428  	}
1429  
1430  	default:
1431  		pr_err_once("unknown opcode %02x\n", code);
1432  		return -EINVAL;
1433  	}
1434  
1435  	return 0;
1436  }
1437  
build_body(struct jit_ctx * ctx)1438  static int build_body(struct jit_ctx *ctx)
1439  {
1440  	const struct bpf_prog *prog = ctx->prog;
1441  	int i;
1442  
1443  	for (i = 0; i < prog->len; i++) {
1444  		const struct bpf_insn *insn = &prog->insnsi[i];
1445  		int ret;
1446  
1447  		ret = build_insn(insn, ctx);
1448  
1449  		if (ret > 0) {
1450  			i++;
1451  			ctx->offset[i] = ctx->idx;
1452  			continue;
1453  		}
1454  		ctx->offset[i] = ctx->idx;
1455  		if (ret)
1456  			return ret;
1457  	}
1458  	return 0;
1459  }
1460  
jit_fill_hole(void * area,unsigned int size)1461  static void jit_fill_hole(void *area, unsigned int size)
1462  {
1463  	u32 *ptr;
1464  	/* We are guaranteed to have aligned memory. */
1465  	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1466  		*ptr++ = 0x91d02005; /* ta 5 */
1467  }
1468  
bpf_jit_needs_zext(void)1469  bool bpf_jit_needs_zext(void)
1470  {
1471  	return true;
1472  }
1473  
1474  struct sparc64_jit_data {
1475  	struct bpf_binary_header *header;
1476  	u8 *image;
1477  	struct jit_ctx ctx;
1478  };
1479  
bpf_int_jit_compile(struct bpf_prog * prog)1480  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1481  {
1482  	struct bpf_prog *tmp, *orig_prog = prog;
1483  	struct sparc64_jit_data *jit_data;
1484  	struct bpf_binary_header *header;
1485  	u32 prev_image_size, image_size;
1486  	bool tmp_blinded = false;
1487  	bool extra_pass = false;
1488  	struct jit_ctx ctx;
1489  	u8 *image_ptr;
1490  	int pass, i;
1491  
1492  	if (!prog->jit_requested)
1493  		return orig_prog;
1494  
1495  	tmp = bpf_jit_blind_constants(prog);
1496  	/* If blinding was requested and we failed during blinding,
1497  	 * we must fall back to the interpreter.
1498  	 */
1499  	if (IS_ERR(tmp))
1500  		return orig_prog;
1501  	if (tmp != prog) {
1502  		tmp_blinded = true;
1503  		prog = tmp;
1504  	}
1505  
1506  	jit_data = prog->aux->jit_data;
1507  	if (!jit_data) {
1508  		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1509  		if (!jit_data) {
1510  			prog = orig_prog;
1511  			goto out;
1512  		}
1513  		prog->aux->jit_data = jit_data;
1514  	}
1515  	if (jit_data->ctx.offset) {
1516  		ctx = jit_data->ctx;
1517  		image_ptr = jit_data->image;
1518  		header = jit_data->header;
1519  		extra_pass = true;
1520  		image_size = sizeof(u32) * ctx.idx;
1521  		prev_image_size = image_size;
1522  		pass = 1;
1523  		goto skip_init_ctx;
1524  	}
1525  
1526  	memset(&ctx, 0, sizeof(ctx));
1527  	ctx.prog = prog;
1528  
1529  	ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL);
1530  	if (ctx.offset == NULL) {
1531  		prog = orig_prog;
1532  		goto out_off;
1533  	}
1534  
1535  	/* Longest sequence emitted is for bswap32, 12 instructions.  Pre-cook
1536  	 * the offset array so that we converge faster.
1537  	 */
1538  	for (i = 0; i < prog->len; i++)
1539  		ctx.offset[i] = i * (12 * 4);
1540  
1541  	prev_image_size = ~0U;
1542  	for (pass = 1; pass < 40; pass++) {
1543  		ctx.idx = 0;
1544  
1545  		build_prologue(&ctx);
1546  		if (build_body(&ctx)) {
1547  			prog = orig_prog;
1548  			goto out_off;
1549  		}
1550  		build_epilogue(&ctx);
1551  
1552  		if (bpf_jit_enable > 1)
1553  			pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass,
1554  				ctx.idx * 4,
1555  				ctx.tmp_1_used ? '1' : ' ',
1556  				ctx.tmp_2_used ? '2' : ' ',
1557  				ctx.tmp_3_used ? '3' : ' ',
1558  				ctx.saw_frame_pointer ? 'F' : ' ',
1559  				ctx.saw_call ? 'C' : ' ',
1560  				ctx.saw_tail_call ? 'T' : ' ');
1561  
1562  		if (ctx.idx * 4 == prev_image_size)
1563  			break;
1564  		prev_image_size = ctx.idx * 4;
1565  		cond_resched();
1566  	}
1567  
1568  	/* Now we know the actual image size. */
1569  	image_size = sizeof(u32) * ctx.idx;
1570  	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1571  				      sizeof(u32), jit_fill_hole);
1572  	if (header == NULL) {
1573  		prog = orig_prog;
1574  		goto out_off;
1575  	}
1576  
1577  	ctx.image = (u32 *)image_ptr;
1578  skip_init_ctx:
1579  	ctx.idx = 0;
1580  
1581  	build_prologue(&ctx);
1582  
1583  	if (build_body(&ctx)) {
1584  		bpf_jit_binary_free(header);
1585  		prog = orig_prog;
1586  		goto out_off;
1587  	}
1588  
1589  	build_epilogue(&ctx);
1590  
1591  	if (ctx.idx * 4 != prev_image_size) {
1592  		pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n",
1593  		       prev_image_size, ctx.idx * 4);
1594  		bpf_jit_binary_free(header);
1595  		prog = orig_prog;
1596  		goto out_off;
1597  	}
1598  
1599  	if (bpf_jit_enable > 1)
1600  		bpf_jit_dump(prog->len, image_size, pass, ctx.image);
1601  
1602  	bpf_flush_icache(header, (u8 *)header + header->size);
1603  
1604  	if (!prog->is_func || extra_pass) {
1605  		if (bpf_jit_binary_lock_ro(header)) {
1606  			bpf_jit_binary_free(header);
1607  			prog = orig_prog;
1608  			goto out_off;
1609  		}
1610  	} else {
1611  		jit_data->ctx = ctx;
1612  		jit_data->image = image_ptr;
1613  		jit_data->header = header;
1614  	}
1615  
1616  	prog->bpf_func = (void *)ctx.image;
1617  	prog->jited = 1;
1618  	prog->jited_len = image_size;
1619  
1620  	if (!prog->is_func || extra_pass) {
1621  		bpf_prog_fill_jited_linfo(prog, ctx.offset);
1622  out_off:
1623  		kfree(ctx.offset);
1624  		kfree(jit_data);
1625  		prog->aux->jit_data = NULL;
1626  	}
1627  out:
1628  	if (tmp_blinded)
1629  		bpf_jit_prog_release_other(prog, prog == orig_prog ?
1630  					   tmp : orig_prog);
1631  	return prog;
1632  }
1633