1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * guest access functions
4   *
5   * Copyright IBM Corp. 2014
6   *
7   */
8  
9  #include <linux/vmalloc.h>
10  #include <linux/mm_types.h>
11  #include <linux/err.h>
12  #include <linux/pgtable.h>
13  #include <linux/bitfield.h>
14  #include <asm/access-regs.h>
15  #include <asm/fault.h>
16  #include <asm/gmap.h>
17  #include <asm/dat-bits.h>
18  #include "kvm-s390.h"
19  #include "gaccess.h"
20  
21  /*
22   * vaddress union in order to easily decode a virtual address into its
23   * region first index, region second index etc. parts.
24   */
25  union vaddress {
26  	unsigned long addr;
27  	struct {
28  		unsigned long rfx : 11;
29  		unsigned long rsx : 11;
30  		unsigned long rtx : 11;
31  		unsigned long sx  : 11;
32  		unsigned long px  : 8;
33  		unsigned long bx  : 12;
34  	};
35  	struct {
36  		unsigned long rfx01 : 2;
37  		unsigned long	    : 9;
38  		unsigned long rsx01 : 2;
39  		unsigned long	    : 9;
40  		unsigned long rtx01 : 2;
41  		unsigned long	    : 9;
42  		unsigned long sx01  : 2;
43  		unsigned long	    : 29;
44  	};
45  };
46  
47  /*
48   * raddress union which will contain the result (real or absolute address)
49   * after a page table walk. The rfaa, sfaa and pfra members are used to
50   * simply assign them the value of a region, segment or page table entry.
51   */
52  union raddress {
53  	unsigned long addr;
54  	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
55  	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
56  	unsigned long pfra : 52; /* Page-Frame Real Address */
57  };
58  
59  union alet {
60  	u32 val;
61  	struct {
62  		u32 reserved : 7;
63  		u32 p        : 1;
64  		u32 alesn    : 8;
65  		u32 alen     : 16;
66  	};
67  };
68  
69  union ald {
70  	u32 val;
71  	struct {
72  		u32     : 1;
73  		u32 alo : 24;
74  		u32 all : 7;
75  	};
76  };
77  
78  struct ale {
79  	unsigned long i      : 1; /* ALEN-Invalid Bit */
80  	unsigned long        : 5;
81  	unsigned long fo     : 1; /* Fetch-Only Bit */
82  	unsigned long p      : 1; /* Private Bit */
83  	unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
84  	unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
85  	unsigned long        : 32;
86  	unsigned long        : 1;
87  	unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
88  	unsigned long        : 6;
89  	unsigned long astesn : 32; /* ASTE Sequence Number */
90  };
91  
92  struct aste {
93  	unsigned long i      : 1; /* ASX-Invalid Bit */
94  	unsigned long ato    : 29; /* Authority-Table Origin */
95  	unsigned long        : 1;
96  	unsigned long b      : 1; /* Base-Space Bit */
97  	unsigned long ax     : 16; /* Authorization Index */
98  	unsigned long atl    : 12; /* Authority-Table Length */
99  	unsigned long        : 2;
100  	unsigned long ca     : 1; /* Controlled-ASN Bit */
101  	unsigned long ra     : 1; /* Reusable-ASN Bit */
102  	unsigned long asce   : 64; /* Address-Space-Control Element */
103  	unsigned long ald    : 32;
104  	unsigned long astesn : 32;
105  	/* .. more fields there */
106  };
107  
ipte_lock_held(struct kvm * kvm)108  int ipte_lock_held(struct kvm *kvm)
109  {
110  	if (sclp.has_siif) {
111  		int rc;
112  
113  		read_lock(&kvm->arch.sca_lock);
114  		rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
115  		read_unlock(&kvm->arch.sca_lock);
116  		return rc;
117  	}
118  	return kvm->arch.ipte_lock_count != 0;
119  }
120  
ipte_lock_simple(struct kvm * kvm)121  static void ipte_lock_simple(struct kvm *kvm)
122  {
123  	union ipte_control old, new, *ic;
124  
125  	mutex_lock(&kvm->arch.ipte_mutex);
126  	kvm->arch.ipte_lock_count++;
127  	if (kvm->arch.ipte_lock_count > 1)
128  		goto out;
129  retry:
130  	read_lock(&kvm->arch.sca_lock);
131  	ic = kvm_s390_get_ipte_control(kvm);
132  	do {
133  		old = READ_ONCE(*ic);
134  		if (old.k) {
135  			read_unlock(&kvm->arch.sca_lock);
136  			cond_resched();
137  			goto retry;
138  		}
139  		new = old;
140  		new.k = 1;
141  	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
142  	read_unlock(&kvm->arch.sca_lock);
143  out:
144  	mutex_unlock(&kvm->arch.ipte_mutex);
145  }
146  
ipte_unlock_simple(struct kvm * kvm)147  static void ipte_unlock_simple(struct kvm *kvm)
148  {
149  	union ipte_control old, new, *ic;
150  
151  	mutex_lock(&kvm->arch.ipte_mutex);
152  	kvm->arch.ipte_lock_count--;
153  	if (kvm->arch.ipte_lock_count)
154  		goto out;
155  	read_lock(&kvm->arch.sca_lock);
156  	ic = kvm_s390_get_ipte_control(kvm);
157  	do {
158  		old = READ_ONCE(*ic);
159  		new = old;
160  		new.k = 0;
161  	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
162  	read_unlock(&kvm->arch.sca_lock);
163  	wake_up(&kvm->arch.ipte_wq);
164  out:
165  	mutex_unlock(&kvm->arch.ipte_mutex);
166  }
167  
ipte_lock_siif(struct kvm * kvm)168  static void ipte_lock_siif(struct kvm *kvm)
169  {
170  	union ipte_control old, new, *ic;
171  
172  retry:
173  	read_lock(&kvm->arch.sca_lock);
174  	ic = kvm_s390_get_ipte_control(kvm);
175  	do {
176  		old = READ_ONCE(*ic);
177  		if (old.kg) {
178  			read_unlock(&kvm->arch.sca_lock);
179  			cond_resched();
180  			goto retry;
181  		}
182  		new = old;
183  		new.k = 1;
184  		new.kh++;
185  	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
186  	read_unlock(&kvm->arch.sca_lock);
187  }
188  
ipte_unlock_siif(struct kvm * kvm)189  static void ipte_unlock_siif(struct kvm *kvm)
190  {
191  	union ipte_control old, new, *ic;
192  
193  	read_lock(&kvm->arch.sca_lock);
194  	ic = kvm_s390_get_ipte_control(kvm);
195  	do {
196  		old = READ_ONCE(*ic);
197  		new = old;
198  		new.kh--;
199  		if (!new.kh)
200  			new.k = 0;
201  	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
202  	read_unlock(&kvm->arch.sca_lock);
203  	if (!new.kh)
204  		wake_up(&kvm->arch.ipte_wq);
205  }
206  
ipte_lock(struct kvm * kvm)207  void ipte_lock(struct kvm *kvm)
208  {
209  	if (sclp.has_siif)
210  		ipte_lock_siif(kvm);
211  	else
212  		ipte_lock_simple(kvm);
213  }
214  
ipte_unlock(struct kvm * kvm)215  void ipte_unlock(struct kvm *kvm)
216  {
217  	if (sclp.has_siif)
218  		ipte_unlock_siif(kvm);
219  	else
220  		ipte_unlock_simple(kvm);
221  }
222  
ar_translation(struct kvm_vcpu * vcpu,union asce * asce,u8 ar,enum gacc_mode mode)223  static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
224  			  enum gacc_mode mode)
225  {
226  	union alet alet;
227  	struct ale ale;
228  	struct aste aste;
229  	unsigned long ald_addr, authority_table_addr;
230  	union ald ald;
231  	int eax, rc;
232  	u8 authority_table;
233  
234  	if (ar >= NUM_ACRS)
235  		return -EINVAL;
236  
237  	if (vcpu->arch.acrs_loaded)
238  		save_access_regs(vcpu->run->s.regs.acrs);
239  	alet.val = vcpu->run->s.regs.acrs[ar];
240  
241  	if (ar == 0 || alet.val == 0) {
242  		asce->val = vcpu->arch.sie_block->gcr[1];
243  		return 0;
244  	} else if (alet.val == 1) {
245  		asce->val = vcpu->arch.sie_block->gcr[7];
246  		return 0;
247  	}
248  
249  	if (alet.reserved)
250  		return PGM_ALET_SPECIFICATION;
251  
252  	if (alet.p)
253  		ald_addr = vcpu->arch.sie_block->gcr[5];
254  	else
255  		ald_addr = vcpu->arch.sie_block->gcr[2];
256  	ald_addr &= 0x7fffffc0;
257  
258  	rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
259  	if (rc)
260  		return rc;
261  
262  	if (alet.alen / 8 > ald.all)
263  		return PGM_ALEN_TRANSLATION;
264  
265  	if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
266  		return PGM_ADDRESSING;
267  
268  	rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
269  			     sizeof(struct ale));
270  	if (rc)
271  		return rc;
272  
273  	if (ale.i == 1)
274  		return PGM_ALEN_TRANSLATION;
275  	if (ale.alesn != alet.alesn)
276  		return PGM_ALE_SEQUENCE;
277  
278  	rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
279  	if (rc)
280  		return rc;
281  
282  	if (aste.i)
283  		return PGM_ASTE_VALIDITY;
284  	if (aste.astesn != ale.astesn)
285  		return PGM_ASTE_SEQUENCE;
286  
287  	if (ale.p == 1) {
288  		eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
289  		if (ale.aleax != eax) {
290  			if (eax / 16 > aste.atl)
291  				return PGM_EXTENDED_AUTHORITY;
292  
293  			authority_table_addr = aste.ato * 4 + eax / 4;
294  
295  			rc = read_guest_real(vcpu, authority_table_addr,
296  					     &authority_table,
297  					     sizeof(u8));
298  			if (rc)
299  				return rc;
300  
301  			if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
302  				return PGM_EXTENDED_AUTHORITY;
303  		}
304  	}
305  
306  	if (ale.fo == 1 && mode == GACC_STORE)
307  		return PGM_PROTECTION;
308  
309  	asce->val = aste.asce;
310  	return 0;
311  }
312  
313  enum prot_type {
314  	PROT_TYPE_LA   = 0,
315  	PROT_TYPE_KEYC = 1,
316  	PROT_TYPE_ALC  = 2,
317  	PROT_TYPE_DAT  = 3,
318  	PROT_TYPE_IEP  = 4,
319  	/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
320  	PROT_NONE,
321  };
322  
trans_exc_ending(struct kvm_vcpu * vcpu,int code,unsigned long gva,u8 ar,enum gacc_mode mode,enum prot_type prot,bool terminate)323  static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
324  			    enum gacc_mode mode, enum prot_type prot, bool terminate)
325  {
326  	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
327  	union teid *teid;
328  
329  	memset(pgm, 0, sizeof(*pgm));
330  	pgm->code = code;
331  	teid = (union teid *)&pgm->trans_exc_code;
332  
333  	switch (code) {
334  	case PGM_PROTECTION:
335  		switch (prot) {
336  		case PROT_NONE:
337  			/* We should never get here, acts like termination */
338  			WARN_ON_ONCE(1);
339  			break;
340  		case PROT_TYPE_IEP:
341  			teid->b61 = 1;
342  			fallthrough;
343  		case PROT_TYPE_LA:
344  			teid->b56 = 1;
345  			break;
346  		case PROT_TYPE_KEYC:
347  			teid->b60 = 1;
348  			break;
349  		case PROT_TYPE_ALC:
350  			teid->b60 = 1;
351  			fallthrough;
352  		case PROT_TYPE_DAT:
353  			teid->b61 = 1;
354  			break;
355  		}
356  		if (terminate) {
357  			teid->b56 = 0;
358  			teid->b60 = 0;
359  			teid->b61 = 0;
360  		}
361  		fallthrough;
362  	case PGM_ASCE_TYPE:
363  	case PGM_PAGE_TRANSLATION:
364  	case PGM_REGION_FIRST_TRANS:
365  	case PGM_REGION_SECOND_TRANS:
366  	case PGM_REGION_THIRD_TRANS:
367  	case PGM_SEGMENT_TRANSLATION:
368  		/*
369  		 * op_access_id only applies to MOVE_PAGE -> set bit 61
370  		 * exc_access_id has to be set to 0 for some instructions. Both
371  		 * cases have to be handled by the caller.
372  		 */
373  		teid->addr = gva >> PAGE_SHIFT;
374  		teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
375  		teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
376  		fallthrough;
377  	case PGM_ALEN_TRANSLATION:
378  	case PGM_ALE_SEQUENCE:
379  	case PGM_ASTE_VALIDITY:
380  	case PGM_ASTE_SEQUENCE:
381  	case PGM_EXTENDED_AUTHORITY:
382  		/*
383  		 * We can always store exc_access_id, as it is
384  		 * undefined for non-ar cases. It is undefined for
385  		 * most DAT protection exceptions.
386  		 */
387  		pgm->exc_access_id = ar;
388  		break;
389  	}
390  	return code;
391  }
392  
trans_exc(struct kvm_vcpu * vcpu,int code,unsigned long gva,u8 ar,enum gacc_mode mode,enum prot_type prot)393  static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
394  		     enum gacc_mode mode, enum prot_type prot)
395  {
396  	return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
397  }
398  
get_vcpu_asce(struct kvm_vcpu * vcpu,union asce * asce,unsigned long ga,u8 ar,enum gacc_mode mode)399  static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
400  			 unsigned long ga, u8 ar, enum gacc_mode mode)
401  {
402  	int rc;
403  	struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
404  
405  	if (!psw.dat) {
406  		asce->val = 0;
407  		asce->r = 1;
408  		return 0;
409  	}
410  
411  	if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
412  		psw.as = PSW_BITS_AS_PRIMARY;
413  
414  	switch (psw.as) {
415  	case PSW_BITS_AS_PRIMARY:
416  		asce->val = vcpu->arch.sie_block->gcr[1];
417  		return 0;
418  	case PSW_BITS_AS_SECONDARY:
419  		asce->val = vcpu->arch.sie_block->gcr[7];
420  		return 0;
421  	case PSW_BITS_AS_HOME:
422  		asce->val = vcpu->arch.sie_block->gcr[13];
423  		return 0;
424  	case PSW_BITS_AS_ACCREG:
425  		rc = ar_translation(vcpu, asce, ar, mode);
426  		if (rc > 0)
427  			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
428  		return rc;
429  	}
430  	return 0;
431  }
432  
deref_table(struct kvm * kvm,unsigned long gpa,unsigned long * val)433  static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
434  {
435  	return kvm_read_guest(kvm, gpa, val, sizeof(*val));
436  }
437  
438  /**
439   * guest_translate - translate a guest virtual into a guest absolute address
440   * @vcpu: virtual cpu
441   * @gva: guest virtual address
442   * @gpa: points to where guest physical (absolute) address should be stored
443   * @asce: effective asce
444   * @mode: indicates the access mode to be used
445   * @prot: returns the type for protection exceptions
446   *
447   * Translate a guest virtual address into a guest absolute address by means
448   * of dynamic address translation as specified by the architecture.
449   * If the resulting absolute address is not available in the configuration
450   * an addressing exception is indicated and @gpa will not be changed.
451   *
452   * Returns: - zero on success; @gpa contains the resulting absolute address
453   *	    - a negative value if guest access failed due to e.g. broken
454   *	      guest mapping
455   *	    - a positive value if an access exception happened. In this case
456   *	      the returned value is the program interruption code as defined
457   *	      by the architecture
458   */
guest_translate(struct kvm_vcpu * vcpu,unsigned long gva,unsigned long * gpa,const union asce asce,enum gacc_mode mode,enum prot_type * prot)459  static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
460  				     unsigned long *gpa, const union asce asce,
461  				     enum gacc_mode mode, enum prot_type *prot)
462  {
463  	union vaddress vaddr = {.addr = gva};
464  	union raddress raddr = {.addr = gva};
465  	union page_table_entry pte;
466  	int dat_protection = 0;
467  	int iep_protection = 0;
468  	union ctlreg0 ctlreg0;
469  	unsigned long ptr;
470  	int edat1, edat2, iep;
471  
472  	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
473  	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
474  	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
475  	iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
476  	if (asce.r)
477  		goto real_address;
478  	ptr = asce.rsto * PAGE_SIZE;
479  	switch (asce.dt) {
480  	case ASCE_TYPE_REGION1:
481  		if (vaddr.rfx01 > asce.tl)
482  			return PGM_REGION_FIRST_TRANS;
483  		ptr += vaddr.rfx * 8;
484  		break;
485  	case ASCE_TYPE_REGION2:
486  		if (vaddr.rfx)
487  			return PGM_ASCE_TYPE;
488  		if (vaddr.rsx01 > asce.tl)
489  			return PGM_REGION_SECOND_TRANS;
490  		ptr += vaddr.rsx * 8;
491  		break;
492  	case ASCE_TYPE_REGION3:
493  		if (vaddr.rfx || vaddr.rsx)
494  			return PGM_ASCE_TYPE;
495  		if (vaddr.rtx01 > asce.tl)
496  			return PGM_REGION_THIRD_TRANS;
497  		ptr += vaddr.rtx * 8;
498  		break;
499  	case ASCE_TYPE_SEGMENT:
500  		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
501  			return PGM_ASCE_TYPE;
502  		if (vaddr.sx01 > asce.tl)
503  			return PGM_SEGMENT_TRANSLATION;
504  		ptr += vaddr.sx * 8;
505  		break;
506  	}
507  	switch (asce.dt) {
508  	case ASCE_TYPE_REGION1:	{
509  		union region1_table_entry rfte;
510  
511  		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
512  			return PGM_ADDRESSING;
513  		if (deref_table(vcpu->kvm, ptr, &rfte.val))
514  			return -EFAULT;
515  		if (rfte.i)
516  			return PGM_REGION_FIRST_TRANS;
517  		if (rfte.tt != TABLE_TYPE_REGION1)
518  			return PGM_TRANSLATION_SPEC;
519  		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
520  			return PGM_REGION_SECOND_TRANS;
521  		if (edat1)
522  			dat_protection |= rfte.p;
523  		ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
524  	}
525  		fallthrough;
526  	case ASCE_TYPE_REGION2: {
527  		union region2_table_entry rste;
528  
529  		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
530  			return PGM_ADDRESSING;
531  		if (deref_table(vcpu->kvm, ptr, &rste.val))
532  			return -EFAULT;
533  		if (rste.i)
534  			return PGM_REGION_SECOND_TRANS;
535  		if (rste.tt != TABLE_TYPE_REGION2)
536  			return PGM_TRANSLATION_SPEC;
537  		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
538  			return PGM_REGION_THIRD_TRANS;
539  		if (edat1)
540  			dat_protection |= rste.p;
541  		ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
542  	}
543  		fallthrough;
544  	case ASCE_TYPE_REGION3: {
545  		union region3_table_entry rtte;
546  
547  		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
548  			return PGM_ADDRESSING;
549  		if (deref_table(vcpu->kvm, ptr, &rtte.val))
550  			return -EFAULT;
551  		if (rtte.i)
552  			return PGM_REGION_THIRD_TRANS;
553  		if (rtte.tt != TABLE_TYPE_REGION3)
554  			return PGM_TRANSLATION_SPEC;
555  		if (rtte.cr && asce.p && edat2)
556  			return PGM_TRANSLATION_SPEC;
557  		if (rtte.fc && edat2) {
558  			dat_protection |= rtte.fc1.p;
559  			iep_protection = rtte.fc1.iep;
560  			raddr.rfaa = rtte.fc1.rfaa;
561  			goto absolute_address;
562  		}
563  		if (vaddr.sx01 < rtte.fc0.tf)
564  			return PGM_SEGMENT_TRANSLATION;
565  		if (vaddr.sx01 > rtte.fc0.tl)
566  			return PGM_SEGMENT_TRANSLATION;
567  		if (edat1)
568  			dat_protection |= rtte.fc0.p;
569  		ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
570  	}
571  		fallthrough;
572  	case ASCE_TYPE_SEGMENT: {
573  		union segment_table_entry ste;
574  
575  		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
576  			return PGM_ADDRESSING;
577  		if (deref_table(vcpu->kvm, ptr, &ste.val))
578  			return -EFAULT;
579  		if (ste.i)
580  			return PGM_SEGMENT_TRANSLATION;
581  		if (ste.tt != TABLE_TYPE_SEGMENT)
582  			return PGM_TRANSLATION_SPEC;
583  		if (ste.cs && asce.p)
584  			return PGM_TRANSLATION_SPEC;
585  		if (ste.fc && edat1) {
586  			dat_protection |= ste.fc1.p;
587  			iep_protection = ste.fc1.iep;
588  			raddr.sfaa = ste.fc1.sfaa;
589  			goto absolute_address;
590  		}
591  		dat_protection |= ste.fc0.p;
592  		ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
593  	}
594  	}
595  	if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
596  		return PGM_ADDRESSING;
597  	if (deref_table(vcpu->kvm, ptr, &pte.val))
598  		return -EFAULT;
599  	if (pte.i)
600  		return PGM_PAGE_TRANSLATION;
601  	if (pte.z)
602  		return PGM_TRANSLATION_SPEC;
603  	dat_protection |= pte.p;
604  	iep_protection = pte.iep;
605  	raddr.pfra = pte.pfra;
606  real_address:
607  	raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
608  absolute_address:
609  	if (mode == GACC_STORE && dat_protection) {
610  		*prot = PROT_TYPE_DAT;
611  		return PGM_PROTECTION;
612  	}
613  	if (mode == GACC_IFETCH && iep_protection && iep) {
614  		*prot = PROT_TYPE_IEP;
615  		return PGM_PROTECTION;
616  	}
617  	if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
618  		return PGM_ADDRESSING;
619  	*gpa = raddr.addr;
620  	return 0;
621  }
622  
is_low_address(unsigned long ga)623  static inline int is_low_address(unsigned long ga)
624  {
625  	/* Check for address ranges 0..511 and 4096..4607 */
626  	return (ga & ~0x11fful) == 0;
627  }
628  
low_address_protection_enabled(struct kvm_vcpu * vcpu,const union asce asce)629  static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
630  					  const union asce asce)
631  {
632  	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
633  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
634  
635  	if (!ctlreg0.lap)
636  		return 0;
637  	if (psw_bits(*psw).dat && asce.p)
638  		return 0;
639  	return 1;
640  }
641  
vm_check_access_key(struct kvm * kvm,u8 access_key,enum gacc_mode mode,gpa_t gpa)642  static int vm_check_access_key(struct kvm *kvm, u8 access_key,
643  			       enum gacc_mode mode, gpa_t gpa)
644  {
645  	u8 storage_key, access_control;
646  	bool fetch_protected;
647  	unsigned long hva;
648  	int r;
649  
650  	if (access_key == 0)
651  		return 0;
652  
653  	hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
654  	if (kvm_is_error_hva(hva))
655  		return PGM_ADDRESSING;
656  
657  	mmap_read_lock(current->mm);
658  	r = get_guest_storage_key(current->mm, hva, &storage_key);
659  	mmap_read_unlock(current->mm);
660  	if (r)
661  		return r;
662  	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
663  	if (access_control == access_key)
664  		return 0;
665  	fetch_protected = storage_key & _PAGE_FP_BIT;
666  	if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
667  		return 0;
668  	return PGM_PROTECTION;
669  }
670  
fetch_prot_override_applicable(struct kvm_vcpu * vcpu,enum gacc_mode mode,union asce asce)671  static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
672  					   union asce asce)
673  {
674  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
675  	unsigned long override;
676  
677  	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
678  		/* check if fetch protection override enabled */
679  		override = vcpu->arch.sie_block->gcr[0];
680  		override &= CR0_FETCH_PROTECTION_OVERRIDE;
681  		/* not applicable if subject to DAT && private space */
682  		override = override && !(psw_bits(*psw).dat && asce.p);
683  		return override;
684  	}
685  	return false;
686  }
687  
fetch_prot_override_applies(unsigned long ga,unsigned int len)688  static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
689  {
690  	return ga < 2048 && ga + len <= 2048;
691  }
692  
storage_prot_override_applicable(struct kvm_vcpu * vcpu)693  static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
694  {
695  	/* check if storage protection override enabled */
696  	return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
697  }
698  
storage_prot_override_applies(u8 access_control)699  static bool storage_prot_override_applies(u8 access_control)
700  {
701  	/* matches special storage protection override key (9) -> allow */
702  	return access_control == PAGE_SPO_ACC;
703  }
704  
vcpu_check_access_key(struct kvm_vcpu * vcpu,u8 access_key,enum gacc_mode mode,union asce asce,gpa_t gpa,unsigned long ga,unsigned int len)705  static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
706  				 enum gacc_mode mode, union asce asce, gpa_t gpa,
707  				 unsigned long ga, unsigned int len)
708  {
709  	u8 storage_key, access_control;
710  	unsigned long hva;
711  	int r;
712  
713  	/* access key 0 matches any storage key -> allow */
714  	if (access_key == 0)
715  		return 0;
716  	/*
717  	 * caller needs to ensure that gfn is accessible, so we can
718  	 * assume that this cannot fail
719  	 */
720  	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
721  	mmap_read_lock(current->mm);
722  	r = get_guest_storage_key(current->mm, hva, &storage_key);
723  	mmap_read_unlock(current->mm);
724  	if (r)
725  		return r;
726  	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
727  	/* access key matches storage key -> allow */
728  	if (access_control == access_key)
729  		return 0;
730  	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
731  		/* it is a fetch and fetch protection is off -> allow */
732  		if (!(storage_key & _PAGE_FP_BIT))
733  			return 0;
734  		if (fetch_prot_override_applicable(vcpu, mode, asce) &&
735  		    fetch_prot_override_applies(ga, len))
736  			return 0;
737  	}
738  	if (storage_prot_override_applicable(vcpu) &&
739  	    storage_prot_override_applies(access_control))
740  		return 0;
741  	return PGM_PROTECTION;
742  }
743  
744  /**
745   * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
746   * covering a logical range
747   * @vcpu: virtual cpu
748   * @ga: guest address, start of range
749   * @ar: access register
750   * @gpas: output argument, may be NULL
751   * @len: length of range in bytes
752   * @asce: address-space-control element to use for translation
753   * @mode: access mode
754   * @access_key: access key to mach the range's storage keys against
755   *
756   * Translate a logical range to a series of guest absolute addresses,
757   * such that the concatenation of page fragments starting at each gpa make up
758   * the whole range.
759   * The translation is performed as if done by the cpu for the given @asce, @ar,
760   * @mode and state of the @vcpu.
761   * If the translation causes an exception, its program interruption code is
762   * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
763   * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
764   * a correct exception into the guest.
765   * The resulting gpas are stored into @gpas, unless it is NULL.
766   *
767   * Note: All fragments except the first one start at the beginning of a page.
768   *	 When deriving the boundaries of a fragment from a gpa, all but the last
769   *	 fragment end at the end of the page.
770   *
771   * Return:
772   * * 0		- success
773   * * <0		- translation could not be performed, for example if  guest
774   *		  memory could not be accessed
775   * * >0		- an access exception occurred. In this case the returned value
776   *		  is the program interruption code and the contents of pgm may
777   *		  be used to inject an exception into the guest.
778   */
guest_range_to_gpas(struct kvm_vcpu * vcpu,unsigned long ga,u8 ar,unsigned long * gpas,unsigned long len,const union asce asce,enum gacc_mode mode,u8 access_key)779  static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
780  			       unsigned long *gpas, unsigned long len,
781  			       const union asce asce, enum gacc_mode mode,
782  			       u8 access_key)
783  {
784  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
785  	unsigned int offset = offset_in_page(ga);
786  	unsigned int fragment_len;
787  	int lap_enabled, rc = 0;
788  	enum prot_type prot;
789  	unsigned long gpa;
790  
791  	lap_enabled = low_address_protection_enabled(vcpu, asce);
792  	while (min(PAGE_SIZE - offset, len) > 0) {
793  		fragment_len = min(PAGE_SIZE - offset, len);
794  		ga = kvm_s390_logical_to_effective(vcpu, ga);
795  		if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
796  			return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
797  					 PROT_TYPE_LA);
798  		if (psw_bits(*psw).dat) {
799  			rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
800  			if (rc < 0)
801  				return rc;
802  		} else {
803  			gpa = kvm_s390_real_to_abs(vcpu, ga);
804  			if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
805  				rc = PGM_ADDRESSING;
806  				prot = PROT_NONE;
807  			}
808  		}
809  		if (rc)
810  			return trans_exc(vcpu, rc, ga, ar, mode, prot);
811  		rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
812  					   fragment_len);
813  		if (rc)
814  			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
815  		if (gpas)
816  			*gpas++ = gpa;
817  		offset = 0;
818  		ga += fragment_len;
819  		len -= fragment_len;
820  	}
821  	return 0;
822  }
823  
access_guest_page(struct kvm * kvm,enum gacc_mode mode,gpa_t gpa,void * data,unsigned int len)824  static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
825  			     void *data, unsigned int len)
826  {
827  	const unsigned int offset = offset_in_page(gpa);
828  	const gfn_t gfn = gpa_to_gfn(gpa);
829  	int rc;
830  
831  	if (!gfn_to_memslot(kvm, gfn))
832  		return PGM_ADDRESSING;
833  	if (mode == GACC_STORE)
834  		rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
835  	else
836  		rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
837  	return rc;
838  }
839  
840  static int
access_guest_page_with_key(struct kvm * kvm,enum gacc_mode mode,gpa_t gpa,void * data,unsigned int len,u8 access_key)841  access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
842  			   void *data, unsigned int len, u8 access_key)
843  {
844  	struct kvm_memory_slot *slot;
845  	bool writable;
846  	gfn_t gfn;
847  	hva_t hva;
848  	int rc;
849  
850  	gfn = gpa >> PAGE_SHIFT;
851  	slot = gfn_to_memslot(kvm, gfn);
852  	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
853  
854  	if (kvm_is_error_hva(hva))
855  		return PGM_ADDRESSING;
856  	/*
857  	 * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
858  	 * Don't try to actually handle that case.
859  	 */
860  	if (!writable && mode == GACC_STORE)
861  		return -EOPNOTSUPP;
862  	hva += offset_in_page(gpa);
863  	if (mode == GACC_STORE)
864  		rc = copy_to_user_key((void __user *)hva, data, len, access_key);
865  	else
866  		rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
867  	if (rc)
868  		return PGM_PROTECTION;
869  	if (mode == GACC_STORE)
870  		mark_page_dirty_in_slot(kvm, slot, gfn);
871  	return 0;
872  }
873  
access_guest_abs_with_key(struct kvm * kvm,gpa_t gpa,void * data,unsigned long len,enum gacc_mode mode,u8 access_key)874  int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
875  			      unsigned long len, enum gacc_mode mode, u8 access_key)
876  {
877  	int offset = offset_in_page(gpa);
878  	int fragment_len;
879  	int rc;
880  
881  	while (min(PAGE_SIZE - offset, len) > 0) {
882  		fragment_len = min(PAGE_SIZE - offset, len);
883  		rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
884  		if (rc)
885  			return rc;
886  		offset = 0;
887  		len -= fragment_len;
888  		data += fragment_len;
889  		gpa += fragment_len;
890  	}
891  	return 0;
892  }
893  
access_guest_with_key(struct kvm_vcpu * vcpu,unsigned long ga,u8 ar,void * data,unsigned long len,enum gacc_mode mode,u8 access_key)894  int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
895  			  void *data, unsigned long len, enum gacc_mode mode,
896  			  u8 access_key)
897  {
898  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
899  	unsigned long nr_pages, idx;
900  	unsigned long gpa_array[2];
901  	unsigned int fragment_len;
902  	unsigned long *gpas;
903  	enum prot_type prot;
904  	int need_ipte_lock;
905  	union asce asce;
906  	bool try_storage_prot_override;
907  	bool try_fetch_prot_override;
908  	int rc;
909  
910  	if (!len)
911  		return 0;
912  	ga = kvm_s390_logical_to_effective(vcpu, ga);
913  	rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
914  	if (rc)
915  		return rc;
916  	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
917  	gpas = gpa_array;
918  	if (nr_pages > ARRAY_SIZE(gpa_array))
919  		gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
920  	if (!gpas)
921  		return -ENOMEM;
922  	try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
923  	try_storage_prot_override = storage_prot_override_applicable(vcpu);
924  	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
925  	if (need_ipte_lock)
926  		ipte_lock(vcpu->kvm);
927  	/*
928  	 * Since we do the access further down ultimately via a move instruction
929  	 * that does key checking and returns an error in case of a protection
930  	 * violation, we don't need to do the check during address translation.
931  	 * Skip it by passing access key 0, which matches any storage key,
932  	 * obviating the need for any further checks. As a result the check is
933  	 * handled entirely in hardware on access, we only need to take care to
934  	 * forego key protection checking if fetch protection override applies or
935  	 * retry with the special key 9 in case of storage protection override.
936  	 */
937  	rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
938  	if (rc)
939  		goto out_unlock;
940  	for (idx = 0; idx < nr_pages; idx++) {
941  		fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
942  		if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
943  			rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
944  					       data, fragment_len);
945  		} else {
946  			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
947  							data, fragment_len, access_key);
948  		}
949  		if (rc == PGM_PROTECTION && try_storage_prot_override)
950  			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
951  							data, fragment_len, PAGE_SPO_ACC);
952  		if (rc)
953  			break;
954  		len -= fragment_len;
955  		data += fragment_len;
956  		ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
957  	}
958  	if (rc > 0) {
959  		bool terminate = (mode == GACC_STORE) && (idx > 0);
960  
961  		if (rc == PGM_PROTECTION)
962  			prot = PROT_TYPE_KEYC;
963  		else
964  			prot = PROT_NONE;
965  		rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
966  	}
967  out_unlock:
968  	if (need_ipte_lock)
969  		ipte_unlock(vcpu->kvm);
970  	if (nr_pages > ARRAY_SIZE(gpa_array))
971  		vfree(gpas);
972  	return rc;
973  }
974  
access_guest_real(struct kvm_vcpu * vcpu,unsigned long gra,void * data,unsigned long len,enum gacc_mode mode)975  int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
976  		      void *data, unsigned long len, enum gacc_mode mode)
977  {
978  	unsigned int fragment_len;
979  	unsigned long gpa;
980  	int rc = 0;
981  
982  	while (len && !rc) {
983  		gpa = kvm_s390_real_to_abs(vcpu, gra);
984  		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
985  		rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
986  		len -= fragment_len;
987  		gra += fragment_len;
988  		data += fragment_len;
989  	}
990  	if (rc > 0)
991  		vcpu->arch.pgm.code = rc;
992  	return rc;
993  }
994  
995  /**
996   * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
997   * @kvm: Virtual machine instance.
998   * @gpa: Absolute guest address of the location to be changed.
999   * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
1000   *       non power of two will result in failure.
1001   * @old_addr: Pointer to old value. If the location at @gpa contains this value,
1002   *            the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
1003   *            *@old_addr contains the value at @gpa before the attempt to
1004   *            exchange the value.
1005   * @new: The value to place at @gpa.
1006   * @access_key: The access key to use for the guest access.
1007   * @success: output value indicating if an exchange occurred.
1008   *
1009   * Atomically exchange the value at @gpa by @new, if it contains *@old.
1010   * Honors storage keys.
1011   *
1012   * Return: * 0: successful exchange
1013   *         * >0: a program interruption code indicating the reason cmpxchg could
1014   *               not be attempted
1015   *         * -EINVAL: address misaligned or len not power of two
1016   *         * -EAGAIN: transient failure (len 1 or 2)
1017   *         * -EOPNOTSUPP: read-only memslot (should never occur)
1018   */
cmpxchg_guest_abs_with_key(struct kvm * kvm,gpa_t gpa,int len,__uint128_t * old_addr,__uint128_t new,u8 access_key,bool * success)1019  int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
1020  			       __uint128_t *old_addr, __uint128_t new,
1021  			       u8 access_key, bool *success)
1022  {
1023  	gfn_t gfn = gpa_to_gfn(gpa);
1024  	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1025  	bool writable;
1026  	hva_t hva;
1027  	int ret;
1028  
1029  	if (!IS_ALIGNED(gpa, len))
1030  		return -EINVAL;
1031  
1032  	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
1033  	if (kvm_is_error_hva(hva))
1034  		return PGM_ADDRESSING;
1035  	/*
1036  	 * Check if it's a read-only memslot, even though that cannot occur
1037  	 * since those are unsupported.
1038  	 * Don't try to actually handle that case.
1039  	 */
1040  	if (!writable)
1041  		return -EOPNOTSUPP;
1042  
1043  	hva += offset_in_page(gpa);
1044  	/*
1045  	 * The cmpxchg_user_key macro depends on the type of "old", so we need
1046  	 * a case for each valid length and get some code duplication as long
1047  	 * as we don't introduce a new macro.
1048  	 */
1049  	switch (len) {
1050  	case 1: {
1051  		u8 old;
1052  
1053  		ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
1054  		*success = !ret && old == *old_addr;
1055  		*old_addr = old;
1056  		break;
1057  	}
1058  	case 2: {
1059  		u16 old;
1060  
1061  		ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
1062  		*success = !ret && old == *old_addr;
1063  		*old_addr = old;
1064  		break;
1065  	}
1066  	case 4: {
1067  		u32 old;
1068  
1069  		ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
1070  		*success = !ret && old == *old_addr;
1071  		*old_addr = old;
1072  		break;
1073  	}
1074  	case 8: {
1075  		u64 old;
1076  
1077  		ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
1078  		*success = !ret && old == *old_addr;
1079  		*old_addr = old;
1080  		break;
1081  	}
1082  	case 16: {
1083  		__uint128_t old;
1084  
1085  		ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
1086  		*success = !ret && old == *old_addr;
1087  		*old_addr = old;
1088  		break;
1089  	}
1090  	default:
1091  		return -EINVAL;
1092  	}
1093  	if (*success)
1094  		mark_page_dirty_in_slot(kvm, slot, gfn);
1095  	/*
1096  	 * Assume that the fault is caused by protection, either key protection
1097  	 * or user page write protection.
1098  	 */
1099  	if (ret == -EFAULT)
1100  		ret = PGM_PROTECTION;
1101  	return ret;
1102  }
1103  
1104  /**
1105   * guest_translate_address_with_key - translate guest logical into guest absolute address
1106   * @vcpu: virtual cpu
1107   * @gva: Guest virtual address
1108   * @ar: Access register
1109   * @gpa: Guest physical address
1110   * @mode: Translation access mode
1111   * @access_key: access key to mach the storage key with
1112   *
1113   * Parameter semantics are the same as the ones from guest_translate.
1114   * The memory contents at the guest address are not changed.
1115   *
1116   * Note: The IPTE lock is not taken during this function, so the caller
1117   * has to take care of this.
1118   */
guest_translate_address_with_key(struct kvm_vcpu * vcpu,unsigned long gva,u8 ar,unsigned long * gpa,enum gacc_mode mode,u8 access_key)1119  int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1120  				     unsigned long *gpa, enum gacc_mode mode,
1121  				     u8 access_key)
1122  {
1123  	union asce asce;
1124  	int rc;
1125  
1126  	gva = kvm_s390_logical_to_effective(vcpu, gva);
1127  	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1128  	if (rc)
1129  		return rc;
1130  	return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
1131  				   access_key);
1132  }
1133  
1134  /**
1135   * check_gva_range - test a range of guest virtual addresses for accessibility
1136   * @vcpu: virtual cpu
1137   * @gva: Guest virtual address
1138   * @ar: Access register
1139   * @length: Length of test range
1140   * @mode: Translation access mode
1141   * @access_key: access key to mach the storage keys with
1142   */
check_gva_range(struct kvm_vcpu * vcpu,unsigned long gva,u8 ar,unsigned long length,enum gacc_mode mode,u8 access_key)1143  int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1144  		    unsigned long length, enum gacc_mode mode, u8 access_key)
1145  {
1146  	union asce asce;
1147  	int rc = 0;
1148  
1149  	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1150  	if (rc)
1151  		return rc;
1152  	ipte_lock(vcpu->kvm);
1153  	rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
1154  				 access_key);
1155  	ipte_unlock(vcpu->kvm);
1156  
1157  	return rc;
1158  }
1159  
1160  /**
1161   * check_gpa_range - test a range of guest physical addresses for accessibility
1162   * @kvm: virtual machine instance
1163   * @gpa: guest physical address
1164   * @length: length of test range
1165   * @mode: access mode to test, relevant for storage keys
1166   * @access_key: access key to mach the storage keys with
1167   */
check_gpa_range(struct kvm * kvm,unsigned long gpa,unsigned long length,enum gacc_mode mode,u8 access_key)1168  int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
1169  		    enum gacc_mode mode, u8 access_key)
1170  {
1171  	unsigned int fragment_len;
1172  	int rc = 0;
1173  
1174  	while (length && !rc) {
1175  		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
1176  		rc = vm_check_access_key(kvm, access_key, mode, gpa);
1177  		length -= fragment_len;
1178  		gpa += fragment_len;
1179  	}
1180  	return rc;
1181  }
1182  
1183  /**
1184   * kvm_s390_check_low_addr_prot_real - check for low-address protection
1185   * @vcpu: virtual cpu
1186   * @gra: Guest real address
1187   *
1188   * Checks whether an address is subject to low-address protection and set
1189   * up vcpu->arch.pgm accordingly if necessary.
1190   *
1191   * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
1192   */
kvm_s390_check_low_addr_prot_real(struct kvm_vcpu * vcpu,unsigned long gra)1193  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
1194  {
1195  	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
1196  
1197  	if (!ctlreg0.lap || !is_low_address(gra))
1198  		return 0;
1199  	return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
1200  }
1201  
1202  /**
1203   * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
1204   * @sg: pointer to the shadow guest address space structure
1205   * @saddr: faulting address in the shadow gmap
1206   * @pgt: pointer to the beginning of the page table for the given address if
1207   *	 successful (return value 0), or to the first invalid DAT entry in
1208   *	 case of exceptions (return value > 0)
1209   * @dat_protection: referenced memory is write protected
1210   * @fake: pgt references contiguous guest memory block, not a pgtable
1211   */
kvm_s390_shadow_tables(struct gmap * sg,unsigned long saddr,unsigned long * pgt,int * dat_protection,int * fake)1212  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
1213  				  unsigned long *pgt, int *dat_protection,
1214  				  int *fake)
1215  {
1216  	struct kvm *kvm;
1217  	struct gmap *parent;
1218  	union asce asce;
1219  	union vaddress vaddr;
1220  	unsigned long ptr;
1221  	int rc;
1222  
1223  	*fake = 0;
1224  	*dat_protection = 0;
1225  	kvm = sg->private;
1226  	parent = sg->parent;
1227  	vaddr.addr = saddr;
1228  	asce.val = sg->orig_asce;
1229  	ptr = asce.rsto * PAGE_SIZE;
1230  	if (asce.r) {
1231  		*fake = 1;
1232  		ptr = 0;
1233  		asce.dt = ASCE_TYPE_REGION1;
1234  	}
1235  	switch (asce.dt) {
1236  	case ASCE_TYPE_REGION1:
1237  		if (vaddr.rfx01 > asce.tl && !*fake)
1238  			return PGM_REGION_FIRST_TRANS;
1239  		break;
1240  	case ASCE_TYPE_REGION2:
1241  		if (vaddr.rfx)
1242  			return PGM_ASCE_TYPE;
1243  		if (vaddr.rsx01 > asce.tl)
1244  			return PGM_REGION_SECOND_TRANS;
1245  		break;
1246  	case ASCE_TYPE_REGION3:
1247  		if (vaddr.rfx || vaddr.rsx)
1248  			return PGM_ASCE_TYPE;
1249  		if (vaddr.rtx01 > asce.tl)
1250  			return PGM_REGION_THIRD_TRANS;
1251  		break;
1252  	case ASCE_TYPE_SEGMENT:
1253  		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
1254  			return PGM_ASCE_TYPE;
1255  		if (vaddr.sx01 > asce.tl)
1256  			return PGM_SEGMENT_TRANSLATION;
1257  		break;
1258  	}
1259  
1260  	switch (asce.dt) {
1261  	case ASCE_TYPE_REGION1: {
1262  		union region1_table_entry rfte;
1263  
1264  		if (*fake) {
1265  			ptr += vaddr.rfx * _REGION1_SIZE;
1266  			rfte.val = ptr;
1267  			goto shadow_r2t;
1268  		}
1269  		*pgt = ptr + vaddr.rfx * 8;
1270  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
1271  		if (rc)
1272  			return rc;
1273  		if (rfte.i)
1274  			return PGM_REGION_FIRST_TRANS;
1275  		if (rfte.tt != TABLE_TYPE_REGION1)
1276  			return PGM_TRANSLATION_SPEC;
1277  		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
1278  			return PGM_REGION_SECOND_TRANS;
1279  		if (sg->edat_level >= 1)
1280  			*dat_protection |= rfte.p;
1281  		ptr = rfte.rto * PAGE_SIZE;
1282  shadow_r2t:
1283  		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
1284  		if (rc)
1285  			return rc;
1286  		kvm->stat.gmap_shadow_r1_entry++;
1287  	}
1288  		fallthrough;
1289  	case ASCE_TYPE_REGION2: {
1290  		union region2_table_entry rste;
1291  
1292  		if (*fake) {
1293  			ptr += vaddr.rsx * _REGION2_SIZE;
1294  			rste.val = ptr;
1295  			goto shadow_r3t;
1296  		}
1297  		*pgt = ptr + vaddr.rsx * 8;
1298  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
1299  		if (rc)
1300  			return rc;
1301  		if (rste.i)
1302  			return PGM_REGION_SECOND_TRANS;
1303  		if (rste.tt != TABLE_TYPE_REGION2)
1304  			return PGM_TRANSLATION_SPEC;
1305  		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
1306  			return PGM_REGION_THIRD_TRANS;
1307  		if (sg->edat_level >= 1)
1308  			*dat_protection |= rste.p;
1309  		ptr = rste.rto * PAGE_SIZE;
1310  shadow_r3t:
1311  		rste.p |= *dat_protection;
1312  		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
1313  		if (rc)
1314  			return rc;
1315  		kvm->stat.gmap_shadow_r2_entry++;
1316  	}
1317  		fallthrough;
1318  	case ASCE_TYPE_REGION3: {
1319  		union region3_table_entry rtte;
1320  
1321  		if (*fake) {
1322  			ptr += vaddr.rtx * _REGION3_SIZE;
1323  			rtte.val = ptr;
1324  			goto shadow_sgt;
1325  		}
1326  		*pgt = ptr + vaddr.rtx * 8;
1327  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
1328  		if (rc)
1329  			return rc;
1330  		if (rtte.i)
1331  			return PGM_REGION_THIRD_TRANS;
1332  		if (rtte.tt != TABLE_TYPE_REGION3)
1333  			return PGM_TRANSLATION_SPEC;
1334  		if (rtte.cr && asce.p && sg->edat_level >= 2)
1335  			return PGM_TRANSLATION_SPEC;
1336  		if (rtte.fc && sg->edat_level >= 2) {
1337  			*dat_protection |= rtte.fc0.p;
1338  			*fake = 1;
1339  			ptr = rtte.fc1.rfaa * _REGION3_SIZE;
1340  			rtte.val = ptr;
1341  			goto shadow_sgt;
1342  		}
1343  		if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
1344  			return PGM_SEGMENT_TRANSLATION;
1345  		if (sg->edat_level >= 1)
1346  			*dat_protection |= rtte.fc0.p;
1347  		ptr = rtte.fc0.sto * PAGE_SIZE;
1348  shadow_sgt:
1349  		rtte.fc0.p |= *dat_protection;
1350  		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
1351  		if (rc)
1352  			return rc;
1353  		kvm->stat.gmap_shadow_r3_entry++;
1354  	}
1355  		fallthrough;
1356  	case ASCE_TYPE_SEGMENT: {
1357  		union segment_table_entry ste;
1358  
1359  		if (*fake) {
1360  			ptr += vaddr.sx * _SEGMENT_SIZE;
1361  			ste.val = ptr;
1362  			goto shadow_pgt;
1363  		}
1364  		*pgt = ptr + vaddr.sx * 8;
1365  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
1366  		if (rc)
1367  			return rc;
1368  		if (ste.i)
1369  			return PGM_SEGMENT_TRANSLATION;
1370  		if (ste.tt != TABLE_TYPE_SEGMENT)
1371  			return PGM_TRANSLATION_SPEC;
1372  		if (ste.cs && asce.p)
1373  			return PGM_TRANSLATION_SPEC;
1374  		*dat_protection |= ste.fc0.p;
1375  		if (ste.fc && sg->edat_level >= 1) {
1376  			*fake = 1;
1377  			ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
1378  			ste.val = ptr;
1379  			goto shadow_pgt;
1380  		}
1381  		ptr = ste.fc0.pto * (PAGE_SIZE / 2);
1382  shadow_pgt:
1383  		ste.fc0.p |= *dat_protection;
1384  		rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
1385  		if (rc)
1386  			return rc;
1387  		kvm->stat.gmap_shadow_sg_entry++;
1388  	}
1389  	}
1390  	/* Return the parent address of the page table */
1391  	*pgt = ptr;
1392  	return 0;
1393  }
1394  
1395  /**
1396   * kvm_s390_shadow_fault - handle fault on a shadow page table
1397   * @vcpu: virtual cpu
1398   * @sg: pointer to the shadow guest address space structure
1399   * @saddr: faulting address in the shadow gmap
1400   * @datptr: will contain the address of the faulting DAT table entry, or of
1401   *	    the valid leaf, plus some flags
1402   *
1403   * Returns: - 0 if the shadow fault was successfully resolved
1404   *	    - > 0 (pgm exception code) on exceptions while faulting
1405   *	    - -EAGAIN if the caller can retry immediately
1406   *	    - -EFAULT when accessing invalid guest addresses
1407   *	    - -ENOMEM if out of memory
1408   */
kvm_s390_shadow_fault(struct kvm_vcpu * vcpu,struct gmap * sg,unsigned long saddr,unsigned long * datptr)1409  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
1410  			  unsigned long saddr, unsigned long *datptr)
1411  {
1412  	union vaddress vaddr;
1413  	union page_table_entry pte;
1414  	unsigned long pgt = 0;
1415  	int dat_protection, fake;
1416  	int rc;
1417  
1418  	mmap_read_lock(sg->mm);
1419  	/*
1420  	 * We don't want any guest-2 tables to change - so the parent
1421  	 * tables/pointers we read stay valid - unshadowing is however
1422  	 * always possible - only guest_table_lock protects us.
1423  	 */
1424  	ipte_lock(vcpu->kvm);
1425  
1426  	rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
1427  	if (rc)
1428  		rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
1429  					    &fake);
1430  
1431  	vaddr.addr = saddr;
1432  	if (fake) {
1433  		pte.val = pgt + vaddr.px * PAGE_SIZE;
1434  		goto shadow_page;
1435  	}
1436  
1437  	switch (rc) {
1438  	case PGM_SEGMENT_TRANSLATION:
1439  	case PGM_REGION_THIRD_TRANS:
1440  	case PGM_REGION_SECOND_TRANS:
1441  	case PGM_REGION_FIRST_TRANS:
1442  		pgt |= PEI_NOT_PTE;
1443  		break;
1444  	case 0:
1445  		pgt += vaddr.px * 8;
1446  		rc = gmap_read_table(sg->parent, pgt, &pte.val);
1447  	}
1448  	if (datptr)
1449  		*datptr = pgt | dat_protection * PEI_DAT_PROT;
1450  	if (!rc && pte.i)
1451  		rc = PGM_PAGE_TRANSLATION;
1452  	if (!rc && pte.z)
1453  		rc = PGM_TRANSLATION_SPEC;
1454  shadow_page:
1455  	pte.p |= dat_protection;
1456  	if (!rc)
1457  		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
1458  	vcpu->kvm->stat.gmap_shadow_pg_entry++;
1459  	ipte_unlock(vcpu->kvm);
1460  	mmap_read_unlock(sg->mm);
1461  	return rc;
1462  }
1463