1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * channel program interfaces
4  *
5  * Copyright IBM Corp. 2017
6  *
7  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
8  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
9  */
10 
11 #include <linux/ratelimit.h>
12 #include <linux/mm.h>
13 #include <linux/slab.h>
14 #include <linux/highmem.h>
15 #include <linux/iommu.h>
16 #include <linux/vfio.h>
17 #include <asm/idals.h>
18 
19 #include "vfio_ccw_cp.h"
20 #include "vfio_ccw_private.h"
21 
22 struct page_array {
23 	/* Array that stores pages need to pin. */
24 	dma_addr_t		*pa_iova;
25 	/* Array that receives the pinned pages. */
26 	struct page		**pa_page;
27 	/* Number of pages pinned from @pa_iova. */
28 	int			pa_nr;
29 };
30 
31 struct ccwchain {
32 	struct list_head	next;
33 	struct ccw1		*ch_ccw;
34 	/* Guest physical address of the current chain. */
35 	u64			ch_iova;
36 	/* Count of the valid ccws in chain. */
37 	int			ch_len;
38 	/* Pinned PAGEs for the original data. */
39 	struct page_array	*ch_pa;
40 };
41 
42 /*
43  * page_array_alloc() - alloc memory for page array
44  * @pa: page_array on which to perform the operation
45  * @len: number of pages that should be pinned from @iova
46  *
47  * Attempt to allocate memory for page array.
48  *
49  * Usage of page_array:
50  * We expect (pa_nr == 0) and (pa_iova == NULL), any field in
51  * this structure will be filled in by this function.
52  *
53  * Returns:
54  *         0 if page array is allocated
55  *   -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL
56  *   -ENOMEM if alloc failed
57  */
page_array_alloc(struct page_array * pa,unsigned int len)58 static int page_array_alloc(struct page_array *pa, unsigned int len)
59 {
60 	if (pa->pa_nr || pa->pa_iova)
61 		return -EINVAL;
62 
63 	if (len == 0)
64 		return -EINVAL;
65 
66 	pa->pa_nr = len;
67 
68 	pa->pa_iova = kcalloc(len, sizeof(*pa->pa_iova), GFP_KERNEL);
69 	if (!pa->pa_iova)
70 		return -ENOMEM;
71 
72 	pa->pa_page = kcalloc(len, sizeof(*pa->pa_page), GFP_KERNEL);
73 	if (!pa->pa_page) {
74 		kfree(pa->pa_iova);
75 		return -ENOMEM;
76 	}
77 
78 	return 0;
79 }
80 
81 /*
82  * page_array_unpin() - Unpin user pages in memory
83  * @pa: page_array on which to perform the operation
84  * @vdev: the vfio device to perform the operation
85  * @pa_nr: number of user pages to unpin
86  * @unaligned: were pages unaligned on the pin request
87  *
88  * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0,
89  * otherwise only clear pa->pa_nr
90  */
page_array_unpin(struct page_array * pa,struct vfio_device * vdev,int pa_nr,bool unaligned)91 static void page_array_unpin(struct page_array *pa,
92 			     struct vfio_device *vdev, int pa_nr, bool unaligned)
93 {
94 	int unpinned = 0, npage = 1;
95 
96 	while (unpinned < pa_nr) {
97 		dma_addr_t *first = &pa->pa_iova[unpinned];
98 		dma_addr_t *last = &first[npage];
99 
100 		if (unpinned + npage < pa_nr &&
101 		    *first + npage * PAGE_SIZE == *last &&
102 		    !unaligned) {
103 			npage++;
104 			continue;
105 		}
106 
107 		vfio_unpin_pages(vdev, *first, npage);
108 		unpinned += npage;
109 		npage = 1;
110 	}
111 
112 	pa->pa_nr = 0;
113 }
114 
115 /*
116  * page_array_pin() - Pin user pages in memory
117  * @pa: page_array on which to perform the operation
118  * @vdev: the vfio device to perform pin operations
119  * @unaligned: are pages aligned to 4K boundary?
120  *
121  * Returns number of pages pinned upon success.
122  * If the pin request partially succeeds, or fails completely,
123  * all pages are left unpinned and a negative error value is returned.
124  *
125  * Requests to pin "aligned" pages can be coalesced into a single
126  * vfio_pin_pages request for the sake of efficiency, based on the
127  * expectation of 4K page requests. Unaligned requests are probably
128  * dealing with 2K "pages", and cannot be coalesced without
129  * reworking this logic to incorporate that math.
130  */
page_array_pin(struct page_array * pa,struct vfio_device * vdev,bool unaligned)131 static int page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned)
132 {
133 	int pinned = 0, npage = 1;
134 	int ret = 0;
135 
136 	while (pinned < pa->pa_nr) {
137 		dma_addr_t *first = &pa->pa_iova[pinned];
138 		dma_addr_t *last = &first[npage];
139 
140 		if (pinned + npage < pa->pa_nr &&
141 		    *first + npage * PAGE_SIZE == *last &&
142 		    !unaligned) {
143 			npage++;
144 			continue;
145 		}
146 
147 		ret = vfio_pin_pages(vdev, *first, npage,
148 				     IOMMU_READ | IOMMU_WRITE,
149 				     &pa->pa_page[pinned]);
150 		if (ret < 0) {
151 			goto err_out;
152 		} else if (ret > 0 && ret != npage) {
153 			pinned += ret;
154 			ret = -EINVAL;
155 			goto err_out;
156 		}
157 		pinned += npage;
158 		npage = 1;
159 	}
160 
161 	return ret;
162 
163 err_out:
164 	page_array_unpin(pa, vdev, pinned, unaligned);
165 	return ret;
166 }
167 
168 /* Unpin the pages before releasing the memory. */
page_array_unpin_free(struct page_array * pa,struct vfio_device * vdev,bool unaligned)169 static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev, bool unaligned)
170 {
171 	page_array_unpin(pa, vdev, pa->pa_nr, unaligned);
172 	kfree(pa->pa_page);
173 	kfree(pa->pa_iova);
174 }
175 
page_array_iova_pinned(struct page_array * pa,u64 iova,u64 length)176 static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length)
177 {
178 	u64 iova_pfn_start = iova >> PAGE_SHIFT;
179 	u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT;
180 	u64 pfn;
181 	int i;
182 
183 	for (i = 0; i < pa->pa_nr; i++) {
184 		pfn = pa->pa_iova[i] >> PAGE_SHIFT;
185 		if (pfn >= iova_pfn_start && pfn <= iova_pfn_end)
186 			return true;
187 	}
188 
189 	return false;
190 }
191 /* Create the list of IDAL words for a page_array. */
page_array_idal_create_words(struct page_array * pa,dma64_t * idaws)192 static inline void page_array_idal_create_words(struct page_array *pa,
193 						dma64_t *idaws)
194 {
195 	int i;
196 
197 	/*
198 	 * Idal words (execept the first one) rely on the memory being 4k
199 	 * aligned. If a user virtual address is 4K aligned, then it's
200 	 * corresponding kernel physical address will also be 4K aligned. Thus
201 	 * there will be no problem here to simply use the phys to create an
202 	 * idaw.
203 	 */
204 
205 	for (i = 0; i < pa->pa_nr; i++) {
206 		idaws[i] = virt_to_dma64(page_to_virt(pa->pa_page[i]));
207 
208 		/* Incorporate any offset from each starting address */
209 		idaws[i] = dma64_add(idaws[i], pa->pa_iova[i] & ~PAGE_MASK);
210 	}
211 }
212 
convert_ccw0_to_ccw1(struct ccw1 * source,unsigned long len)213 static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
214 {
215 	struct ccw0 ccw0;
216 	struct ccw1 *pccw1 = source;
217 	int i;
218 
219 	for (i = 0; i < len; i++) {
220 		ccw0 = *(struct ccw0 *)pccw1;
221 		if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
222 			pccw1->cmd_code = CCW_CMD_TIC;
223 			pccw1->flags = 0;
224 			pccw1->count = 0;
225 		} else {
226 			pccw1->cmd_code = ccw0.cmd_code;
227 			pccw1->flags = ccw0.flags;
228 			pccw1->count = ccw0.count;
229 		}
230 		pccw1->cda = u32_to_dma32(ccw0.cda);
231 		pccw1++;
232 	}
233 }
234 
235 #define idal_is_2k(_cp) (!(_cp)->orb.cmd.c64 || (_cp)->orb.cmd.i2k)
236 
237 /*
238  * Helpers to operate ccwchain.
239  */
240 #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
241 #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
242 #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
243 
244 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
245 
246 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
247 
248 #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
249 #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
250 
251 #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
252 
253 /*
254  * ccw_does_data_transfer()
255  *
256  * Determine whether a CCW will move any data, such that the guest pages
257  * would need to be pinned before performing the I/O.
258  *
259  * Returns 1 if yes, 0 if no.
260  */
ccw_does_data_transfer(struct ccw1 * ccw)261 static inline int ccw_does_data_transfer(struct ccw1 *ccw)
262 {
263 	/* If the count field is zero, then no data will be transferred */
264 	if (ccw->count == 0)
265 		return 0;
266 
267 	/* If the command is a NOP, then no data will be transferred */
268 	if (ccw_is_noop(ccw))
269 		return 0;
270 
271 	/* If the skip flag is off, then data will be transferred */
272 	if (!ccw_is_skip(ccw))
273 		return 1;
274 
275 	/*
276 	 * If the skip flag is on, it is only meaningful if the command
277 	 * code is a read, read backward, sense, or sense ID.  In those
278 	 * cases, no data will be transferred.
279 	 */
280 	if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
281 		return 0;
282 
283 	if (ccw_is_sense(ccw))
284 		return 0;
285 
286 	/* The skip flag is on, but it is ignored for this command code. */
287 	return 1;
288 }
289 
290 /*
291  * is_cpa_within_range()
292  *
293  * @cpa: channel program address being questioned
294  * @head: address of the beginning of a CCW chain
295  * @len: number of CCWs within the chain
296  *
297  * Determine whether the address of a CCW (whether a new chain,
298  * or the target of a TIC) falls within a range (including the end points).
299  *
300  * Returns 1 if yes, 0 if no.
301  */
is_cpa_within_range(dma32_t cpa,u32 head,int len)302 static inline int is_cpa_within_range(dma32_t cpa, u32 head, int len)
303 {
304 	u32 tail = head + (len - 1) * sizeof(struct ccw1);
305 	u32 gcpa = dma32_to_u32(cpa);
306 
307 	return head <= gcpa && gcpa <= tail;
308 }
309 
is_tic_within_range(struct ccw1 * ccw,u32 head,int len)310 static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len)
311 {
312 	if (!ccw_is_tic(ccw))
313 		return 0;
314 
315 	return is_cpa_within_range(ccw->cda, head, len);
316 }
317 
ccwchain_alloc(struct channel_program * cp,int len)318 static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
319 {
320 	struct ccwchain *chain;
321 
322 	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
323 	if (!chain)
324 		return NULL;
325 
326 	chain->ch_ccw = kcalloc(len, sizeof(*chain->ch_ccw), GFP_DMA | GFP_KERNEL);
327 	if (!chain->ch_ccw)
328 		goto out_err;
329 
330 	chain->ch_pa = kcalloc(len, sizeof(*chain->ch_pa), GFP_KERNEL);
331 	if (!chain->ch_pa)
332 		goto out_err;
333 
334 	list_add_tail(&chain->next, &cp->ccwchain_list);
335 
336 	return chain;
337 
338 out_err:
339 	kfree(chain->ch_ccw);
340 	kfree(chain);
341 	return NULL;
342 }
343 
ccwchain_free(struct ccwchain * chain)344 static void ccwchain_free(struct ccwchain *chain)
345 {
346 	list_del(&chain->next);
347 	kfree(chain->ch_pa);
348 	kfree(chain->ch_ccw);
349 	kfree(chain);
350 }
351 
352 /* Free resource for a ccw that allocated memory for its cda. */
ccwchain_cda_free(struct ccwchain * chain,int idx)353 static void ccwchain_cda_free(struct ccwchain *chain, int idx)
354 {
355 	struct ccw1 *ccw = &chain->ch_ccw[idx];
356 
357 	if (ccw_is_tic(ccw))
358 		return;
359 
360 	kfree(dma32_to_virt(ccw->cda));
361 }
362 
363 /**
364  * ccwchain_calc_length - calculate the length of the ccw chain.
365  * @iova: guest physical address of the target ccw chain
366  * @cp: channel_program on which to perform the operation
367  *
368  * This is the chain length not considering any TICs.
369  * You need to do a new round for each TIC target.
370  *
371  * The program is also validated for absence of not yet supported
372  * indirect data addressing scenarios.
373  *
374  * Returns: the length of the ccw chain or -errno.
375  */
ccwchain_calc_length(u64 iova,struct channel_program * cp)376 static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
377 {
378 	struct ccw1 *ccw = cp->guest_cp;
379 	int cnt = 0;
380 
381 	do {
382 		cnt++;
383 
384 		/*
385 		 * We want to keep counting if the current CCW has the
386 		 * command-chaining flag enabled, or if it is a TIC CCW
387 		 * that loops back into the current chain.  The latter
388 		 * is used for device orientation, where the CCW PRIOR to
389 		 * the TIC can either jump to the TIC or a CCW immediately
390 		 * after the TIC, depending on the results of its operation.
391 		 */
392 		if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt))
393 			break;
394 
395 		ccw++;
396 	} while (cnt < CCWCHAIN_LEN_MAX + 1);
397 
398 	if (cnt == CCWCHAIN_LEN_MAX + 1)
399 		cnt = -EINVAL;
400 
401 	return cnt;
402 }
403 
tic_target_chain_exists(struct ccw1 * tic,struct channel_program * cp)404 static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
405 {
406 	struct ccwchain *chain;
407 	u32 ccw_head;
408 
409 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
410 		ccw_head = chain->ch_iova;
411 		if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len))
412 			return 1;
413 	}
414 
415 	return 0;
416 }
417 
418 static int ccwchain_loop_tic(struct ccwchain *chain,
419 			     struct channel_program *cp);
420 
ccwchain_handle_ccw(dma32_t cda,struct channel_program * cp)421 static int ccwchain_handle_ccw(dma32_t cda, struct channel_program *cp)
422 {
423 	struct vfio_device *vdev =
424 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
425 	struct ccwchain *chain;
426 	int len, ret;
427 	u32 gcda;
428 
429 	gcda = dma32_to_u32(cda);
430 	/* Copy 2K (the most we support today) of possible CCWs */
431 	ret = vfio_dma_rw(vdev, gcda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false);
432 	if (ret)
433 		return ret;
434 
435 	/* Convert any Format-0 CCWs to Format-1 */
436 	if (!cp->orb.cmd.fmt)
437 		convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
438 
439 	/* Count the CCWs in the current chain */
440 	len = ccwchain_calc_length(gcda, cp);
441 	if (len < 0)
442 		return len;
443 
444 	/* Need alloc a new chain for this one. */
445 	chain = ccwchain_alloc(cp, len);
446 	if (!chain)
447 		return -ENOMEM;
448 
449 	chain->ch_len = len;
450 	chain->ch_iova = gcda;
451 
452 	/* Copy the actual CCWs into the new chain */
453 	memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
454 
455 	/* Loop for tics on this new chain. */
456 	ret = ccwchain_loop_tic(chain, cp);
457 
458 	if (ret)
459 		ccwchain_free(chain);
460 
461 	return ret;
462 }
463 
464 /* Loop for TICs. */
ccwchain_loop_tic(struct ccwchain * chain,struct channel_program * cp)465 static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
466 {
467 	struct ccw1 *tic;
468 	int i, ret;
469 
470 	for (i = 0; i < chain->ch_len; i++) {
471 		tic = &chain->ch_ccw[i];
472 
473 		if (!ccw_is_tic(tic))
474 			continue;
475 
476 		/* May transfer to an existing chain. */
477 		if (tic_target_chain_exists(tic, cp))
478 			continue;
479 
480 		/* Build a ccwchain for the next segment */
481 		ret = ccwchain_handle_ccw(tic->cda, cp);
482 		if (ret)
483 			return ret;
484 	}
485 
486 	return 0;
487 }
488 
ccwchain_fetch_tic(struct ccw1 * ccw,struct channel_program * cp)489 static int ccwchain_fetch_tic(struct ccw1 *ccw,
490 			      struct channel_program *cp)
491 {
492 	struct ccwchain *iter;
493 	u32 offset, ccw_head;
494 
495 	list_for_each_entry(iter, &cp->ccwchain_list, next) {
496 		ccw_head = iter->ch_iova;
497 		if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
498 			/* Calculate offset of TIC target */
499 			offset = dma32_to_u32(ccw->cda) - ccw_head;
500 			ccw->cda = virt_to_dma32((void *)iter->ch_ccw + offset);
501 			return 0;
502 		}
503 	}
504 
505 	return -EFAULT;
506 }
507 
get_guest_idal(struct ccw1 * ccw,struct channel_program * cp,int idaw_nr)508 static dma64_t *get_guest_idal(struct ccw1 *ccw, struct channel_program *cp, int idaw_nr)
509 {
510 	struct vfio_device *vdev =
511 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
512 	dma64_t *idaws;
513 	dma32_t *idaws_f1;
514 	int idal_len = idaw_nr * sizeof(*idaws);
515 	int idaw_size = idal_is_2k(cp) ? PAGE_SIZE / 2 : PAGE_SIZE;
516 	int idaw_mask = ~(idaw_size - 1);
517 	int i, ret;
518 
519 	idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
520 	if (!idaws)
521 		return ERR_PTR(-ENOMEM);
522 
523 	if (ccw_is_idal(ccw)) {
524 		/* Copy IDAL from guest */
525 		ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), idaws, idal_len, false);
526 		if (ret) {
527 			kfree(idaws);
528 			return ERR_PTR(ret);
529 		}
530 	} else {
531 		/* Fabricate an IDAL based off CCW data address */
532 		if (cp->orb.cmd.c64) {
533 			idaws[0] = u64_to_dma64(dma32_to_u32(ccw->cda));
534 			for (i = 1; i < idaw_nr; i++) {
535 				idaws[i] = dma64_add(idaws[i - 1], idaw_size);
536 				idaws[i] = dma64_and(idaws[i], idaw_mask);
537 			}
538 		} else {
539 			idaws_f1 = (dma32_t *)idaws;
540 			idaws_f1[0] = ccw->cda;
541 			for (i = 1; i < idaw_nr; i++) {
542 				idaws_f1[i] = dma32_add(idaws_f1[i - 1], idaw_size);
543 				idaws_f1[i] = dma32_and(idaws_f1[i], idaw_mask);
544 			}
545 		}
546 	}
547 
548 	return idaws;
549 }
550 
551 /*
552  * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer
553  * a specified amount of data
554  *
555  * @ccw: The Channel Command Word being translated
556  * @cp: Channel Program being processed
557  *
558  * The ORB is examined, since it specifies what IDAWs could actually be
559  * used by any CCW in the channel program, regardless of whether or not
560  * the CCW actually does. An ORB that does not specify Format-2-IDAW
561  * Control could still contain a CCW with an IDAL, which would be
562  * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within
563  * the channel program must follow the same size requirements.
564  */
ccw_count_idaws(struct ccw1 * ccw,struct channel_program * cp)565 static int ccw_count_idaws(struct ccw1 *ccw,
566 			   struct channel_program *cp)
567 {
568 	struct vfio_device *vdev =
569 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
570 	u64 iova;
571 	int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32);
572 	int ret;
573 	int bytes = 1;
574 
575 	if (ccw->count)
576 		bytes = ccw->count;
577 
578 	if (ccw_is_idal(ccw)) {
579 		/* Read first IDAW to check its starting address. */
580 		/* All subsequent IDAWs will be 2K- or 4K-aligned. */
581 		ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), &iova, size, false);
582 		if (ret)
583 			return ret;
584 
585 		/*
586 		 * Format-1 IDAWs only occupy the first 32 bits,
587 		 * and bit 0 is always off.
588 		 */
589 		if (!cp->orb.cmd.c64)
590 			iova = iova >> 32;
591 	} else {
592 		iova = dma32_to_u32(ccw->cda);
593 	}
594 
595 	/* Format-1 IDAWs operate on 2K each */
596 	if (!cp->orb.cmd.c64)
597 		return idal_2k_nr_words((void *)iova, bytes);
598 
599 	/* Using the 2K variant of Format-2 IDAWs? */
600 	if (cp->orb.cmd.i2k)
601 		return idal_2k_nr_words((void *)iova, bytes);
602 
603 	/* The 'usual' case is 4K Format-2 IDAWs */
604 	return idal_nr_words((void *)iova, bytes);
605 }
606 
ccwchain_fetch_ccw(struct ccw1 * ccw,struct page_array * pa,struct channel_program * cp)607 static int ccwchain_fetch_ccw(struct ccw1 *ccw,
608 			      struct page_array *pa,
609 			      struct channel_program *cp)
610 {
611 	struct vfio_device *vdev =
612 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
613 	dma64_t *idaws;
614 	dma32_t *idaws_f1;
615 	int ret;
616 	int idaw_nr;
617 	int i;
618 
619 	/* Calculate size of IDAL */
620 	idaw_nr = ccw_count_idaws(ccw, cp);
621 	if (idaw_nr < 0)
622 		return idaw_nr;
623 
624 	/* Allocate an IDAL from host storage */
625 	idaws = get_guest_idal(ccw, cp, idaw_nr);
626 	if (IS_ERR(idaws)) {
627 		ret = PTR_ERR(idaws);
628 		goto out_init;
629 	}
630 
631 	/*
632 	 * Allocate an array of pages to pin/translate.
633 	 * The number of pages is actually the count of the idaws
634 	 * required for the data transfer, since we only only support
635 	 * 4K IDAWs today.
636 	 */
637 	ret = page_array_alloc(pa, idaw_nr);
638 	if (ret < 0)
639 		goto out_free_idaws;
640 
641 	/*
642 	 * Copy guest IDAWs into page_array, in case the memory they
643 	 * occupy is not contiguous.
644 	 */
645 	idaws_f1 = (dma32_t *)idaws;
646 	for (i = 0; i < idaw_nr; i++) {
647 		if (cp->orb.cmd.c64)
648 			pa->pa_iova[i] = dma64_to_u64(idaws[i]);
649 		else
650 			pa->pa_iova[i] = dma32_to_u32(idaws_f1[i]);
651 	}
652 
653 	if (ccw_does_data_transfer(ccw)) {
654 		ret = page_array_pin(pa, vdev, idal_is_2k(cp));
655 		if (ret < 0)
656 			goto out_unpin;
657 	} else {
658 		pa->pa_nr = 0;
659 	}
660 
661 	ccw->cda = virt_to_dma32(idaws);
662 	ccw->flags |= CCW_FLAG_IDA;
663 
664 	/* Populate the IDAL with pinned/translated addresses from page */
665 	page_array_idal_create_words(pa, idaws);
666 
667 	return 0;
668 
669 out_unpin:
670 	page_array_unpin_free(pa, vdev, idal_is_2k(cp));
671 out_free_idaws:
672 	kfree(idaws);
673 out_init:
674 	ccw->cda = 0;
675 	return ret;
676 }
677 
678 /*
679  * Fetch one ccw.
680  * To reduce memory copy, we'll pin the cda page in memory,
681  * and to get rid of the cda 2G limitation of ccw1, we'll translate
682  * direct ccws to idal ccws.
683  */
ccwchain_fetch_one(struct ccw1 * ccw,struct page_array * pa,struct channel_program * cp)684 static int ccwchain_fetch_one(struct ccw1 *ccw,
685 			      struct page_array *pa,
686 			      struct channel_program *cp)
687 
688 {
689 	if (ccw_is_tic(ccw))
690 		return ccwchain_fetch_tic(ccw, cp);
691 
692 	return ccwchain_fetch_ccw(ccw, pa, cp);
693 }
694 
695 /**
696  * cp_init() - allocate ccwchains for a channel program.
697  * @cp: channel_program on which to perform the operation
698  * @orb: control block for the channel program from the guest
699  *
700  * This creates one or more ccwchain(s), and copies the raw data of
701  * the target channel program from @orb->cmd.iova to the new ccwchain(s).
702  *
703  * Limitations:
704  * 1. Supports idal(c64) ccw chaining.
705  * 2. Supports 4k idaw.
706  *
707  * Returns:
708  *   %0 on success and a negative error value on failure.
709  */
cp_init(struct channel_program * cp,union orb * orb)710 int cp_init(struct channel_program *cp, union orb *orb)
711 {
712 	struct vfio_device *vdev =
713 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
714 	/* custom ratelimit used to avoid flood during guest IPL */
715 	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
716 	int ret;
717 
718 	/* this is an error in the caller */
719 	if (cp->initialized)
720 		return -EBUSY;
721 
722 	/*
723 	 * We only support prefetching the channel program. We assume all channel
724 	 * programs executed by supported guests likewise support prefetching.
725 	 * Executing a channel program that does not specify prefetching will
726 	 * typically not cause an error, but a warning is issued to help identify
727 	 * the problem if something does break.
728 	 */
729 	if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
730 		dev_warn(
731 			vdev->dev,
732 			"Prefetching channel program even though prefetch not specified in ORB");
733 
734 	INIT_LIST_HEAD(&cp->ccwchain_list);
735 	memcpy(&cp->orb, orb, sizeof(*orb));
736 
737 	/* Build a ccwchain for the first CCW segment */
738 	ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
739 
740 	if (!ret)
741 		cp->initialized = true;
742 
743 	return ret;
744 }
745 
746 
747 /**
748  * cp_free() - free resources for channel program.
749  * @cp: channel_program on which to perform the operation
750  *
751  * This unpins the memory pages and frees the memory space occupied by
752  * @cp, which must have been returned by a previous call to cp_init().
753  * Otherwise, undefined behavior occurs.
754  */
cp_free(struct channel_program * cp)755 void cp_free(struct channel_program *cp)
756 {
757 	struct vfio_device *vdev =
758 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
759 	struct ccwchain *chain, *temp;
760 	int i;
761 
762 	if (!cp->initialized)
763 		return;
764 
765 	cp->initialized = false;
766 	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
767 		for (i = 0; i < chain->ch_len; i++) {
768 			page_array_unpin_free(&chain->ch_pa[i], vdev, idal_is_2k(cp));
769 			ccwchain_cda_free(chain, i);
770 		}
771 		ccwchain_free(chain);
772 	}
773 }
774 
775 /**
776  * cp_prefetch() - translate a guest physical address channel program to
777  *                 a real-device runnable channel program.
778  * @cp: channel_program on which to perform the operation
779  *
780  * This function translates the guest-physical-address channel program
781  * and stores the result to ccwchain list. @cp must have been
782  * initialized by a previous call with cp_init(). Otherwise, undefined
783  * behavior occurs.
784  * For each chain composing the channel program:
785  * - On entry ch_len holds the count of CCWs to be translated.
786  * - On exit ch_len is adjusted to the count of successfully translated CCWs.
787  * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
788  *
789  * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
790  * as helpers to do ccw chain translation inside the kernel. Basically
791  * they accept a channel program issued by a virtual machine, and
792  * translate the channel program to a real-device runnable channel
793  * program.
794  *
795  * These APIs will copy the ccws into kernel-space buffers, and update
796  * the guest physical addresses with their corresponding host physical
797  * addresses.  Then channel I/O device drivers could issue the
798  * translated channel program to real devices to perform an I/O
799  * operation.
800  *
801  * These interfaces are designed to support translation only for
802  * channel programs, which are generated and formatted by a
803  * guest. Thus this will make it possible for things like VFIO to
804  * leverage the interfaces to passthrough a channel I/O mediated
805  * device in QEMU.
806  *
807  * We support direct ccw chaining by translating them to idal ccws.
808  *
809  * Returns:
810  *   %0 on success and a negative error value on failure.
811  */
cp_prefetch(struct channel_program * cp)812 int cp_prefetch(struct channel_program *cp)
813 {
814 	struct ccwchain *chain;
815 	struct ccw1 *ccw;
816 	struct page_array *pa;
817 	int len, idx, ret;
818 
819 	/* this is an error in the caller */
820 	if (!cp->initialized)
821 		return -EINVAL;
822 
823 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
824 		len = chain->ch_len;
825 		for (idx = 0; idx < len; idx++) {
826 			ccw = &chain->ch_ccw[idx];
827 			pa = &chain->ch_pa[idx];
828 
829 			ret = ccwchain_fetch_one(ccw, pa, cp);
830 			if (ret)
831 				goto out_err;
832 		}
833 	}
834 
835 	return 0;
836 out_err:
837 	/* Only cleanup the chain elements that were actually translated. */
838 	chain->ch_len = idx;
839 	list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
840 		chain->ch_len = 0;
841 	}
842 	return ret;
843 }
844 
845 /**
846  * cp_get_orb() - get the orb of the channel program
847  * @cp: channel_program on which to perform the operation
848  * @sch: subchannel the operation will be performed against
849  *
850  * This function returns the address of the updated orb of the channel
851  * program. Channel I/O device drivers could use this orb to issue a
852  * ssch.
853  */
cp_get_orb(struct channel_program * cp,struct subchannel * sch)854 union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch)
855 {
856 	union orb *orb;
857 	struct ccwchain *chain;
858 	struct ccw1 *cpa;
859 
860 	/* this is an error in the caller */
861 	if (!cp->initialized)
862 		return NULL;
863 
864 	orb = &cp->orb;
865 
866 	orb->cmd.intparm = (u32)virt_to_phys(sch);
867 	orb->cmd.fmt = 1;
868 
869 	/*
870 	 * Everything built by vfio-ccw is a Format-2 IDAL.
871 	 * If the input was a Format-1 IDAL, indicate that
872 	 * 2K Format-2 IDAWs were created here.
873 	 */
874 	if (!orb->cmd.c64)
875 		orb->cmd.i2k = 1;
876 	orb->cmd.c64 = 1;
877 
878 	if (orb->cmd.lpm == 0)
879 		orb->cmd.lpm = sch->lpm;
880 
881 	chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
882 	cpa = chain->ch_ccw;
883 	orb->cmd.cpa = virt_to_dma32(cpa);
884 
885 	return orb;
886 }
887 
888 /**
889  * cp_update_scsw() - update scsw for a channel program.
890  * @cp: channel_program on which to perform the operation
891  * @scsw: I/O results of the channel program and also the target to be
892  *        updated
893  *
894  * @scsw contains the I/O results of the channel program that pointed
895  * to by @cp. However what @scsw->cpa stores is a host physical
896  * address, which is meaningless for the guest, which is waiting for
897  * the I/O results.
898  *
899  * This function updates @scsw->cpa to its coressponding guest physical
900  * address.
901  */
cp_update_scsw(struct channel_program * cp,union scsw * scsw)902 void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
903 {
904 	struct ccwchain *chain;
905 	dma32_t cpa = scsw->cmd.cpa;
906 	u32 ccw_head;
907 
908 	if (!cp->initialized)
909 		return;
910 
911 	/*
912 	 * LATER:
913 	 * For now, only update the cmd.cpa part. We may need to deal with
914 	 * other portions of the schib as well, even if we don't return them
915 	 * in the ioctl directly. Path status changes etc.
916 	 */
917 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
918 		ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw));
919 		/*
920 		 * On successful execution, cpa points just beyond the end
921 		 * of the chain.
922 		 */
923 		if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
924 			/*
925 			 * (cpa - ccw_head) is the offset value of the host
926 			 * physical ccw to its chain head.
927 			 * Adding this value to the guest physical ccw chain
928 			 * head gets us the guest cpa:
929 			 * cpa = chain->ch_iova + (cpa - ccw_head)
930 			 */
931 			cpa = dma32_add(cpa, chain->ch_iova - ccw_head);
932 			break;
933 		}
934 	}
935 
936 	scsw->cmd.cpa = cpa;
937 }
938 
939 /**
940  * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
941  * @cp: channel_program on which to perform the operation
942  * @iova: the iova to check
943  * @length: the length to check from @iova
944  *
945  * If the @iova is currently pinned for the ccw chain, return true;
946  * else return false.
947  */
cp_iova_pinned(struct channel_program * cp,u64 iova,u64 length)948 bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length)
949 {
950 	struct ccwchain *chain;
951 	int i;
952 
953 	if (!cp->initialized)
954 		return false;
955 
956 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
957 		for (i = 0; i < chain->ch_len; i++)
958 			if (page_array_iova_pinned(&chain->ch_pa[i], iova, length))
959 				return true;
960 	}
961 
962 	return false;
963 }
964