1  // SPDX-License-Identifier: GPL-2.0+
2  /*
3   * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4   *
5   * Copyright 2018 IBM Corp.
6   *
7   * This program is free software; you can redistribute it and/or
8   * modify it under the terms of the GNU General Public License
9   * as published by the Free Software Foundation; either version
10   * 2 of the License, or (at your option) any later version.
11   */
12  
13  #include <linux/kernel.h>
14  #include <linux/iommu.h>
15  
16  #include <asm/iommu.h>
17  #include <asm/tce.h>
18  #include "pci.h"
19  
pnv_ioda_parse_tce_sizes(struct pnv_phb * phb)20  unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
21  {
22  	struct pci_controller *hose = phb->hose;
23  	struct device_node *dn = hose->dn;
24  	unsigned long mask = 0;
25  	int i, rc, count;
26  	u32 val;
27  
28  	count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
29  	if (count <= 0) {
30  		mask = SZ_4K | SZ_64K;
31  		/* Add 16M for POWER8 by default */
32  		if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
33  				!cpu_has_feature(CPU_FTR_ARCH_300))
34  			mask |= SZ_16M | SZ_256M;
35  		return mask;
36  	}
37  
38  	for (i = 0; i < count; i++) {
39  		rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
40  						i, &val);
41  		if (rc == 0)
42  			mask |= 1ULL << val;
43  	}
44  
45  	return mask;
46  }
47  
pnv_pci_setup_iommu_table(struct iommu_table * tbl,void * tce_mem,u64 tce_size,u64 dma_offset,unsigned int page_shift)48  void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
49  		void *tce_mem, u64 tce_size,
50  		u64 dma_offset, unsigned int page_shift)
51  {
52  	tbl->it_blocksize = 16;
53  	tbl->it_base = (unsigned long)tce_mem;
54  	tbl->it_page_shift = page_shift;
55  	tbl->it_offset = dma_offset >> tbl->it_page_shift;
56  	tbl->it_index = 0;
57  	tbl->it_size = tce_size >> 3;
58  	tbl->it_busno = 0;
59  	tbl->it_type = TCE_PCI;
60  }
61  
pnv_alloc_tce_level(int nid,unsigned int shift)62  static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
63  {
64  	struct page *tce_mem = NULL;
65  	__be64 *addr;
66  
67  	tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
68  			shift - PAGE_SHIFT);
69  	if (!tce_mem) {
70  		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
71  				shift);
72  		return NULL;
73  	}
74  	addr = page_address(tce_mem);
75  	memset(addr, 0, 1UL << shift);
76  
77  	return addr;
78  }
79  
80  static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
81  		unsigned long size, unsigned int levels);
82  
pnv_tce(struct iommu_table * tbl,bool user,long idx,bool alloc)83  static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
84  {
85  	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
86  	int  level = tbl->it_indirect_levels;
87  	const long shift = ilog2(tbl->it_level_size);
88  	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
89  
90  	while (level) {
91  		int n = (idx & mask) >> (level * shift);
92  		unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
93  
94  		if (!tce) {
95  			__be64 *tmp2;
96  
97  			if (!alloc)
98  				return NULL;
99  
100  			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
101  					ilog2(tbl->it_level_size) + 3);
102  			if (!tmp2)
103  				return NULL;
104  
105  			tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
106  			oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
107  					cpu_to_be64(tce)));
108  			if (oldtce) {
109  				pnv_pci_ioda2_table_do_free_pages(tmp2,
110  					ilog2(tbl->it_level_size) + 3, 1);
111  				tce = oldtce;
112  			}
113  		}
114  
115  		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
116  		idx &= ~mask;
117  		mask >>= shift;
118  		--level;
119  	}
120  
121  	return tmp + idx;
122  }
123  
pnv_tce_build(struct iommu_table * tbl,long index,long npages,unsigned long uaddr,enum dma_data_direction direction,unsigned long attrs)124  int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
125  		unsigned long uaddr, enum dma_data_direction direction,
126  		unsigned long attrs)
127  {
128  	u64 proto_tce = iommu_direction_to_tce_perm(direction);
129  	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
130  	long i;
131  
132  	if (proto_tce & TCE_PCI_WRITE)
133  		proto_tce |= TCE_PCI_READ;
134  
135  	for (i = 0; i < npages; i++) {
136  		unsigned long newtce = proto_tce |
137  			((rpn + i) << tbl->it_page_shift);
138  		unsigned long idx = index - tbl->it_offset + i;
139  
140  		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
141  	}
142  
143  	return 0;
144  }
145  
146  #ifdef CONFIG_IOMMU_API
pnv_tce_xchg(struct iommu_table * tbl,long index,unsigned long * hpa,enum dma_data_direction * direction)147  int pnv_tce_xchg(struct iommu_table *tbl, long index,
148  		unsigned long *hpa, enum dma_data_direction *direction)
149  {
150  	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
151  	unsigned long newtce = *hpa | proto_tce, oldtce;
152  	unsigned long idx = index - tbl->it_offset;
153  	__be64 *ptce = NULL;
154  
155  	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
156  
157  	if (*direction == DMA_NONE) {
158  		ptce = pnv_tce(tbl, false, idx, false);
159  		if (!ptce) {
160  			*hpa = 0;
161  			return 0;
162  		}
163  	}
164  
165  	if (!ptce) {
166  		ptce = pnv_tce(tbl, false, idx, true);
167  		if (!ptce)
168  			return -ENOMEM;
169  	}
170  
171  	if (newtce & TCE_PCI_WRITE)
172  		newtce |= TCE_PCI_READ;
173  
174  	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
175  	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
176  	*direction = iommu_tce_direction(oldtce);
177  
178  	return 0;
179  }
180  
pnv_tce_useraddrptr(struct iommu_table * tbl,long index,bool alloc)181  __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
182  {
183  	if (WARN_ON_ONCE(!tbl->it_userspace))
184  		return NULL;
185  
186  	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
187  }
188  #endif
189  
pnv_tce_free(struct iommu_table * tbl,long index,long npages)190  void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
191  {
192  	long i;
193  
194  	for (i = 0; i < npages; i++) {
195  		unsigned long idx = index - tbl->it_offset + i;
196  		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
197  
198  		if (ptce)
199  			*ptce = cpu_to_be64(0);
200  		else
201  			/* Skip the rest of the level */
202  			i |= tbl->it_level_size - 1;
203  	}
204  }
205  
pnv_tce_get(struct iommu_table * tbl,long index)206  unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
207  {
208  	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
209  
210  	if (!ptce)
211  		return 0;
212  
213  	return be64_to_cpu(*ptce);
214  }
215  
pnv_pci_ioda2_table_do_free_pages(__be64 * addr,unsigned long size,unsigned int levels)216  static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
217  		unsigned long size, unsigned int levels)
218  {
219  	const unsigned long addr_ul = (unsigned long) addr &
220  			~(TCE_PCI_READ | TCE_PCI_WRITE);
221  
222  	if (levels) {
223  		long i;
224  		u64 *tmp = (u64 *) addr_ul;
225  
226  		for (i = 0; i < size; ++i) {
227  			unsigned long hpa = be64_to_cpu(tmp[i]);
228  
229  			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
230  				continue;
231  
232  			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
233  					levels - 1);
234  		}
235  	}
236  
237  	free_pages(addr_ul, get_order(size << 3));
238  }
239  
pnv_pci_ioda2_table_free_pages(struct iommu_table * tbl)240  void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
241  {
242  	const unsigned long size = tbl->it_indirect_levels ?
243  			tbl->it_level_size : tbl->it_size;
244  
245  	if (!tbl->it_size)
246  		return;
247  
248  	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
249  			tbl->it_indirect_levels);
250  	if (tbl->it_userspace) {
251  		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
252  				tbl->it_indirect_levels);
253  	}
254  }
255  
pnv_pci_ioda2_table_do_alloc_pages(int nid,unsigned int shift,unsigned int levels,unsigned long limit,unsigned long * current_offset,unsigned long * total_allocated)256  static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
257  		unsigned int levels, unsigned long limit,
258  		unsigned long *current_offset, unsigned long *total_allocated)
259  {
260  	__be64 *addr, *tmp;
261  	unsigned long allocated = 1UL << shift;
262  	unsigned int entries = 1UL << (shift - 3);
263  	long i;
264  
265  	addr = pnv_alloc_tce_level(nid, shift);
266  	*total_allocated += allocated;
267  
268  	--levels;
269  	if (!levels) {
270  		*current_offset += allocated;
271  		return addr;
272  	}
273  
274  	for (i = 0; i < entries; ++i) {
275  		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
276  				levels, limit, current_offset, total_allocated);
277  		if (!tmp)
278  			break;
279  
280  		addr[i] = cpu_to_be64(__pa(tmp) |
281  				TCE_PCI_READ | TCE_PCI_WRITE);
282  
283  		if (*current_offset >= limit)
284  			break;
285  	}
286  
287  	return addr;
288  }
289  
pnv_pci_ioda2_table_alloc_pages(int nid,__u64 bus_offset,__u32 page_shift,__u64 window_size,__u32 levels,bool alloc_userspace_copy,struct iommu_table * tbl)290  long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
291  		__u32 page_shift, __u64 window_size, __u32 levels,
292  		bool alloc_userspace_copy, struct iommu_table *tbl)
293  {
294  	void *addr, *uas = NULL;
295  	unsigned long offset = 0, level_shift, total_allocated = 0;
296  	unsigned long total_allocated_uas = 0;
297  	const unsigned int window_shift = ilog2(window_size);
298  	unsigned int entries_shift = window_shift - page_shift;
299  	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
300  			PAGE_SHIFT);
301  	const unsigned long tce_table_size = 1UL << table_shift;
302  
303  	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
304  		return -EINVAL;
305  
306  	if (!is_power_of_2(window_size))
307  		return -EINVAL;
308  
309  	/* Adjust direct table size from window_size and levels */
310  	entries_shift = (entries_shift + levels - 1) / levels;
311  	level_shift = entries_shift + 3;
312  	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
313  
314  	if ((level_shift - 3) * levels + page_shift >= 55)
315  		return -EINVAL;
316  
317  	/* Allocate TCE table */
318  	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
319  			1, tce_table_size, &offset, &total_allocated);
320  
321  	/* addr==NULL means that the first level allocation failed */
322  	if (!addr)
323  		return -ENOMEM;
324  
325  	/*
326  	 * First level was allocated but some lower level failed as
327  	 * we did not allocate as much as we wanted,
328  	 * release partially allocated table.
329  	 */
330  	if (levels == 1 && offset < tce_table_size)
331  		goto free_tces_exit;
332  
333  	/* Allocate userspace view of the TCE table */
334  	if (alloc_userspace_copy) {
335  		offset = 0;
336  		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
337  				1, tce_table_size, &offset,
338  				&total_allocated_uas);
339  		if (!uas)
340  			goto free_tces_exit;
341  		if (levels == 1 && (offset < tce_table_size ||
342  				total_allocated_uas != total_allocated))
343  			goto free_uas_exit;
344  	}
345  
346  	/* Setup linux iommu table */
347  	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
348  			page_shift);
349  	tbl->it_level_size = 1ULL << (level_shift - 3);
350  	tbl->it_indirect_levels = levels - 1;
351  	tbl->it_userspace = uas;
352  	tbl->it_nid = nid;
353  
354  	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
355  			window_size, tce_table_size, bus_offset, tbl->it_base,
356  			tbl->it_userspace, 1, levels);
357  
358  	return 0;
359  
360  free_uas_exit:
361  	pnv_pci_ioda2_table_do_free_pages(uas,
362  			1ULL << (level_shift - 3), levels - 1);
363  free_tces_exit:
364  	pnv_pci_ioda2_table_do_free_pages(addr,
365  			1ULL << (level_shift - 3), levels - 1);
366  
367  	return -ENOMEM;
368  }
369  
pnv_pci_unlink_table_and_group(struct iommu_table * tbl,struct iommu_table_group * table_group)370  void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
371  		struct iommu_table_group *table_group)
372  {
373  	long i;
374  	bool found;
375  	struct iommu_table_group_link *tgl;
376  
377  	if (!tbl || !table_group)
378  		return;
379  
380  	/* Remove link to a group from table's list of attached groups */
381  	found = false;
382  
383  	rcu_read_lock();
384  	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
385  		if (tgl->table_group == table_group) {
386  			list_del_rcu(&tgl->next);
387  			kfree_rcu(tgl, rcu);
388  			found = true;
389  			break;
390  		}
391  	}
392  	rcu_read_unlock();
393  
394  	if (WARN_ON(!found))
395  		return;
396  
397  	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
398  	found = false;
399  	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
400  		if (table_group->tables[i] == tbl) {
401  			iommu_tce_table_put(tbl);
402  			table_group->tables[i] = NULL;
403  			found = true;
404  			break;
405  		}
406  	}
407  	WARN_ON(!found);
408  }
409  
pnv_pci_link_table_and_group(int node,int num,struct iommu_table * tbl,struct iommu_table_group * table_group)410  long pnv_pci_link_table_and_group(int node, int num,
411  		struct iommu_table *tbl,
412  		struct iommu_table_group *table_group)
413  {
414  	struct iommu_table_group_link *tgl = NULL;
415  
416  	if (WARN_ON(!tbl || !table_group))
417  		return -EINVAL;
418  
419  	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
420  			node);
421  	if (!tgl)
422  		return -ENOMEM;
423  
424  	tgl->table_group = table_group;
425  	list_add_rcu(&tgl->next, &tbl->it_group_list);
426  
427  	table_group->tables[num] = iommu_tce_table_get(tbl);
428  
429  	return 0;
430  }
431