1  // SPDX-License-Identifier: GPL-2.0-only
2  /* Copyright(c) 2023 Intel Corporation */
3  
4  #include <linux/dev_printk.h>
5  #include <linux/dma-mapping.h>
6  #include <linux/export.h>
7  #include <linux/kernel.h>
8  #include <linux/kstrtox.h>
9  #include <linux/overflow.h>
10  #include <linux/string.h>
11  #include <linux/slab.h>
12  #include <linux/types.h>
13  #include <asm/errno.h>
14  #include "adf_accel_devices.h"
15  #include "adf_admin.h"
16  #include "adf_cfg.h"
17  #include "adf_cfg_strings.h"
18  #include "adf_clock.h"
19  #include "adf_common_drv.h"
20  #include "adf_heartbeat.h"
21  #include "adf_transport_internal.h"
22  #include "icp_qat_fw_init_admin.h"
23  
24  #define ADF_HB_EMPTY_SIG 0xA5A5A5A5
25  
adf_hb_check_polling_freq(struct adf_accel_dev * accel_dev)26  static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev)
27  {
28  	u64 curr_time = adf_clock_get_current_time();
29  	u64 polling_time = curr_time - accel_dev->heartbeat->last_hb_check_time;
30  
31  	if (polling_time < accel_dev->heartbeat->hb_timer) {
32  		dev_warn(&GET_DEV(accel_dev),
33  			 "HB polling too frequent. Configured HB timer %d ms\n",
34  			 accel_dev->heartbeat->hb_timer);
35  		return -EINVAL;
36  	}
37  
38  	accel_dev->heartbeat->last_hb_check_time = curr_time;
39  	return 0;
40  }
41  
42  /**
43   * validate_hb_ctrs_cnt() - checks if the number of heartbeat counters should
44   * be updated by one to support the currently loaded firmware.
45   * @accel_dev: Pointer to acceleration device.
46   *
47   * Return:
48   * * true - hb_ctrs must increased by ADF_NUM_PKE_STRAND
49   * * false - no changes needed
50   */
validate_hb_ctrs_cnt(struct adf_accel_dev * accel_dev)51  static bool validate_hb_ctrs_cnt(struct adf_accel_dev *accel_dev)
52  {
53  	const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs;
54  	const size_t max_aes = accel_dev->hw_device->num_engines;
55  	const size_t hb_struct_size = sizeof(struct hb_cnt_pair);
56  	const size_t exp_diff_size = array3_size(ADF_NUM_PKE_STRAND, max_aes,
57  						 hb_struct_size);
58  	const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
59  	const size_t stats_size = size_mul(dev_ctrs, hb_struct_size);
60  	const u32 exp_diff_cnt = exp_diff_size / sizeof(u32);
61  	const u32 stats_el_cnt = stats_size / sizeof(u32);
62  	struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr;
63  	const u32 *mem_to_chk = (u32 *)(hb_stats + dev_ctrs);
64  	u32 el_diff_cnt = 0;
65  	int i;
66  
67  	/* count how many bytes are different from pattern */
68  	for (i = 0; i < stats_el_cnt; i++) {
69  		if (mem_to_chk[i] == ADF_HB_EMPTY_SIG)
70  			break;
71  
72  		el_diff_cnt++;
73  	}
74  
75  	return el_diff_cnt && el_diff_cnt == exp_diff_cnt;
76  }
77  
adf_heartbeat_check_ctrs(struct adf_accel_dev * accel_dev)78  void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev)
79  {
80  	struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr;
81  	const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs;
82  	const size_t max_aes = accel_dev->hw_device->num_engines;
83  	const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
84  	const size_t stats_size = size_mul(dev_ctrs, sizeof(struct hb_cnt_pair));
85  	const size_t mem_items_to_fill = size_mul(stats_size, 2) / sizeof(u32);
86  
87  	/* fill hb stats memory with pattern */
88  	memset32((uint32_t *)hb_stats, ADF_HB_EMPTY_SIG, mem_items_to_fill);
89  	accel_dev->heartbeat->ctrs_cnt_checked = false;
90  }
91  EXPORT_SYMBOL_GPL(adf_heartbeat_check_ctrs);
92  
get_timer_ticks(struct adf_accel_dev * accel_dev,unsigned int * value)93  static int get_timer_ticks(struct adf_accel_dev *accel_dev, unsigned int *value)
94  {
95  	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { };
96  	u32 timer_ms = ADF_CFG_HB_TIMER_DEFAULT_MS;
97  	int cfg_read_status;
98  	u32 ticks;
99  	int ret;
100  
101  	cfg_read_status = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
102  						  ADF_HEARTBEAT_TIMER, timer_str);
103  	if (cfg_read_status == 0) {
104  		if (kstrtouint(timer_str, 10, &timer_ms))
105  			dev_dbg(&GET_DEV(accel_dev),
106  				"kstrtouint failed to parse the %s, param value",
107  				ADF_HEARTBEAT_TIMER);
108  	}
109  
110  	if (timer_ms < ADF_CFG_HB_TIMER_MIN_MS) {
111  		dev_err(&GET_DEV(accel_dev), "Timer cannot be less than %u\n",
112  			ADF_CFG_HB_TIMER_MIN_MS);
113  		return -EINVAL;
114  	}
115  
116  	/*
117  	 * On 4xxx devices adf_timer is responsible for HB updates and
118  	 * its period is fixed to 200ms
119  	 */
120  	if (accel_dev->timer)
121  		timer_ms = ADF_CFG_HB_TIMER_MIN_MS;
122  
123  	ret = adf_heartbeat_ms_to_ticks(accel_dev, timer_ms, &ticks);
124  	if (ret)
125  		return ret;
126  
127  	adf_heartbeat_save_cfg_param(accel_dev, timer_ms);
128  
129  	accel_dev->heartbeat->hb_timer = timer_ms;
130  	*value = ticks;
131  
132  	return 0;
133  }
134  
check_ae(struct hb_cnt_pair * curr,struct hb_cnt_pair * prev,u16 * count,const size_t hb_ctrs)135  static int check_ae(struct hb_cnt_pair *curr, struct hb_cnt_pair *prev,
136  		    u16 *count, const size_t hb_ctrs)
137  {
138  	size_t thr;
139  
140  	/* loop through all threads in AE */
141  	for (thr = 0; thr < hb_ctrs; thr++) {
142  		u16 req = curr[thr].req_heartbeat_cnt;
143  		u16 resp = curr[thr].resp_heartbeat_cnt;
144  		u16 last = prev[thr].resp_heartbeat_cnt;
145  
146  		if ((thr == ADF_AE_ADMIN_THREAD || req != resp) && resp == last) {
147  			u16 retry = ++count[thr];
148  
149  			if (retry >= ADF_CFG_HB_COUNT_THRESHOLD)
150  				return -EIO;
151  
152  		} else {
153  			count[thr] = 0;
154  		}
155  	}
156  	return 0;
157  }
158  
adf_hb_get_status(struct adf_accel_dev * accel_dev)159  static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
160  {
161  	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
162  	struct hb_cnt_pair *live_stats, *last_stats, *curr_stats;
163  	const size_t hb_ctrs = hw_device->num_hb_ctrs;
164  	const unsigned long ae_mask = hw_device->ae_mask;
165  	const size_t max_aes = hw_device->num_engines;
166  	const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
167  	const size_t stats_size = size_mul(dev_ctrs, sizeof(*curr_stats));
168  	struct hb_cnt_pair *ae_curr_p, *ae_prev_p;
169  	u16 *count_fails, *ae_count_p;
170  	size_t ae_offset;
171  	size_t ae = 0;
172  	int ret = 0;
173  
174  	if (!accel_dev->heartbeat->ctrs_cnt_checked) {
175  		if (validate_hb_ctrs_cnt(accel_dev))
176  			hw_device->num_hb_ctrs += ADF_NUM_PKE_STRAND;
177  
178  		accel_dev->heartbeat->ctrs_cnt_checked = true;
179  	}
180  
181  	live_stats = accel_dev->heartbeat->dma.virt_addr;
182  	last_stats = live_stats + dev_ctrs;
183  	count_fails = (u16 *)(last_stats + dev_ctrs);
184  
185  	curr_stats = kmemdup(live_stats, stats_size, GFP_KERNEL);
186  	if (!curr_stats)
187  		return -ENOMEM;
188  
189  	/* loop through active AEs */
190  	for_each_set_bit(ae, &ae_mask, max_aes) {
191  		ae_offset = size_mul(ae, hb_ctrs);
192  		ae_curr_p = curr_stats + ae_offset;
193  		ae_prev_p = last_stats + ae_offset;
194  		ae_count_p = count_fails + ae_offset;
195  
196  		ret = check_ae(ae_curr_p, ae_prev_p, ae_count_p, hb_ctrs);
197  		if (ret)
198  			break;
199  	}
200  
201  	/* Copy current stats for the next iteration */
202  	memcpy(last_stats, curr_stats, stats_size);
203  	kfree(curr_stats);
204  
205  	return ret;
206  }
207  
adf_heartbeat_reset(struct adf_accel_dev * accel_dev)208  static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev)
209  {
210  	u64 curr_time = adf_clock_get_current_time();
211  	u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time;
212  
213  	if (time_since_reset < ADF_CFG_HB_RESET_MS)
214  		return;
215  
216  	accel_dev->heartbeat->last_hb_reset_time = curr_time;
217  	if (adf_notify_fatal_error(accel_dev))
218  		dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n");
219  }
220  
adf_heartbeat_status(struct adf_accel_dev * accel_dev,enum adf_device_heartbeat_status * hb_status)221  void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
222  			  enum adf_device_heartbeat_status *hb_status)
223  {
224  	struct adf_heartbeat *hb;
225  
226  	if (!adf_dev_started(accel_dev) ||
227  	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
228  		*hb_status = HB_DEV_UNRESPONSIVE;
229  		return;
230  	}
231  
232  	if (adf_hb_check_polling_freq(accel_dev) == -EINVAL) {
233  		*hb_status = HB_DEV_UNSUPPORTED;
234  		return;
235  	}
236  
237  	hb = accel_dev->heartbeat;
238  	hb->hb_sent_counter++;
239  
240  	if (adf_hb_get_status(accel_dev)) {
241  		dev_err(&GET_DEV(accel_dev),
242  			"Heartbeat ERROR: QAT is not responding.\n");
243  		*hb_status = HB_DEV_UNRESPONSIVE;
244  		hb->hb_failed_counter++;
245  		adf_heartbeat_reset(accel_dev);
246  		return;
247  	}
248  
249  	*hb_status = HB_DEV_ALIVE;
250  }
251  
adf_heartbeat_ms_to_ticks(struct adf_accel_dev * accel_dev,unsigned int time_ms,u32 * value)252  int adf_heartbeat_ms_to_ticks(struct adf_accel_dev *accel_dev, unsigned int time_ms,
253  			      u32 *value)
254  {
255  	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
256  	u32 clk_per_sec;
257  
258  	/* HB clock may be different than AE clock */
259  	if (!hw_data->get_hb_clock)
260  		return -EINVAL;
261  
262  	clk_per_sec = hw_data->get_hb_clock(hw_data);
263  	*value = time_ms * (clk_per_sec / MSEC_PER_SEC);
264  
265  	return 0;
266  }
267  
adf_heartbeat_save_cfg_param(struct adf_accel_dev * accel_dev,unsigned int timer_ms)268  int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev,
269  				 unsigned int timer_ms)
270  {
271  	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
272  
273  	snprintf(timer_str, sizeof(timer_str), "%u", timer_ms);
274  	return adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC,
275  					  ADF_HEARTBEAT_TIMER, timer_str,
276  					  ADF_STR);
277  }
278  EXPORT_SYMBOL_GPL(adf_heartbeat_save_cfg_param);
279  
adf_heartbeat_init(struct adf_accel_dev * accel_dev)280  int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
281  {
282  	struct adf_heartbeat *hb;
283  
284  	hb = kzalloc(sizeof(*hb), GFP_KERNEL);
285  	if (!hb)
286  		goto err_ret;
287  
288  	hb->dma.virt_addr = dma_alloc_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
289  					       &hb->dma.phy_addr, GFP_KERNEL);
290  	if (!hb->dma.virt_addr)
291  		goto err_free;
292  
293  	/*
294  	 * Default set this flag as true to avoid unnecessary checks,
295  	 * it will be reset on platforms that need such a check
296  	 */
297  	hb->ctrs_cnt_checked = true;
298  	accel_dev->heartbeat = hb;
299  
300  	return 0;
301  
302  err_free:
303  	kfree(hb);
304  err_ret:
305  	return -ENOMEM;
306  }
307  
adf_heartbeat_start(struct adf_accel_dev * accel_dev)308  int adf_heartbeat_start(struct adf_accel_dev *accel_dev)
309  {
310  	unsigned int timer_ticks;
311  	int ret;
312  
313  	if (!accel_dev->heartbeat) {
314  		dev_warn(&GET_DEV(accel_dev), "Heartbeat instance not found!");
315  		return -EFAULT;
316  	}
317  
318  	if (accel_dev->hw_device->check_hb_ctrs)
319  		accel_dev->hw_device->check_hb_ctrs(accel_dev);
320  
321  	ret = get_timer_ticks(accel_dev, &timer_ticks);
322  	if (ret)
323  		return ret;
324  
325  	ret = adf_send_admin_hb_timer(accel_dev, timer_ticks);
326  	if (ret)
327  		dev_warn(&GET_DEV(accel_dev), "Heartbeat not supported!");
328  
329  	return ret;
330  }
331  
adf_heartbeat_shutdown(struct adf_accel_dev * accel_dev)332  void adf_heartbeat_shutdown(struct adf_accel_dev *accel_dev)
333  {
334  	struct adf_heartbeat *hb = accel_dev->heartbeat;
335  
336  	if (!hb)
337  		return;
338  
339  	if (hb->dma.virt_addr)
340  		dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
341  				  hb->dma.virt_addr, hb->dma.phy_addr);
342  
343  	kfree(hb);
344  	accel_dev->heartbeat = NULL;
345  }
346