1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2023 Intel Corporation */
3
4 #include <linux/dev_printk.h>
5 #include <linux/dma-mapping.h>
6 #include <linux/export.h>
7 #include <linux/kernel.h>
8 #include <linux/kstrtox.h>
9 #include <linux/overflow.h>
10 #include <linux/string.h>
11 #include <linux/slab.h>
12 #include <linux/types.h>
13 #include <asm/errno.h>
14 #include "adf_accel_devices.h"
15 #include "adf_admin.h"
16 #include "adf_cfg.h"
17 #include "adf_cfg_strings.h"
18 #include "adf_clock.h"
19 #include "adf_common_drv.h"
20 #include "adf_heartbeat.h"
21 #include "adf_transport_internal.h"
22 #include "icp_qat_fw_init_admin.h"
23
24 #define ADF_HB_EMPTY_SIG 0xA5A5A5A5
25
adf_hb_check_polling_freq(struct adf_accel_dev * accel_dev)26 static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev)
27 {
28 u64 curr_time = adf_clock_get_current_time();
29 u64 polling_time = curr_time - accel_dev->heartbeat->last_hb_check_time;
30
31 if (polling_time < accel_dev->heartbeat->hb_timer) {
32 dev_warn(&GET_DEV(accel_dev),
33 "HB polling too frequent. Configured HB timer %d ms\n",
34 accel_dev->heartbeat->hb_timer);
35 return -EINVAL;
36 }
37
38 accel_dev->heartbeat->last_hb_check_time = curr_time;
39 return 0;
40 }
41
42 /**
43 * validate_hb_ctrs_cnt() - checks if the number of heartbeat counters should
44 * be updated by one to support the currently loaded firmware.
45 * @accel_dev: Pointer to acceleration device.
46 *
47 * Return:
48 * * true - hb_ctrs must increased by ADF_NUM_PKE_STRAND
49 * * false - no changes needed
50 */
validate_hb_ctrs_cnt(struct adf_accel_dev * accel_dev)51 static bool validate_hb_ctrs_cnt(struct adf_accel_dev *accel_dev)
52 {
53 const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs;
54 const size_t max_aes = accel_dev->hw_device->num_engines;
55 const size_t hb_struct_size = sizeof(struct hb_cnt_pair);
56 const size_t exp_diff_size = array3_size(ADF_NUM_PKE_STRAND, max_aes,
57 hb_struct_size);
58 const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
59 const size_t stats_size = size_mul(dev_ctrs, hb_struct_size);
60 const u32 exp_diff_cnt = exp_diff_size / sizeof(u32);
61 const u32 stats_el_cnt = stats_size / sizeof(u32);
62 struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr;
63 const u32 *mem_to_chk = (u32 *)(hb_stats + dev_ctrs);
64 u32 el_diff_cnt = 0;
65 int i;
66
67 /* count how many bytes are different from pattern */
68 for (i = 0; i < stats_el_cnt; i++) {
69 if (mem_to_chk[i] == ADF_HB_EMPTY_SIG)
70 break;
71
72 el_diff_cnt++;
73 }
74
75 return el_diff_cnt && el_diff_cnt == exp_diff_cnt;
76 }
77
adf_heartbeat_check_ctrs(struct adf_accel_dev * accel_dev)78 void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev)
79 {
80 struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr;
81 const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs;
82 const size_t max_aes = accel_dev->hw_device->num_engines;
83 const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
84 const size_t stats_size = size_mul(dev_ctrs, sizeof(struct hb_cnt_pair));
85 const size_t mem_items_to_fill = size_mul(stats_size, 2) / sizeof(u32);
86
87 /* fill hb stats memory with pattern */
88 memset32((uint32_t *)hb_stats, ADF_HB_EMPTY_SIG, mem_items_to_fill);
89 accel_dev->heartbeat->ctrs_cnt_checked = false;
90 }
91 EXPORT_SYMBOL_GPL(adf_heartbeat_check_ctrs);
92
get_timer_ticks(struct adf_accel_dev * accel_dev,unsigned int * value)93 static int get_timer_ticks(struct adf_accel_dev *accel_dev, unsigned int *value)
94 {
95 char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { };
96 u32 timer_ms = ADF_CFG_HB_TIMER_DEFAULT_MS;
97 int cfg_read_status;
98 u32 ticks;
99 int ret;
100
101 cfg_read_status = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
102 ADF_HEARTBEAT_TIMER, timer_str);
103 if (cfg_read_status == 0) {
104 if (kstrtouint(timer_str, 10, &timer_ms))
105 dev_dbg(&GET_DEV(accel_dev),
106 "kstrtouint failed to parse the %s, param value",
107 ADF_HEARTBEAT_TIMER);
108 }
109
110 if (timer_ms < ADF_CFG_HB_TIMER_MIN_MS) {
111 dev_err(&GET_DEV(accel_dev), "Timer cannot be less than %u\n",
112 ADF_CFG_HB_TIMER_MIN_MS);
113 return -EINVAL;
114 }
115
116 /*
117 * On 4xxx devices adf_timer is responsible for HB updates and
118 * its period is fixed to 200ms
119 */
120 if (accel_dev->timer)
121 timer_ms = ADF_CFG_HB_TIMER_MIN_MS;
122
123 ret = adf_heartbeat_ms_to_ticks(accel_dev, timer_ms, &ticks);
124 if (ret)
125 return ret;
126
127 adf_heartbeat_save_cfg_param(accel_dev, timer_ms);
128
129 accel_dev->heartbeat->hb_timer = timer_ms;
130 *value = ticks;
131
132 return 0;
133 }
134
check_ae(struct hb_cnt_pair * curr,struct hb_cnt_pair * prev,u16 * count,const size_t hb_ctrs)135 static int check_ae(struct hb_cnt_pair *curr, struct hb_cnt_pair *prev,
136 u16 *count, const size_t hb_ctrs)
137 {
138 size_t thr;
139
140 /* loop through all threads in AE */
141 for (thr = 0; thr < hb_ctrs; thr++) {
142 u16 req = curr[thr].req_heartbeat_cnt;
143 u16 resp = curr[thr].resp_heartbeat_cnt;
144 u16 last = prev[thr].resp_heartbeat_cnt;
145
146 if ((thr == ADF_AE_ADMIN_THREAD || req != resp) && resp == last) {
147 u16 retry = ++count[thr];
148
149 if (retry >= ADF_CFG_HB_COUNT_THRESHOLD)
150 return -EIO;
151
152 } else {
153 count[thr] = 0;
154 }
155 }
156 return 0;
157 }
158
adf_hb_get_status(struct adf_accel_dev * accel_dev)159 static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
160 {
161 struct adf_hw_device_data *hw_device = accel_dev->hw_device;
162 struct hb_cnt_pair *live_stats, *last_stats, *curr_stats;
163 const size_t hb_ctrs = hw_device->num_hb_ctrs;
164 const unsigned long ae_mask = hw_device->ae_mask;
165 const size_t max_aes = hw_device->num_engines;
166 const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
167 const size_t stats_size = size_mul(dev_ctrs, sizeof(*curr_stats));
168 struct hb_cnt_pair *ae_curr_p, *ae_prev_p;
169 u16 *count_fails, *ae_count_p;
170 size_t ae_offset;
171 size_t ae = 0;
172 int ret = 0;
173
174 if (!accel_dev->heartbeat->ctrs_cnt_checked) {
175 if (validate_hb_ctrs_cnt(accel_dev))
176 hw_device->num_hb_ctrs += ADF_NUM_PKE_STRAND;
177
178 accel_dev->heartbeat->ctrs_cnt_checked = true;
179 }
180
181 live_stats = accel_dev->heartbeat->dma.virt_addr;
182 last_stats = live_stats + dev_ctrs;
183 count_fails = (u16 *)(last_stats + dev_ctrs);
184
185 curr_stats = kmemdup(live_stats, stats_size, GFP_KERNEL);
186 if (!curr_stats)
187 return -ENOMEM;
188
189 /* loop through active AEs */
190 for_each_set_bit(ae, &ae_mask, max_aes) {
191 ae_offset = size_mul(ae, hb_ctrs);
192 ae_curr_p = curr_stats + ae_offset;
193 ae_prev_p = last_stats + ae_offset;
194 ae_count_p = count_fails + ae_offset;
195
196 ret = check_ae(ae_curr_p, ae_prev_p, ae_count_p, hb_ctrs);
197 if (ret)
198 break;
199 }
200
201 /* Copy current stats for the next iteration */
202 memcpy(last_stats, curr_stats, stats_size);
203 kfree(curr_stats);
204
205 return ret;
206 }
207
adf_heartbeat_reset(struct adf_accel_dev * accel_dev)208 static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev)
209 {
210 u64 curr_time = adf_clock_get_current_time();
211 u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time;
212
213 if (time_since_reset < ADF_CFG_HB_RESET_MS)
214 return;
215
216 accel_dev->heartbeat->last_hb_reset_time = curr_time;
217 if (adf_notify_fatal_error(accel_dev))
218 dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n");
219 }
220
adf_heartbeat_status(struct adf_accel_dev * accel_dev,enum adf_device_heartbeat_status * hb_status)221 void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
222 enum adf_device_heartbeat_status *hb_status)
223 {
224 struct adf_heartbeat *hb;
225
226 if (!adf_dev_started(accel_dev) ||
227 test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
228 *hb_status = HB_DEV_UNRESPONSIVE;
229 return;
230 }
231
232 if (adf_hb_check_polling_freq(accel_dev) == -EINVAL) {
233 *hb_status = HB_DEV_UNSUPPORTED;
234 return;
235 }
236
237 hb = accel_dev->heartbeat;
238 hb->hb_sent_counter++;
239
240 if (adf_hb_get_status(accel_dev)) {
241 dev_err(&GET_DEV(accel_dev),
242 "Heartbeat ERROR: QAT is not responding.\n");
243 *hb_status = HB_DEV_UNRESPONSIVE;
244 hb->hb_failed_counter++;
245 adf_heartbeat_reset(accel_dev);
246 return;
247 }
248
249 *hb_status = HB_DEV_ALIVE;
250 }
251
adf_heartbeat_ms_to_ticks(struct adf_accel_dev * accel_dev,unsigned int time_ms,u32 * value)252 int adf_heartbeat_ms_to_ticks(struct adf_accel_dev *accel_dev, unsigned int time_ms,
253 u32 *value)
254 {
255 struct adf_hw_device_data *hw_data = accel_dev->hw_device;
256 u32 clk_per_sec;
257
258 /* HB clock may be different than AE clock */
259 if (!hw_data->get_hb_clock)
260 return -EINVAL;
261
262 clk_per_sec = hw_data->get_hb_clock(hw_data);
263 *value = time_ms * (clk_per_sec / MSEC_PER_SEC);
264
265 return 0;
266 }
267
adf_heartbeat_save_cfg_param(struct adf_accel_dev * accel_dev,unsigned int timer_ms)268 int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev,
269 unsigned int timer_ms)
270 {
271 char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
272
273 snprintf(timer_str, sizeof(timer_str), "%u", timer_ms);
274 return adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC,
275 ADF_HEARTBEAT_TIMER, timer_str,
276 ADF_STR);
277 }
278 EXPORT_SYMBOL_GPL(adf_heartbeat_save_cfg_param);
279
adf_heartbeat_init(struct adf_accel_dev * accel_dev)280 int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
281 {
282 struct adf_heartbeat *hb;
283
284 hb = kzalloc(sizeof(*hb), GFP_KERNEL);
285 if (!hb)
286 goto err_ret;
287
288 hb->dma.virt_addr = dma_alloc_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
289 &hb->dma.phy_addr, GFP_KERNEL);
290 if (!hb->dma.virt_addr)
291 goto err_free;
292
293 /*
294 * Default set this flag as true to avoid unnecessary checks,
295 * it will be reset on platforms that need such a check
296 */
297 hb->ctrs_cnt_checked = true;
298 accel_dev->heartbeat = hb;
299
300 return 0;
301
302 err_free:
303 kfree(hb);
304 err_ret:
305 return -ENOMEM;
306 }
307
adf_heartbeat_start(struct adf_accel_dev * accel_dev)308 int adf_heartbeat_start(struct adf_accel_dev *accel_dev)
309 {
310 unsigned int timer_ticks;
311 int ret;
312
313 if (!accel_dev->heartbeat) {
314 dev_warn(&GET_DEV(accel_dev), "Heartbeat instance not found!");
315 return -EFAULT;
316 }
317
318 if (accel_dev->hw_device->check_hb_ctrs)
319 accel_dev->hw_device->check_hb_ctrs(accel_dev);
320
321 ret = get_timer_ticks(accel_dev, &timer_ticks);
322 if (ret)
323 return ret;
324
325 ret = adf_send_admin_hb_timer(accel_dev, timer_ticks);
326 if (ret)
327 dev_warn(&GET_DEV(accel_dev), "Heartbeat not supported!");
328
329 return ret;
330 }
331
adf_heartbeat_shutdown(struct adf_accel_dev * accel_dev)332 void adf_heartbeat_shutdown(struct adf_accel_dev *accel_dev)
333 {
334 struct adf_heartbeat *hb = accel_dev->heartbeat;
335
336 if (!hb)
337 return;
338
339 if (hb->dma.virt_addr)
340 dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
341 hb->dma.virt_addr, hb->dma.phy_addr);
342
343 kfree(hb);
344 accel_dev->heartbeat = NULL;
345 }
346