1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2 /*
3 * Wave5 series multi-standard codec IP - decoder interface
4 *
5 * Copyright (C) 2021-2023 CHIPS&MEDIA INC
6 */
7
8 #include "wave5-helper.h"
9
10 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
11 #define VPU_DEC_DRV_NAME "wave5-dec"
12
13 #define DEFAULT_SRC_SIZE(width, height) ({ \
14 (width) * (height) / 8 * 3; \
15 })
16
17 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
18 [VPU_FMT_TYPE_CODEC] = {
19 {
20 .v4l2_pix_fmt = V4L2_PIX_FMT_HEVC,
21 .max_width = 8192,
22 .min_width = 8,
23 .max_height = 4320,
24 .min_height = 8,
25 },
26 {
27 .v4l2_pix_fmt = V4L2_PIX_FMT_H264,
28 .max_width = 8192,
29 .min_width = 32,
30 .max_height = 4320,
31 .min_height = 32,
32 },
33 },
34 [VPU_FMT_TYPE_RAW] = {
35 {
36 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420,
37 .max_width = 8192,
38 .min_width = 8,
39 .max_height = 4320,
40 .min_height = 8,
41 },
42 {
43 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12,
44 .max_width = 8192,
45 .min_width = 8,
46 .max_height = 4320,
47 .min_height = 8,
48 },
49 {
50 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21,
51 .max_width = 8192,
52 .min_width = 8,
53 .max_height = 4320,
54 .min_height = 8,
55 },
56 {
57 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P,
58 .max_width = 8192,
59 .min_width = 8,
60 .max_height = 4320,
61 .min_height = 8,
62 },
63 {
64 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16,
65 .max_width = 8192,
66 .min_width = 8,
67 .max_height = 4320,
68 .min_height = 8,
69 },
70 {
71 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61,
72 .max_width = 8192,
73 .min_width = 8,
74 .max_height = 4320,
75 .min_height = 8,
76 },
77 {
78 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M,
79 .max_width = 8192,
80 .min_width = 8,
81 .max_height = 4320,
82 .min_height = 8,
83 },
84 {
85 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12M,
86 .max_width = 8192,
87 .min_width = 8,
88 .max_height = 4320,
89 .min_height = 8,
90 },
91 {
92 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21M,
93 .max_width = 8192,
94 .min_width = 8,
95 .max_height = 4320,
96 .min_height = 8,
97 },
98 {
99 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M,
100 .max_width = 8192,
101 .min_width = 8,
102 .max_height = 4320,
103 .min_height = 8,
104 },
105 {
106 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16M,
107 .max_width = 8192,
108 .min_width = 8,
109 .max_height = 4320,
110 .min_height = 8,
111 },
112 {
113 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61M,
114 .max_width = 8192,
115 .min_width = 8,
116 .max_height = 4320,
117 .min_height = 8,
118 },
119 }
120 };
121
122 /*
123 * Make sure that the state switch is allowed and add logging for debugging
124 * purposes
125 */
switch_state(struct vpu_instance * inst,enum vpu_instance_state state)126 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state)
127 {
128 switch (state) {
129 case VPU_INST_STATE_NONE:
130 break;
131 case VPU_INST_STATE_OPEN:
132 if (inst->state != VPU_INST_STATE_NONE)
133 goto invalid_state_switch;
134 goto valid_state_switch;
135 case VPU_INST_STATE_INIT_SEQ:
136 if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP)
137 goto invalid_state_switch;
138 goto valid_state_switch;
139 case VPU_INST_STATE_PIC_RUN:
140 if (inst->state != VPU_INST_STATE_INIT_SEQ)
141 goto invalid_state_switch;
142 goto valid_state_switch;
143 case VPU_INST_STATE_STOP:
144 goto valid_state_switch;
145 }
146 invalid_state_switch:
147 WARN(1, "Invalid state switch from %s to %s.\n",
148 state_to_str(inst->state), state_to_str(state));
149 return -EINVAL;
150 valid_state_switch:
151 dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
152 state_to_str(inst->state), state_to_str(state));
153 inst->state = state;
154 return 0;
155 }
156
wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance * inst)157 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst)
158 {
159 int ret;
160
161 ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0);
162 if (ret) {
163 /*
164 * To set the EOS flag, a command is sent to the firmware.
165 * That command may never return (timeout) or may report an error.
166 */
167 dev_err(inst->dev->dev,
168 "Setting EOS for the bitstream, fail: %d\n", ret);
169 return ret;
170 }
171 return 0;
172 }
173
wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx * m2m_ctx)174 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx)
175 {
176 struct vpu_src_buffer *vpu_buf;
177
178 if (!m2m_ctx->last_src_buf)
179 return false;
180
181 vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
182 return vpu_buf->consumed;
183 }
184
wave5_handle_src_buffer(struct vpu_instance * inst,dma_addr_t rd_ptr)185 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr)
186 {
187 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
188 struct v4l2_m2m_buffer *buf, *n;
189 size_t consumed_bytes = 0;
190
191 if (rd_ptr >= inst->last_rd_ptr) {
192 consumed_bytes = rd_ptr - inst->last_rd_ptr;
193 } else {
194 size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr;
195 size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr;
196
197 consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs);
198 }
199
200 inst->last_rd_ptr = rd_ptr;
201 consumed_bytes += inst->remaining_consumed_bytes;
202
203 dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__,
204 consumed_bytes);
205
206 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
207 struct vb2_v4l2_buffer *src_buf = &buf->vb;
208 size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
209
210 if (src_size > consumed_bytes)
211 break;
212
213 dev_dbg(inst->dev->dev, "%s: removing src buffer %i",
214 __func__, src_buf->vb2_buf.index);
215 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
216 inst->timestamp = src_buf->vb2_buf.timestamp;
217 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
218 consumed_bytes -= src_size;
219
220 /* Handle the case the last bitstream buffer has been picked */
221 if (src_buf == m2m_ctx->last_src_buf) {
222 int ret;
223
224 m2m_ctx->last_src_buf = NULL;
225 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
226 if (ret)
227 dev_warn(inst->dev->dev,
228 "Setting EOS for the bitstream, fail: %d\n", ret);
229 break;
230 }
231 }
232
233 inst->remaining_consumed_bytes = consumed_bytes;
234 }
235
wave5_update_pix_fmt(struct v4l2_pix_format_mplane * pix_mp,unsigned int width,unsigned int height)236 static void wave5_update_pix_fmt(struct v4l2_pix_format_mplane *pix_mp, unsigned int width,
237 unsigned int height)
238 {
239 switch (pix_mp->pixelformat) {
240 case V4L2_PIX_FMT_YUV420:
241 case V4L2_PIX_FMT_NV12:
242 case V4L2_PIX_FMT_NV21:
243 pix_mp->width = round_up(width, 32);
244 pix_mp->height = round_up(height, 16);
245 pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
246 pix_mp->plane_fmt[0].sizeimage = width * height * 3 / 2;
247 break;
248 case V4L2_PIX_FMT_YUV422P:
249 case V4L2_PIX_FMT_NV16:
250 case V4L2_PIX_FMT_NV61:
251 pix_mp->width = round_up(width, 32);
252 pix_mp->height = round_up(height, 16);
253 pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
254 pix_mp->plane_fmt[0].sizeimage = width * height * 2;
255 break;
256 case V4L2_PIX_FMT_YUV420M:
257 pix_mp->width = round_up(width, 32);
258 pix_mp->height = round_up(height, 16);
259 pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
260 pix_mp->plane_fmt[0].sizeimage = width * height;
261 pix_mp->plane_fmt[1].bytesperline = round_up(width, 32) / 2;
262 pix_mp->plane_fmt[1].sizeimage = width * height / 4;
263 pix_mp->plane_fmt[2].bytesperline = round_up(width, 32) / 2;
264 pix_mp->plane_fmt[2].sizeimage = width * height / 4;
265 break;
266 case V4L2_PIX_FMT_NV12M:
267 case V4L2_PIX_FMT_NV21M:
268 pix_mp->width = round_up(width, 32);
269 pix_mp->height = round_up(height, 16);
270 pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
271 pix_mp->plane_fmt[0].sizeimage = width * height;
272 pix_mp->plane_fmt[1].bytesperline = round_up(width, 32);
273 pix_mp->plane_fmt[1].sizeimage = width * height / 2;
274 break;
275 case V4L2_PIX_FMT_YUV422M:
276 pix_mp->width = round_up(width, 32);
277 pix_mp->height = round_up(height, 16);
278 pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
279 pix_mp->plane_fmt[0].sizeimage = width * height;
280 pix_mp->plane_fmt[1].bytesperline = round_up(width, 32) / 2;
281 pix_mp->plane_fmt[1].sizeimage = width * height / 2;
282 pix_mp->plane_fmt[2].bytesperline = round_up(width, 32) / 2;
283 pix_mp->plane_fmt[2].sizeimage = width * height / 2;
284 break;
285 case V4L2_PIX_FMT_NV16M:
286 case V4L2_PIX_FMT_NV61M:
287 pix_mp->width = round_up(width, 32);
288 pix_mp->height = round_up(height, 16);
289 pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
290 pix_mp->plane_fmt[0].sizeimage = width * height;
291 pix_mp->plane_fmt[1].bytesperline = round_up(width, 32);
292 pix_mp->plane_fmt[1].sizeimage = width * height;
293 break;
294 default:
295 pix_mp->width = width;
296 pix_mp->height = height;
297 pix_mp->plane_fmt[0].bytesperline = 0;
298 pix_mp->plane_fmt[0].sizeimage = max(DEFAULT_SRC_SIZE(width, height),
299 pix_mp->plane_fmt[0].sizeimage);
300 break;
301 }
302 }
303
start_decode(struct vpu_instance * inst,u32 * fail_res)304 static int start_decode(struct vpu_instance *inst, u32 *fail_res)
305 {
306 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
307 int ret = 0;
308
309 ret = wave5_vpu_dec_start_one_frame(inst, fail_res);
310 if (ret) {
311 struct vb2_v4l2_buffer *src_buf;
312
313 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
314 if (src_buf)
315 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
316 switch_state(inst, VPU_INST_STATE_STOP);
317
318 dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
319 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
320 }
321
322 return ret;
323 }
324
flag_last_buffer_done(struct vpu_instance * inst)325 static void flag_last_buffer_done(struct vpu_instance *inst)
326 {
327 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
328 struct vb2_v4l2_buffer *vb;
329 int i;
330
331 lockdep_assert_held(&inst->state_spinlock);
332
333 vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
334 if (!vb) {
335 m2m_ctx->is_draining = true;
336 m2m_ctx->next_buf_last = true;
337 return;
338 }
339
340 for (i = 0; i < vb->vb2_buf.num_planes; i++)
341 vb2_set_plane_payload(&vb->vb2_buf, i, 0);
342 vb->field = V4L2_FIELD_NONE;
343
344 v4l2_m2m_last_buffer_done(m2m_ctx, vb);
345 }
346
send_eos_event(struct vpu_instance * inst)347 static void send_eos_event(struct vpu_instance *inst)
348 {
349 static const struct v4l2_event vpu_event_eos = {
350 .type = V4L2_EVENT_EOS
351 };
352
353 lockdep_assert_held(&inst->state_spinlock);
354
355 v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
356 inst->eos = false;
357 }
358
handle_dynamic_resolution_change(struct vpu_instance * inst)359 static int handle_dynamic_resolution_change(struct vpu_instance *inst)
360 {
361 struct v4l2_fh *fh = &inst->v4l2_fh;
362 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
363
364 static const struct v4l2_event vpu_event_src_ch = {
365 .type = V4L2_EVENT_SOURCE_CHANGE,
366 .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
367 };
368 struct dec_info *p_dec_info = &inst->codec_info->dec_info;
369 struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info;
370
371 lockdep_assert_held(&inst->state_spinlock);
372
373 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr);
374
375 dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n",
376 __func__, initial_info->pic_width, initial_info->pic_height,
377 initial_info->profile, initial_info->min_frame_buffer_count);
378
379 inst->needs_reallocation = true;
380 inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1;
381 if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) {
382 struct v4l2_ctrl *ctrl;
383
384 ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl,
385 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE);
386 if (ctrl)
387 v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count);
388 }
389
390 if (p_dec_info->initial_info_obtained) {
391 inst->conf_win.left = initial_info->pic_crop_rect.left;
392 inst->conf_win.top = initial_info->pic_crop_rect.top;
393 inst->conf_win.width = initial_info->pic_width -
394 initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right;
395 inst->conf_win.height = initial_info->pic_height -
396 initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom;
397
398 wave5_update_pix_fmt(&inst->src_fmt, initial_info->pic_width,
399 initial_info->pic_height);
400 wave5_update_pix_fmt(&inst->dst_fmt, initial_info->pic_width,
401 initial_info->pic_height);
402 }
403
404 v4l2_event_queue_fh(fh, &vpu_event_src_ch);
405
406 return 0;
407 }
408
wave5_vpu_dec_finish_decode(struct vpu_instance * inst)409 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
410 {
411 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
412 struct dec_output_info dec_info;
413 int ret;
414 struct vb2_v4l2_buffer *dec_buf = NULL;
415 struct vb2_v4l2_buffer *disp_buf = NULL;
416 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
417 struct queue_status_info q_status;
418
419 dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
420
421 ret = wave5_vpu_dec_get_output_info(inst, &dec_info);
422 if (ret) {
423 dev_warn(inst->dev->dev, "%s: could not get output info.", __func__);
424 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
425 return;
426 }
427
428 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
429 &dec_info.wr_ptr);
430 wave5_handle_src_buffer(inst, dec_info.rd_ptr);
431
432 dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
433 dec_info.index_frame_decoded, dec_info.index_frame_display);
434
435 if (!vb2_is_streaming(dst_vq)) {
436 dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
437 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
438 return;
439 }
440
441 /* Remove decoded buffer from the ready queue now that it has been
442 * decoded.
443 */
444 if (dec_info.index_frame_decoded >= 0) {
445 struct vb2_buffer *vb = vb2_get_buffer(dst_vq,
446 dec_info.index_frame_decoded);
447 if (vb) {
448 dec_buf = to_vb2_v4l2_buffer(vb);
449 dec_buf->vb2_buf.timestamp = inst->timestamp;
450 } else {
451 dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i",
452 __func__, dec_info.index_frame_decoded);
453 }
454 }
455
456 if (dec_info.index_frame_display >= 0) {
457 disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display);
458 if (!disp_buf)
459 dev_warn(inst->dev->dev, "%s: invalid display frame index %i",
460 __func__, dec_info.index_frame_display);
461 }
462
463 /* If there is anything to display, do that now */
464 if (disp_buf) {
465 struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf);
466
467 if (inst->dst_fmt.num_planes == 1) {
468 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
469 inst->dst_fmt.plane_fmt[0].sizeimage);
470 } else if (inst->dst_fmt.num_planes == 2) {
471 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
472 inst->dst_fmt.plane_fmt[0].sizeimage);
473 vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
474 inst->dst_fmt.plane_fmt[1].sizeimage);
475 } else if (inst->dst_fmt.num_planes == 3) {
476 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
477 inst->dst_fmt.plane_fmt[0].sizeimage);
478 vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
479 inst->dst_fmt.plane_fmt[1].sizeimage);
480 vb2_set_plane_payload(&disp_buf->vb2_buf, 2,
481 inst->dst_fmt.plane_fmt[2].sizeimage);
482 }
483
484 /* TODO implement interlace support */
485 disp_buf->field = V4L2_FIELD_NONE;
486 dst_vpu_buf->display = true;
487 v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE);
488
489 dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n",
490 __func__, dec_info.frame_cycle,
491 vb2_get_plane_payload(&disp_buf->vb2_buf, 0));
492 }
493
494 if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
495 dec_info.sequence_changed)) {
496 unsigned long flags;
497
498 spin_lock_irqsave(&inst->state_spinlock, flags);
499 if (!v4l2_m2m_has_stopped(m2m_ctx)) {
500 switch_state(inst, VPU_INST_STATE_STOP);
501
502 if (dec_info.sequence_changed)
503 handle_dynamic_resolution_change(inst);
504 else
505 send_eos_event(inst);
506
507 flag_last_buffer_done(inst);
508 }
509 spin_unlock_irqrestore(&inst->state_spinlock, flags);
510 }
511
512 /*
513 * During a resolution change and while draining, the firmware may flush
514 * the reorder queue regardless of having a matching decoding operation
515 * pending. Only terminate the job if there are no more IRQ coming.
516 */
517 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
518 if (q_status.report_queue_count == 0 &&
519 (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) {
520 dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__);
521 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
522 }
523 }
524
wave5_vpu_dec_querycap(struct file * file,void * fh,struct v4l2_capability * cap)525 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
526 {
527 strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver));
528 strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card));
529
530 return 0;
531 }
532
wave5_vpu_dec_enum_framesizes(struct file * f,void * fh,struct v4l2_frmsizeenum * fsize)533 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
534 {
535 const struct vpu_format *vpu_fmt;
536
537 if (fsize->index)
538 return -EINVAL;
539
540 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
541 if (!vpu_fmt) {
542 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]);
543 if (!vpu_fmt)
544 return -EINVAL;
545 }
546
547 fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
548 fsize->stepwise.min_width = vpu_fmt->min_width;
549 fsize->stepwise.max_width = vpu_fmt->max_width;
550 fsize->stepwise.step_width = 1;
551 fsize->stepwise.min_height = vpu_fmt->min_height;
552 fsize->stepwise.max_height = vpu_fmt->max_height;
553 fsize->stepwise.step_height = 1;
554
555 return 0;
556 }
557
wave5_vpu_dec_enum_fmt_cap(struct file * file,void * fh,struct v4l2_fmtdesc * f)558 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f)
559 {
560 const struct vpu_format *vpu_fmt;
561
562 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]);
563 if (!vpu_fmt)
564 return -EINVAL;
565
566 f->pixelformat = vpu_fmt->v4l2_pix_fmt;
567 f->flags = 0;
568
569 return 0;
570 }
571
wave5_vpu_dec_try_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)572 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
573 {
574 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
575 struct dec_info *p_dec_info = &inst->codec_info->dec_info;
576 const struct vpu_format *vpu_fmt;
577 int width, height;
578
579 dev_dbg(inst->dev->dev,
580 "%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n",
581 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
582 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
583
584 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
585 if (!vpu_fmt) {
586 width = inst->dst_fmt.width;
587 height = inst->dst_fmt.height;
588 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
589 f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
590 } else {
591 const struct v4l2_format_info *info = v4l2_format_info(vpu_fmt->v4l2_pix_fmt);
592
593 width = clamp(f->fmt.pix_mp.width, vpu_fmt->min_width, vpu_fmt->max_width);
594 height = clamp(f->fmt.pix_mp.height, vpu_fmt->min_height, vpu_fmt->max_height);
595 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
596 f->fmt.pix_mp.num_planes = info->mem_planes;
597 }
598
599 if (p_dec_info->initial_info_obtained) {
600 width = inst->dst_fmt.width;
601 height = inst->dst_fmt.height;
602 }
603
604 wave5_update_pix_fmt(&f->fmt.pix_mp, width, height);
605 f->fmt.pix_mp.flags = 0;
606 f->fmt.pix_mp.field = V4L2_FIELD_NONE;
607 f->fmt.pix_mp.colorspace = inst->colorspace;
608 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
609 f->fmt.pix_mp.quantization = inst->quantization;
610 f->fmt.pix_mp.xfer_func = inst->xfer_func;
611
612 return 0;
613 }
614
wave5_vpu_dec_s_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)615 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
616 {
617 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
618 int i, ret;
619
620 dev_dbg(inst->dev->dev,
621 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
622 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
623 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
624
625 ret = wave5_vpu_dec_try_fmt_cap(file, fh, f);
626 if (ret)
627 return ret;
628
629 inst->dst_fmt.width = f->fmt.pix_mp.width;
630 inst->dst_fmt.height = f->fmt.pix_mp.height;
631 inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
632 inst->dst_fmt.field = f->fmt.pix_mp.field;
633 inst->dst_fmt.flags = f->fmt.pix_mp.flags;
634 inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes;
635 for (i = 0; i < inst->dst_fmt.num_planes; i++) {
636 inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
637 inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
638 }
639
640 if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 ||
641 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) {
642 inst->cbcr_interleave = true;
643 inst->nv21 = false;
644 inst->output_format = FORMAT_420;
645 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 ||
646 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) {
647 inst->cbcr_interleave = true;
648 inst->nv21 = true;
649 inst->output_format = FORMAT_420;
650 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 ||
651 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) {
652 inst->cbcr_interleave = true;
653 inst->nv21 = false;
654 inst->output_format = FORMAT_422;
655 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 ||
656 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) {
657 inst->cbcr_interleave = true;
658 inst->nv21 = true;
659 inst->output_format = FORMAT_422;
660 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P ||
661 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) {
662 inst->cbcr_interleave = false;
663 inst->nv21 = false;
664 inst->output_format = FORMAT_422;
665 } else {
666 inst->cbcr_interleave = false;
667 inst->nv21 = false;
668 inst->output_format = FORMAT_420;
669 }
670
671 return 0;
672 }
673
wave5_vpu_dec_g_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)674 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
675 {
676 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
677 int i;
678
679 f->fmt.pix_mp.width = inst->dst_fmt.width;
680 f->fmt.pix_mp.height = inst->dst_fmt.height;
681 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
682 f->fmt.pix_mp.field = inst->dst_fmt.field;
683 f->fmt.pix_mp.flags = inst->dst_fmt.flags;
684 f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
685 for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
686 f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline;
687 f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage;
688 }
689
690 f->fmt.pix_mp.colorspace = inst->colorspace;
691 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
692 f->fmt.pix_mp.quantization = inst->quantization;
693 f->fmt.pix_mp.xfer_func = inst->xfer_func;
694
695 return 0;
696 }
697
wave5_vpu_dec_enum_fmt_out(struct file * file,void * fh,struct v4l2_fmtdesc * f)698 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f)
699 {
700 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
701 const struct vpu_format *vpu_fmt;
702
703 dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index);
704
705 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
706 if (!vpu_fmt)
707 return -EINVAL;
708
709 f->pixelformat = vpu_fmt->v4l2_pix_fmt;
710 f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED;
711
712 return 0;
713 }
714
wave5_vpu_dec_try_fmt_out(struct file * file,void * fh,struct v4l2_format * f)715 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
716 {
717 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
718 const struct vpu_format *vpu_fmt;
719
720 dev_dbg(inst->dev->dev,
721 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
722 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
723 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
724
725 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
726 if (!vpu_fmt) {
727 f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat;
728 f->fmt.pix_mp.num_planes = inst->src_fmt.num_planes;
729 wave5_update_pix_fmt(&f->fmt.pix_mp, inst->src_fmt.width, inst->src_fmt.height);
730 } else {
731 int width = clamp(f->fmt.pix_mp.width, vpu_fmt->min_width, vpu_fmt->max_width);
732 int height = clamp(f->fmt.pix_mp.height, vpu_fmt->min_height, vpu_fmt->max_height);
733
734 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
735 f->fmt.pix_mp.num_planes = 1;
736 wave5_update_pix_fmt(&f->fmt.pix_mp, width, height);
737 }
738
739 f->fmt.pix_mp.flags = 0;
740 f->fmt.pix_mp.field = V4L2_FIELD_NONE;
741
742 return 0;
743 }
744
wave5_vpu_dec_s_fmt_out(struct file * file,void * fh,struct v4l2_format * f)745 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
746 {
747 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
748 int i, ret;
749
750 dev_dbg(inst->dev->dev,
751 "%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n",
752 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
753 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field);
754
755 ret = wave5_vpu_dec_try_fmt_out(file, fh, f);
756 if (ret)
757 return ret;
758
759 inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type);
760 if (inst->std == STD_UNKNOWN) {
761 dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n",
762 (char *)&f->fmt.pix_mp.pixelformat);
763 return -EINVAL;
764 }
765
766 inst->src_fmt.width = f->fmt.pix_mp.width;
767 inst->src_fmt.height = f->fmt.pix_mp.height;
768 inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
769 inst->src_fmt.field = f->fmt.pix_mp.field;
770 inst->src_fmt.flags = f->fmt.pix_mp.flags;
771 inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes;
772 for (i = 0; i < inst->src_fmt.num_planes; i++) {
773 inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
774 inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
775 }
776
777 inst->colorspace = f->fmt.pix_mp.colorspace;
778 inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
779 inst->quantization = f->fmt.pix_mp.quantization;
780 inst->xfer_func = f->fmt.pix_mp.xfer_func;
781
782 wave5_update_pix_fmt(&inst->dst_fmt, f->fmt.pix_mp.width, f->fmt.pix_mp.height);
783
784 return 0;
785 }
786
wave5_vpu_dec_g_selection(struct file * file,void * fh,struct v4l2_selection * s)787 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
788 {
789 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
790
791 dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target);
792
793 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
794 return -EINVAL;
795 switch (s->target) {
796 case V4L2_SEL_TGT_COMPOSE_BOUNDS:
797 case V4L2_SEL_TGT_COMPOSE_PADDED:
798 s->r.left = 0;
799 s->r.top = 0;
800 s->r.width = inst->dst_fmt.width;
801 s->r.height = inst->dst_fmt.height;
802 break;
803 case V4L2_SEL_TGT_COMPOSE:
804 case V4L2_SEL_TGT_COMPOSE_DEFAULT:
805 s->r.left = 0;
806 s->r.top = 0;
807 if (inst->state > VPU_INST_STATE_OPEN) {
808 s->r = inst->conf_win;
809 } else {
810 s->r.width = inst->src_fmt.width;
811 s->r.height = inst->src_fmt.height;
812 }
813 break;
814 default:
815 return -EINVAL;
816 }
817
818 return 0;
819 }
820
wave5_vpu_dec_s_selection(struct file * file,void * fh,struct v4l2_selection * s)821 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
822 {
823 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
824
825 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
826 return -EINVAL;
827
828 if (s->target != V4L2_SEL_TGT_COMPOSE)
829 return -EINVAL;
830
831 dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n",
832 s->r.width, s->r.height);
833
834 s->r.left = 0;
835 s->r.top = 0;
836 s->r.width = inst->dst_fmt.width;
837 s->r.height = inst->dst_fmt.height;
838
839 return 0;
840 }
841
wave5_vpu_dec_stop(struct vpu_instance * inst)842 static int wave5_vpu_dec_stop(struct vpu_instance *inst)
843 {
844 int ret = 0;
845 unsigned long flags;
846 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
847
848 spin_lock_irqsave(&inst->state_spinlock, flags);
849
850 if (m2m_ctx->is_draining) {
851 ret = -EBUSY;
852 goto unlock_and_return;
853 }
854
855 if (inst->state != VPU_INST_STATE_NONE) {
856 /*
857 * Temporarily release the state_spinlock so that subsequent
858 * calls do not block on a mutex while inside this spinlock.
859 */
860 spin_unlock_irqrestore(&inst->state_spinlock, flags);
861 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
862 if (ret)
863 return ret;
864
865 spin_lock_irqsave(&inst->state_spinlock, flags);
866 /*
867 * TODO eliminate this check by using a separate check for
868 * draining triggered by a resolution change.
869 */
870 if (m2m_ctx->is_draining) {
871 ret = -EBUSY;
872 goto unlock_and_return;
873 }
874 }
875
876 /*
877 * Used to remember the EOS state after the streamoff/on transition on
878 * the capture queue.
879 */
880 inst->eos = true;
881
882 if (m2m_ctx->has_stopped)
883 goto unlock_and_return;
884
885 m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
886 m2m_ctx->is_draining = true;
887
888 /*
889 * Deferred to device run in case it wasn't in the ring buffer
890 * yet. In other case, we have to send the EOS signal to the
891 * firmware so that any pending PIC_RUN ends without new
892 * bitstream buffer.
893 */
894 if (m2m_ctx->last_src_buf)
895 goto unlock_and_return;
896
897 if (inst->state == VPU_INST_STATE_NONE) {
898 send_eos_event(inst);
899 flag_last_buffer_done(inst);
900 }
901
902 unlock_and_return:
903 spin_unlock_irqrestore(&inst->state_spinlock, flags);
904 return ret;
905 }
906
wave5_vpu_dec_start(struct vpu_instance * inst)907 static int wave5_vpu_dec_start(struct vpu_instance *inst)
908 {
909 int ret = 0;
910 unsigned long flags;
911 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
912 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
913
914 spin_lock_irqsave(&inst->state_spinlock, flags);
915
916 if (m2m_ctx->is_draining) {
917 ret = -EBUSY;
918 goto unlock_and_return;
919 }
920
921 if (m2m_ctx->has_stopped)
922 m2m_ctx->has_stopped = false;
923
924 vb2_clear_last_buffer_dequeued(dst_vq);
925 inst->eos = false;
926
927 unlock_and_return:
928 spin_unlock_irqrestore(&inst->state_spinlock, flags);
929 return ret;
930 }
931
wave5_vpu_dec_decoder_cmd(struct file * file,void * fh,struct v4l2_decoder_cmd * dc)932 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
933 {
934 struct vpu_instance *inst = wave5_to_vpu_inst(fh);
935 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
936 int ret;
937
938 dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
939
940 ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc);
941 if (ret)
942 return ret;
943
944 switch (dc->cmd) {
945 case V4L2_DEC_CMD_STOP:
946 ret = wave5_vpu_dec_stop(inst);
947 /* Just in case we don't have anything to decode anymore */
948 v4l2_m2m_try_schedule(m2m_ctx);
949 break;
950 case V4L2_DEC_CMD_START:
951 ret = wave5_vpu_dec_start(inst);
952 break;
953 default:
954 ret = -EINVAL;
955 }
956
957 return ret;
958 }
959
960 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
961 .vidioc_querycap = wave5_vpu_dec_querycap,
962 .vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes,
963
964 .vidioc_enum_fmt_vid_cap = wave5_vpu_dec_enum_fmt_cap,
965 .vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap,
966 .vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap,
967 .vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap,
968
969 .vidioc_enum_fmt_vid_out = wave5_vpu_dec_enum_fmt_out,
970 .vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out,
971 .vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out,
972 .vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out,
973
974 .vidioc_g_selection = wave5_vpu_dec_g_selection,
975 .vidioc_s_selection = wave5_vpu_dec_s_selection,
976
977 .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
978 /*
979 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since
980 * there is no immediate use-case for supporting CREATE_BUFS on
981 * just the OUTPUT queue, disable CREATE_BUFS altogether.
982 */
983 .vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
984 .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
985 .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
986 .vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
987 .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
988 .vidioc_streamon = v4l2_m2m_ioctl_streamon,
989 .vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
990
991 .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd,
992 .vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd,
993
994 .vidioc_subscribe_event = wave5_vpu_subscribe_event,
995 .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
996 };
997
wave5_vpu_dec_queue_setup(struct vb2_queue * q,unsigned int * num_buffers,unsigned int * num_planes,unsigned int sizes[],struct device * alloc_devs[])998 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers,
999 unsigned int *num_planes, unsigned int sizes[],
1000 struct device *alloc_devs[])
1001 {
1002 struct vpu_instance *inst = vb2_get_drv_priv(q);
1003 struct v4l2_pix_format_mplane inst_format =
1004 (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt;
1005
1006 dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__,
1007 *num_buffers, *num_planes, q->type);
1008
1009 *num_planes = inst_format.num_planes;
1010
1011 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
1012 sizes[0] = inst_format.plane_fmt[0].sizeimage;
1013 dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
1014 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1015 if (*num_buffers < inst->fbc_buf_count)
1016 *num_buffers = inst->fbc_buf_count;
1017
1018 if (*num_planes == 1) {
1019 if (inst->output_format == FORMAT_422)
1020 sizes[0] = inst_format.width * inst_format.height * 2;
1021 else
1022 sizes[0] = inst_format.width * inst_format.height * 3 / 2;
1023 dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
1024 } else if (*num_planes == 2) {
1025 sizes[0] = inst_format.width * inst_format.height;
1026 if (inst->output_format == FORMAT_422)
1027 sizes[1] = inst_format.width * inst_format.height;
1028 else
1029 sizes[1] = inst_format.width * inst_format.height / 2;
1030 dev_dbg(inst->dev->dev, "%s: size[0]: %u | size[1]: %u\n",
1031 __func__, sizes[0], sizes[1]);
1032 } else if (*num_planes == 3) {
1033 sizes[0] = inst_format.width * inst_format.height;
1034 if (inst->output_format == FORMAT_422) {
1035 sizes[1] = inst_format.width * inst_format.height / 2;
1036 sizes[2] = inst_format.width * inst_format.height / 2;
1037 } else {
1038 sizes[1] = inst_format.width * inst_format.height / 4;
1039 sizes[2] = inst_format.width * inst_format.height / 4;
1040 }
1041 dev_dbg(inst->dev->dev, "%s: size[0]: %u | size[1]: %u | size[2]: %u\n",
1042 __func__, sizes[0], sizes[1], sizes[2]);
1043 }
1044 }
1045
1046 return 0;
1047 }
1048
wave5_prepare_fb(struct vpu_instance * inst)1049 static int wave5_prepare_fb(struct vpu_instance *inst)
1050 {
1051 int linear_num;
1052 int non_linear_num;
1053 int fb_stride = 0, fb_height = 0;
1054 int luma_size, chroma_size;
1055 int ret, i;
1056 struct v4l2_m2m_buffer *buf, *n;
1057 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1058 u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth;
1059
1060 switch (bitdepth) {
1061 case 8:
1062 break;
1063 case 10:
1064 if (inst->std == W_HEVC_DEC &&
1065 inst->dev->attr.support_hevc10bit_dec)
1066 break;
1067
1068 fallthrough;
1069 default:
1070 dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth);
1071
1072 return -EINVAL;
1073 }
1074
1075 linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx);
1076 non_linear_num = inst->fbc_buf_count;
1077
1078 for (i = 0; i < non_linear_num; i++) {
1079 struct frame_buffer *frame = &inst->frame_buf[i];
1080 struct vpu_buf *vframe = &inst->frame_vbuf[i];
1081
1082 fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32);
1083 fb_height = ALIGN(inst->dst_fmt.height, 32);
1084 luma_size = fb_stride * fb_height;
1085
1086 chroma_size = ALIGN(fb_stride / 2, 16) * fb_height;
1087
1088 if (vframe->size == (luma_size + chroma_size))
1089 continue;
1090
1091 if (vframe->size)
1092 wave5_vpu_dec_reset_framebuffer(inst, i);
1093
1094 vframe->size = luma_size + chroma_size;
1095 ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe);
1096 if (ret) {
1097 dev_dbg(inst->dev->dev,
1098 "%s: Allocating FBC buf of size %zu, fail: %d\n",
1099 __func__, vframe->size, ret);
1100 return ret;
1101 }
1102
1103 frame->buf_y = vframe->daddr;
1104 frame->buf_cb = vframe->daddr + luma_size;
1105 frame->buf_cr = (dma_addr_t)-1;
1106 frame->size = vframe->size;
1107 frame->width = inst->src_fmt.width;
1108 frame->stride = fb_stride;
1109 frame->map_type = COMPRESSED_FRAME_MAP;
1110 frame->update_fb_info = true;
1111 }
1112 /* In case the count has reduced, clean up leftover framebuffer memory */
1113 for (i = non_linear_num; i < MAX_REG_FRAME; i++) {
1114 ret = wave5_vpu_dec_reset_framebuffer(inst, i);
1115 if (ret)
1116 break;
1117 }
1118
1119 for (i = 0; i < linear_num; i++) {
1120 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1121 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1122 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i);
1123 struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i];
1124 dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0;
1125 u32 buf_size = 0;
1126 u32 fb_stride = inst->dst_fmt.width;
1127 u32 luma_size = fb_stride * inst->dst_fmt.height;
1128 u32 chroma_size;
1129
1130 if (inst->output_format == FORMAT_422)
1131 chroma_size = fb_stride * inst->dst_fmt.height / 2;
1132 else
1133 chroma_size = fb_stride * inst->dst_fmt.height / 4;
1134
1135 if (inst->dst_fmt.num_planes == 1) {
1136 buf_size = vb2_plane_size(vb, 0);
1137 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1138 buf_addr_cb = buf_addr_y + luma_size;
1139 buf_addr_cr = buf_addr_cb + chroma_size;
1140 } else if (inst->dst_fmt.num_planes == 2) {
1141 buf_size = vb2_plane_size(vb, 0) +
1142 vb2_plane_size(vb, 1);
1143 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1144 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1145 buf_addr_cr = buf_addr_cb + chroma_size;
1146 } else if (inst->dst_fmt.num_planes == 3) {
1147 buf_size = vb2_plane_size(vb, 0) +
1148 vb2_plane_size(vb, 1) +
1149 vb2_plane_size(vb, 2);
1150 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1151 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1152 buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2);
1153 }
1154
1155 frame->buf_y = buf_addr_y;
1156 frame->buf_cb = buf_addr_cb;
1157 frame->buf_cr = buf_addr_cr;
1158 frame->size = buf_size;
1159 frame->width = inst->src_fmt.width;
1160 frame->stride = fb_stride;
1161 frame->map_type = LINEAR_FRAME_MAP;
1162 frame->update_fb_info = true;
1163 }
1164
1165 ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num,
1166 fb_stride, inst->dst_fmt.height);
1167 if (ret) {
1168 dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d",
1169 __func__, ret);
1170 return ret;
1171 }
1172
1173 /*
1174 * Mark all frame buffers as out of display, to avoid using them before
1175 * the application have them queued.
1176 */
1177 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1178 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1179 if (ret) {
1180 dev_dbg(inst->dev->dev,
1181 "%s: Setting display flag of buf index: %u, fail: %d\n",
1182 __func__, i, ret);
1183 }
1184 }
1185
1186 v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) {
1187 struct vb2_v4l2_buffer *vbuf = &buf->vb;
1188
1189 ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index);
1190 if (ret)
1191 dev_dbg(inst->dev->dev,
1192 "%s: Clearing display flag of buf index: %u, fail: %d\n",
1193 __func__, i, ret);
1194 }
1195
1196 return 0;
1197 }
1198
write_to_ringbuffer(struct vpu_instance * inst,void * buffer,size_t buffer_size,struct vpu_buf * ring_buffer,dma_addr_t wr_ptr)1199 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
1200 struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
1201 {
1202 size_t size;
1203 size_t offset = wr_ptr - ring_buffer->daddr;
1204 int ret;
1205
1206 if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
1207 size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
1208 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
1209 if (ret < 0)
1210 return ret;
1211
1212 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
1213 buffer_size - size);
1214 if (ret < 0)
1215 return ret;
1216 } else {
1217 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
1218 buffer_size);
1219 if (ret < 0)
1220 return ret;
1221 }
1222
1223 return 0;
1224 }
1225
fill_ringbuffer(struct vpu_instance * inst)1226 static int fill_ringbuffer(struct vpu_instance *inst)
1227 {
1228 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1229 struct v4l2_m2m_buffer *buf, *n;
1230 int ret;
1231
1232 if (m2m_ctx->last_src_buf) {
1233 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
1234
1235 if (vpu_buf->consumed) {
1236 dev_dbg(inst->dev->dev, "last src buffer already written\n");
1237 return 0;
1238 }
1239 }
1240
1241 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
1242 struct vb2_v4l2_buffer *vbuf = &buf->vb;
1243 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1244 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1245 size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
1246 void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
1247 dma_addr_t rd_ptr = 0;
1248 dma_addr_t wr_ptr = 0;
1249 size_t remain_size = 0;
1250
1251 if (vpu_buf->consumed) {
1252 dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
1253 vbuf->vb2_buf.index);
1254 continue;
1255 }
1256
1257 if (!src_buf) {
1258 dev_dbg(inst->dev->dev,
1259 "%s: Acquiring kernel pointer to src buf (%u), fail\n",
1260 __func__, vbuf->vb2_buf.index);
1261 break;
1262 }
1263
1264 ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
1265 if (ret) {
1266 /* Unable to acquire the mutex */
1267 dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
1268 ret);
1269 return ret;
1270 }
1271
1272 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
1273
1274 if (remain_size < src_size) {
1275 dev_dbg(inst->dev->dev,
1276 "%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
1277 __func__, remain_size, src_size, vbuf->vb2_buf.index);
1278 break;
1279 }
1280
1281 ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
1282 if (ret) {
1283 dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
1284 vbuf->vb2_buf.index, ret);
1285 return ret;
1286 }
1287
1288 ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
1289 if (ret) {
1290 dev_dbg(inst->dev->dev,
1291 "update_bitstream_buffer fail: %d for src buf (%u)\n",
1292 ret, vbuf->vb2_buf.index);
1293 break;
1294 }
1295
1296 vpu_buf->consumed = true;
1297
1298 /* Don't write buffers passed the last one while draining. */
1299 if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
1300 dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
1301 break;
1302 }
1303 }
1304
1305 return 0;
1306 }
1307
wave5_vpu_dec_buf_queue_src(struct vb2_buffer * vb)1308 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
1309 {
1310 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1311 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1312 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1313 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1314
1315 vpu_buf->consumed = false;
1316 vbuf->sequence = inst->queued_src_buf_num++;
1317
1318 v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1319 }
1320
wave5_vpu_dec_buf_queue_dst(struct vb2_buffer * vb)1321 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
1322 {
1323 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1324 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1325 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1326
1327 vbuf->sequence = inst->queued_dst_buf_num++;
1328
1329 if (inst->state == VPU_INST_STATE_PIC_RUN) {
1330 struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf);
1331 int ret;
1332
1333 /*
1334 * The buffer is already registered just clear the display flag
1335 * to let the firmware know it can be used.
1336 */
1337 vpu_buf->display = false;
1338 ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
1339 if (ret) {
1340 dev_dbg(inst->dev->dev,
1341 "%s: Clearing the display flag of buffer index: %u, fail: %d\n",
1342 __func__, vb->index, ret);
1343 }
1344 }
1345
1346 if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
1347 unsigned int i;
1348
1349 for (i = 0; i < vb->num_planes; i++)
1350 vb2_set_plane_payload(vb, i, 0);
1351
1352 vbuf->field = V4L2_FIELD_NONE;
1353
1354 send_eos_event(inst);
1355 v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
1356 } else {
1357 v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1358 }
1359 }
1360
wave5_vpu_dec_buf_queue(struct vb2_buffer * vb)1361 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
1362 {
1363 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1364 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1365
1366 dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
1367 __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
1368 vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
1369
1370 if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1371 wave5_vpu_dec_buf_queue_src(vb);
1372 else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
1373 wave5_vpu_dec_buf_queue_dst(vb);
1374 }
1375
wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance * inst)1376 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
1377 {
1378 int ret;
1379 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1380
1381 ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4;
1382 ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer);
1383 if (ret) {
1384 dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n",
1385 __func__, ring_buffer->size, ret);
1386 return ret;
1387 }
1388
1389 inst->last_rd_ptr = ring_buffer->daddr;
1390
1391 return 0;
1392 }
1393
wave5_vpu_dec_start_streaming(struct vb2_queue * q,unsigned int count)1394 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count)
1395 {
1396 struct vpu_instance *inst = vb2_get_drv_priv(q);
1397 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1398 int ret = 0;
1399
1400 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1401
1402 v4l2_m2m_update_start_streaming_state(m2m_ctx, q);
1403
1404 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) {
1405 struct dec_open_param open_param;
1406
1407 memset(&open_param, 0, sizeof(struct dec_open_param));
1408
1409 ret = wave5_vpu_dec_allocate_ring_buffer(inst);
1410 if (ret)
1411 goto return_buffers;
1412
1413 open_param.bitstream_buffer = inst->bitstream_vbuf.daddr;
1414 open_param.bitstream_buffer_size = inst->bitstream_vbuf.size;
1415
1416 ret = wave5_vpu_dec_open(inst, &open_param);
1417 if (ret) {
1418 dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n",
1419 __func__, ret);
1420 goto free_bitstream_vbuf;
1421 }
1422
1423 ret = switch_state(inst, VPU_INST_STATE_OPEN);
1424 if (ret)
1425 goto free_bitstream_vbuf;
1426 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1427 if (inst->state == VPU_INST_STATE_STOP)
1428 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1429 if (ret)
1430 goto return_buffers;
1431 }
1432
1433 return ret;
1434
1435 free_bitstream_vbuf:
1436 wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf);
1437 return_buffers:
1438 wave5_return_bufs(q, VB2_BUF_STATE_QUEUED);
1439 return ret;
1440 }
1441
streamoff_output(struct vb2_queue * q)1442 static int streamoff_output(struct vb2_queue *q)
1443 {
1444 struct vpu_instance *inst = vb2_get_drv_priv(q);
1445 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1446 struct vb2_v4l2_buffer *buf;
1447 int ret;
1448 dma_addr_t new_rd_ptr;
1449
1450 while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
1451 dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
1452 __func__, buf->vb2_buf.type, buf->vb2_buf.index);
1453 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1454 }
1455
1456 ret = wave5_vpu_flush_instance(inst);
1457 if (ret)
1458 return ret;
1459
1460 /* Reset the ring buffer information */
1461 new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst);
1462 inst->last_rd_ptr = new_rd_ptr;
1463 inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
1464 inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
1465
1466 if (v4l2_m2m_has_stopped(m2m_ctx))
1467 send_eos_event(inst);
1468
1469 /* streamoff on output cancels any draining operation */
1470 inst->eos = false;
1471
1472 return 0;
1473 }
1474
streamoff_capture(struct vb2_queue * q)1475 static int streamoff_capture(struct vb2_queue *q)
1476 {
1477 struct vpu_instance *inst = vb2_get_drv_priv(q);
1478 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1479 struct vb2_v4l2_buffer *buf;
1480 unsigned int i;
1481 int ret = 0;
1482
1483 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1484 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1485 if (ret)
1486 dev_dbg(inst->dev->dev,
1487 "%s: Setting display flag of buf index: %u, fail: %d\n",
1488 __func__, i, ret);
1489 }
1490
1491 while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) {
1492 u32 plane;
1493
1494 dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n",
1495 __func__, buf->vb2_buf.type, buf->vb2_buf.index);
1496
1497 for (plane = 0; plane < inst->dst_fmt.num_planes; plane++)
1498 vb2_set_plane_payload(&buf->vb2_buf, plane, 0);
1499
1500 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1501 }
1502
1503 if (inst->needs_reallocation) {
1504 wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL);
1505 inst->needs_reallocation = false;
1506 }
1507
1508 if (v4l2_m2m_has_stopped(m2m_ctx)) {
1509 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1510 if (ret)
1511 return ret;
1512 }
1513
1514 return 0;
1515 }
1516
wave5_vpu_dec_stop_streaming(struct vb2_queue * q)1517 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
1518 {
1519 struct vpu_instance *inst = vb2_get_drv_priv(q);
1520 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1521 bool check_cmd = TRUE;
1522
1523 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1524
1525 while (check_cmd) {
1526 struct queue_status_info q_status;
1527 struct dec_output_info dec_output_info;
1528
1529 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1530
1531 if (q_status.report_queue_count == 0)
1532 break;
1533
1534 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1535 break;
1536
1537 if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
1538 dev_dbg(inst->dev->dev, "Getting decoding results from fw, fail\n");
1539 }
1540
1541 v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
1542
1543 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1544 streamoff_output(q);
1545 else
1546 streamoff_capture(q);
1547 }
1548
1549 static const struct vb2_ops wave5_vpu_dec_vb2_ops = {
1550 .queue_setup = wave5_vpu_dec_queue_setup,
1551 .wait_prepare = vb2_ops_wait_prepare,
1552 .wait_finish = vb2_ops_wait_finish,
1553 .buf_queue = wave5_vpu_dec_buf_queue,
1554 .start_streaming = wave5_vpu_dec_start_streaming,
1555 .stop_streaming = wave5_vpu_dec_stop_streaming,
1556 };
1557
wave5_set_default_format(struct v4l2_pix_format_mplane * src_fmt,struct v4l2_pix_format_mplane * dst_fmt)1558 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt,
1559 struct v4l2_pix_format_mplane *dst_fmt)
1560 {
1561 unsigned int dst_pix_fmt = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt;
1562 const struct v4l2_format_info *dst_fmt_info = v4l2_format_info(dst_pix_fmt);
1563
1564 src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt;
1565 src_fmt->field = V4L2_FIELD_NONE;
1566 src_fmt->flags = 0;
1567 src_fmt->num_planes = 1;
1568 wave5_update_pix_fmt(src_fmt, 720, 480);
1569
1570 dst_fmt->pixelformat = dst_pix_fmt;
1571 dst_fmt->field = V4L2_FIELD_NONE;
1572 dst_fmt->flags = 0;
1573 dst_fmt->num_planes = dst_fmt_info->mem_planes;
1574 wave5_update_pix_fmt(dst_fmt, 736, 480);
1575 }
1576
wave5_vpu_dec_queue_init(void * priv,struct vb2_queue * src_vq,struct vb2_queue * dst_vq)1577 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
1578 {
1579 return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops);
1580 }
1581
1582 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = {
1583 .finish_process = wave5_vpu_dec_finish_decode,
1584 };
1585
initialize_sequence(struct vpu_instance * inst)1586 static int initialize_sequence(struct vpu_instance *inst)
1587 {
1588 struct dec_initial_info initial_info;
1589 int ret = 0;
1590
1591 memset(&initial_info, 0, sizeof(struct dec_initial_info));
1592
1593 ret = wave5_vpu_dec_issue_seq_init(inst);
1594 if (ret) {
1595 dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n",
1596 __func__, ret);
1597 return ret;
1598 }
1599
1600 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1601 dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__);
1602
1603 ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info);
1604 if (ret) {
1605 dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n",
1606 __func__, ret, initial_info.seq_init_err_reason);
1607 wave5_handle_src_buffer(inst, initial_info.rd_ptr);
1608 return ret;
1609 }
1610
1611 handle_dynamic_resolution_change(inst);
1612
1613 return 0;
1614 }
1615
wave5_is_draining_or_eos(struct vpu_instance * inst)1616 static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
1617 {
1618 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1619
1620 lockdep_assert_held(&inst->state_spinlock);
1621 return m2m_ctx->is_draining || inst->eos;
1622 }
1623
wave5_vpu_dec_device_run(void * priv)1624 static void wave5_vpu_dec_device_run(void *priv)
1625 {
1626 struct vpu_instance *inst = priv;
1627 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1628 struct queue_status_info q_status;
1629 u32 fail_res = 0;
1630 int ret = 0;
1631
1632 dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
1633
1634 ret = fill_ringbuffer(inst);
1635 if (ret) {
1636 dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
1637 goto finish_job_and_return;
1638 }
1639
1640 switch (inst->state) {
1641 case VPU_INST_STATE_OPEN:
1642 ret = initialize_sequence(inst);
1643 if (ret) {
1644 unsigned long flags;
1645
1646 spin_lock_irqsave(&inst->state_spinlock, flags);
1647 if (wave5_is_draining_or_eos(inst) &&
1648 wave5_last_src_buffer_consumed(m2m_ctx)) {
1649 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1650
1651 switch_state(inst, VPU_INST_STATE_STOP);
1652
1653 if (vb2_is_streaming(dst_vq))
1654 send_eos_event(inst);
1655 else
1656 handle_dynamic_resolution_change(inst);
1657
1658 flag_last_buffer_done(inst);
1659 }
1660 spin_unlock_irqrestore(&inst->state_spinlock, flags);
1661 } else {
1662 switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1663 }
1664
1665 break;
1666
1667 case VPU_INST_STATE_INIT_SEQ:
1668 /*
1669 * Do this early, preparing the fb can trigger an IRQ before
1670 * we had a chance to switch, which leads to an invalid state
1671 * change.
1672 */
1673 switch_state(inst, VPU_INST_STATE_PIC_RUN);
1674
1675 /*
1676 * During DRC, the picture decoding remains pending, so just leave the job
1677 * active until this decode operation completes.
1678 */
1679 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1680
1681 /*
1682 * The sequence must be analyzed first to calculate the proper
1683 * size of the auxiliary buffers.
1684 */
1685 ret = wave5_prepare_fb(inst);
1686 if (ret) {
1687 dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
1688 switch_state(inst, VPU_INST_STATE_STOP);
1689 break;
1690 }
1691
1692 if (q_status.instance_queue_count) {
1693 dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1694 return;
1695 }
1696
1697 fallthrough;
1698 case VPU_INST_STATE_PIC_RUN:
1699 ret = start_decode(inst, &fail_res);
1700 if (ret) {
1701 dev_err(inst->dev->dev,
1702 "Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
1703 m2m_ctx, ret, fail_res);
1704 break;
1705 }
1706 /* Return so that we leave this job active */
1707 dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1708 return;
1709 default:
1710 WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state));
1711 break;
1712 }
1713
1714 finish_job_and_return:
1715 dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
1716 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1717 }
1718
wave5_vpu_dec_job_abort(void * priv)1719 static void wave5_vpu_dec_job_abort(void *priv)
1720 {
1721 struct vpu_instance *inst = priv;
1722 int ret;
1723
1724 ret = switch_state(inst, VPU_INST_STATE_STOP);
1725 if (ret)
1726 return;
1727
1728 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
1729 if (ret)
1730 dev_warn(inst->dev->dev,
1731 "Setting EOS for the bitstream, fail: %d\n", ret);
1732 }
1733
wave5_vpu_dec_job_ready(void * priv)1734 static int wave5_vpu_dec_job_ready(void *priv)
1735 {
1736 struct vpu_instance *inst = priv;
1737 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1738 unsigned long flags;
1739 int ret = 0;
1740
1741 spin_lock_irqsave(&inst->state_spinlock, flags);
1742
1743 switch (inst->state) {
1744 case VPU_INST_STATE_NONE:
1745 dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
1746 break;
1747 case VPU_INST_STATE_OPEN:
1748 if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
1749 v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
1750 ret = 1;
1751 break;
1752 }
1753
1754 dev_dbg(inst->dev->dev,
1755 "Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
1756 break;
1757 case VPU_INST_STATE_INIT_SEQ:
1758 case VPU_INST_STATE_PIC_RUN:
1759 if (!m2m_ctx->cap_q_ctx.q.streaming) {
1760 dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
1761 break;
1762 } else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
1763 dev_dbg(inst->dev->dev,
1764 "No capture buffer ready to decode!\n");
1765 break;
1766 } else if (!wave5_is_draining_or_eos(inst) &&
1767 !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
1768 dev_dbg(inst->dev->dev,
1769 "No bitstream data to decode!\n");
1770 break;
1771 }
1772 ret = 1;
1773 break;
1774 case VPU_INST_STATE_STOP:
1775 dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
1776 break;
1777 }
1778
1779 spin_unlock_irqrestore(&inst->state_spinlock, flags);
1780
1781 return ret;
1782 }
1783
1784 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
1785 .device_run = wave5_vpu_dec_device_run,
1786 .job_abort = wave5_vpu_dec_job_abort,
1787 .job_ready = wave5_vpu_dec_job_ready,
1788 };
1789
wave5_vpu_open_dec(struct file * filp)1790 static int wave5_vpu_open_dec(struct file *filp)
1791 {
1792 struct video_device *vdev = video_devdata(filp);
1793 struct vpu_device *dev = video_drvdata(filp);
1794 struct vpu_instance *inst = NULL;
1795 struct v4l2_m2m_ctx *m2m_ctx;
1796 int ret = 0;
1797
1798 inst = kzalloc(sizeof(*inst), GFP_KERNEL);
1799 if (!inst)
1800 return -ENOMEM;
1801
1802 inst->dev = dev;
1803 inst->type = VPU_INST_TYPE_DEC;
1804 inst->ops = &wave5_vpu_dec_inst_ops;
1805
1806 spin_lock_init(&inst->state_spinlock);
1807
1808 inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
1809 if (!inst->codec_info)
1810 return -ENOMEM;
1811
1812 v4l2_fh_init(&inst->v4l2_fh, vdev);
1813 filp->private_data = &inst->v4l2_fh;
1814 v4l2_fh_add(&inst->v4l2_fh);
1815
1816 INIT_LIST_HEAD(&inst->list);
1817
1818 inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev;
1819 inst->v4l2_fh.m2m_ctx =
1820 v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init);
1821 if (IS_ERR(inst->v4l2_fh.m2m_ctx)) {
1822 ret = PTR_ERR(inst->v4l2_fh.m2m_ctx);
1823 goto cleanup_inst;
1824 }
1825 m2m_ctx = inst->v4l2_fh.m2m_ctx;
1826
1827 v4l2_m2m_set_src_buffered(m2m_ctx, true);
1828 v4l2_m2m_set_dst_buffered(m2m_ctx, true);
1829 /*
1830 * We use the M2M job queue to ensure synchronization of steps where
1831 * needed, as IOCTLs can occur at anytime and we need to run commands on
1832 * the firmware in a specified order.
1833 * In order to initialize the sequence on the firmware within an M2M
1834 * job, the M2M framework needs to be able to queue jobs before
1835 * the CAPTURE queue has been started, because we need the results of the
1836 * initialization to properly prepare the CAPTURE queue with the correct
1837 * amount of buffers.
1838 * By setting ignore_cap_streaming to true the m2m framework will call
1839 * job_ready as soon as the OUTPUT queue is streaming, instead of
1840 * waiting until both the CAPTURE and OUTPUT queues are streaming.
1841 */
1842 m2m_ctx->ignore_cap_streaming = true;
1843
1844 v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10);
1845 v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL,
1846 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1);
1847
1848 if (inst->v4l2_ctrl_hdl.error) {
1849 ret = -ENODEV;
1850 goto cleanup_inst;
1851 }
1852
1853 inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl;
1854 v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl);
1855
1856 wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt);
1857 inst->colorspace = V4L2_COLORSPACE_REC709;
1858 inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
1859 inst->quantization = V4L2_QUANTIZATION_DEFAULT;
1860 inst->xfer_func = V4L2_XFER_FUNC_DEFAULT;
1861
1862 init_completion(&inst->irq_done);
1863
1864 inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL);
1865 if (inst->id < 0) {
1866 dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id);
1867 ret = inst->id;
1868 goto cleanup_inst;
1869 }
1870
1871 /*
1872 * For Wave515 SRAM memory was already allocated
1873 * at wave5_vpu_dec_register_device()
1874 */
1875 if (inst->dev->product_code != WAVE515_CODE)
1876 wave5_vdi_allocate_sram(inst->dev);
1877
1878 ret = mutex_lock_interruptible(&dev->dev_lock);
1879 if (ret)
1880 goto cleanup_inst;
1881
1882 if (dev->irq < 0 && !hrtimer_active(&dev->hrtimer) && list_empty(&dev->instances))
1883 hrtimer_start(&dev->hrtimer, ns_to_ktime(dev->vpu_poll_interval * NSEC_PER_MSEC),
1884 HRTIMER_MODE_REL_PINNED);
1885
1886 list_add_tail(&inst->list, &dev->instances);
1887
1888 mutex_unlock(&dev->dev_lock);
1889
1890 return 0;
1891
1892 cleanup_inst:
1893 wave5_cleanup_instance(inst);
1894 return ret;
1895 }
1896
wave5_vpu_dec_release(struct file * filp)1897 static int wave5_vpu_dec_release(struct file *filp)
1898 {
1899 return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder");
1900 }
1901
1902 static const struct v4l2_file_operations wave5_vpu_dec_fops = {
1903 .owner = THIS_MODULE,
1904 .open = wave5_vpu_open_dec,
1905 .release = wave5_vpu_dec_release,
1906 .unlocked_ioctl = video_ioctl2,
1907 .poll = v4l2_m2m_fop_poll,
1908 .mmap = v4l2_m2m_fop_mmap,
1909 };
1910
wave5_vpu_dec_register_device(struct vpu_device * dev)1911 int wave5_vpu_dec_register_device(struct vpu_device *dev)
1912 {
1913 struct video_device *vdev_dec;
1914 int ret;
1915
1916 /*
1917 * Secondary AXI setup for Wave515 is done by INIT_VPU command,
1918 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early.
1919 */
1920 if (dev->product_code == WAVE515_CODE)
1921 wave5_vdi_allocate_sram(dev);
1922
1923 vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL);
1924 if (!vdev_dec)
1925 return -ENOMEM;
1926
1927 dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops);
1928 if (IS_ERR(dev->v4l2_m2m_dec_dev)) {
1929 ret = PTR_ERR(dev->v4l2_m2m_dec_dev);
1930 dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret);
1931 return -EINVAL;
1932 }
1933
1934 dev->video_dev_dec = vdev_dec;
1935
1936 strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name));
1937 vdev_dec->fops = &wave5_vpu_dec_fops;
1938 vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops;
1939 vdev_dec->release = video_device_release_empty;
1940 vdev_dec->v4l2_dev = &dev->v4l2_dev;
1941 vdev_dec->vfl_dir = VFL_DIR_M2M;
1942 vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1943 vdev_dec->lock = &dev->dev_lock;
1944
1945 ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1);
1946 if (ret)
1947 return ret;
1948
1949 video_set_drvdata(vdev_dec, dev);
1950
1951 return 0;
1952 }
1953
wave5_vpu_dec_unregister_device(struct vpu_device * dev)1954 void wave5_vpu_dec_unregister_device(struct vpu_device *dev)
1955 {
1956 /*
1957 * Here is a freeing pair for Wave515 SRAM memory allocation
1958 * happened at wave5_vpu_dec_register_device().
1959 */
1960 if (dev->product_code == WAVE515_CODE)
1961 wave5_vdi_free_sram(dev);
1962
1963 video_unregister_device(dev->video_dev_dec);
1964 if (dev->v4l2_m2m_dec_dev)
1965 v4l2_m2m_release(dev->v4l2_m2m_dec_dev);
1966 }
1967