Lines Matching +full:1 +full:- +full:8

1 // SPDX-License-Identifier: LGPL-2.1+
6 * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
8 * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
15 #include "codec-fwht.h"
21 * be guaranteed that the magic 8 byte sequence (see below) can
28 #define IBLOCK 1
34 1, 8,
57 s16 block[8 * 8]; in rlc()
67 for (y = 0; y < 8; y++) { in rlc()
68 for (x = 0; x < 8; x++) { in rlc()
69 *wp = in[x + y * 8]; in rlc()
75 for (i = 63; i >= 0 && !block[zigzag[i]]; i--) in rlc()
81 to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0); in rlc()
93 cnt--; in rlc()
111 * This function will worst-case increase rlc_in by 65*2 bytes:
112 * one s16 value for the header and 8 * 8 coefficients of type s16.
121 s16 block[8 * 8 + 16]; in derlc()
130 * Now de-compress, it expands one byte to up to 15 bytes in derlc()
134 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to in derlc()
137 while (dec_count < 8 * 8) { in derlc()
150 for (i = 0; i < 64 - dec_count; i++) in derlc()
158 dec_count += length + 1; in derlc()
165 int y = pos / 8; in derlc()
166 int x = pos % 8; in derlc()
168 dwht_out[x + y * 8] = *wp++; in derlc()
182 2, 2, 3, 6, 6, 6, 6, 8,
201 for (j = 0; j < 8; j++) { in quantize_intra()
202 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { in quantize_intra()
204 if (*coeff >= -qp && *coeff <= qp) in quantize_intra()
217 for (j = 0; j < 8; j++) in dequantize_intra()
218 for (i = 0; i < 8; i++, quant++, coeff++) in dequantize_intra()
227 for (j = 0; j < 8; j++) { in quantize_inter()
228 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { in quantize_inter()
230 if (*coeff >= -qp && *coeff <= qp) in quantize_inter()
243 for (j = 0; j < 8; j++) in dequantize_inter()
244 for (i = 0; i < 8; i++, quant++, coeff++) in dequantize_inter()
252 /* we'll need more than 8 bits for the transformed coefficients */ in fwht()
253 s32 workspace1[8], workspace2[8]; in fwht()
259 /* stage 1 */ in fwht()
260 for (i = 0; i < 8; i++, tmp += stride, out += 8) { in fwht()
262 case 1: in fwht()
263 workspace1[0] = tmp[0] + tmp[1] - add; in fwht()
264 workspace1[1] = tmp[0] - tmp[1]; in fwht()
266 workspace1[2] = tmp[2] + tmp[3] - add; in fwht()
267 workspace1[3] = tmp[2] - tmp[3]; in fwht()
269 workspace1[4] = tmp[4] + tmp[5] - add; in fwht()
270 workspace1[5] = tmp[4] - tmp[5]; in fwht()
272 workspace1[6] = tmp[6] + tmp[7] - add; in fwht()
273 workspace1[7] = tmp[6] - tmp[7]; in fwht()
276 workspace1[0] = tmp[0] + tmp[2] - add; in fwht()
277 workspace1[1] = tmp[0] - tmp[2]; in fwht()
279 workspace1[2] = tmp[4] + tmp[6] - add; in fwht()
280 workspace1[3] = tmp[4] - tmp[6]; in fwht()
282 workspace1[4] = tmp[8] + tmp[10] - add; in fwht()
283 workspace1[5] = tmp[8] - tmp[10]; in fwht()
285 workspace1[6] = tmp[12] + tmp[14] - add; in fwht()
286 workspace1[7] = tmp[12] - tmp[14]; in fwht()
289 workspace1[0] = tmp[0] + tmp[3] - add; in fwht()
290 workspace1[1] = tmp[0] - tmp[3]; in fwht()
292 workspace1[2] = tmp[6] + tmp[9] - add; in fwht()
293 workspace1[3] = tmp[6] - tmp[9]; in fwht()
295 workspace1[4] = tmp[12] + tmp[15] - add; in fwht()
296 workspace1[5] = tmp[12] - tmp[15]; in fwht()
298 workspace1[6] = tmp[18] + tmp[21] - add; in fwht()
299 workspace1[7] = tmp[18] - tmp[21]; in fwht()
302 workspace1[0] = tmp[0] + tmp[4] - add; in fwht()
303 workspace1[1] = tmp[0] - tmp[4]; in fwht()
305 workspace1[2] = tmp[8] + tmp[12] - add; in fwht()
306 workspace1[3] = tmp[8] - tmp[12]; in fwht()
308 workspace1[4] = tmp[16] + tmp[20] - add; in fwht()
309 workspace1[5] = tmp[16] - tmp[20]; in fwht()
311 workspace1[6] = tmp[24] + tmp[28] - add; in fwht()
312 workspace1[7] = tmp[24] - tmp[28]; in fwht()
318 workspace2[1] = workspace1[0] - workspace1[2]; in fwht()
319 workspace2[2] = workspace1[1] - workspace1[3]; in fwht()
320 workspace2[3] = workspace1[1] + workspace1[3]; in fwht()
323 workspace2[5] = workspace1[4] - workspace1[6]; in fwht()
324 workspace2[6] = workspace1[5] - workspace1[7]; in fwht()
329 out[1] = workspace2[0] - workspace2[4]; in fwht()
330 out[2] = workspace2[1] - workspace2[5]; in fwht()
331 out[3] = workspace2[1] + workspace2[5]; in fwht()
333 out[5] = workspace2[2] - workspace2[6]; in fwht()
334 out[6] = workspace2[3] - workspace2[7]; in fwht()
340 for (i = 0; i < 8; i++, out++) { in fwht()
341 /* stage 1 */ in fwht()
342 workspace1[0] = out[0] + out[1 * 8]; in fwht()
343 workspace1[1] = out[0] - out[1 * 8]; in fwht()
345 workspace1[2] = out[2 * 8] + out[3 * 8]; in fwht()
346 workspace1[3] = out[2 * 8] - out[3 * 8]; in fwht()
348 workspace1[4] = out[4 * 8] + out[5 * 8]; in fwht()
349 workspace1[5] = out[4 * 8] - out[5 * 8]; in fwht()
351 workspace1[6] = out[6 * 8] + out[7 * 8]; in fwht()
352 workspace1[7] = out[6 * 8] - out[7 * 8]; in fwht()
356 workspace2[1] = workspace1[0] - workspace1[2]; in fwht()
357 workspace2[2] = workspace1[1] - workspace1[3]; in fwht()
358 workspace2[3] = workspace1[1] + workspace1[3]; in fwht()
361 workspace2[5] = workspace1[4] - workspace1[6]; in fwht()
362 workspace2[6] = workspace1[5] - workspace1[7]; in fwht()
365 out[0 * 8] = workspace2[0] + workspace2[4]; in fwht()
366 out[1 * 8] = workspace2[0] - workspace2[4]; in fwht()
367 out[2 * 8] = workspace2[1] - workspace2[5]; in fwht()
368 out[3 * 8] = workspace2[1] + workspace2[5]; in fwht()
369 out[4 * 8] = workspace2[2] + workspace2[6]; in fwht()
370 out[5 * 8] = workspace2[2] - workspace2[6]; in fwht()
371 out[6 * 8] = workspace2[3] - workspace2[7]; in fwht()
372 out[7 * 8] = workspace2[3] + workspace2[7]; in fwht()
377 * Not the nicest way of doing it, but P-blocks get twice the range of
378 * that of the I-blocks. Therefore we need a type bigger than 8 bits.
385 /* we'll need more than 8 bits for the transformed coefficients */ in fwht16()
386 s32 workspace1[8], workspace2[8]; in fwht16()
391 for (i = 0; i < 8; i++, tmp += stride, out += 8) { in fwht16()
392 /* stage 1 */ in fwht16()
393 workspace1[0] = tmp[0] + tmp[1]; in fwht16()
394 workspace1[1] = tmp[0] - tmp[1]; in fwht16()
397 workspace1[3] = tmp[2] - tmp[3]; in fwht16()
400 workspace1[5] = tmp[4] - tmp[5]; in fwht16()
403 workspace1[7] = tmp[6] - tmp[7]; in fwht16()
407 workspace2[1] = workspace1[0] - workspace1[2]; in fwht16()
408 workspace2[2] = workspace1[1] - workspace1[3]; in fwht16()
409 workspace2[3] = workspace1[1] + workspace1[3]; in fwht16()
412 workspace2[5] = workspace1[4] - workspace1[6]; in fwht16()
413 workspace2[6] = workspace1[5] - workspace1[7]; in fwht16()
418 out[1] = workspace2[0] - workspace2[4]; in fwht16()
419 out[2] = workspace2[1] - workspace2[5]; in fwht16()
420 out[3] = workspace2[1] + workspace2[5]; in fwht16()
422 out[5] = workspace2[2] - workspace2[6]; in fwht16()
423 out[6] = workspace2[3] - workspace2[7]; in fwht16()
429 for (i = 0; i < 8; i++, out++) { in fwht16()
430 /* stage 1 */ in fwht16()
431 workspace1[0] = out[0] + out[1*8]; in fwht16()
432 workspace1[1] = out[0] - out[1*8]; in fwht16()
434 workspace1[2] = out[2*8] + out[3*8]; in fwht16()
435 workspace1[3] = out[2*8] - out[3*8]; in fwht16()
437 workspace1[4] = out[4*8] + out[5*8]; in fwht16()
438 workspace1[5] = out[4*8] - out[5*8]; in fwht16()
440 workspace1[6] = out[6*8] + out[7*8]; in fwht16()
441 workspace1[7] = out[6*8] - out[7*8]; in fwht16()
445 workspace2[1] = workspace1[0] - workspace1[2]; in fwht16()
446 workspace2[2] = workspace1[1] - workspace1[3]; in fwht16()
447 workspace2[3] = workspace1[1] + workspace1[3]; in fwht16()
450 workspace2[5] = workspace1[4] - workspace1[6]; in fwht16()
451 workspace2[6] = workspace1[5] - workspace1[7]; in fwht16()
455 out[0*8] = workspace2[0] + workspace2[4]; in fwht16()
456 out[1*8] = workspace2[0] - workspace2[4]; in fwht16()
457 out[2*8] = workspace2[1] - workspace2[5]; in fwht16()
458 out[3*8] = workspace2[1] + workspace2[5]; in fwht16()
459 out[4*8] = workspace2[2] + workspace2[6]; in fwht16()
460 out[5*8] = workspace2[2] - workspace2[6]; in fwht16()
461 out[6*8] = workspace2[3] - workspace2[7]; in fwht16()
462 out[7*8] = workspace2[3] + workspace2[7]; in fwht16()
470 * we'll need more than 8 bits for the transformed coefficients in ifwht()
473 int workspace1[8], workspace2[8]; in ifwht()
474 int inter = intra ? 0 : 1; in ifwht()
479 for (i = 0; i < 8; i++, tmp += 8, out += 8) { in ifwht()
480 /* stage 1 */ in ifwht()
481 workspace1[0] = tmp[0] + tmp[1]; in ifwht()
482 workspace1[1] = tmp[0] - tmp[1]; in ifwht()
485 workspace1[3] = tmp[2] - tmp[3]; in ifwht()
488 workspace1[5] = tmp[4] - tmp[5]; in ifwht()
491 workspace1[7] = tmp[6] - tmp[7]; in ifwht()
495 workspace2[1] = workspace1[0] - workspace1[2]; in ifwht()
496 workspace2[2] = workspace1[1] - workspace1[3]; in ifwht()
497 workspace2[3] = workspace1[1] + workspace1[3]; in ifwht()
500 workspace2[5] = workspace1[4] - workspace1[6]; in ifwht()
501 workspace2[6] = workspace1[5] - workspace1[7]; in ifwht()
506 out[1] = workspace2[0] - workspace2[4]; in ifwht()
507 out[2] = workspace2[1] - workspace2[5]; in ifwht()
508 out[3] = workspace2[1] + workspace2[5]; in ifwht()
510 out[5] = workspace2[2] - workspace2[6]; in ifwht()
511 out[6] = workspace2[3] - workspace2[7]; in ifwht()
517 for (i = 0; i < 8; i++, out++) { in ifwht()
518 /* stage 1 */ in ifwht()
519 workspace1[0] = out[0] + out[1 * 8]; in ifwht()
520 workspace1[1] = out[0] - out[1 * 8]; in ifwht()
522 workspace1[2] = out[2 * 8] + out[3 * 8]; in ifwht()
523 workspace1[3] = out[2 * 8] - out[3 * 8]; in ifwht()
525 workspace1[4] = out[4 * 8] + out[5 * 8]; in ifwht()
526 workspace1[5] = out[4 * 8] - out[5 * 8]; in ifwht()
528 workspace1[6] = out[6 * 8] + out[7 * 8]; in ifwht()
529 workspace1[7] = out[6 * 8] - out[7 * 8]; in ifwht()
533 workspace2[1] = workspace1[0] - workspace1[2]; in ifwht()
534 workspace2[2] = workspace1[1] - workspace1[3]; in ifwht()
535 workspace2[3] = workspace1[1] + workspace1[3]; in ifwht()
538 workspace2[5] = workspace1[4] - workspace1[6]; in ifwht()
539 workspace2[6] = workspace1[5] - workspace1[7]; in ifwht()
546 out[0 * 8] = workspace2[0] + workspace2[4]; in ifwht()
547 out[1 * 8] = workspace2[0] - workspace2[4]; in ifwht()
548 out[2 * 8] = workspace2[1] - workspace2[5]; in ifwht()
549 out[3 * 8] = workspace2[1] + workspace2[5]; in ifwht()
550 out[4 * 8] = workspace2[2] + workspace2[6]; in ifwht()
551 out[5 * 8] = workspace2[2] - workspace2[6]; in ifwht()
552 out[6 * 8] = workspace2[3] - workspace2[7]; in ifwht()
553 out[7 * 8] = workspace2[3] + workspace2[7]; in ifwht()
555 for (d = 0; d < 8; d++) in ifwht()
556 out[8 * d] >>= 6; in ifwht()
560 out[0 * 8] = workspace2[0] + workspace2[4]; in ifwht()
561 out[1 * 8] = workspace2[0] - workspace2[4]; in ifwht()
562 out[2 * 8] = workspace2[1] - workspace2[5]; in ifwht()
563 out[3 * 8] = workspace2[1] + workspace2[5]; in ifwht()
564 out[4 * 8] = workspace2[2] + workspace2[6]; in ifwht()
565 out[5 * 8] = workspace2[2] - workspace2[6]; in ifwht()
566 out[6 * 8] = workspace2[3] - workspace2[7]; in ifwht()
567 out[7 * 8] = workspace2[3] + workspace2[7]; in ifwht()
569 for (d = 0; d < 8; d++) { in ifwht()
570 out[8 * d] >>= 6; in ifwht()
571 out[8 * d] += 128; in ifwht()
582 for (i = 0; i < 8; i++) { in fill_encoder_block()
583 for (j = 0; j < 8; j++, input += input_step) in fill_encoder_block()
585 input += stride - 8 * input_step; in fill_encoder_block()
596 for (i = 0; i < 8 * 8; i++, tmp++) in var_intra()
600 for (i = 0; i < 8 * 8; i++, tmp++) in var_intra()
601 ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean); in var_intra()
610 for (i = 0; i < 8 * 8; i++, old++, new++) in var_inter()
611 ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new); in var_inter()
627 fill_encoder_block(reference, old, 8, 1); in decide_blocktype()
630 for (k = 0; k < 8; k++) { in decide_blocktype()
631 for (l = 0; l < 8; l++) { in decide_blocktype()
632 *deltablock = *work - *reference; in decide_blocktype()
638 deltablock -= 64; in decide_blocktype()
648 for (i = 0; i < 8; i++) { in fill_decoder_block()
649 for (j = 0; j < 8; j++, input++, dst += dst_step) { in fill_decoder_block()
657 dst += stride - (8 * dst_step); in fill_decoder_block()
666 for (k = 0; k < 8; k++) { in add_deltas()
667 for (l = 0; l < 8; l++) { in add_deltas()
680 ref += stride - (8 * ref_step); in add_deltas()
697 width = round_up(width, 8); in encode_plane()
698 height = round_up(height, 8); in encode_plane()
700 for (j = 0; j < height / 8; j++) { in encode_plane()
701 input = input_start + j * 8 * stride; in encode_plane()
702 for (i = 0; i < width / 8; i++) { in encode_plane()
711 fwht(input, cf->coeffs, stride, input_step, 1); in encode_plane()
712 quantize_intra(cf->coeffs, cf->de_coeffs, in encode_plane()
713 cf->i_frame_qp); in encode_plane()
717 fwht16(deltablock, cf->coeffs, 8, 0); in encode_plane()
718 quantize_inter(cf->coeffs, cf->de_coeffs, in encode_plane()
719 cf->p_frame_qp); in encode_plane()
722 ifwht(cf->de_coeffs, cf->de_fwht, blocktype); in encode_plane()
725 add_deltas(cf->de_fwht, refp, 8, 1); in encode_plane()
726 fill_decoder_block(refp, cf->de_fwht, 8, 1); in encode_plane()
729 input += 8 * input_step; in encode_plane()
730 refp += 8 * 8; in encode_plane()
732 size = rlc(cf->coeffs, *rlco, blocktype); in encode_plane()
734 !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) { in encode_plane()
735 __be16 *last_rlco = *rlco - size; in encode_plane()
785 __be16 *rlco = cf->rlc_data; in fwht_encode_frame()
789 rlco_max = rlco + size / 2 - 256; in fwht_encode_frame()
790 encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf, in fwht_encode_frame()
792 frm->luma_alpha_step, is_intra, next_is_intra); in fwht_encode_frame()
797 if (frm->components_num >= 3) { in fwht_encode_frame()
798 u32 chroma_h = height / frm->height_div; in fwht_encode_frame()
799 u32 chroma_w = width / frm->width_div; in fwht_encode_frame()
802 rlco_max = rlco + chroma_size / 2 - 256; in fwht_encode_frame()
803 encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, in fwht_encode_frame()
805 chroma_stride, frm->chroma_step, in fwht_encode_frame()
810 rlco_max = rlco + chroma_size / 2 - 256; in fwht_encode_frame()
811 encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, in fwht_encode_frame()
813 chroma_stride, frm->chroma_step, in fwht_encode_frame()
820 if (frm->components_num == 4) { in fwht_encode_frame()
821 rlco_max = rlco + size / 2 - 256; in fwht_encode_frame()
822 encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco, in fwht_encode_frame()
824 stride, frm->luma_alpha_step, in fwht_encode_frame()
831 cf->size = (rlco - cf->rlc_data) * sizeof(*rlco); in fwht_encode_frame()
842 s16 copy[8 * 8]; in decode_plane()
847 width = round_up(width, 8); in decode_plane()
848 height = round_up(height, 8); in decode_plane()
853 if (end_of_rlco_buf + 1 < *rlco + width * height / 2) in decode_plane()
865 * by 65 * 2 bytes worst-case. in decode_plane()
869 for (j = 0; j < height / 8; j++) { in decode_plane()
870 for (i = 0; i < width / 8; i++) { in decode_plane()
871 const u8 *refp = ref + j * 8 * ref_stride + in decode_plane()
872 i * 8 * ref_step; in decode_plane()
873 u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step; in decode_plane()
876 memcpy(cf->de_fwht, copy, sizeof(copy)); in decode_plane()
878 add_deltas(cf->de_fwht, refp, in decode_plane()
880 fill_decoder_block(dstp, cf->de_fwht, in decode_plane()
882 copies--; in decode_plane()
886 stat = derlc(rlco, cf->coeffs, end_of_rlco_buf); in decode_plane()
890 dequantize_inter(cf->coeffs); in decode_plane()
892 dequantize_intra(cf->coeffs); in decode_plane()
894 ifwht(cf->coeffs, cf->de_fwht, in decode_plane()
895 ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1); in decode_plane()
897 copies = (stat & DUPS_MASK) >> 1; in decode_plane()
899 memcpy(copy, cf->de_fwht, sizeof(copy)); in decode_plane()
901 add_deltas(cf->de_fwht, refp, in decode_plane()
903 fill_decoder_block(dstp, cf->de_fwht, dst_stride, in decode_plane()
917 const __be16 *rlco = cf->rlc_data; in fwht_decode_frame()
918 const __be16 *end_of_rlco_buf = cf->rlc_data + in fwht_decode_frame()
919 (cf->size / sizeof(*rlco)) - 1; in fwht_decode_frame()
921 if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride, in fwht_decode_frame()
922 ref->luma_alpha_step, dst->luma, dst_stride, in fwht_decode_frame()
923 dst->luma_alpha_step, in fwht_decode_frame()
937 if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride, in fwht_decode_frame()
938 ref->chroma_step, dst->cb, dst_chroma_stride, in fwht_decode_frame()
939 dst->chroma_step, in fwht_decode_frame()
943 if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride, in fwht_decode_frame()
944 ref->chroma_step, dst->cr, dst_chroma_stride, in fwht_decode_frame()
945 dst->chroma_step, in fwht_decode_frame()
952 if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride, in fwht_decode_frame()
953 ref->luma_alpha_step, dst->alpha, dst_stride, in fwht_decode_frame()
954 dst->luma_alpha_step, in fwht_decode_frame()