Lines Matching +full:2 +full:- +full:8

1 // SPDX-License-Identifier: LGPL-2.1+
6 * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
8 * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
15 #include "codec-fwht.h"
21 * be guaranteed that the magic 8 byte sequence (see below) can
34 1, 8,
35 2, 9, 16,
57 s16 block[8 * 8]; in rlc()
67 for (y = 0; y < 8; y++) { in rlc()
68 for (x = 0; x < 8; x++) { in rlc()
69 *wp = in[x + y * 8]; in rlc()
75 for (i = 63; i >= 0 && !block[zigzag[i]]; i--) in rlc()
81 to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0); in rlc()
93 cnt--; in rlc()
111 * This function will worst-case increase rlc_in by 65*2 bytes:
112 * one s16 value for the header and 8 * 8 coefficients of type s16.
121 s16 block[8 * 8 + 16]; in derlc()
130 * Now de-compress, it expands one byte to up to 15 bytes in derlc()
134 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to in derlc()
137 while (dec_count < 8 * 8) { in derlc()
150 for (i = 0; i < 64 - dec_count; i++) in derlc()
165 int y = pos / 8; in derlc()
166 int x = pos % 8; in derlc()
168 dwht_out[x + y * 8] = *wp++; in derlc()
175 2, 2, 2, 2, 2, 2, 2, 2,
176 2, 2, 2, 2, 2, 2, 2, 2,
177 2, 2, 2, 2, 2, 2, 2, 3,
178 2, 2, 2, 2, 2, 2, 3, 6,
179 2, 2, 2, 2, 2, 3, 6, 6,
180 2, 2, 2, 2, 3, 6, 6, 6,
181 2, 2, 2, 3, 6, 6, 6, 6,
182 2, 2, 3, 6, 6, 6, 6, 8,
201 for (j = 0; j < 8; j++) { in quantize_intra()
202 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { in quantize_intra()
204 if (*coeff >= -qp && *coeff <= qp) in quantize_intra()
217 for (j = 0; j < 8; j++) in dequantize_intra()
218 for (i = 0; i < 8; i++, quant++, coeff++) in dequantize_intra()
227 for (j = 0; j < 8; j++) { in quantize_inter()
228 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { in quantize_inter()
230 if (*coeff >= -qp && *coeff <= qp) in quantize_inter()
243 for (j = 0; j < 8; j++) in dequantize_inter()
244 for (i = 0; i < 8; i++, quant++, coeff++) in dequantize_inter()
252 /* we'll need more than 8 bits for the transformed coefficients */ in fwht()
253 s32 workspace1[8], workspace2[8]; in fwht()
260 for (i = 0; i < 8; i++, tmp += stride, out += 8) { in fwht()
263 workspace1[0] = tmp[0] + tmp[1] - add; in fwht()
264 workspace1[1] = tmp[0] - tmp[1]; in fwht()
266 workspace1[2] = tmp[2] + tmp[3] - add; in fwht()
267 workspace1[3] = tmp[2] - tmp[3]; in fwht()
269 workspace1[4] = tmp[4] + tmp[5] - add; in fwht()
270 workspace1[5] = tmp[4] - tmp[5]; in fwht()
272 workspace1[6] = tmp[6] + tmp[7] - add; in fwht()
273 workspace1[7] = tmp[6] - tmp[7]; in fwht()
275 case 2: in fwht()
276 workspace1[0] = tmp[0] + tmp[2] - add; in fwht()
277 workspace1[1] = tmp[0] - tmp[2]; in fwht()
279 workspace1[2] = tmp[4] + tmp[6] - add; in fwht()
280 workspace1[3] = tmp[4] - tmp[6]; in fwht()
282 workspace1[4] = tmp[8] + tmp[10] - add; in fwht()
283 workspace1[5] = tmp[8] - tmp[10]; in fwht()
285 workspace1[6] = tmp[12] + tmp[14] - add; in fwht()
286 workspace1[7] = tmp[12] - tmp[14]; in fwht()
289 workspace1[0] = tmp[0] + tmp[3] - add; in fwht()
290 workspace1[1] = tmp[0] - tmp[3]; in fwht()
292 workspace1[2] = tmp[6] + tmp[9] - add; in fwht()
293 workspace1[3] = tmp[6] - tmp[9]; in fwht()
295 workspace1[4] = tmp[12] + tmp[15] - add; in fwht()
296 workspace1[5] = tmp[12] - tmp[15]; in fwht()
298 workspace1[6] = tmp[18] + tmp[21] - add; in fwht()
299 workspace1[7] = tmp[18] - tmp[21]; in fwht()
302 workspace1[0] = tmp[0] + tmp[4] - add; in fwht()
303 workspace1[1] = tmp[0] - tmp[4]; in fwht()
305 workspace1[2] = tmp[8] + tmp[12] - add; in fwht()
306 workspace1[3] = tmp[8] - tmp[12]; in fwht()
308 workspace1[4] = tmp[16] + tmp[20] - add; in fwht()
309 workspace1[5] = tmp[16] - tmp[20]; in fwht()
311 workspace1[6] = tmp[24] + tmp[28] - add; in fwht()
312 workspace1[7] = tmp[24] - tmp[28]; in fwht()
316 /* stage 2 */ in fwht()
317 workspace2[0] = workspace1[0] + workspace1[2]; in fwht()
318 workspace2[1] = workspace1[0] - workspace1[2]; in fwht()
319 workspace2[2] = workspace1[1] - workspace1[3]; in fwht()
323 workspace2[5] = workspace1[4] - workspace1[6]; in fwht()
324 workspace2[6] = workspace1[5] - workspace1[7]; in fwht()
329 out[1] = workspace2[0] - workspace2[4]; in fwht()
330 out[2] = workspace2[1] - workspace2[5]; in fwht()
332 out[4] = workspace2[2] + workspace2[6]; in fwht()
333 out[5] = workspace2[2] - workspace2[6]; in fwht()
334 out[6] = workspace2[3] - workspace2[7]; in fwht()
340 for (i = 0; i < 8; i++, out++) { in fwht()
342 workspace1[0] = out[0] + out[1 * 8]; in fwht()
343 workspace1[1] = out[0] - out[1 * 8]; in fwht()
345 workspace1[2] = out[2 * 8] + out[3 * 8]; in fwht()
346 workspace1[3] = out[2 * 8] - out[3 * 8]; in fwht()
348 workspace1[4] = out[4 * 8] + out[5 * 8]; in fwht()
349 workspace1[5] = out[4 * 8] - out[5 * 8]; in fwht()
351 workspace1[6] = out[6 * 8] + out[7 * 8]; in fwht()
352 workspace1[7] = out[6 * 8] - out[7 * 8]; in fwht()
354 /* stage 2 */ in fwht()
355 workspace2[0] = workspace1[0] + workspace1[2]; in fwht()
356 workspace2[1] = workspace1[0] - workspace1[2]; in fwht()
357 workspace2[2] = workspace1[1] - workspace1[3]; in fwht()
361 workspace2[5] = workspace1[4] - workspace1[6]; in fwht()
362 workspace2[6] = workspace1[5] - workspace1[7]; in fwht()
365 out[0 * 8] = workspace2[0] + workspace2[4]; in fwht()
366 out[1 * 8] = workspace2[0] - workspace2[4]; in fwht()
367 out[2 * 8] = workspace2[1] - workspace2[5]; in fwht()
368 out[3 * 8] = workspace2[1] + workspace2[5]; in fwht()
369 out[4 * 8] = workspace2[2] + workspace2[6]; in fwht()
370 out[5 * 8] = workspace2[2] - workspace2[6]; in fwht()
371 out[6 * 8] = workspace2[3] - workspace2[7]; in fwht()
372 out[7 * 8] = workspace2[3] + workspace2[7]; in fwht()
377 * Not the nicest way of doing it, but P-blocks get twice the range of
378 * that of the I-blocks. Therefore we need a type bigger than 8 bits.
385 /* we'll need more than 8 bits for the transformed coefficients */ in fwht16()
386 s32 workspace1[8], workspace2[8]; in fwht16()
391 for (i = 0; i < 8; i++, tmp += stride, out += 8) { in fwht16()
394 workspace1[1] = tmp[0] - tmp[1]; in fwht16()
396 workspace1[2] = tmp[2] + tmp[3]; in fwht16()
397 workspace1[3] = tmp[2] - tmp[3]; in fwht16()
400 workspace1[5] = tmp[4] - tmp[5]; in fwht16()
403 workspace1[7] = tmp[6] - tmp[7]; in fwht16()
405 /* stage 2 */ in fwht16()
406 workspace2[0] = workspace1[0] + workspace1[2]; in fwht16()
407 workspace2[1] = workspace1[0] - workspace1[2]; in fwht16()
408 workspace2[2] = workspace1[1] - workspace1[3]; in fwht16()
412 workspace2[5] = workspace1[4] - workspace1[6]; in fwht16()
413 workspace2[6] = workspace1[5] - workspace1[7]; in fwht16()
418 out[1] = workspace2[0] - workspace2[4]; in fwht16()
419 out[2] = workspace2[1] - workspace2[5]; in fwht16()
421 out[4] = workspace2[2] + workspace2[6]; in fwht16()
422 out[5] = workspace2[2] - workspace2[6]; in fwht16()
423 out[6] = workspace2[3] - workspace2[7]; in fwht16()
429 for (i = 0; i < 8; i++, out++) { in fwht16()
431 workspace1[0] = out[0] + out[1*8]; in fwht16()
432 workspace1[1] = out[0] - out[1*8]; in fwht16()
434 workspace1[2] = out[2*8] + out[3*8]; in fwht16()
435 workspace1[3] = out[2*8] - out[3*8]; in fwht16()
437 workspace1[4] = out[4*8] + out[5*8]; in fwht16()
438 workspace1[5] = out[4*8] - out[5*8]; in fwht16()
440 workspace1[6] = out[6*8] + out[7*8]; in fwht16()
441 workspace1[7] = out[6*8] - out[7*8]; in fwht16()
443 /* stage 2 */ in fwht16()
444 workspace2[0] = workspace1[0] + workspace1[2]; in fwht16()
445 workspace2[1] = workspace1[0] - workspace1[2]; in fwht16()
446 workspace2[2] = workspace1[1] - workspace1[3]; in fwht16()
450 workspace2[5] = workspace1[4] - workspace1[6]; in fwht16()
451 workspace2[6] = workspace1[5] - workspace1[7]; in fwht16()
455 out[0*8] = workspace2[0] + workspace2[4]; in fwht16()
456 out[1*8] = workspace2[0] - workspace2[4]; in fwht16()
457 out[2*8] = workspace2[1] - workspace2[5]; in fwht16()
458 out[3*8] = workspace2[1] + workspace2[5]; in fwht16()
459 out[4*8] = workspace2[2] + workspace2[6]; in fwht16()
460 out[5*8] = workspace2[2] - workspace2[6]; in fwht16()
461 out[6*8] = workspace2[3] - workspace2[7]; in fwht16()
462 out[7*8] = workspace2[3] + workspace2[7]; in fwht16()
470 * we'll need more than 8 bits for the transformed coefficients in ifwht()
473 int workspace1[8], workspace2[8]; in ifwht()
479 for (i = 0; i < 8; i++, tmp += 8, out += 8) { in ifwht()
482 workspace1[1] = tmp[0] - tmp[1]; in ifwht()
484 workspace1[2] = tmp[2] + tmp[3]; in ifwht()
485 workspace1[3] = tmp[2] - tmp[3]; in ifwht()
488 workspace1[5] = tmp[4] - tmp[5]; in ifwht()
491 workspace1[7] = tmp[6] - tmp[7]; in ifwht()
493 /* stage 2 */ in ifwht()
494 workspace2[0] = workspace1[0] + workspace1[2]; in ifwht()
495 workspace2[1] = workspace1[0] - workspace1[2]; in ifwht()
496 workspace2[2] = workspace1[1] - workspace1[3]; in ifwht()
500 workspace2[5] = workspace1[4] - workspace1[6]; in ifwht()
501 workspace2[6] = workspace1[5] - workspace1[7]; in ifwht()
506 out[1] = workspace2[0] - workspace2[4]; in ifwht()
507 out[2] = workspace2[1] - workspace2[5]; in ifwht()
509 out[4] = workspace2[2] + workspace2[6]; in ifwht()
510 out[5] = workspace2[2] - workspace2[6]; in ifwht()
511 out[6] = workspace2[3] - workspace2[7]; in ifwht()
517 for (i = 0; i < 8; i++, out++) { in ifwht()
519 workspace1[0] = out[0] + out[1 * 8]; in ifwht()
520 workspace1[1] = out[0] - out[1 * 8]; in ifwht()
522 workspace1[2] = out[2 * 8] + out[3 * 8]; in ifwht()
523 workspace1[3] = out[2 * 8] - out[3 * 8]; in ifwht()
525 workspace1[4] = out[4 * 8] + out[5 * 8]; in ifwht()
526 workspace1[5] = out[4 * 8] - out[5 * 8]; in ifwht()
528 workspace1[6] = out[6 * 8] + out[7 * 8]; in ifwht()
529 workspace1[7] = out[6 * 8] - out[7 * 8]; in ifwht()
531 /* stage 2 */ in ifwht()
532 workspace2[0] = workspace1[0] + workspace1[2]; in ifwht()
533 workspace2[1] = workspace1[0] - workspace1[2]; in ifwht()
534 workspace2[2] = workspace1[1] - workspace1[3]; in ifwht()
538 workspace2[5] = workspace1[4] - workspace1[6]; in ifwht()
539 workspace2[6] = workspace1[5] - workspace1[7]; in ifwht()
546 out[0 * 8] = workspace2[0] + workspace2[4]; in ifwht()
547 out[1 * 8] = workspace2[0] - workspace2[4]; in ifwht()
548 out[2 * 8] = workspace2[1] - workspace2[5]; in ifwht()
549 out[3 * 8] = workspace2[1] + workspace2[5]; in ifwht()
550 out[4 * 8] = workspace2[2] + workspace2[6]; in ifwht()
551 out[5 * 8] = workspace2[2] - workspace2[6]; in ifwht()
552 out[6 * 8] = workspace2[3] - workspace2[7]; in ifwht()
553 out[7 * 8] = workspace2[3] + workspace2[7]; in ifwht()
555 for (d = 0; d < 8; d++) in ifwht()
556 out[8 * d] >>= 6; in ifwht()
560 out[0 * 8] = workspace2[0] + workspace2[4]; in ifwht()
561 out[1 * 8] = workspace2[0] - workspace2[4]; in ifwht()
562 out[2 * 8] = workspace2[1] - workspace2[5]; in ifwht()
563 out[3 * 8] = workspace2[1] + workspace2[5]; in ifwht()
564 out[4 * 8] = workspace2[2] + workspace2[6]; in ifwht()
565 out[5 * 8] = workspace2[2] - workspace2[6]; in ifwht()
566 out[6 * 8] = workspace2[3] - workspace2[7]; in ifwht()
567 out[7 * 8] = workspace2[3] + workspace2[7]; in ifwht()
569 for (d = 0; d < 8; d++) { in ifwht()
570 out[8 * d] >>= 6; in ifwht()
571 out[8 * d] += 128; in ifwht()
582 for (i = 0; i < 8; i++) { in fill_encoder_block()
583 for (j = 0; j < 8; j++, input += input_step) in fill_encoder_block()
585 input += stride - 8 * input_step; in fill_encoder_block()
596 for (i = 0; i < 8 * 8; i++, tmp++) in var_intra()
600 for (i = 0; i < 8 * 8; i++, tmp++) in var_intra()
601 ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean); in var_intra()
610 for (i = 0; i < 8 * 8; i++, old++, new++) in var_inter()
611 ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new); in var_inter()
627 fill_encoder_block(reference, old, 8, 1); in decide_blocktype()
630 for (k = 0; k < 8; k++) { in decide_blocktype()
631 for (l = 0; l < 8; l++) { in decide_blocktype()
632 *deltablock = *work - *reference; in decide_blocktype()
638 deltablock -= 64; in decide_blocktype()
648 for (i = 0; i < 8; i++) { in fill_decoder_block()
649 for (j = 0; j < 8; j++, input++, dst += dst_step) { in fill_decoder_block()
657 dst += stride - (8 * dst_step); in fill_decoder_block()
666 for (k = 0; k < 8; k++) { in add_deltas()
667 for (l = 0; l < 8; l++) { in add_deltas()
680 ref += stride - (8 * ref_step); in add_deltas()
697 width = round_up(width, 8); in encode_plane()
698 height = round_up(height, 8); in encode_plane()
700 for (j = 0; j < height / 8; j++) { in encode_plane()
701 input = input_start + j * 8 * stride; in encode_plane()
702 for (i = 0; i < width / 8; i++) { in encode_plane()
711 fwht(input, cf->coeffs, stride, input_step, 1); in encode_plane()
712 quantize_intra(cf->coeffs, cf->de_coeffs, in encode_plane()
713 cf->i_frame_qp); in encode_plane()
717 fwht16(deltablock, cf->coeffs, 8, 0); in encode_plane()
718 quantize_inter(cf->coeffs, cf->de_coeffs, in encode_plane()
719 cf->p_frame_qp); in encode_plane()
722 ifwht(cf->de_coeffs, cf->de_fwht, blocktype); in encode_plane()
725 add_deltas(cf->de_fwht, refp, 8, 1); in encode_plane()
726 fill_decoder_block(refp, cf->de_fwht, 8, 1); in encode_plane()
729 input += 8 * input_step; in encode_plane()
730 refp += 8 * 8; in encode_plane()
732 size = rlc(cf->coeffs, *rlco, blocktype); in encode_plane()
734 !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) { in encode_plane()
735 __be16 *last_rlco = *rlco - size; in encode_plane()
740 *last_rlco = htons(hdr + 2); in encode_plane()
785 __be16 *rlco = cf->rlc_data; in fwht_encode_frame()
789 rlco_max = rlco + size / 2 - 256; in fwht_encode_frame()
790 encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf, in fwht_encode_frame()
792 frm->luma_alpha_step, is_intra, next_is_intra); in fwht_encode_frame()
797 if (frm->components_num >= 3) { in fwht_encode_frame()
798 u32 chroma_h = height / frm->height_div; in fwht_encode_frame()
799 u32 chroma_w = width / frm->width_div; in fwht_encode_frame()
802 rlco_max = rlco + chroma_size / 2 - 256; in fwht_encode_frame()
803 encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, in fwht_encode_frame()
805 chroma_stride, frm->chroma_step, in fwht_encode_frame()
810 rlco_max = rlco + chroma_size / 2 - 256; in fwht_encode_frame()
811 encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, in fwht_encode_frame()
813 chroma_stride, frm->chroma_step, in fwht_encode_frame()
820 if (frm->components_num == 4) { in fwht_encode_frame()
821 rlco_max = rlco + size / 2 - 256; in fwht_encode_frame()
822 encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco, in fwht_encode_frame()
824 stride, frm->luma_alpha_step, in fwht_encode_frame()
831 cf->size = (rlco - cf->rlc_data) * sizeof(*rlco); in fwht_encode_frame()
842 s16 copy[8 * 8]; in decode_plane()
847 width = round_up(width, 8); in decode_plane()
848 height = round_up(height, 8); in decode_plane()
853 if (end_of_rlco_buf + 1 < *rlco + width * height / 2) in decode_plane()
858 *rlco += width / 2; in decode_plane()
865 * by 65 * 2 bytes worst-case. in decode_plane()
869 for (j = 0; j < height / 8; j++) { in decode_plane()
870 for (i = 0; i < width / 8; i++) { in decode_plane()
871 const u8 *refp = ref + j * 8 * ref_stride + in decode_plane()
872 i * 8 * ref_step; in decode_plane()
873 u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step; in decode_plane()
876 memcpy(cf->de_fwht, copy, sizeof(copy)); in decode_plane()
878 add_deltas(cf->de_fwht, refp, in decode_plane()
880 fill_decoder_block(dstp, cf->de_fwht, in decode_plane()
882 copies--; in decode_plane()
886 stat = derlc(rlco, cf->coeffs, end_of_rlco_buf); in decode_plane()
890 dequantize_inter(cf->coeffs); in decode_plane()
892 dequantize_intra(cf->coeffs); in decode_plane()
894 ifwht(cf->coeffs, cf->de_fwht, in decode_plane()
899 memcpy(copy, cf->de_fwht, sizeof(copy)); in decode_plane()
901 add_deltas(cf->de_fwht, refp, in decode_plane()
903 fill_decoder_block(dstp, cf->de_fwht, dst_stride, in decode_plane()
917 const __be16 *rlco = cf->rlc_data; in fwht_decode_frame()
918 const __be16 *end_of_rlco_buf = cf->rlc_data + in fwht_decode_frame()
919 (cf->size / sizeof(*rlco)) - 1; in fwht_decode_frame()
921 if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride, in fwht_decode_frame()
922 ref->luma_alpha_step, dst->luma, dst_stride, in fwht_decode_frame()
923 dst->luma_alpha_step, in fwht_decode_frame()
933 h /= 2; in fwht_decode_frame()
935 w /= 2; in fwht_decode_frame()
937 if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride, in fwht_decode_frame()
938 ref->chroma_step, dst->cb, dst_chroma_stride, in fwht_decode_frame()
939 dst->chroma_step, in fwht_decode_frame()
943 if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride, in fwht_decode_frame()
944 ref->chroma_step, dst->cr, dst_chroma_stride, in fwht_decode_frame()
945 dst->chroma_step, in fwht_decode_frame()
952 if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride, in fwht_decode_frame()
953 ref->luma_alpha_step, dst->alpha, dst_stride, in fwht_decode_frame()
954 dst->luma_alpha_step, in fwht_decode_frame()