Lines Matching +full:1 +full:v0
53 #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX …
56 #define S_COHERENCE glc:1
57 #define V_COHERENCE slc:1 glc:1
77 var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
92 var SQ_WAVE_STATUS_WAVE64_SIZE = 1
467 SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status
491 …s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx b…
510 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
533 // Save v0 by itself since it requires only two SGPRs.
538 global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] V_COHERENCE
539 v_mov_b32 v0, 0x0
556 v_writelane_b32 v0, ttmp4, 0x4
557 v_writelane_b32 v0, ttmp5, 0x5
558 v_writelane_b32 v0, ttmp6, 0x6
559 v_writelane_b32 v0, ttmp7, 0x7
560 v_writelane_b32 v0, ttmp8, 0x8
561 v_writelane_b32 v0, ttmp9, 0x9
562 v_writelane_b32 v0, ttmp10, 0xA
563 v_writelane_b32 v0, ttmp11, 0xB
564 v_writelane_b32 v0, ttmp13, 0xD
565 v_writelane_b32 v0, exec_lo, 0xE
566 v_writelane_b32 v0, exec_hi, 0xF
570 global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 V_COHERENCE
571 v_readlane_b32 ttmp14, v0, 0xE
572 v_readlane_b32 ttmp15, v0, 0xF
603 s_and_b32 m0, m0, 1
604 s_cmp_eq_u32 m0, 1
620 write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
627 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
629 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
630 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
631 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
643 write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
650 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
652 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
653 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
654 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
668 v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
720 s_get_barrier_state s_save_tmp, -1
729 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
747 …// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change th…
757 s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
772 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
780 s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
784 s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
793 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
795 // restore s_save_buf_rsrc0,1
805 s_and_b32 m0, m0, 1
806 s_cmp_eq_u32 m0, 1
838 //load 0~63*4(byte address) to vgpr v0
839 v_mbcnt_lo_u32_b32 v0, -1, 0
840 v_mbcnt_hi_u32_b32 v0, -1, v0
841 v_mul_u32_u24 v0, 4, v0
844 s_and_b32 m0, m0, 1
845 s_cmp_eq_u32 m0, 1
855 ds_read_b32 v1, v0
858 write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
861 v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
862 s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
875 ds_read_b32 v1, v0
877 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
881 v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
882 s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
893 ds_read_b32 v1, v0
896 write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
899 v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
900 s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
913 ds_read_b32 v1, v0
915 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
919 v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
920 s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
929 s_and_b32 m0, m0, 1
930 s_cmp_eq_u32 m0, 1
940 s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
941 …s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (n…
944 s_and_b32 m0, m0, 1
945 s_cmp_eq_u32 m0, 1
962 v_movrels_b32 v0, v0 //v0 = v[0+m0]
963 v_movrels_b32 v1, v1 //v1 = v[1+m0]
967 write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
977 v_movrels_b32 v0, v0 //v0 = v[0+m0]
978 v_movrels_b32 v1, v1 //v1 = v[1+m0]
982 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
983 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
984 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
985 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
989 s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
1007 v_movrels_b32 v0, v0 //v0 = v[0+m0]
1008 v_movrels_b32 v1, v1 //v1 = v[1+m0]
1012 write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
1022 v_movrels_b32 v0, v0 //v0 = v[0+m0]
1023 v_movrels_b32 v1, v1 //v1 = v[1+m0]
1027 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
1028 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
1029 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
1030 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
1034 s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
1054 v_movrels_b32 v0, v0
1056 write_vgprs_to_mem_with_sqc_w64(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
1058 s_add_u32 m0, m0, 1
1066 v_movrels_b32 v0, v0 //v0 = v[0+m0]
1067 buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
1068 s_add_u32 m0, m0, 1 //next vgpr index
1070 s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
1099 s_and_b32 m0, m0, 1
1100 s_cmp_eq_u32 m0, 1
1124 s_and_b32 m0, m0, 1
1125 s_cmp_eq_u32 m0, 1
1131 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
1133 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
1135 ds_store_addtid_b32 v0
1139 s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
1145 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
1147 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
1149 ds_store_addtid_b32 v0
1153 s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
1162 s_and_b32 m0, m0, 1
1163 s_cmp_eq_u32 m0, 1
1171 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
1172 …s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 …
1175 s_and_b32 m0, m0, 1
1176 s_cmp_eq_u32 m0, 1
1182 …s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be th…
1189 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
1190 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128
1191 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*2
1192 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*3
1194 v_movreld_b32 v0, v0 //v[0+m0] = v0
1200 s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
1201 s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
1203 /* VGPR restore on v0 */
1204 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
1205 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128
1206 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*2
1207 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*3
1216 …s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be th…
1223 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
1224 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256
1225 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*2
1226 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*3
1228 v_movreld_b32 v0, v0 //v[0+m0] = v0
1234 s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
1235 s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
1249 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
1251 v_movreld_b32 v0, v0 //v[0+m0] = v0
1252 s_add_u32 m0, m0, 1 //next vgpr index
1254 s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
1255 s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
1257 s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!!
1259 /* VGPR restore on v0 */
1261 buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
1262 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256
1263 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*2
1264 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*3
1317 s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
1320 // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
1392 s_barrier_signal -1
1393 s_add_i32 s_restore_tmp, s_restore_tmp, -1
1444 // Otherwise retain PRIV=1 for subsequent context save requests.
1497 s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
1513 s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
1539 // If TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
1552 s_add_u32 s4, s4, 1
1555 s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
1577 s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
1580 …s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32…
1583 …s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64…