mirror of
https://github.com/bvanroll/odiseectf.git
synced 2025-08-29 20:02:43 +00:00
2659 lines
59 KiB
Plaintext
2659 lines
59 KiB
Plaintext
//
|
||
// Generated by NVIDIA NVVM Compiler
|
||
//
|
||
// Compiler Build ID: UNKNOWN
|
||
// Unknown Toolkit Version
|
||
// Based on LLVM 3.4svn
|
||
//
|
||
|
||
.version 6.3
|
||
.target sm_52, texmode_independent
|
||
.address_size 64
|
||
|
||
// .globl amp
|
||
|
||
.entry amp(
|
||
.param .u64 .ptr .global .align 4 amp_param_0,
|
||
.param .u64 .ptr .global .align 4 amp_param_1,
|
||
.param .u64 .ptr .const .align 4 amp_param_2,
|
||
.param .u64 .ptr .global .align 4 amp_param_3,
|
||
.param .u64 .ptr .global .align 4 amp_param_4,
|
||
.param .u32 amp_param_5,
|
||
.param .u64 amp_param_6
|
||
)
|
||
{
|
||
.local .align 4 .b8 __local_depot0[260];
|
||
.reg .b64 %SP;
|
||
.reg .b64 %SPL;
|
||
.reg .pred %p<268>;
|
||
.reg .b16 %rs<189>;
|
||
.reg .b32 %r<936>;
|
||
.reg .b64 %rd<398>;
|
||
|
||
|
||
mov.u64 %SPL, __local_depot0;
|
||
cvta.local.u64 %SP, %SPL;
|
||
ld.param.u64 %rd122, [amp_param_1];
|
||
ld.param.u64 %rd123, [amp_param_2];
|
||
ld.param.u64 %rd124, [amp_param_6];
|
||
add.u64 %rd125, %SP, 0;
|
||
add.u64 %rd396, %SPL, 0;
|
||
mov.u32 %r291, %ctaid.x;
|
||
mov.u32 %r292, %ntid.x;
|
||
mov.b32 %r293, %envreg3;
|
||
mad.lo.s32 %r1, %r291, %r292, %r293;
|
||
mov.u32 %r2, %tid.x;
|
||
add.s32 %r3, %r1, %r2;
|
||
cvt.s64.s32 %rd126, %r3;
|
||
setp.ge.u64 %p1, %rd126, %rd124;
|
||
@%p1 bra BB0_371;
|
||
|
||
ld.const.u32 %r8, [%rd123];
|
||
setp.ne.s32 %p2, %r8, 58;
|
||
@%p2 bra BB0_3;
|
||
|
||
ld.const.u32 %r294, [%rd123+4];
|
||
setp.eq.s32 %p3, %r294, 0;
|
||
@%p3 bra BB0_371;
|
||
|
||
BB0_3:
|
||
mul.wide.s32 %rd127, %r3, 260;
|
||
add.s64 %rd351, %rd122, %rd127;
|
||
mov.u32 %r819, 0;
|
||
mov.u64 %rd352, %rd396;
|
||
|
||
BB0_4:
|
||
ld.global.u32 %r297, [%rd351];
|
||
st.local.u32 [%rd352], %r297;
|
||
add.s64 %rd352, %rd352, 4;
|
||
add.s64 %rd351, %rd351, 4;
|
||
add.s32 %r819, %r819, 1;
|
||
setp.lt.u32 %p4, %r819, 65;
|
||
@%p4 bra BB0_4;
|
||
|
||
add.s64 %rd129, %rd122, %rd127;
|
||
ld.global.u32 %r933, [%rd129+256];
|
||
setp.eq.s32 %p5, %r8, 0;
|
||
@%p5 bra BB0_369;
|
||
|
||
cvta.to.local.u64 %rd9, %rd125;
|
||
mov.u32 %r821, 0;
|
||
bra.uni BB0_7;
|
||
|
||
BB0_276:
|
||
add.s32 %r887, %r171, 1;
|
||
cvt.u64.u32 %rd226, %r171;
|
||
add.s64 %rd227, %rd9, %rd226;
|
||
ld.local.u8 %rs109, [%rd227+1];
|
||
st.local.u8 [%rd227], %rs109;
|
||
|
||
BB0_277:
|
||
add.s32 %r171, %r887, 1;
|
||
cvt.s64.s32 %rd228, %r887;
|
||
add.s64 %rd229, %rd9, %rd228;
|
||
ld.local.u8 %rs110, [%rd229+1];
|
||
st.local.u8 [%rd229], %rs110;
|
||
|
||
BB0_278:
|
||
add.s32 %r890, %r171, 1;
|
||
cvt.s64.s32 %rd230, %r171;
|
||
add.s64 %rd231, %rd9, %rd230;
|
||
ld.local.u8 %rs111, [%rd231+1];
|
||
st.local.u8 [%rd231], %rs111;
|
||
|
||
BB0_279:
|
||
setp.lt.u32 %p193, %r173, 4;
|
||
@%p193 bra BB0_282;
|
||
|
||
cvt.s64.s32 %rd232, %r890;
|
||
add.s64 %rd370, %rd9, %rd232;
|
||
|
||
BB0_281:
|
||
ld.local.u8 %rs112, [%rd370+1];
|
||
ld.local.u8 %rs113, [%rd370+2];
|
||
ld.local.u8 %rs114, [%rd370+3];
|
||
ld.local.u8 %rs115, [%rd370+4];
|
||
st.local.u8 [%rd370], %rs112;
|
||
st.local.u8 [%rd370+1], %rs113;
|
||
st.local.u8 [%rd370+2], %rs114;
|
||
add.s64 %rd64, %rd370, 4;
|
||
st.local.u8 [%rd370+3], %rs115;
|
||
add.s32 %r890, %r890, 4;
|
||
setp.lt.s32 %p194, %r890, %r933;
|
||
mov.u64 %rd370, %rd64;
|
||
@%p194 bra BB0_281;
|
||
|
||
BB0_282:
|
||
mov.u16 %rs116, 0;
|
||
st.local.u8 [%rd11+-1], %rs116;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_7:
|
||
mov.u32 %r10, %r933;
|
||
shr.u32 %r11, %r8, 8;
|
||
cvt.u16.u32 %rs1, %r11;
|
||
shr.u32 %r12, %r8, 16;
|
||
cvt.u16.u32 %rs2, %r12;
|
||
bfe.u32 %r13, %r8, 8, 8;
|
||
cvt.u64.u32 %rd131, %r13;
|
||
add.s64 %rd10, %rd9, %rd131;
|
||
cvt.s64.s32 %rd132, %r10;
|
||
add.s64 %rd11, %rd9, %rd132;
|
||
add.s32 %r300, %r10, -1;
|
||
shl.b32 %r301, %r300, 1;
|
||
cvt.s64.s32 %rd133, %r301;
|
||
add.s64 %rd12, %rd9, %rd133;
|
||
and.b32 %r299, %r8, 255;
|
||
setp.gt.s32 %p6, %r299, 93;
|
||
@%p6 bra BB0_62;
|
||
|
||
setp.gt.s32 %p34, %r299, 68;
|
||
@%p34 bra BB0_35;
|
||
|
||
setp.gt.s32 %p48, %r299, 44;
|
||
@%p48 bra BB0_22;
|
||
|
||
setp.gt.s32 %p55, %r299, 41;
|
||
@%p55 bra BB0_16;
|
||
|
||
setp.eq.s32 %p59, %r299, 36;
|
||
@%p59 bra BB0_325;
|
||
bra.uni BB0_12;
|
||
|
||
BB0_325:
|
||
setp.gt.s32 %p221, %r10, 254;
|
||
@%p221 bra BB0_367;
|
||
|
||
add.s32 %r933, %r10, 1;
|
||
st.local.u8 [%rd11], %rs1;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_62:
|
||
setp.gt.s32 %p7, %r299, 112;
|
||
@%p7 bra BB0_92;
|
||
|
||
setp.gt.s32 %p21, %r299, 104;
|
||
@%p21 bra BB0_80;
|
||
|
||
setp.gt.s32 %p28, %r299, 99;
|
||
@%p28 bra BB0_70;
|
||
|
||
setp.eq.s32 %p32, %r299, 94;
|
||
@%p32 bra BB0_311;
|
||
bra.uni BB0_66;
|
||
|
||
BB0_311:
|
||
setp.gt.s32 %p214, %r10, 254;
|
||
@%p214 bra BB0_367;
|
||
|
||
add.s32 %r910, %r10, -1;
|
||
setp.lt.s32 %p215, %r910, 0;
|
||
@%p215 bra BB0_324;
|
||
|
||
and.b32 %r212, %r10, 3;
|
||
setp.eq.s32 %p216, %r212, 0;
|
||
@%p216 bra BB0_314;
|
||
bra.uni BB0_315;
|
||
|
||
BB0_314:
|
||
mov.u32 %r909, %r910;
|
||
mov.u32 %r910, %r10;
|
||
bra.uni BB0_322;
|
||
|
||
BB0_35:
|
||
setp.gt.s32 %p35, %r299, 83;
|
||
@%p35 bra BB0_46;
|
||
|
||
setp.gt.s32 %p42, %r299, 75;
|
||
@%p42 bra BB0_41;
|
||
|
||
setp.eq.s32 %p46, %r299, 69;
|
||
@%p46 bra BB0_128;
|
||
bra.uni BB0_38;
|
||
|
||
BB0_128:
|
||
add.s32 %r302, %r10, 4;
|
||
setp.gt.s32 %p61, %r302, 255;
|
||
@%p61 bra BB0_367;
|
||
|
||
mov.u32 %r823, 0;
|
||
setp.lt.s32 %p62, %r10, 1;
|
||
mov.u64 %rd353, %rd396;
|
||
@%p62 bra BB0_131;
|
||
|
||
BB0_130:
|
||
ld.local.u32 %r304, [%rd353];
|
||
and.b32 %r305, %r304, 1077952576;
|
||
shr.u32 %r306, %r305, 1;
|
||
and.b32 %r307, %r304, -2139062144;
|
||
shr.u32 %r308, %r307, 2;
|
||
not.b32 %r309, %r308;
|
||
and.b32 %r310, %r306, %r309;
|
||
and.b32 %r311, %r304, 522133279;
|
||
add.s32 %r312, %r311, 522133279;
|
||
mov.u32 %r313, -84215046;
|
||
sub.s32 %r314, %r313, %r311;
|
||
and.b32 %r315, %r310, %r314;
|
||
and.b32 %r316, %r315, %r312;
|
||
or.b32 %r317, %r316, %r304;
|
||
st.local.u32 [%rd353], %r317;
|
||
add.s64 %rd353, %rd353, 4;
|
||
add.s32 %r823, %r823, 4;
|
||
setp.lt.s32 %p63, %r823, %r10;
|
||
@%p63 bra BB0_130;
|
||
|
||
BB0_131:
|
||
mov.u32 %r825, 0;
|
||
ld.local.u32 %r319, [%rd396];
|
||
and.b32 %r320, %r319, 64;
|
||
shr.u32 %r321, %r320, 1;
|
||
and.b32 %r322, %r319, 128;
|
||
shr.u32 %r323, %r322, 2;
|
||
not.b32 %r324, %r323;
|
||
and.b32 %r325, %r321, %r324;
|
||
and.b32 %r326, %r319, 522133279;
|
||
add.s32 %r327, %r326, 31;
|
||
mov.u32 %r328, -84215046;
|
||
sub.s32 %r329, %r328, %r326;
|
||
and.b32 %r330, %r325, %r329;
|
||
and.b32 %r331, %r330, %r327;
|
||
not.b32 %r332, %r331;
|
||
or.b32 %r333, %r332, -33;
|
||
and.b32 %r824, %r333, %r319;
|
||
st.local.u32 [%rd396], %r824;
|
||
mov.u64 %rd354, %rd396;
|
||
@%p62 bra BB0_367;
|
||
|
||
BB0_132:
|
||
cvt.u16.u32 %rs21, %r824;
|
||
and.b16 %rs22, %rs21, 255;
|
||
setp.eq.s16 %p65, %rs22, 32;
|
||
selp.b32 %r334, 65280, 0, %p65;
|
||
and.b32 %r335, %r824, 65280;
|
||
setp.eq.s32 %p66, %r335, 8192;
|
||
or.b32 %r336, %r334, 16711680;
|
||
selp.b32 %r337, %r336, %r334, %p66;
|
||
and.b32 %r338, %r824, 16711680;
|
||
setp.eq.s32 %p67, %r338, 2097152;
|
||
or.b32 %r339, %r337, -16777216;
|
||
selp.b32 %r340, %r339, %r337, %p67;
|
||
and.b32 %r341, %r824, -16777216;
|
||
setp.eq.s32 %p68, %r341, 536870912;
|
||
selp.b32 %r342, 32, 0, %p68;
|
||
and.b32 %r343, %r824, 1077952512;
|
||
shr.u32 %r344, %r343, 1;
|
||
and.b32 %r345, %r824, -2139062144;
|
||
shr.u32 %r346, %r345, 2;
|
||
not.b32 %r347, %r346;
|
||
and.b32 %r348, %r344, %r347;
|
||
and.b32 %r349, %r824, 522133279;
|
||
add.s32 %r350, %r349, 522133279;
|
||
sub.s32 %r352, %r328, %r349;
|
||
and.b32 %r353, %r348, %r352;
|
||
and.b32 %r354, %r353, %r350;
|
||
and.b32 %r355, %r354, %r340;
|
||
not.b32 %r356, %r355;
|
||
and.b32 %r357, %r824, %r356;
|
||
st.local.u32 [%rd354], %r357;
|
||
add.s64 %rd16, %rd354, 4;
|
||
ld.local.u32 %r358, [%rd354+4];
|
||
and.b32 %r359, %r358, 64;
|
||
shr.u32 %r360, %r359, 1;
|
||
shr.u32 %r361, %r358, 2;
|
||
not.b32 %r362, %r361;
|
||
and.b32 %r363, %r358, 522133279;
|
||
add.s32 %r364, %r363, 31;
|
||
sub.s32 %r365, %r328, %r363;
|
||
and.b32 %r366, %r342, %r362;
|
||
and.b32 %r367, %r366, %r360;
|
||
and.b32 %r368, %r367, %r365;
|
||
and.b32 %r369, %r368, %r364;
|
||
not.b32 %r370, %r369;
|
||
and.b32 %r824, %r358, %r370;
|
||
st.local.u32 [%rd354+4], %r824;
|
||
add.s32 %r825, %r825, 4;
|
||
setp.lt.s32 %p69, %r825, %r10;
|
||
mov.u64 %rd354, %rd16;
|
||
@%p69 bra BB0_132;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_92:
|
||
setp.gt.s32 %p8, %r299, 119;
|
||
@%p8 bra BB0_111;
|
||
|
||
setp.gt.s32 %p15, %r299, 114;
|
||
@%p15 bra BB0_106;
|
||
|
||
setp.eq.s32 %p19, %r299, 113;
|
||
@%p19 bra BB0_160;
|
||
bra.uni BB0_95;
|
||
|
||
BB0_160:
|
||
add.s32 %r933, %r10, %r10;
|
||
setp.gt.s32 %p109, %r933, 255;
|
||
@%p109 bra BB0_367;
|
||
|
||
add.s32 %r842, %r10, -1;
|
||
setp.lt.s32 %p110, %r842, 0;
|
||
@%p110 bra BB0_368;
|
||
|
||
and.b32 %r59, %r10, 3;
|
||
setp.eq.s32 %p111, %r59, 0;
|
||
@%p111 bra BB0_168;
|
||
|
||
setp.eq.s32 %p112, %r59, 1;
|
||
add.s32 %r841, %r10, -1;
|
||
@%p112 bra BB0_167;
|
||
|
||
setp.eq.s32 %p113, %r59, 2;
|
||
add.s32 %r840, %r10, -1;
|
||
@%p113 bra BB0_166;
|
||
|
||
ld.local.u8 %rs55, [%rd11+-1];
|
||
st.local.u8 [%rd12], %rs55;
|
||
ld.local.u8 %rs56, [%rd11+-1];
|
||
st.local.u8 [%rd12+1], %rs56;
|
||
add.s32 %r840, %r10, -2;
|
||
|
||
BB0_166:
|
||
cvt.s64.s32 %rd158, %r840;
|
||
add.s64 %rd159, %rd9, %rd158;
|
||
ld.local.u8 %rs57, [%rd159];
|
||
shl.b32 %r456, %r840, 1;
|
||
cvt.s64.s32 %rd160, %r456;
|
||
add.s64 %rd161, %rd9, %rd160;
|
||
st.local.u8 [%rd161], %rs57;
|
||
ld.local.u8 %rs58, [%rd159];
|
||
st.local.u8 [%rd161+1], %rs58;
|
||
add.s32 %r841, %r840, -1;
|
||
|
||
BB0_167:
|
||
cvt.s64.s32 %rd162, %r841;
|
||
add.s64 %rd163, %rd9, %rd162;
|
||
ld.local.u8 %rs59, [%rd163];
|
||
shl.b32 %r457, %r841, 1;
|
||
cvt.s64.s32 %rd164, %r457;
|
||
add.s64 %rd165, %rd9, %rd164;
|
||
st.local.u8 [%rd165], %rs59;
|
||
ld.local.u8 %rs60, [%rd163];
|
||
st.local.u8 [%rd165+1], %rs60;
|
||
add.s32 %r842, %r841, -1;
|
||
|
||
BB0_168:
|
||
setp.lt.u32 %p114, %r10, 4;
|
||
@%p114 bra BB0_368;
|
||
|
||
shl.b32 %r843, %r842, 1;
|
||
|
||
BB0_170:
|
||
cvt.s64.s32 %rd166, %r842;
|
||
add.s64 %rd167, %rd9, %rd166;
|
||
ld.local.u8 %rs61, [%rd167];
|
||
cvt.s64.s32 %rd168, %r843;
|
||
add.s64 %rd169, %rd9, %rd168;
|
||
st.local.u8 [%rd169], %rs61;
|
||
ld.local.u8 %rs62, [%rd167];
|
||
st.local.u8 [%rd169+1], %rs62;
|
||
ld.local.u8 %rs63, [%rd167+-1];
|
||
st.local.u8 [%rd169+-2], %rs63;
|
||
ld.local.u8 %rs64, [%rd167+-1];
|
||
st.local.u8 [%rd169+-1], %rs64;
|
||
ld.local.u8 %rs65, [%rd167+-2];
|
||
st.local.u8 [%rd169+-4], %rs65;
|
||
ld.local.u8 %rs66, [%rd167+-2];
|
||
st.local.u8 [%rd169+-3], %rs66;
|
||
ld.local.u8 %rs67, [%rd167+-3];
|
||
st.local.u8 [%rd169+-6], %rs67;
|
||
ld.local.u8 %rs68, [%rd167+-3];
|
||
st.local.u8 [%rd169+-5], %rs68;
|
||
add.s32 %r843, %r843, -8;
|
||
add.s32 %r842, %r842, -4;
|
||
setp.gt.s32 %p115, %r842, -1;
|
||
@%p115 bra BB0_170;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_22:
|
||
setp.gt.s32 %p49, %r299, 63;
|
||
@%p49 bra BB0_27;
|
||
|
||
setp.eq.s32 %p53, %r299, 45;
|
||
@%p53 bra BB0_152;
|
||
bra.uni BB0_24;
|
||
|
||
BB0_152:
|
||
and.b32 %r449, %r11, 255;
|
||
setp.ge.s32 %p100, %r449, %r10;
|
||
@%p100 bra BB0_367;
|
||
|
||
ld.local.u8 %rs40, [%rd10];
|
||
add.s16 %rs41, %rs40, -1;
|
||
st.local.u8 [%rd10], %rs41;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_80:
|
||
setp.gt.s32 %p22, %r299, 107;
|
||
@%p22 bra BB0_85;
|
||
|
||
setp.eq.s32 %p26, %r299, 105;
|
||
@%p26 bra BB0_251;
|
||
bra.uni BB0_82;
|
||
|
||
BB0_251:
|
||
bfe.u32 %r808, %r8, 8, 8;
|
||
add.s32 %r933, %r10, 1;
|
||
setp.ge.s32 %p167, %r808, %r933;
|
||
setp.gt.s32 %p168, %r10, 254;
|
||
or.pred %p169, %p167, %p168;
|
||
@%p169 bra BB0_367;
|
||
|
||
bfe.u32 %r809, %r8, 8, 8;
|
||
add.s32 %r476, %r10, -1;
|
||
add.s32 %r148, %r809, -1;
|
||
setp.le.s32 %p170, %r476, %r148;
|
||
@%p170 bra BB0_254;
|
||
|
||
BB0_253:
|
||
cvt.s64.s32 %rd214, %r10;
|
||
add.s64 %rd215, %rd9, %rd214;
|
||
ld.local.u8 %rs102, [%rd215+-1];
|
||
st.local.u8 [%rd215], %rs102;
|
||
add.s32 %r150, %r10, -1;
|
||
add.s32 %r477, %r10, -2;
|
||
setp.gt.s32 %p171, %r477, %r148;
|
||
mov.u32 %r10, %r150;
|
||
@%p171 bra BB0_253;
|
||
|
||
BB0_254:
|
||
st.local.u8 [%rd10], %rs2;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_46:
|
||
setp.gt.s32 %p36, %r299, 89;
|
||
@%p36 bra BB0_56;
|
||
|
||
setp.eq.s32 %p40, %r299, 84;
|
||
@%p40 bra BB0_358;
|
||
bra.uni BB0_48;
|
||
|
||
BB0_358:
|
||
and.b32 %r677, %r11, 255;
|
||
setp.ge.s32 %p255, %r677, %r10;
|
||
@%p255 bra BB0_367;
|
||
|
||
bfe.u32 %r678, %r8, 10, 6;
|
||
shl.b32 %r679, %r11, 3;
|
||
and.b32 %r680, %r679, 24;
|
||
mov.u32 %r681, 32;
|
||
shl.b32 %r682, %r681, %r680;
|
||
mul.wide.u32 %rd342, %r678, 4;
|
||
add.s64 %rd343, %rd396, %rd342;
|
||
ld.local.u32 %r683, [%rd343];
|
||
and.b32 %r684, %r683, 1077952576;
|
||
shr.u32 %r685, %r684, 1;
|
||
shr.u32 %r686, %r683, 2;
|
||
not.b32 %r687, %r686;
|
||
and.b32 %r688, %r683, 522133279;
|
||
add.s32 %r689, %r688, 522133279;
|
||
mov.u32 %r690, -84215046;
|
||
sub.s32 %r691, %r690, %r688;
|
||
and.b32 %r692, %r682, %r687;
|
||
and.b32 %r693, %r692, %r685;
|
||
and.b32 %r694, %r693, %r691;
|
||
and.b32 %r695, %r694, %r689;
|
||
xor.b32 %r696, %r695, %r683;
|
||
st.local.u32 [%rd343], %r696;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_111:
|
||
setp.gt.s32 %p9, %r299, 121;
|
||
@%p9 bra BB0_120;
|
||
|
||
setp.eq.s32 %p13, %r299, 120;
|
||
@%p13 bra BB0_263;
|
||
bra.uni BB0_113;
|
||
|
||
BB0_263:
|
||
and.b32 %r158, %r11, 255;
|
||
setp.ge.s32 %p178, %r158, %r10;
|
||
@%p178 bra BB0_367;
|
||
|
||
shr.u32 %r806, %r8, 16;
|
||
and.b32 %r933, %r806, 255;
|
||
add.s32 %r485, %r158, %r933;
|
||
setp.gt.s32 %p179, %r485, %r10;
|
||
@%p179 bra BB0_367;
|
||
|
||
setp.eq.s32 %p180, %r933, 0;
|
||
@%p180 bra BB0_268;
|
||
|
||
bfe.u32 %r883, %r8, 8, 8;
|
||
mov.u32 %r884, 0;
|
||
mov.u64 %rd367, %rd9;
|
||
|
||
BB0_267:
|
||
cvt.s64.s32 %rd220, %r883;
|
||
add.s64 %rd221, %rd9, %rd220;
|
||
ld.local.u8 %rs105, [%rd221];
|
||
st.local.u8 [%rd367], %rs105;
|
||
add.s64 %rd367, %rd367, 1;
|
||
add.s32 %r883, %r883, 1;
|
||
add.s32 %r884, %r884, 1;
|
||
setp.lt.s32 %p181, %r884, %r933;
|
||
@%p181 bra BB0_267;
|
||
|
||
BB0_268:
|
||
setp.ge.s32 %p182, %r933, %r10;
|
||
@%p182 bra BB0_368;
|
||
|
||
shr.u32 %r807, %r8, 16;
|
||
cvt.u64.u32 %rd222, %r807;
|
||
and.b64 %rd223, %rd222, 255;
|
||
add.s64 %rd368, %rd9, %rd223;
|
||
mov.u32 %r885, %r933;
|
||
|
||
BB0_270:
|
||
mov.u16 %rs106, 0;
|
||
st.local.u8 [%rd368], %rs106;
|
||
add.s64 %rd368, %rd368, 1;
|
||
add.s32 %r885, %r885, 1;
|
||
setp.lt.s32 %p183, %r885, %r10;
|
||
@%p183 bra BB0_270;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_16:
|
||
setp.eq.s32 %p56, %r299, 42;
|
||
@%p56 bra BB0_158;
|
||
|
||
setp.eq.s32 %p57, %r299, 43;
|
||
@%p57 bra BB0_154;
|
||
bra.uni BB0_18;
|
||
|
||
BB0_154:
|
||
and.b32 %r450, %r11, 255;
|
||
setp.ge.s32 %p101, %r450, %r10;
|
||
@%p101 bra BB0_367;
|
||
|
||
ld.local.u8 %rs42, [%rd10];
|
||
add.s16 %rs43, %rs42, 1;
|
||
st.local.u8 [%rd10], %rs43;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_70:
|
||
setp.eq.s32 %p29, %r299, 100;
|
||
@%p29 bra BB0_349;
|
||
|
||
setp.eq.s32 %p30, %r299, 101;
|
||
@%p30 bra BB0_133;
|
||
bra.uni BB0_72;
|
||
|
||
BB0_133:
|
||
add.s32 %r371, %r10, 4;
|
||
setp.gt.s32 %p70, %r371, 255;
|
||
@%p70 bra BB0_367;
|
||
|
||
mov.u32 %r826, 0;
|
||
setp.lt.s32 %p71, %r10, 1;
|
||
mov.u64 %rd355, %rd396;
|
||
@%p71 bra BB0_136;
|
||
|
||
BB0_135:
|
||
ld.local.u32 %r373, [%rd355];
|
||
and.b32 %r374, %r373, 1077952576;
|
||
shr.u32 %r375, %r374, 1;
|
||
and.b32 %r376, %r373, -2139062144;
|
||
shr.u32 %r377, %r376, 2;
|
||
not.b32 %r378, %r377;
|
||
and.b32 %r379, %r375, %r378;
|
||
and.b32 %r380, %r373, 522133279;
|
||
add.s32 %r381, %r380, 522133279;
|
||
mov.u32 %r382, -84215046;
|
||
sub.s32 %r383, %r382, %r380;
|
||
and.b32 %r384, %r379, %r383;
|
||
and.b32 %r385, %r384, %r381;
|
||
or.b32 %r386, %r385, %r373;
|
||
st.local.u32 [%rd355], %r386;
|
||
add.s64 %rd355, %rd355, 4;
|
||
add.s32 %r826, %r826, 4;
|
||
setp.lt.s32 %p72, %r826, %r10;
|
||
@%p72 bra BB0_135;
|
||
|
||
BB0_136:
|
||
ld.local.u32 %r387, [%rd396];
|
||
and.b32 %r388, %r387, 64;
|
||
shr.u32 %r389, %r388, 1;
|
||
and.b32 %r390, %r387, 128;
|
||
shr.u32 %r391, %r390, 2;
|
||
not.b32 %r392, %r391;
|
||
and.b32 %r393, %r389, %r392;
|
||
and.b32 %r394, %r387, 522133279;
|
||
add.s32 %r395, %r394, 31;
|
||
mov.u32 %r396, -84215046;
|
||
sub.s32 %r397, %r396, %r394;
|
||
and.b32 %r398, %r393, %r397;
|
||
and.b32 %r399, %r398, %r395;
|
||
not.b32 %r400, %r399;
|
||
or.b32 %r401, %r400, -33;
|
||
and.b32 %r827, %r401, %r387;
|
||
st.local.u32 [%rd396], %r827;
|
||
@%p71 bra BB0_367;
|
||
|
||
and.b32 %r24, %r11, 255;
|
||
mov.u32 %r828, 0;
|
||
mov.u64 %rd356, %rd396;
|
||
|
||
BB0_138:
|
||
cvt.u16.u32 %rs23, %r827;
|
||
and.b16 %rs24, %rs23, 255;
|
||
and.b16 %rs25, %rs1, 255;
|
||
setp.eq.s16 %p74, %rs24, %rs25;
|
||
selp.b32 %r403, 65280, 0, %p74;
|
||
shr.u16 %rs26, %rs23, 8;
|
||
setp.eq.s16 %p75, %rs26, %rs25;
|
||
or.b32 %r404, %r403, 16711680;
|
||
selp.b32 %r405, %r404, %r403, %p75;
|
||
shr.u32 %r406, %r827, 16;
|
||
cvt.u16.u32 %rs27, %r406;
|
||
and.b16 %rs28, %rs27, 255;
|
||
setp.eq.s16 %p76, %rs28, %rs25;
|
||
or.b32 %r407, %r405, -16777216;
|
||
selp.b32 %r408, %r407, %r405, %p76;
|
||
shr.u32 %r409, %r827, 24;
|
||
setp.eq.s32 %p77, %r409, %r24;
|
||
selp.b32 %r410, 32, 0, %p77;
|
||
and.b32 %r411, %r827, 1077952512;
|
||
shr.u32 %r412, %r411, 1;
|
||
and.b32 %r413, %r827, -2139062144;
|
||
shr.u32 %r414, %r413, 2;
|
||
not.b32 %r415, %r414;
|
||
and.b32 %r416, %r412, %r415;
|
||
and.b32 %r417, %r827, 522133279;
|
||
add.s32 %r418, %r417, 522133279;
|
||
sub.s32 %r420, %r396, %r417;
|
||
and.b32 %r421, %r416, %r420;
|
||
and.b32 %r422, %r421, %r418;
|
||
and.b32 %r423, %r422, %r408;
|
||
not.b32 %r424, %r423;
|
||
and.b32 %r425, %r827, %r424;
|
||
st.local.u32 [%rd356], %r425;
|
||
add.s64 %rd20, %rd356, 4;
|
||
ld.local.u32 %r426, [%rd356+4];
|
||
and.b32 %r427, %r426, 64;
|
||
shr.u32 %r428, %r427, 1;
|
||
shr.u32 %r429, %r426, 2;
|
||
not.b32 %r430, %r429;
|
||
and.b32 %r431, %r426, 522133279;
|
||
add.s32 %r432, %r431, 31;
|
||
sub.s32 %r433, %r396, %r431;
|
||
and.b32 %r434, %r410, %r430;
|
||
and.b32 %r435, %r434, %r428;
|
||
and.b32 %r436, %r435, %r433;
|
||
and.b32 %r437, %r436, %r432;
|
||
not.b32 %r438, %r437;
|
||
and.b32 %r827, %r426, %r438;
|
||
st.local.u32 [%rd356+4], %r827;
|
||
add.s32 %r828, %r828, 4;
|
||
setp.lt.s32 %p78, %r828, %r10;
|
||
mov.u64 %rd356, %rd20;
|
||
@%p78 bra BB0_138;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_41:
|
||
setp.eq.s32 %p43, %r299, 76;
|
||
@%p43 bra BB0_156;
|
||
|
||
setp.eq.s32 %p44, %r299, 79;
|
||
@%p44 bra BB0_255;
|
||
bra.uni BB0_43;
|
||
|
||
BB0_255:
|
||
and.b32 %r881, %r11, 255;
|
||
setp.ge.s32 %p172, %r881, %r10;
|
||
@%p172 bra BB0_367;
|
||
|
||
shr.u32 %r810, %r8, 16;
|
||
and.b32 %r152, %r810, 255;
|
||
add.s32 %r478, %r881, %r152;
|
||
setp.gt.s32 %p173, %r478, %r10;
|
||
@%p173 bra BB0_367;
|
||
|
||
sub.s32 %r933, %r10, %r152;
|
||
setp.ge.s32 %p174, %r881, %r933;
|
||
@%p174 bra BB0_260;
|
||
|
||
bfe.u32 %r479, %r8, 16, 8;
|
||
bfe.u32 %r481, %r8, 8, 8;
|
||
add.s32 %r482, %r479, %r481;
|
||
cvt.u64.u32 %rd216, %r482;
|
||
add.s64 %rd365, %rd9, %rd216;
|
||
cvt.u64.u32 %rd217, %r11;
|
||
and.b64 %rd218, %rd217, 255;
|
||
add.s64 %rd364, %rd9, %rd218;
|
||
|
||
BB0_259:
|
||
ld.local.u8 %rs103, [%rd365];
|
||
st.local.u8 [%rd364], %rs103;
|
||
add.s64 %rd365, %rd365, 1;
|
||
add.s64 %rd364, %rd364, 1;
|
||
add.s32 %r881, %r881, 1;
|
||
setp.lt.s32 %p175, %r881, %r933;
|
||
@%p175 bra BB0_259;
|
||
|
||
BB0_260:
|
||
setp.eq.s32 %p176, %r152, 0;
|
||
@%p176 bra BB0_368;
|
||
|
||
bfe.u32 %r483, %r8, 16, 8;
|
||
sub.s32 %r484, %r10, %r483;
|
||
cvt.s64.s32 %rd219, %r484;
|
||
add.s64 %rd366, %rd9, %rd219;
|
||
mov.u32 %r882, %r933;
|
||
|
||
BB0_262:
|
||
mov.u16 %rs104, 0;
|
||
st.local.u8 [%rd366], %rs104;
|
||
add.s64 %rd366, %rd366, 1;
|
||
add.s32 %r882, %r882, 1;
|
||
setp.lt.s32 %p177, %r882, %r10;
|
||
@%p177 bra BB0_262;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_106:
|
||
setp.eq.s32 %p16, %r299, 115;
|
||
@%p16 bra BB0_226;
|
||
|
||
setp.eq.s32 %p17, %r299, 116;
|
||
@%p17 bra BB0_360;
|
||
bra.uni BB0_108;
|
||
|
||
BB0_360:
|
||
mov.u32 %r928, 0;
|
||
setp.lt.s32 %p256, %r10, 1;
|
||
mov.u64 %rd391, %rd396;
|
||
@%p256 bra BB0_367;
|
||
|
||
BB0_361:
|
||
ld.local.u32 %r698, [%rd391];
|
||
and.b32 %r699, %r698, 1077952576;
|
||
shr.u32 %r700, %r699, 1;
|
||
and.b32 %r701, %r698, -2139062144;
|
||
shr.u32 %r702, %r701, 2;
|
||
not.b32 %r703, %r702;
|
||
and.b32 %r704, %r700, %r703;
|
||
and.b32 %r705, %r698, 522133279;
|
||
add.s32 %r706, %r705, 522133279;
|
||
mov.u32 %r707, -84215046;
|
||
sub.s32 %r708, %r707, %r705;
|
||
and.b32 %r709, %r704, %r708;
|
||
and.b32 %r710, %r709, %r706;
|
||
xor.b32 %r711, %r710, %r698;
|
||
st.local.u32 [%rd391], %r711;
|
||
add.s64 %rd391, %rd391, 4;
|
||
add.s32 %r928, %r928, 4;
|
||
setp.lt.s32 %p257, %r928, %r10;
|
||
@%p257 bra BB0_361;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_27:
|
||
setp.eq.s32 %p50, %r299, 64;
|
||
@%p50 bra BB0_194;
|
||
|
||
setp.eq.s32 %p51, %r299, 67;
|
||
@%p51 bra BB0_362;
|
||
bra.uni BB0_29;
|
||
|
||
BB0_362:
|
||
mov.u32 %r929, 0;
|
||
setp.lt.s32 %p258, %r10, 1;
|
||
mov.u64 %rd392, %rd396;
|
||
@%p258 bra BB0_364;
|
||
|
||
BB0_363:
|
||
ld.local.u32 %r713, [%rd392];
|
||
and.b32 %r714, %r713, 1077952576;
|
||
shr.u32 %r715, %r714, 1;
|
||
and.b32 %r716, %r713, -2139062144;
|
||
shr.u32 %r717, %r716, 2;
|
||
not.b32 %r718, %r717;
|
||
and.b32 %r719, %r715, %r718;
|
||
and.b32 %r720, %r713, 522133279;
|
||
add.s32 %r721, %r720, 522133279;
|
||
mov.u32 %r722, -84215046;
|
||
sub.s32 %r723, %r722, %r720;
|
||
and.b32 %r724, %r719, %r723;
|
||
and.b32 %r725, %r724, %r721;
|
||
not.b32 %r726, %r725;
|
||
and.b32 %r727, %r713, %r726;
|
||
st.local.u32 [%rd392], %r727;
|
||
add.s64 %rd392, %rd392, 4;
|
||
add.s32 %r929, %r929, 4;
|
||
setp.lt.s32 %p259, %r929, %r10;
|
||
@%p259 bra BB0_363;
|
||
|
||
BB0_364:
|
||
ld.local.u32 %r728, [%rd396];
|
||
and.b32 %r729, %r728, 64;
|
||
shr.u32 %r730, %r729, 1;
|
||
shr.u32 %r731, %r728, 2;
|
||
and.b32 %r732, %r728, 522133279;
|
||
add.s32 %r733, %r732, 31;
|
||
mov.u32 %r734, -84215046;
|
||
sub.s32 %r735, %r734, %r732;
|
||
not.b32 %r736, %r731;
|
||
and.b32 %r737, %r736, %r730;
|
||
and.b32 %r738, %r737, %r735;
|
||
and.b32 %r739, %r738, %r733;
|
||
or.b32 %r740, %r739, %r728;
|
||
st.local.u32 [%rd396], %r740;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_85:
|
||
setp.eq.s32 %p23, %r299, 108;
|
||
@%p23 bra BB0_365;
|
||
|
||
setp.eq.s32 %p24, %r299, 111;
|
||
@%p24 bra BB0_249;
|
||
bra.uni BB0_87;
|
||
|
||
BB0_249:
|
||
and.b32 %r475, %r11, 255;
|
||
setp.ge.s32 %p166, %r475, %r10;
|
||
@%p166 bra BB0_367;
|
||
|
||
st.local.u8 [%rd10], %rs2;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_56:
|
||
setp.eq.s32 %p37, %r299, 90;
|
||
@%p37 bra BB0_171;
|
||
|
||
setp.eq.s32 %p38, %r299, 91;
|
||
@%p38 bra BB0_283;
|
||
bra.uni BB0_58;
|
||
|
||
BB0_283:
|
||
setp.lt.s32 %p195, %r10, 1;
|
||
@%p195 bra BB0_367;
|
||
|
||
add.s32 %r933, %r10, -1;
|
||
setp.lt.s32 %p196, %r933, 1;
|
||
@%p196 bra BB0_294;
|
||
|
||
and.b32 %r184, %r933, 3;
|
||
setp.eq.s32 %p197, %r184, 0;
|
||
mov.u32 %r894, 0;
|
||
@%p197 bra BB0_291;
|
||
|
||
setp.eq.s32 %p198, %r184, 1;
|
||
mov.u32 %r892, 0;
|
||
@%p198 bra BB0_290;
|
||
|
||
setp.eq.s32 %p199, %r184, 2;
|
||
mov.u32 %r891, 0;
|
||
@%p199 bra BB0_289;
|
||
|
||
ld.local.u8 %rs117, [%rd9+1];
|
||
st.local.u8 [%rd9], %rs117;
|
||
mov.u32 %r891, 1;
|
||
|
||
BB0_289:
|
||
add.s32 %r892, %r891, 1;
|
||
cvt.u64.u32 %rd233, %r892;
|
||
add.s64 %rd234, %rd9, %rd233;
|
||
ld.local.u8 %rs118, [%rd234];
|
||
cvt.u64.u32 %rd235, %r891;
|
||
add.s64 %rd236, %rd9, %rd235;
|
||
st.local.u8 [%rd236], %rs118;
|
||
|
||
BB0_290:
|
||
add.s32 %r894, %r892, 1;
|
||
cvt.s64.s32 %rd237, %r892;
|
||
add.s64 %rd238, %rd9, %rd237;
|
||
ld.local.u8 %rs119, [%rd238+1];
|
||
st.local.u8 [%rd238], %rs119;
|
||
|
||
BB0_291:
|
||
setp.lt.u32 %p200, %r933, 4;
|
||
@%p200 bra BB0_294;
|
||
|
||
cvt.s64.s32 %rd239, %r894;
|
||
add.s64 %rd371, %rd9, %rd239;
|
||
|
||
BB0_293:
|
||
ld.local.u8 %rs120, [%rd371+1];
|
||
ld.local.u8 %rs121, [%rd371+2];
|
||
ld.local.u8 %rs122, [%rd371+3];
|
||
ld.local.u8 %rs123, [%rd371+4];
|
||
st.local.u8 [%rd371], %rs120;
|
||
st.local.u8 [%rd371+1], %rs121;
|
||
st.local.u8 [%rd371+2], %rs122;
|
||
add.s64 %rd67, %rd371, 4;
|
||
st.local.u8 [%rd371+3], %rs123;
|
||
add.s32 %r894, %r894, 4;
|
||
setp.lt.s32 %p201, %r894, %r933;
|
||
mov.u64 %rd371, %rd67;
|
||
@%p201 bra BB0_293;
|
||
|
||
BB0_294:
|
||
mov.u16 %rs124, 0;
|
||
st.local.u8 [%rd11+-1], %rs124;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_120:
|
||
setp.eq.s32 %p10, %r299, 122;
|
||
@%p10 bra BB0_177;
|
||
|
||
setp.eq.s32 %p11, %r299, 123;
|
||
@%p11 bra BB0_302;
|
||
bra.uni BB0_122;
|
||
|
||
BB0_302:
|
||
setp.lt.s32 %p208, %r300, 1;
|
||
@%p208 bra BB0_367;
|
||
|
||
and.b32 %r202, %r300, 3;
|
||
setp.eq.s32 %p209, %r202, 0;
|
||
mov.u32 %r902, %r300;
|
||
@%p209 bra BB0_309;
|
||
|
||
setp.eq.s32 %p210, %r202, 1;
|
||
mov.u32 %r900, %r300;
|
||
@%p210 bra BB0_308;
|
||
|
||
setp.eq.s32 %p211, %r202, 2;
|
||
mov.u32 %r899, %r300;
|
||
@%p211 bra BB0_307;
|
||
|
||
ld.local.u8 %rs130, [%rd9];
|
||
ld.local.u8 %rs131, [%rd11+-1];
|
||
st.local.u8 [%rd9], %rs131;
|
||
st.local.u8 [%rd11+-1], %rs130;
|
||
add.s32 %r899, %r10, -2;
|
||
|
||
BB0_307:
|
||
ld.local.u8 %rs132, [%rd9];
|
||
cvt.s64.s32 %rd245, %r899;
|
||
add.s64 %rd246, %rd9, %rd245;
|
||
ld.local.u8 %rs133, [%rd246];
|
||
st.local.u8 [%rd9], %rs133;
|
||
st.local.u8 [%rd246], %rs132;
|
||
add.s32 %r900, %r899, -1;
|
||
|
||
BB0_308:
|
||
ld.local.u8 %rs134, [%rd9];
|
||
cvt.s64.s32 %rd247, %r900;
|
||
add.s64 %rd248, %rd9, %rd247;
|
||
ld.local.u8 %rs135, [%rd248];
|
||
st.local.u8 [%rd9], %rs135;
|
||
st.local.u8 [%rd248], %rs134;
|
||
add.s32 %r902, %r900, -1;
|
||
|
||
BB0_309:
|
||
setp.lt.u32 %p212, %r300, 4;
|
||
@%p212 bra BB0_367;
|
||
|
||
BB0_310:
|
||
ld.local.u8 %rs136, [%rd9];
|
||
cvt.s64.s32 %rd249, %r902;
|
||
add.s64 %rd250, %rd9, %rd249;
|
||
ld.local.u8 %rs137, [%rd250];
|
||
st.local.u8 [%rd9], %rs137;
|
||
st.local.u8 [%rd250], %rs136;
|
||
ld.local.u8 %rs138, [%rd9];
|
||
ld.local.u8 %rs139, [%rd250+-1];
|
||
st.local.u8 [%rd9], %rs139;
|
||
st.local.u8 [%rd250+-1], %rs138;
|
||
ld.local.u8 %rs140, [%rd9];
|
||
ld.local.u8 %rs141, [%rd250+-2];
|
||
st.local.u8 [%rd9], %rs141;
|
||
st.local.u8 [%rd250+-2], %rs140;
|
||
ld.local.u8 %rs142, [%rd9];
|
||
ld.local.u8 %rs143, [%rd250+-3];
|
||
st.local.u8 [%rd9], %rs143;
|
||
st.local.u8 [%rd250+-3], %rs142;
|
||
add.s32 %r902, %r902, -4;
|
||
setp.gt.s32 %p213, %r902, 0;
|
||
@%p213 bra BB0_310;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_12:
|
||
setp.eq.s32 %p60, %r299, 39;
|
||
@%p60 bra BB0_13;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_13:
|
||
and.b32 %r933, %r11, 255;
|
||
setp.ge.s32 %p164, %r933, %r10;
|
||
@%p164 bra BB0_367;
|
||
|
||
cvt.u64.u32 %rd212, %r11;
|
||
and.b64 %rd213, %rd212, 255;
|
||
add.s64 %rd363, %rd9, %rd213;
|
||
mov.u32 %r879, %r933;
|
||
|
||
BB0_15:
|
||
mov.u16 %rs101, 0;
|
||
st.local.u8 [%rd363], %rs101;
|
||
add.s64 %rd363, %rd363, 1;
|
||
add.s32 %r879, %r879, 1;
|
||
setp.lt.s32 %p165, %r879, %r10;
|
||
@%p165 bra BB0_15;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_66:
|
||
setp.eq.s32 %p33, %r299, 99;
|
||
@%p33 bra BB0_67;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_67:
|
||
mov.u32 %r931, 0;
|
||
setp.lt.s32 %p262, %r10, 1;
|
||
mov.u64 %rd394, %rd396;
|
||
@%p262 bra BB0_69;
|
||
|
||
BB0_68:
|
||
ld.local.u32 %r758, [%rd394];
|
||
and.b32 %r759, %r758, 1077952576;
|
||
shr.u32 %r760, %r759, 1;
|
||
and.b32 %r761, %r758, -2139062144;
|
||
shr.u32 %r762, %r761, 2;
|
||
not.b32 %r763, %r762;
|
||
and.b32 %r764, %r760, %r763;
|
||
and.b32 %r765, %r758, 522133279;
|
||
add.s32 %r766, %r765, 522133279;
|
||
mov.u32 %r767, -84215046;
|
||
sub.s32 %r768, %r767, %r765;
|
||
and.b32 %r769, %r764, %r768;
|
||
and.b32 %r770, %r769, %r766;
|
||
or.b32 %r771, %r770, %r758;
|
||
st.local.u32 [%rd394], %r771;
|
||
add.s64 %rd394, %rd394, 4;
|
||
add.s32 %r931, %r931, 4;
|
||
setp.lt.s32 %p263, %r931, %r10;
|
||
@%p263 bra BB0_68;
|
||
|
||
BB0_69:
|
||
ld.local.u32 %r772, [%rd396];
|
||
and.b32 %r773, %r772, 64;
|
||
shr.u32 %r774, %r773, 1;
|
||
and.b32 %r775, %r772, 128;
|
||
shr.u32 %r776, %r775, 2;
|
||
not.b32 %r777, %r776;
|
||
and.b32 %r778, %r774, %r777;
|
||
and.b32 %r779, %r772, 522133279;
|
||
add.s32 %r780, %r779, 31;
|
||
mov.u32 %r781, -84215046;
|
||
sub.s32 %r782, %r781, %r779;
|
||
and.b32 %r783, %r778, %r782;
|
||
and.b32 %r784, %r783, %r780;
|
||
not.b32 %r785, %r784;
|
||
or.b32 %r786, %r785, -33;
|
||
and.b32 %r787, %r786, %r772;
|
||
st.local.u32 [%rd396], %r787;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_38:
|
||
setp.eq.s32 %p47, %r299, 75;
|
||
@%p47 bra BB0_39;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_39:
|
||
setp.lt.s32 %p107, %r10, 2;
|
||
@%p107 bra BB0_367;
|
||
|
||
ld.local.u8 %rs49, [%rd11+-2];
|
||
ld.local.u8 %rs50, [%rd11+-1];
|
||
st.local.u8 [%rd11+-2], %rs50;
|
||
st.local.u8 [%rd11+-1], %rs49;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_95:
|
||
setp.eq.s32 %p20, %r299, 114;
|
||
@%p20 bra BB0_96;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_96:
|
||
shr.u32 %r667, %r10, 31;
|
||
add.s32 %r668, %r10, %r667;
|
||
shr.s32 %r264, %r668, 1;
|
||
setp.lt.s32 %p249, %r10, 2;
|
||
@%p249 bra BB0_367;
|
||
|
||
add.s32 %r265, %r10, -1;
|
||
mov.u32 %r670, 1;
|
||
max.s32 %r266, %r264, %r670;
|
||
and.b32 %r267, %r266, 3;
|
||
setp.eq.s32 %p250, %r267, 0;
|
||
mov.u32 %r926, 0;
|
||
@%p250 bra BB0_103;
|
||
|
||
setp.eq.s32 %p251, %r267, 1;
|
||
mov.u32 %r925, 0;
|
||
@%p251 bra BB0_102;
|
||
|
||
setp.eq.s32 %p252, %r267, 2;
|
||
mov.u32 %r924, 0;
|
||
@%p252 bra BB0_101;
|
||
|
||
ld.local.u8 %rs172, [%rd9];
|
||
ld.local.u8 %rs173, [%rd11+-1];
|
||
st.local.u8 [%rd9], %rs173;
|
||
st.local.u8 [%rd11+-1], %rs172;
|
||
mov.u32 %r924, %r670;
|
||
|
||
BB0_101:
|
||
cvt.u64.u32 %rd332, %r924;
|
||
add.s64 %rd333, %rd9, %rd332;
|
||
ld.local.u8 %rs174, [%rd333];
|
||
sub.s32 %r674, %r265, %r924;
|
||
cvt.s64.s32 %rd334, %r674;
|
||
add.s64 %rd335, %rd9, %rd334;
|
||
ld.local.u8 %rs175, [%rd335];
|
||
st.local.u8 [%rd333], %rs175;
|
||
st.local.u8 [%rd335], %rs174;
|
||
add.s32 %r925, %r924, 1;
|
||
|
||
BB0_102:
|
||
cvt.s64.s32 %rd336, %r925;
|
||
add.s64 %rd337, %rd9, %rd336;
|
||
ld.local.u8 %rs176, [%rd337];
|
||
sub.s32 %r675, %r265, %r925;
|
||
cvt.s64.s32 %rd338, %r675;
|
||
add.s64 %rd339, %rd9, %rd338;
|
||
ld.local.u8 %rs177, [%rd339];
|
||
st.local.u8 [%rd337], %rs177;
|
||
st.local.u8 [%rd339], %rs176;
|
||
add.s32 %r926, %r925, 1;
|
||
|
||
BB0_103:
|
||
setp.lt.u32 %p253, %r266, 4;
|
||
@%p253 bra BB0_367;
|
||
|
||
cvt.s64.s32 %rd340, %r926;
|
||
add.s64 %rd390, %rd9, %rd340;
|
||
sub.s32 %r676, %r10, %r926;
|
||
cvt.s64.s32 %rd341, %r676;
|
||
add.s64 %rd389, %rd9, %rd341;
|
||
|
||
BB0_105:
|
||
ld.local.u8 %rs178, [%rd390];
|
||
ld.local.u8 %rs179, [%rd389+-1];
|
||
st.local.u8 [%rd390], %rs179;
|
||
st.local.u8 [%rd389+-1], %rs178;
|
||
ld.local.u8 %rs180, [%rd390+1];
|
||
ld.local.u8 %rs181, [%rd389+-2];
|
||
st.local.u8 [%rd390+1], %rs181;
|
||
st.local.u8 [%rd389+-2], %rs180;
|
||
ld.local.u8 %rs182, [%rd390+2];
|
||
ld.local.u8 %rs183, [%rd389+-3];
|
||
st.local.u8 [%rd390+2], %rs183;
|
||
st.local.u8 [%rd389+-3], %rs182;
|
||
ld.local.u8 %rs184, [%rd390+3];
|
||
add.s64 %rd105, %rd389, -4;
|
||
ld.local.u8 %rs185, [%rd389+-4];
|
||
st.local.u8 [%rd390+3], %rs185;
|
||
st.local.u8 [%rd389+-4], %rs184;
|
||
add.s64 %rd390, %rd390, 4;
|
||
add.s32 %r926, %r926, 4;
|
||
setp.lt.s32 %p254, %r926, %r264;
|
||
mov.u64 %rd389, %rd105;
|
||
@%p254 bra BB0_105;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_24:
|
||
setp.eq.s32 %p54, %r299, 46;
|
||
@%p54 bra BB0_25;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_25:
|
||
and.b32 %r447, %r11, 255;
|
||
add.s32 %r448, %r447, 1;
|
||
setp.ge.s32 %p99, %r448, %r10;
|
||
@%p99 bra BB0_367;
|
||
|
||
ld.local.u8 %rs39, [%rd10+1];
|
||
st.local.u8 [%rd10], %rs39;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_82:
|
||
setp.eq.s32 %p27, %r299, 107;
|
||
@%p27 bra BB0_83;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_83:
|
||
setp.lt.s32 %p108, %r10, 2;
|
||
@%p108 bra BB0_367;
|
||
|
||
add.u64 %rd157, %SPL, 0;
|
||
ld.local.v2.u8 {%rs51, %rs52}, [%rd157];
|
||
st.local.v2.u8 [%rd157], {%rs52, %rs51};
|
||
bra.uni BB0_367;
|
||
|
||
BB0_48:
|
||
setp.eq.s32 %p41, %r299, 89;
|
||
@%p41 bra BB0_49;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_49:
|
||
and.b32 %r29, %r11, 255;
|
||
setp.le.s32 %p79, %r10, %r29;
|
||
@%p79 bra BB0_367;
|
||
|
||
add.s32 %r933, %r29, %r10;
|
||
setp.gt.s32 %p80, %r933, 255;
|
||
@%p80 bra BB0_367;
|
||
|
||
setp.eq.s32 %p81, %r29, 0;
|
||
@%p81 bra BB0_368;
|
||
|
||
cvt.s64.s32 %rd350, %r10;
|
||
bfe.u32 %r440, %r8, 8, 8;
|
||
sub.s32 %r441, %r10, %r440;
|
||
cvt.s64.s32 %rd134, %r441;
|
||
add.s64 %rd358, %rd9, %rd134;
|
||
add.s64 %rd357, %rd9, %rd350;
|
||
mov.u32 %r830, 0;
|
||
|
||
BB0_53:
|
||
setp.gt.s32 %p82, %r10, 254;
|
||
@%p82 bra BB0_55;
|
||
|
||
ld.local.u8 %rs29, [%rd358];
|
||
st.local.u8 [%rd357], %rs29;
|
||
|
||
BB0_55:
|
||
add.s32 %r830, %r830, 1;
|
||
add.s64 %rd358, %rd358, 1;
|
||
add.s64 %rd357, %rd357, 1;
|
||
add.s32 %r10, %r10, 1;
|
||
setp.lt.s32 %p83, %r830, %r29;
|
||
@%p83 bra BB0_53;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_113:
|
||
setp.eq.s32 %p14, %r299, 121;
|
||
@%p14 bra BB0_114;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_114:
|
||
and.b32 %r35, %r11, 255;
|
||
setp.ge.s32 %p84, %r35, %r10;
|
||
@%p84 bra BB0_367;
|
||
|
||
add.s32 %r933, %r35, %r10;
|
||
setp.gt.s32 %p85, %r933, 255;
|
||
@%p85 bra BB0_367;
|
||
|
||
setp.eq.s32 %p86, %r35, 0;
|
||
@%p86 bra BB0_368;
|
||
|
||
mov.u32 %r831, 0;
|
||
|
||
BB0_118:
|
||
cvt.u64.u32 %rd27, %r831;
|
||
add.s32 %r39, %r831, %r10;
|
||
add.s32 %r443, %r39, 1;
|
||
and.b32 %r40, %r831, 255;
|
||
setp.ge.s32 %p87, %r40, %r443;
|
||
setp.gt.s32 %p88, %r39, 254;
|
||
or.pred %p89, %p87, %p88;
|
||
@%p89 bra BB0_151;
|
||
|
||
shl.b32 %r444, %r831, 1;
|
||
cvt.s64.s32 %rd136, %r444;
|
||
add.s64 %rd137, %rd9, %rd136;
|
||
ld.local.u8 %rs3, [%rd137];
|
||
add.s32 %r839, %r39, -1;
|
||
add.s32 %r42, %r40, -1;
|
||
setp.le.s32 %p90, %r839, %r42;
|
||
@%p90 bra BB0_150;
|
||
|
||
and.b32 %r812, %r10, 3;
|
||
setp.eq.s32 %p91, %r812, 0;
|
||
@%p91 bra BB0_140;
|
||
bra.uni BB0_141;
|
||
|
||
BB0_140:
|
||
mov.u32 %r838, %r839;
|
||
mov.u32 %r839, %r39;
|
||
bra.uni BB0_148;
|
||
|
||
BB0_141:
|
||
and.b32 %r813, %r10, 3;
|
||
setp.eq.s32 %p92, %r813, 1;
|
||
@%p92 bra BB0_142;
|
||
bra.uni BB0_143;
|
||
|
||
BB0_142:
|
||
mov.u32 %r835, %r39;
|
||
bra.uni BB0_147;
|
||
|
||
BB0_143:
|
||
and.b32 %r814, %r10, 3;
|
||
setp.eq.s32 %p93, %r814, 2;
|
||
@%p93 bra BB0_144;
|
||
bra.uni BB0_145;
|
||
|
||
BB0_144:
|
||
mov.u32 %r835, %r839;
|
||
mov.u32 %r839, %r39;
|
||
bra.uni BB0_146;
|
||
|
||
BB0_145:
|
||
cvt.s64.s32 %rd138, %r839;
|
||
add.s64 %rd139, %rd9, %rd138;
|
||
ld.local.u8 %rs30, [%rd139];
|
||
st.local.u8 [%rd139+1], %rs30;
|
||
add.s32 %r835, %r39, -2;
|
||
|
||
BB0_146:
|
||
cvt.s64.s32 %rd140, %r835;
|
||
add.s64 %rd141, %rd9, %rd140;
|
||
ld.local.u8 %rs31, [%rd141];
|
||
cvt.s64.s32 %rd142, %r839;
|
||
add.s64 %rd143, %rd9, %rd142;
|
||
st.local.u8 [%rd143], %rs31;
|
||
add.s32 %r839, %r835, -1;
|
||
|
||
BB0_147:
|
||
cvt.s64.s32 %rd144, %r839;
|
||
add.s64 %rd145, %rd9, %rd144;
|
||
ld.local.u8 %rs32, [%rd145];
|
||
cvt.s64.s32 %rd146, %r835;
|
||
add.s64 %rd147, %rd9, %rd146;
|
||
st.local.u8 [%rd147], %rs32;
|
||
add.s32 %r838, %r839, -1;
|
||
|
||
BB0_148:
|
||
setp.lt.u32 %p94, %r10, 4;
|
||
@%p94 bra BB0_150;
|
||
|
||
BB0_149:
|
||
cvt.s64.s32 %rd148, %r838;
|
||
add.s64 %rd149, %rd9, %rd148;
|
||
ld.local.u8 %rs33, [%rd149];
|
||
cvt.s64.s32 %rd150, %r839;
|
||
add.s64 %rd151, %rd9, %rd150;
|
||
st.local.u8 [%rd151], %rs33;
|
||
ld.local.u8 %rs34, [%rd149+-1];
|
||
ld.local.u8 %rs35, [%rd149+-2];
|
||
ld.local.u8 %rs36, [%rd149+-3];
|
||
st.local.u8 [%rd149], %rs34;
|
||
st.local.u8 [%rd149+-1], %rs35;
|
||
st.local.u8 [%rd149+-2], %rs36;
|
||
add.s32 %r54, %r838, -4;
|
||
setp.gt.s32 %p95, %r54, %r42;
|
||
add.s32 %r839, %r838, -3;
|
||
mov.u32 %r838, %r54;
|
||
@%p95 bra BB0_149;
|
||
|
||
BB0_150:
|
||
and.b64 %rd152, %rd27, 255;
|
||
add.s64 %rd153, %rd9, %rd152;
|
||
st.local.u8 [%rd153], %rs3;
|
||
|
||
BB0_151:
|
||
cvt.u32.u64 %r445, %rd27;
|
||
add.s32 %r831, %r445, 1;
|
||
setp.lt.s32 %p96, %r831, %r35;
|
||
@%p96 bra BB0_118;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_158:
|
||
shr.u32 %r811, %r8, 16;
|
||
and.b32 %r454, %r11, 255;
|
||
setp.ge.s32 %p104, %r454, %r10;
|
||
and.b32 %r57, %r811, 255;
|
||
setp.ge.s32 %p105, %r57, %r10;
|
||
or.pred %p106, %p104, %p105;
|
||
@%p106 bra BB0_367;
|
||
|
||
ld.local.u8 %rs47, [%rd10];
|
||
cvt.u64.u32 %rd154, %r57;
|
||
add.s64 %rd155, %rd9, %rd154;
|
||
ld.local.u8 %rs48, [%rd155];
|
||
st.local.u8 [%rd10], %rs48;
|
||
st.local.u8 [%rd155], %rs47;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_18:
|
||
setp.eq.s32 %p58, %r299, 44;
|
||
@%p58 bra BB0_19;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_19:
|
||
and.b16 %rs37, %rs1, 255;
|
||
setp.eq.s16 %p97, %rs37, 0;
|
||
@%p97 bra BB0_367;
|
||
|
||
and.b32 %r446, %r11, 255;
|
||
setp.ge.s32 %p98, %r446, %r10;
|
||
@%p98 bra BB0_367;
|
||
|
||
ld.local.u8 %rs38, [%rd10+-1];
|
||
st.local.u8 [%rd10], %rs38;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_349:
|
||
shl.b32 %r933, %r10, 1;
|
||
setp.gt.s32 %p243, %r933, 255;
|
||
@%p243 bra BB0_367;
|
||
|
||
add.s32 %r258, %r10, -3;
|
||
mov.u32 %r263, 0;
|
||
setp.lt.s32 %p244, %r258, 1;
|
||
mov.u32 %r922, %r10;
|
||
@%p244 bra BB0_352;
|
||
|
||
BB0_351:
|
||
and.b32 %r595, %r922, 3;
|
||
shl.b32 %r596, %r595, 3;
|
||
shr.s32 %r597, %r263, 31;
|
||
shr.u32 %r598, %r597, 30;
|
||
add.s32 %r599, %r263, %r598;
|
||
shr.s32 %r600, %r599, 2;
|
||
mul.wide.s32 %rd304, %r600, 4;
|
||
add.s64 %rd305, %rd396, %rd304;
|
||
ld.local.u32 %r601, [%rd305+4];
|
||
ld.local.u32 %r602, [%rd305];
|
||
mov.b64 %rd306, {%r602, %r601};
|
||
and.b64 %rd307, %rd306, 4294967295;
|
||
shl.b64 %rd308, %rd307, %r596;
|
||
cvt.u32.u64 %r603, %rd308;
|
||
shr.u64 %rd309, %rd308, 32;
|
||
cvt.u32.u64 %r604, %rd309;
|
||
shr.s32 %r605, %r922, 31;
|
||
shr.u32 %r606, %r605, 30;
|
||
add.s32 %r607, %r922, %r606;
|
||
shr.s32 %r608, %r607, 2;
|
||
mul.wide.s32 %rd310, %r608, 4;
|
||
add.s64 %rd311, %rd396, %rd310;
|
||
ld.local.u32 %r609, [%rd311];
|
||
or.b32 %r610, %r603, %r609;
|
||
ld.local.u32 %r611, [%rd311+4];
|
||
st.local.u32 [%rd311], %r610;
|
||
or.b32 %r612, %r604, %r611;
|
||
st.local.u32 [%rd311+4], %r612;
|
||
add.s32 %r922, %r922, 4;
|
||
add.s32 %r263, %r263, 4;
|
||
setp.lt.s32 %p245, %r263, %r258;
|
||
@%p245 bra BB0_351;
|
||
|
||
BB0_352:
|
||
sub.s32 %r613, %r10, %r263;
|
||
setp.eq.s32 %p246, %r613, 3;
|
||
@%p246 bra BB0_357;
|
||
|
||
setp.eq.s32 %p247, %r613, 2;
|
||
@%p247 bra BB0_356;
|
||
bra.uni BB0_354;
|
||
|
||
BB0_356:
|
||
add.s32 %r629, %r263, %r10;
|
||
shr.s32 %r630, %r629, 31;
|
||
shr.u32 %r631, %r630, 30;
|
||
add.s32 %r632, %r629, %r631;
|
||
shr.s32 %r633, %r632, 2;
|
||
and.b32 %r634, %r629, 3;
|
||
shl.b32 %r635, %r634, 3;
|
||
shr.s32 %r636, %r263, 31;
|
||
shr.u32 %r637, %r636, 30;
|
||
add.s32 %r638, %r263, %r637;
|
||
shr.s32 %r639, %r638, 2;
|
||
mul.wide.s32 %rd316, %r639, 4;
|
||
add.s64 %rd317, %rd396, %rd316;
|
||
ld.local.u32 %r640, [%rd317+4];
|
||
ld.local.u32 %r641, [%rd317];
|
||
mov.b64 %rd318, {%r641, %r640};
|
||
and.b64 %rd319, %rd318, 65535;
|
||
shl.b64 %rd320, %rd319, %r635;
|
||
cvt.u32.u64 %r642, %rd320;
|
||
shr.u64 %rd321, %rd320, 32;
|
||
cvt.u32.u64 %r643, %rd321;
|
||
mul.wide.s32 %rd322, %r633, 4;
|
||
add.s64 %rd323, %rd396, %rd322;
|
||
ld.local.u32 %r644, [%rd323];
|
||
or.b32 %r645, %r642, %r644;
|
||
ld.local.u32 %r646, [%rd323+4];
|
||
st.local.u32 [%rd323], %r645;
|
||
or.b32 %r647, %r643, %r646;
|
||
st.local.u32 [%rd323+4], %r647;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_72:
|
||
setp.eq.s32 %p31, %r299, 102;
|
||
@%p31 bra BB0_73;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_73:
|
||
shl.b32 %r228, %r10, 1;
|
||
setp.gt.s32 %p222, %r228, 255;
|
||
@%p222 bra BB0_367;
|
||
|
||
add.s32 %r229, %r10, -3;
|
||
mov.u32 %r912, 0;
|
||
setp.lt.s32 %p223, %r229, 1;
|
||
mov.u32 %r911, %r10;
|
||
@%p223 bra BB0_76;
|
||
|
||
BB0_75:
|
||
add.s32 %r818, %r10, -3;
|
||
and.b32 %r501, %r911, 3;
|
||
shl.b32 %r502, %r501, 3;
|
||
shr.s32 %r503, %r912, 31;
|
||
shr.u32 %r504, %r503, 30;
|
||
add.s32 %r505, %r912, %r504;
|
||
shr.s32 %r506, %r505, 2;
|
||
mul.wide.s32 %rd263, %r506, 4;
|
||
add.s64 %rd264, %rd396, %rd263;
|
||
ld.local.u32 %r507, [%rd264+4];
|
||
ld.local.u32 %r508, [%rd264];
|
||
mov.b64 %rd265, {%r508, %r507};
|
||
and.b64 %rd266, %rd265, 4294967295;
|
||
shl.b64 %rd267, %rd266, %r502;
|
||
cvt.u32.u64 %r509, %rd267;
|
||
shr.u64 %rd268, %rd267, 32;
|
||
cvt.u32.u64 %r510, %rd268;
|
||
shr.s32 %r511, %r911, 31;
|
||
shr.u32 %r512, %r511, 30;
|
||
add.s32 %r513, %r911, %r512;
|
||
shr.s32 %r514, %r513, 2;
|
||
mul.wide.s32 %rd269, %r514, 4;
|
||
add.s64 %rd270, %rd396, %rd269;
|
||
ld.local.u32 %r515, [%rd270];
|
||
or.b32 %r516, %r509, %r515;
|
||
ld.local.u32 %r517, [%rd270+4];
|
||
st.local.u32 [%rd270], %r516;
|
||
or.b32 %r518, %r510, %r517;
|
||
st.local.u32 [%rd270+4], %r518;
|
||
add.s32 %r911, %r911, 4;
|
||
add.s32 %r912, %r912, 4;
|
||
setp.lt.s32 %p224, %r912, %r818;
|
||
@%p224 bra BB0_75;
|
||
|
||
BB0_76:
|
||
sub.s32 %r519, %r10, %r912;
|
||
setp.eq.s32 %p225, %r519, 3;
|
||
@%p225 bra BB0_328;
|
||
|
||
setp.eq.s32 %p226, %r519, 2;
|
||
@%p226 bra BB0_327;
|
||
bra.uni BB0_78;
|
||
|
||
BB0_327:
|
||
add.s32 %r535, %r912, %r10;
|
||
shr.s32 %r536, %r535, 31;
|
||
shr.u32 %r537, %r536, 30;
|
||
add.s32 %r538, %r535, %r537;
|
||
shr.s32 %r539, %r538, 2;
|
||
and.b32 %r540, %r535, 3;
|
||
shl.b32 %r541, %r540, 3;
|
||
shr.s32 %r542, %r912, 31;
|
||
shr.u32 %r543, %r542, 30;
|
||
add.s32 %r544, %r912, %r543;
|
||
shr.s32 %r545, %r544, 2;
|
||
mul.wide.s32 %rd275, %r545, 4;
|
||
add.s64 %rd276, %rd396, %rd275;
|
||
ld.local.u32 %r546, [%rd276+4];
|
||
ld.local.u32 %r547, [%rd276];
|
||
mov.b64 %rd277, {%r547, %r546};
|
||
and.b64 %rd278, %rd277, 65535;
|
||
shl.b64 %rd279, %rd278, %r541;
|
||
cvt.u32.u64 %r548, %rd279;
|
||
shr.u64 %rd280, %rd279, 32;
|
||
cvt.u32.u64 %r549, %rd280;
|
||
mul.wide.s32 %rd281, %r539, 4;
|
||
add.s64 %rd282, %rd396, %rd281;
|
||
ld.local.u32 %r550, [%rd282];
|
||
or.b32 %r551, %r548, %r550;
|
||
ld.local.u32 %r552, [%rd282+4];
|
||
st.local.u32 [%rd282], %r551;
|
||
or.b32 %r553, %r549, %r552;
|
||
st.local.u32 [%rd282+4], %r553;
|
||
bra.uni BB0_329;
|
||
|
||
BB0_156:
|
||
and.b32 %r452, %r11, 255;
|
||
setp.ge.s32 %p103, %r452, %r10;
|
||
@%p103 bra BB0_367;
|
||
|
||
ld.local.u8 %rs46, [%rd10];
|
||
mul.wide.u16 %r453, %rs46, 2;
|
||
st.local.u8 [%rd10], %r453;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_43:
|
||
setp.eq.s32 %p45, %r299, 82;
|
||
@%p45 bra BB0_44;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_44:
|
||
and.b32 %r451, %r11, 255;
|
||
setp.ge.s32 %p102, %r451, %r10;
|
||
@%p102 bra BB0_367;
|
||
|
||
ld.local.u8 %rs44, [%rd10];
|
||
shr.u16 %rs45, %rs44, 1;
|
||
st.local.u8 [%rd10], %rs45;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_226:
|
||
setp.lt.s32 %p151, %r10, 1;
|
||
@%p151 bra BB0_367;
|
||
|
||
and.b32 %r136, %r10, 3;
|
||
setp.eq.s32 %p152, %r136, 0;
|
||
mov.u32 %r877, 0;
|
||
@%p152 bra BB0_238;
|
||
|
||
setp.eq.s32 %p153, %r136, 1;
|
||
mov.u32 %r876, 0;
|
||
@%p153 bra BB0_235;
|
||
|
||
setp.eq.s32 %p154, %r136, 2;
|
||
mov.u32 %r875, 0;
|
||
@%p154 bra BB0_232;
|
||
|
||
ld.local.u8 %rs87, [%rd9];
|
||
and.b16 %rs88, %rs1, 255;
|
||
mov.u32 %r875, 1;
|
||
setp.ne.s16 %p155, %rs87, %rs88;
|
||
@%p155 bra BB0_232;
|
||
|
||
st.local.u8 [%rd9], %rs2;
|
||
|
||
BB0_232:
|
||
cvt.u64.u32 %rd209, %r875;
|
||
add.s64 %rd37, %rd9, %rd209;
|
||
ld.local.u8 %rs89, [%rd37];
|
||
and.b16 %rs90, %rs1, 255;
|
||
setp.ne.s16 %p156, %rs89, %rs90;
|
||
@%p156 bra BB0_234;
|
||
|
||
st.local.u8 [%rd37], %rs2;
|
||
|
||
BB0_234:
|
||
add.s32 %r876, %r875, 1;
|
||
|
||
BB0_235:
|
||
cvt.s64.s32 %rd210, %r876;
|
||
add.s64 %rd38, %rd9, %rd210;
|
||
ld.local.u8 %rs91, [%rd38];
|
||
and.b16 %rs92, %rs1, 255;
|
||
setp.ne.s16 %p157, %rs91, %rs92;
|
||
@%p157 bra BB0_237;
|
||
|
||
st.local.u8 [%rd38], %rs2;
|
||
|
||
BB0_237:
|
||
add.s32 %r877, %r876, 1;
|
||
|
||
BB0_238:
|
||
setp.lt.u32 %p158, %r10, 4;
|
||
@%p158 bra BB0_367;
|
||
|
||
cvt.s64.s32 %rd211, %r877;
|
||
add.s64 %rd362, %rd9, %rd211;
|
||
|
||
BB0_240:
|
||
ld.local.u8 %rs93, [%rd362];
|
||
and.b16 %rs94, %rs1, 255;
|
||
setp.ne.s16 %p159, %rs93, %rs94;
|
||
@%p159 bra BB0_242;
|
||
|
||
st.local.u8 [%rd362], %rs2;
|
||
|
||
BB0_242:
|
||
ld.local.u8 %rs95, [%rd362+1];
|
||
setp.ne.s16 %p160, %rs95, %rs94;
|
||
@%p160 bra BB0_244;
|
||
|
||
st.local.u8 [%rd362+1], %rs2;
|
||
|
||
BB0_244:
|
||
ld.local.u8 %rs97, [%rd362+2];
|
||
setp.ne.s16 %p161, %rs97, %rs94;
|
||
@%p161 bra BB0_246;
|
||
|
||
st.local.u8 [%rd362+2], %rs2;
|
||
|
||
BB0_246:
|
||
ld.local.u8 %rs99, [%rd362+3];
|
||
setp.ne.s16 %p162, %rs99, %rs94;
|
||
@%p162 bra BB0_248;
|
||
|
||
st.local.u8 [%rd362+3], %rs2;
|
||
|
||
BB0_248:
|
||
add.s32 %r877, %r877, 4;
|
||
setp.lt.s32 %p163, %r877, %r10;
|
||
add.s64 %rd362, %rd362, 4;
|
||
@%p163 bra BB0_240;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_108:
|
||
setp.eq.s32 %p18, %r299, 117;
|
||
@%p18 bra BB0_109;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_109:
|
||
mov.u32 %r930, 0;
|
||
setp.lt.s32 %p260, %r10, 1;
|
||
mov.u64 %rd393, %rd396;
|
||
@%p260 bra BB0_367;
|
||
|
||
BB0_110:
|
||
ld.local.u32 %r742, [%rd393];
|
||
and.b32 %r743, %r742, 1077952576;
|
||
shr.u32 %r744, %r743, 1;
|
||
and.b32 %r745, %r742, -2139062144;
|
||
shr.u32 %r746, %r745, 2;
|
||
not.b32 %r747, %r746;
|
||
and.b32 %r748, %r744, %r747;
|
||
and.b32 %r749, %r742, 522133279;
|
||
add.s32 %r750, %r749, 522133279;
|
||
mov.u32 %r751, -84215046;
|
||
sub.s32 %r752, %r751, %r749;
|
||
and.b32 %r753, %r748, %r752;
|
||
and.b32 %r754, %r753, %r750;
|
||
not.b32 %r755, %r754;
|
||
and.b32 %r756, %r742, %r755;
|
||
st.local.u32 [%rd393], %r756;
|
||
add.s64 %rd393, %rd393, 4;
|
||
add.s32 %r930, %r930, 4;
|
||
setp.lt.s32 %p261, %r930, %r10;
|
||
@%p261 bra BB0_110;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_194:
|
||
mov.u32 %r933, 0;
|
||
setp.lt.s32 %p132, %r10, 1;
|
||
@%p132 bra BB0_216;
|
||
|
||
and.b32 %r100, %r10, 3;
|
||
setp.eq.s32 %p133, %r100, 0;
|
||
mov.u32 %r862, 0;
|
||
mov.u32 %r933, %r862;
|
||
@%p133 bra BB0_205;
|
||
|
||
setp.eq.s32 %p134, %r100, 1;
|
||
mov.u32 %r859, 0;
|
||
mov.u32 %r933, %r859;
|
||
@%p134 bra BB0_202;
|
||
|
||
setp.eq.s32 %p135, %r100, 2;
|
||
mov.u32 %r856, 0;
|
||
mov.u32 %r933, %r856;
|
||
@%p135 bra BB0_199;
|
||
|
||
ld.local.u8 %rs76, [%rd9];
|
||
and.b16 %rs77, %rs1, 255;
|
||
setp.ne.s16 %p136, %rs76, %rs77;
|
||
selp.u32 %r933, 1, 0, %p136;
|
||
mov.u32 %r856, 1;
|
||
|
||
BB0_199:
|
||
cvt.u64.u32 %rd185, %r856;
|
||
add.s64 %rd186, %rd9, %rd185;
|
||
ld.local.u8 %rs6, [%rd186];
|
||
and.b16 %rs78, %rs1, 255;
|
||
setp.eq.s16 %p137, %rs6, %rs78;
|
||
@%p137 bra BB0_201;
|
||
|
||
cvt.u64.u32 %rd187, %r933;
|
||
add.s64 %rd188, %rd9, %rd187;
|
||
st.local.u8 [%rd188], %rs6;
|
||
add.s32 %r933, %r933, 1;
|
||
|
||
BB0_201:
|
||
add.s32 %r859, %r856, 1;
|
||
|
||
BB0_202:
|
||
cvt.s64.s32 %rd189, %r859;
|
||
add.s64 %rd190, %rd9, %rd189;
|
||
ld.local.u8 %rs7, [%rd190];
|
||
and.b16 %rs79, %rs1, 255;
|
||
setp.eq.s16 %p138, %rs7, %rs79;
|
||
@%p138 bra BB0_204;
|
||
|
||
cvt.s64.s32 %rd191, %r933;
|
||
add.s64 %rd192, %rd9, %rd191;
|
||
st.local.u8 [%rd192], %rs7;
|
||
add.s32 %r933, %r933, 1;
|
||
|
||
BB0_204:
|
||
add.s32 %r862, %r859, 1;
|
||
|
||
BB0_205:
|
||
setp.lt.u32 %p139, %r10, 4;
|
||
@%p139 bra BB0_216;
|
||
|
||
cvt.s64.s32 %rd193, %r862;
|
||
add.s64 %rd360, %rd9, %rd193;
|
||
|
||
BB0_207:
|
||
ld.local.u8 %rs8, [%rd360];
|
||
and.b16 %rs80, %rs1, 255;
|
||
setp.eq.s16 %p140, %rs8, %rs80;
|
||
@%p140 bra BB0_209;
|
||
|
||
cvt.s64.s32 %rd194, %r933;
|
||
add.s64 %rd195, %rd9, %rd194;
|
||
st.local.u8 [%rd195], %rs8;
|
||
add.s32 %r933, %r933, 1;
|
||
|
||
BB0_209:
|
||
ld.local.u8 %rs9, [%rd360+1];
|
||
setp.eq.s16 %p141, %rs9, %rs80;
|
||
@%p141 bra BB0_211;
|
||
|
||
cvt.s64.s32 %rd196, %r933;
|
||
add.s64 %rd197, %rd9, %rd196;
|
||
st.local.u8 [%rd197], %rs9;
|
||
add.s32 %r933, %r933, 1;
|
||
|
||
BB0_211:
|
||
ld.local.u8 %rs10, [%rd360+2];
|
||
setp.eq.s16 %p142, %rs10, %rs80;
|
||
@%p142 bra BB0_213;
|
||
|
||
cvt.s64.s32 %rd198, %r933;
|
||
add.s64 %rd199, %rd9, %rd198;
|
||
st.local.u8 [%rd199], %rs10;
|
||
add.s32 %r933, %r933, 1;
|
||
|
||
BB0_213:
|
||
ld.local.u8 %rs11, [%rd360+3];
|
||
setp.eq.s16 %p143, %rs11, %rs80;
|
||
@%p143 bra BB0_215;
|
||
|
||
cvt.s64.s32 %rd200, %r933;
|
||
add.s64 %rd201, %rd9, %rd200;
|
||
st.local.u8 [%rd201], %rs11;
|
||
add.s32 %r933, %r933, 1;
|
||
|
||
BB0_215:
|
||
add.s32 %r862, %r862, 4;
|
||
setp.lt.s32 %p144, %r862, %r10;
|
||
add.s64 %rd360, %rd360, 4;
|
||
@%p144 bra BB0_207;
|
||
|
||
BB0_216:
|
||
setp.le.s32 %p145, %r10, %r933;
|
||
@%p145 bra BB0_368;
|
||
|
||
sub.s32 %r126, %r10, %r933;
|
||
and.b32 %r127, %r126, 3;
|
||
setp.eq.s32 %p146, %r127, 0;
|
||
mov.u32 %r873, %r933;
|
||
@%p146 bra BB0_223;
|
||
|
||
setp.eq.s32 %p147, %r127, 1;
|
||
mov.u32 %r872, %r933;
|
||
@%p147 bra BB0_222;
|
||
|
||
setp.eq.s32 %p148, %r127, 2;
|
||
mov.u32 %r871, %r933;
|
||
@%p148 bra BB0_221;
|
||
|
||
cvt.s64.s32 %rd202, %r933;
|
||
add.s64 %rd203, %rd9, %rd202;
|
||
mov.u16 %rs84, 0;
|
||
st.local.u8 [%rd203], %rs84;
|
||
add.s32 %r871, %r933, 1;
|
||
|
||
BB0_221:
|
||
cvt.s64.s32 %rd204, %r871;
|
||
add.s64 %rd205, %rd9, %rd204;
|
||
mov.u16 %rs85, 0;
|
||
st.local.u8 [%rd205], %rs85;
|
||
add.s32 %r872, %r871, 1;
|
||
|
||
BB0_222:
|
||
cvt.s64.s32 %rd206, %r872;
|
||
add.s64 %rd207, %rd9, %rd206;
|
||
mov.u16 %rs86, 0;
|
||
st.local.u8 [%rd207], %rs86;
|
||
add.s32 %r873, %r872, 1;
|
||
|
||
BB0_223:
|
||
setp.lt.u32 %p149, %r126, 4;
|
||
@%p149 bra BB0_368;
|
||
|
||
cvt.s64.s32 %rd208, %r873;
|
||
add.s64 %rd361, %rd9, %rd208;
|
||
|
||
BB0_225:
|
||
mov.u32 %r468, 0;
|
||
st.local.u8 [%rd361+3], %r468;
|
||
st.local.u8 [%rd361+2], %r468;
|
||
st.local.u8 [%rd361+1], %r468;
|
||
st.local.u8 [%rd361], %r468;
|
||
add.s64 %rd361, %rd361, 4;
|
||
add.s32 %r873, %r873, 4;
|
||
setp.lt.s32 %p150, %r873, %r10;
|
||
@%p150 bra BB0_225;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_29:
|
||
setp.eq.s32 %p52, %r299, 68;
|
||
@%p52 bra BB0_30;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_30:
|
||
and.b32 %r886, %r11, 255;
|
||
setp.ge.s32 %p184, %r886, %r10;
|
||
@%p184 bra BB0_367;
|
||
|
||
add.s32 %r933, %r10, -1;
|
||
setp.ge.s32 %p185, %r886, %r933;
|
||
@%p185 bra BB0_34;
|
||
|
||
cvt.u64.u32 %rd224, %r11;
|
||
and.b64 %rd225, %rd224, 255;
|
||
add.s64 %rd369, %rd9, %rd225;
|
||
|
||
BB0_33:
|
||
add.s64 %rd61, %rd369, 1;
|
||
ld.local.u8 %rs107, [%rd369+1];
|
||
st.local.u8 [%rd369], %rs107;
|
||
add.s32 %r886, %r886, 1;
|
||
setp.lt.s32 %p186, %r886, %r933;
|
||
mov.u64 %rd369, %rd61;
|
||
@%p186 bra BB0_33;
|
||
|
||
BB0_34:
|
||
mov.u16 %rs108, 0;
|
||
st.local.u8 [%rd11+-1], %rs108;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_365:
|
||
mov.u32 %r932, 0;
|
||
setp.lt.s32 %p264, %r10, 1;
|
||
mov.u64 %rd395, %rd396;
|
||
@%p264 bra BB0_367;
|
||
|
||
BB0_366:
|
||
ld.local.u32 %r789, [%rd395];
|
||
and.b32 %r790, %r789, 1077952576;
|
||
shr.u32 %r791, %r790, 1;
|
||
and.b32 %r792, %r789, -2139062144;
|
||
shr.u32 %r793, %r792, 2;
|
||
not.b32 %r794, %r793;
|
||
and.b32 %r795, %r791, %r794;
|
||
and.b32 %r796, %r789, 522133279;
|
||
add.s32 %r797, %r796, 522133279;
|
||
mov.u32 %r798, -84215046;
|
||
sub.s32 %r799, %r798, %r796;
|
||
and.b32 %r800, %r795, %r799;
|
||
and.b32 %r801, %r800, %r797;
|
||
or.b32 %r802, %r801, %r789;
|
||
st.local.u32 [%rd395], %r802;
|
||
add.s64 %rd395, %rd395, 4;
|
||
add.s32 %r932, %r932, 4;
|
||
setp.lt.s32 %p265, %r932, %r10;
|
||
@%p265 bra BB0_366;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_87:
|
||
setp.eq.s32 %p25, %r299, 112;
|
||
@%p25 bra BB0_88;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_88:
|
||
and.b32 %r246, %r11, 255;
|
||
mad.lo.s32 %r933, %r246, %r10, %r10;
|
||
setp.gt.s32 %p234, %r933, 255;
|
||
@%p234 bra BB0_367;
|
||
|
||
setp.eq.s32 %p235, %r246, 0;
|
||
@%p235 bra BB0_368;
|
||
|
||
and.b32 %r248, %r10, 3;
|
||
mov.u32 %r917, 0;
|
||
mov.u64 %rd387, %rd9;
|
||
|
||
BB0_91:
|
||
setp.lt.s32 %p236, %r10, 1;
|
||
@%p236 bra BB0_348;
|
||
|
||
setp.eq.s32 %p237, %r248, 0;
|
||
mov.u64 %rd303, 0;
|
||
mov.u32 %r921, 0;
|
||
@%p237 bra BB0_340;
|
||
bra.uni BB0_341;
|
||
|
||
BB0_340:
|
||
mov.u64 %rd385, %rd387;
|
||
mov.u64 %rd386, %rd11;
|
||
mov.u64 %rd387, %rd303;
|
||
mov.u64 %rd11, %rd303;
|
||
bra.uni BB0_346;
|
||
|
||
BB0_341:
|
||
setp.eq.s32 %p238, %r248, 1;
|
||
mov.u32 %r919, 0;
|
||
@%p238 bra BB0_345;
|
||
|
||
setp.eq.s32 %p239, %r248, 2;
|
||
mov.u32 %r919, 1;
|
||
@%p239 bra BB0_344;
|
||
|
||
ld.local.u8 %rs165, [%rd387];
|
||
st.local.u8 [%rd11], %rs165;
|
||
add.s64 %rd11, %rd11, 1;
|
||
add.s64 %rd387, %rd387, 1;
|
||
mov.u32 %r919, 2;
|
||
|
||
BB0_344:
|
||
ld.local.u8 %rs166, [%rd387];
|
||
st.local.u8 [%rd11], %rs166;
|
||
add.s64 %rd11, %rd11, 1;
|
||
add.s64 %rd387, %rd387, 1;
|
||
|
||
BB0_345:
|
||
ld.local.u8 %rs167, [%rd387];
|
||
st.local.u8 [%rd11], %rs167;
|
||
add.s64 %rd386, %rd11, 1;
|
||
add.s64 %rd385, %rd387, 1;
|
||
add.s32 %r921, %r919, 1;
|
||
mov.u64 %rd387, %rd385;
|
||
mov.u64 %rd11, %rd386;
|
||
|
||
BB0_346:
|
||
setp.lt.u32 %p240, %r10, 4;
|
||
@%p240 bra BB0_348;
|
||
|
||
BB0_347:
|
||
ld.local.u8 %rs168, [%rd385];
|
||
st.local.u8 [%rd386], %rs168;
|
||
ld.local.u8 %rs169, [%rd385+1];
|
||
st.local.u8 [%rd386+1], %rs169;
|
||
ld.local.u8 %rs170, [%rd385+2];
|
||
st.local.u8 [%rd386+2], %rs170;
|
||
ld.local.u8 %rs171, [%rd385+3];
|
||
st.local.u8 [%rd386+3], %rs171;
|
||
add.s64 %rd386, %rd386, 4;
|
||
add.s64 %rd385, %rd385, 4;
|
||
add.s32 %r921, %r921, 4;
|
||
setp.lt.s32 %p241, %r921, %r10;
|
||
mov.u64 %rd387, %rd385;
|
||
mov.u64 %rd11, %rd386;
|
||
@%p241 bra BB0_347;
|
||
|
||
BB0_348:
|
||
add.s32 %r917, %r917, 1;
|
||
setp.lt.s32 %p242, %r917, %r246;
|
||
@%p242 bra BB0_91;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_171:
|
||
and.b32 %r74, %r11, 255;
|
||
add.s32 %r933, %r74, %r10;
|
||
setp.gt.s32 %p116, %r933, 255;
|
||
setp.eq.s32 %p117, %r10, 0;
|
||
or.pred %p118, %p117, %p116;
|
||
@%p118 bra BB0_367;
|
||
|
||
setp.eq.s32 %p119, %r74, 0;
|
||
@%p119 bra BB0_368;
|
||
|
||
ld.local.u8 %rs4, [%rd11+-1];
|
||
mov.u32 %r846, 0;
|
||
|
||
BB0_174:
|
||
setp.gt.s32 %p120, %r10, 254;
|
||
@%p120 bra BB0_176;
|
||
|
||
st.local.u8 [%rd11], %rs4;
|
||
|
||
BB0_176:
|
||
add.s32 %r846, %r846, 1;
|
||
add.s64 %rd11, %rd11, 1;
|
||
add.s32 %r10, %r10, 1;
|
||
setp.lt.s32 %p121, %r846, %r74;
|
||
@%p121 bra BB0_174;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_58:
|
||
setp.eq.s32 %p39, %r299, 93;
|
||
@%p39 bra BB0_59;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_59:
|
||
setp.eq.s32 %p187, %r10, 0;
|
||
mov.u32 %r933, 0;
|
||
@%p187 bra BB0_368;
|
||
|
||
add.s32 %r490, %r10, 255;
|
||
and.b32 %r171, %r490, 255;
|
||
setp.ge.s32 %p188, %r171, %r10;
|
||
@%p188 bra BB0_367;
|
||
|
||
add.s32 %r933, %r10, -1;
|
||
setp.le.s32 %p189, %r933, %r171;
|
||
@%p189 bra BB0_282;
|
||
|
||
sub.s32 %r173, %r933, %r171;
|
||
and.b32 %r174, %r173, 3;
|
||
setp.eq.s32 %p190, %r174, 0;
|
||
@%p190 bra BB0_272;
|
||
bra.uni BB0_273;
|
||
|
||
BB0_272:
|
||
mov.u32 %r890, %r171;
|
||
bra.uni BB0_279;
|
||
|
||
BB0_177:
|
||
and.b32 %r80, %r11, 255;
|
||
add.s32 %r933, %r80, %r10;
|
||
setp.gt.s32 %p122, %r933, 255;
|
||
@%p122 bra BB0_367;
|
||
|
||
ld.local.u8 %rs5, [%rd9];
|
||
setp.eq.s32 %p123, %r80, 0;
|
||
mov.u32 %r847, 0;
|
||
@%p123 bra BB0_368;
|
||
|
||
BB0_179:
|
||
add.s32 %r83, %r10, %r847;
|
||
setp.gt.s32 %p124, %r83, 254;
|
||
@%p124 bra BB0_193;
|
||
|
||
add.s32 %r855, %r83, -1;
|
||
setp.lt.s32 %p125, %r855, 0;
|
||
@%p125 bra BB0_192;
|
||
|
||
and.b32 %r85, %r83, 3;
|
||
setp.eq.s32 %p126, %r85, 0;
|
||
@%p126 bra BB0_182;
|
||
bra.uni BB0_183;
|
||
|
||
BB0_182:
|
||
mov.u32 %r854, %r855;
|
||
mov.u32 %r855, %r83;
|
||
bra.uni BB0_190;
|
||
|
||
BB0_183:
|
||
setp.eq.s32 %p127, %r85, 1;
|
||
@%p127 bra BB0_184;
|
||
bra.uni BB0_185;
|
||
|
||
BB0_184:
|
||
mov.u32 %r851, %r83;
|
||
bra.uni BB0_189;
|
||
|
||
BB0_185:
|
||
setp.eq.s32 %p128, %r85, 2;
|
||
@%p128 bra BB0_186;
|
||
bra.uni BB0_187;
|
||
|
||
BB0_186:
|
||
mov.u32 %r851, %r855;
|
||
mov.u32 %r855, %r83;
|
||
bra.uni BB0_188;
|
||
|
||
BB0_187:
|
||
cvt.s64.s32 %rd171, %r855;
|
||
add.s64 %rd172, %rd9, %rd171;
|
||
ld.local.u8 %rs69, [%rd172];
|
||
st.local.u8 [%rd172+1], %rs69;
|
||
add.s32 %r851, %r83, -2;
|
||
|
||
BB0_188:
|
||
cvt.s64.s32 %rd173, %r851;
|
||
add.s64 %rd174, %rd9, %rd173;
|
||
ld.local.u8 %rs70, [%rd174];
|
||
cvt.s64.s32 %rd175, %r855;
|
||
add.s64 %rd176, %rd9, %rd175;
|
||
st.local.u8 [%rd176], %rs70;
|
||
add.s32 %r855, %r851, -1;
|
||
|
||
BB0_189:
|
||
cvt.s64.s32 %rd177, %r855;
|
||
add.s64 %rd178, %rd9, %rd177;
|
||
ld.local.u8 %rs71, [%rd178];
|
||
cvt.s64.s32 %rd179, %r851;
|
||
add.s64 %rd180, %rd9, %rd179;
|
||
st.local.u8 [%rd180], %rs71;
|
||
add.s32 %r854, %r855, -1;
|
||
|
||
BB0_190:
|
||
setp.lt.u32 %p129, %r83, 4;
|
||
@%p129 bra BB0_192;
|
||
|
||
BB0_191:
|
||
cvt.s64.s32 %rd181, %r854;
|
||
add.s64 %rd182, %rd9, %rd181;
|
||
ld.local.u8 %rs72, [%rd182];
|
||
cvt.s64.s32 %rd183, %r855;
|
||
add.s64 %rd184, %rd9, %rd183;
|
||
st.local.u8 [%rd184], %rs72;
|
||
ld.local.u8 %rs73, [%rd182+-1];
|
||
ld.local.u8 %rs74, [%rd182+-2];
|
||
ld.local.u8 %rs75, [%rd182+-3];
|
||
st.local.u8 [%rd182], %rs73;
|
||
st.local.u8 [%rd182+-1], %rs74;
|
||
st.local.u8 [%rd182+-2], %rs75;
|
||
add.s32 %r97, %r854, -4;
|
||
setp.gt.s32 %p130, %r97, -1;
|
||
add.s32 %r855, %r854, -3;
|
||
mov.u32 %r854, %r97;
|
||
@%p130 bra BB0_191;
|
||
|
||
BB0_192:
|
||
st.local.u8 [%rd9], %rs5;
|
||
|
||
BB0_193:
|
||
add.s32 %r847, %r847, 1;
|
||
setp.lt.s32 %p131, %r847, %r80;
|
||
@%p131 bra BB0_179;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_122:
|
||
setp.eq.s32 %p12, %r299, 125;
|
||
@%p12 bra BB0_123;
|
||
bra.uni BB0_367;
|
||
|
||
BB0_123:
|
||
setp.lt.s32 %p202, %r300, 1;
|
||
@%p202 bra BB0_367;
|
||
|
||
and.b32 %r193, %r300, 3;
|
||
setp.eq.s32 %p203, %r193, 0;
|
||
mov.u32 %r897, 0;
|
||
@%p203 bra BB0_299;
|
||
|
||
setp.eq.s32 %p204, %r193, 1;
|
||
@%p204 bra BB0_297;
|
||
bra.uni BB0_126;
|
||
|
||
BB0_297:
|
||
ld.local.u8 %rs187, [%rd11+-1];
|
||
mov.u32 %r896, 0;
|
||
bra.uni BB0_298;
|
||
|
||
BB0_315:
|
||
setp.eq.s32 %p217, %r212, 1;
|
||
@%p217 bra BB0_316;
|
||
bra.uni BB0_317;
|
||
|
||
BB0_316:
|
||
mov.u32 %r906, %r10;
|
||
bra.uni BB0_321;
|
||
|
||
BB0_357:
|
||
add.s32 %r648, %r263, %r10;
|
||
shr.s32 %r649, %r648, 31;
|
||
shr.u32 %r650, %r649, 30;
|
||
add.s32 %r651, %r648, %r650;
|
||
shr.s32 %r652, %r651, 2;
|
||
and.b32 %r653, %r648, 3;
|
||
shl.b32 %r654, %r653, 3;
|
||
shr.s32 %r655, %r263, 31;
|
||
shr.u32 %r656, %r655, 30;
|
||
add.s32 %r657, %r263, %r656;
|
||
shr.s32 %r658, %r657, 2;
|
||
mul.wide.s32 %rd324, %r658, 4;
|
||
add.s64 %rd325, %rd396, %rd324;
|
||
ld.local.u32 %r659, [%rd325+4];
|
||
ld.local.u32 %r660, [%rd325];
|
||
mov.b64 %rd326, {%r660, %r659};
|
||
and.b64 %rd327, %rd326, 16777215;
|
||
shl.b64 %rd328, %rd327, %r654;
|
||
cvt.u32.u64 %r661, %rd328;
|
||
shr.u64 %rd329, %rd328, 32;
|
||
cvt.u32.u64 %r662, %rd329;
|
||
mul.wide.s32 %rd330, %r652, 4;
|
||
add.s64 %rd331, %rd396, %rd330;
|
||
ld.local.u32 %r663, [%rd331];
|
||
or.b32 %r664, %r661, %r663;
|
||
ld.local.u32 %r665, [%rd331+4];
|
||
st.local.u32 [%rd331], %r664;
|
||
or.b32 %r666, %r662, %r665;
|
||
st.local.u32 [%rd331+4], %r666;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_354:
|
||
setp.ne.s32 %p248, %r613, 1;
|
||
@%p248 bra BB0_368;
|
||
|
||
add.s32 %r614, %r263, %r10;
|
||
shr.s32 %r615, %r614, 31;
|
||
shr.u32 %r616, %r615, 30;
|
||
add.s32 %r617, %r614, %r616;
|
||
shr.s32 %r618, %r617, 2;
|
||
and.b32 %r619, %r614, 3;
|
||
shl.b32 %r620, %r619, 3;
|
||
shr.s32 %r621, %r263, 31;
|
||
shr.u32 %r622, %r621, 30;
|
||
add.s32 %r623, %r263, %r622;
|
||
shr.s32 %r624, %r623, 2;
|
||
mul.wide.s32 %rd312, %r624, 4;
|
||
add.s64 %rd313, %rd396, %rd312;
|
||
ld.local.u8 %r625, [%rd313];
|
||
shl.b32 %r626, %r625, %r620;
|
||
mul.wide.s32 %rd314, %r618, 4;
|
||
add.s64 %rd315, %rd396, %rd314;
|
||
ld.local.u32 %r627, [%rd315];
|
||
or.b32 %r628, %r626, %r627;
|
||
st.local.u32 [%rd315], %r628;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_317:
|
||
setp.eq.s32 %p218, %r212, 2;
|
||
@%p218 bra BB0_318;
|
||
bra.uni BB0_319;
|
||
|
||
BB0_318:
|
||
mov.u32 %r906, %r910;
|
||
mov.u32 %r910, %r10;
|
||
bra.uni BB0_320;
|
||
|
||
BB0_328:
|
||
add.s32 %r554, %r912, %r10;
|
||
shr.s32 %r555, %r554, 31;
|
||
shr.u32 %r556, %r555, 30;
|
||
add.s32 %r557, %r554, %r556;
|
||
shr.s32 %r558, %r557, 2;
|
||
and.b32 %r559, %r554, 3;
|
||
shl.b32 %r560, %r559, 3;
|
||
shr.s32 %r561, %r912, 31;
|
||
shr.u32 %r562, %r561, 30;
|
||
add.s32 %r563, %r912, %r562;
|
||
shr.s32 %r564, %r563, 2;
|
||
mul.wide.s32 %rd283, %r564, 4;
|
||
add.s64 %rd284, %rd396, %rd283;
|
||
ld.local.u32 %r565, [%rd284+4];
|
||
ld.local.u32 %r566, [%rd284];
|
||
mov.b64 %rd285, {%r566, %r565};
|
||
and.b64 %rd286, %rd285, 16777215;
|
||
shl.b64 %rd287, %rd286, %r560;
|
||
cvt.u32.u64 %r567, %rd287;
|
||
shr.u64 %rd288, %rd287, 32;
|
||
cvt.u32.u64 %r568, %rd288;
|
||
mul.wide.s32 %rd289, %r558, 4;
|
||
add.s64 %rd290, %rd396, %rd289;
|
||
ld.local.u32 %r569, [%rd290];
|
||
or.b32 %r570, %r567, %r569;
|
||
ld.local.u32 %r571, [%rd290+4];
|
||
st.local.u32 [%rd290], %r570;
|
||
or.b32 %r572, %r568, %r571;
|
||
st.local.u32 [%rd290+4], %r572;
|
||
bra.uni BB0_329;
|
||
|
||
BB0_78:
|
||
setp.ne.s32 %p227, %r519, 1;
|
||
@%p227 bra BB0_329;
|
||
|
||
add.s32 %r520, %r912, %r10;
|
||
shr.s32 %r521, %r520, 31;
|
||
shr.u32 %r522, %r521, 30;
|
||
add.s32 %r523, %r520, %r522;
|
||
shr.s32 %r524, %r523, 2;
|
||
and.b32 %r525, %r520, 3;
|
||
shl.b32 %r526, %r525, 3;
|
||
shr.s32 %r527, %r912, 31;
|
||
shr.u32 %r528, %r527, 30;
|
||
add.s32 %r529, %r912, %r528;
|
||
shr.s32 %r530, %r529, 2;
|
||
mul.wide.s32 %rd271, %r530, 4;
|
||
add.s64 %rd272, %rd396, %rd271;
|
||
ld.local.u8 %r531, [%rd272];
|
||
shl.b32 %r532, %r531, %r526;
|
||
mul.wide.s32 %rd273, %r524, 4;
|
||
add.s64 %rd274, %rd396, %rd273;
|
||
ld.local.u32 %r533, [%rd274];
|
||
or.b32 %r534, %r532, %r533;
|
||
st.local.u32 [%rd274], %r534;
|
||
|
||
BB0_329:
|
||
shl.b32 %r933, %r10, 1;
|
||
shr.u32 %r573, %r10, 31;
|
||
add.s32 %r574, %r10, %r573;
|
||
shr.s32 %r235, %r574, 1;
|
||
setp.lt.s32 %p228, %r10, 2;
|
||
@%p228 bra BB0_368;
|
||
|
||
add.s32 %r576, %r10, %r10;
|
||
add.s32 %r236, %r576, -1;
|
||
mov.u32 %r577, 1;
|
||
max.s32 %r237, %r235, %r577;
|
||
and.b32 %r238, %r237, 3;
|
||
setp.eq.s32 %p229, %r238, 0;
|
||
mov.u32 %r916, 0;
|
||
@%p229 bra BB0_336;
|
||
|
||
setp.eq.s32 %p230, %r238, 1;
|
||
mov.u32 %r914, 0;
|
||
@%p230 bra BB0_335;
|
||
|
||
setp.eq.s32 %p231, %r238, 2;
|
||
mov.u32 %r913, 0;
|
||
@%p231 bra BB0_334;
|
||
|
||
ld.local.u8 %rs151, [%rd11];
|
||
ld.local.u8 %rs152, [%rd12+1];
|
||
st.local.u8 [%rd11], %rs152;
|
||
st.local.u8 [%rd12+1], %rs151;
|
||
mov.u32 %r913, %r577;
|
||
|
||
BB0_334:
|
||
add.s32 %r581, %r913, %r10;
|
||
cvt.s64.s32 %rd291, %r581;
|
||
add.s64 %rd292, %rd9, %rd291;
|
||
ld.local.u8 %rs153, [%rd292];
|
||
sub.s32 %r582, %r236, %r913;
|
||
cvt.s64.s32 %rd293, %r582;
|
||
add.s64 %rd294, %rd9, %rd293;
|
||
ld.local.u8 %rs154, [%rd294];
|
||
st.local.u8 [%rd292], %rs154;
|
||
st.local.u8 [%rd294], %rs153;
|
||
add.s32 %r914, %r913, 1;
|
||
|
||
BB0_335:
|
||
add.s32 %r583, %r914, %r10;
|
||
cvt.s64.s32 %rd295, %r583;
|
||
add.s64 %rd296, %rd9, %rd295;
|
||
ld.local.u8 %rs155, [%rd296];
|
||
sub.s32 %r584, %r236, %r914;
|
||
cvt.s64.s32 %rd297, %r584;
|
||
add.s64 %rd298, %rd9, %rd297;
|
||
ld.local.u8 %rs156, [%rd298];
|
||
st.local.u8 [%rd296], %rs156;
|
||
st.local.u8 [%rd298], %rs155;
|
||
add.s32 %r916, %r914, 1;
|
||
|
||
BB0_336:
|
||
shl.b32 %r933, %r10, 1;
|
||
setp.lt.u32 %p232, %r237, 4;
|
||
@%p232 bra BB0_368;
|
||
|
||
shl.b32 %r585, %r10, 1;
|
||
sub.s32 %r586, %r585, %r916;
|
||
cvt.s64.s32 %rd299, %r586;
|
||
add.s64 %rd374, %rd9, %rd299;
|
||
add.s32 %r587, %r916, %r10;
|
||
cvt.s64.s32 %rd300, %r587;
|
||
add.s64 %rd373, %rd9, %rd300;
|
||
|
||
BB0_338:
|
||
shl.b32 %r933, %r10, 1;
|
||
ld.local.u8 %rs157, [%rd373];
|
||
ld.local.u8 %rs158, [%rd374+-1];
|
||
st.local.u8 [%rd373], %rs158;
|
||
st.local.u8 [%rd374+-1], %rs157;
|
||
ld.local.u8 %rs159, [%rd373+1];
|
||
ld.local.u8 %rs160, [%rd374+-2];
|
||
st.local.u8 [%rd373+1], %rs160;
|
||
st.local.u8 [%rd374+-2], %rs159;
|
||
ld.local.u8 %rs161, [%rd373+2];
|
||
ld.local.u8 %rs162, [%rd374+-3];
|
||
st.local.u8 [%rd373+2], %rs162;
|
||
st.local.u8 [%rd374+-3], %rs161;
|
||
ld.local.u8 %rs163, [%rd373+3];
|
||
add.s64 %rd75, %rd374, -4;
|
||
ld.local.u8 %rs164, [%rd374+-4];
|
||
st.local.u8 [%rd373+3], %rs164;
|
||
st.local.u8 [%rd374+-4], %rs163;
|
||
add.s64 %rd373, %rd373, 4;
|
||
add.s32 %r916, %r916, 4;
|
||
setp.lt.s32 %p233, %r916, %r235;
|
||
mov.u64 %rd374, %rd75;
|
||
@%p233 bra BB0_338;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_126:
|
||
setp.eq.s32 %p205, %r193, 2;
|
||
@%p205 bra BB0_295;
|
||
bra.uni BB0_127;
|
||
|
||
BB0_295:
|
||
ld.local.u8 %rs186, [%rd11+-1];
|
||
mov.u32 %r895, 0;
|
||
bra.uni BB0_296;
|
||
|
||
BB0_319:
|
||
ld.local.u8 %rs144, [%rd11+-1];
|
||
st.local.u8 [%rd11], %rs144;
|
||
add.s32 %r906, %r10, -2;
|
||
|
||
BB0_320:
|
||
cvt.s64.s32 %rd251, %r906;
|
||
add.s64 %rd252, %rd9, %rd251;
|
||
ld.local.u8 %rs145, [%rd252];
|
||
cvt.s64.s32 %rd253, %r910;
|
||
add.s64 %rd254, %rd9, %rd253;
|
||
st.local.u8 [%rd254], %rs145;
|
||
add.s32 %r910, %r906, -1;
|
||
|
||
BB0_321:
|
||
cvt.s64.s32 %rd255, %r910;
|
||
add.s64 %rd256, %rd9, %rd255;
|
||
ld.local.u8 %rs146, [%rd256];
|
||
cvt.s64.s32 %rd257, %r906;
|
||
add.s64 %rd258, %rd9, %rd257;
|
||
st.local.u8 [%rd258], %rs146;
|
||
add.s32 %r909, %r910, -1;
|
||
|
||
BB0_322:
|
||
setp.lt.u32 %p219, %r10, 4;
|
||
@%p219 bra BB0_324;
|
||
|
||
BB0_323:
|
||
cvt.s64.s32 %rd259, %r909;
|
||
add.s64 %rd260, %rd9, %rd259;
|
||
ld.local.u8 %rs147, [%rd260];
|
||
cvt.s64.s32 %rd261, %r910;
|
||
add.s64 %rd262, %rd9, %rd261;
|
||
st.local.u8 [%rd262], %rs147;
|
||
ld.local.u8 %rs148, [%rd260+-1];
|
||
ld.local.u8 %rs149, [%rd260+-2];
|
||
ld.local.u8 %rs150, [%rd260+-3];
|
||
st.local.u8 [%rd260], %rs148;
|
||
st.local.u8 [%rd260+-1], %rs149;
|
||
st.local.u8 [%rd260+-2], %rs150;
|
||
add.s32 %r224, %r909, -4;
|
||
setp.gt.s32 %p220, %r224, -1;
|
||
add.s32 %r910, %r909, -3;
|
||
mov.u32 %r909, %r224;
|
||
@%p220 bra BB0_323;
|
||
|
||
BB0_324:
|
||
st.local.u8 [%rd9], %rs1;
|
||
add.s32 %r933, %r10, 1;
|
||
bra.uni BB0_368;
|
||
|
||
BB0_273:
|
||
setp.eq.s32 %p191, %r174, 1;
|
||
@%p191 bra BB0_278;
|
||
|
||
setp.eq.s32 %p192, %r174, 2;
|
||
@%p192 bra BB0_275;
|
||
bra.uni BB0_276;
|
||
|
||
BB0_275:
|
||
mov.u32 %r887, %r171;
|
||
bra.uni BB0_277;
|
||
|
||
BB0_127:
|
||
ld.local.u8 %rs186, [%rd9];
|
||
ld.local.u8 %rs125, [%rd11+-1];
|
||
st.local.u8 [%rd9], %rs125;
|
||
st.local.u8 [%rd11+-1], %rs186;
|
||
mov.u32 %r895, 1;
|
||
|
||
BB0_296:
|
||
cvt.u64.u32 %rd240, %r895;
|
||
add.s64 %rd241, %rd9, %rd240;
|
||
ld.local.u8 %rs187, [%rd241];
|
||
st.local.u8 [%rd241], %rs186;
|
||
st.local.u8 [%rd11+-1], %rs187;
|
||
add.s32 %r896, %r895, 1;
|
||
|
||
BB0_298:
|
||
cvt.s64.s32 %rd242, %r896;
|
||
add.s64 %rd243, %rd9, %rd242;
|
||
ld.local.u8 %rs126, [%rd243];
|
||
st.local.u8 [%rd243], %rs187;
|
||
st.local.u8 [%rd11+-1], %rs126;
|
||
add.s32 %r897, %r896, 1;
|
||
|
||
BB0_299:
|
||
setp.lt.u32 %p206, %r300, 4;
|
||
@%p206 bra BB0_367;
|
||
|
||
ld.local.u8 %rs188, [%rd11+-1];
|
||
cvt.s64.s32 %rd244, %r897;
|
||
add.s64 %rd372, %rd9, %rd244;
|
||
|
||
BB0_301:
|
||
ld.local.u8 %rs127, [%rd372];
|
||
st.local.u8 [%rd372], %rs188;
|
||
st.local.u8 [%rd11+-1], %rs127;
|
||
ld.local.u8 %rs128, [%rd372+1];
|
||
st.local.u8 [%rd372+1], %rs127;
|
||
st.local.u8 [%rd11+-1], %rs128;
|
||
ld.local.u8 %rs129, [%rd372+2];
|
||
st.local.u8 [%rd372+2], %rs128;
|
||
st.local.u8 [%rd11+-1], %rs129;
|
||
ld.local.u8 %rs188, [%rd372+3];
|
||
st.local.u8 [%rd372+3], %rs129;
|
||
st.local.u8 [%rd11+-1], %rs188;
|
||
add.s64 %rd372, %rd372, 4;
|
||
add.s32 %r897, %r897, 4;
|
||
setp.lt.s32 %p207, %r897, %r300;
|
||
@%p207 bra BB0_301;
|
||
|
||
BB0_367:
|
||
mov.u32 %r933, %r10;
|
||
|
||
BB0_368:
|
||
ld.param.u64 %rd347, [amp_param_2];
|
||
add.s32 %r821, %r821, 1;
|
||
mul.wide.u32 %rd344, %r821, 4;
|
||
add.s64 %rd345, %rd347, %rd344;
|
||
ld.const.u32 %r8, [%rd345];
|
||
setp.ne.s32 %p266, %r8, 0;
|
||
@%p266 bra BB0_7;
|
||
|
||
BB0_369:
|
||
ld.param.u64 %rd348, [amp_param_0];
|
||
st.local.u32 [%rd396+256], %r933;
|
||
add.s64 %rd397, %rd348, %rd127;
|
||
mov.u32 %r935, 0;
|
||
|
||
BB0_370:
|
||
ld.local.u32 %r805, [%rd396];
|
||
st.global.u32 [%rd397], %r805;
|
||
add.s64 %rd397, %rd397, 4;
|
||
add.s64 %rd396, %rd396, 4;
|
||
add.s32 %r935, %r935, 1;
|
||
setp.lt.u32 %p267, %r935, 65;
|
||
@%p267 bra BB0_370;
|
||
|
||
BB0_371:
|
||
ret;
|
||
}
|
||
|
||
|
||
|