Files
odiseectf/olivierdeschacht/kernels/amp_a0.e9f8bbfb.kernel
2018-12-16 14:17:14 +01:00

2659 lines
59 KiB
Plaintext
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: UNKNOWN
// Unknown Toolkit Version
// Based on LLVM 3.4svn
//
.version 6.3
.target sm_52, texmode_independent
.address_size 64
// .globl amp
.entry amp(
.param .u64 .ptr .global .align 4 amp_param_0,
.param .u64 .ptr .global .align 4 amp_param_1,
.param .u64 .ptr .const .align 4 amp_param_2,
.param .u64 .ptr .global .align 4 amp_param_3,
.param .u64 .ptr .global .align 4 amp_param_4,
.param .u32 amp_param_5,
.param .u64 amp_param_6
)
{
.local .align 4 .b8 __local_depot0[260];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<268>;
.reg .b16 %rs<189>;
.reg .b32 %r<936>;
.reg .b64 %rd<398>;
mov.u64 %SPL, __local_depot0;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd122, [amp_param_1];
ld.param.u64 %rd123, [amp_param_2];
ld.param.u64 %rd124, [amp_param_6];
add.u64 %rd125, %SP, 0;
add.u64 %rd396, %SPL, 0;
mov.u32 %r291, %ctaid.x;
mov.u32 %r292, %ntid.x;
mov.b32 %r293, %envreg3;
mad.lo.s32 %r1, %r291, %r292, %r293;
mov.u32 %r2, %tid.x;
add.s32 %r3, %r1, %r2;
cvt.s64.s32 %rd126, %r3;
setp.ge.u64 %p1, %rd126, %rd124;
@%p1 bra BB0_371;
ld.const.u32 %r8, [%rd123];
setp.ne.s32 %p2, %r8, 58;
@%p2 bra BB0_3;
ld.const.u32 %r294, [%rd123+4];
setp.eq.s32 %p3, %r294, 0;
@%p3 bra BB0_371;
BB0_3:
mul.wide.s32 %rd127, %r3, 260;
add.s64 %rd351, %rd122, %rd127;
mov.u32 %r819, 0;
mov.u64 %rd352, %rd396;
BB0_4:
ld.global.u32 %r297, [%rd351];
st.local.u32 [%rd352], %r297;
add.s64 %rd352, %rd352, 4;
add.s64 %rd351, %rd351, 4;
add.s32 %r819, %r819, 1;
setp.lt.u32 %p4, %r819, 65;
@%p4 bra BB0_4;
add.s64 %rd129, %rd122, %rd127;
ld.global.u32 %r933, [%rd129+256];
setp.eq.s32 %p5, %r8, 0;
@%p5 bra BB0_369;
cvta.to.local.u64 %rd9, %rd125;
mov.u32 %r821, 0;
bra.uni BB0_7;
BB0_276:
add.s32 %r887, %r171, 1;
cvt.u64.u32 %rd226, %r171;
add.s64 %rd227, %rd9, %rd226;
ld.local.u8 %rs109, [%rd227+1];
st.local.u8 [%rd227], %rs109;
BB0_277:
add.s32 %r171, %r887, 1;
cvt.s64.s32 %rd228, %r887;
add.s64 %rd229, %rd9, %rd228;
ld.local.u8 %rs110, [%rd229+1];
st.local.u8 [%rd229], %rs110;
BB0_278:
add.s32 %r890, %r171, 1;
cvt.s64.s32 %rd230, %r171;
add.s64 %rd231, %rd9, %rd230;
ld.local.u8 %rs111, [%rd231+1];
st.local.u8 [%rd231], %rs111;
BB0_279:
setp.lt.u32 %p193, %r173, 4;
@%p193 bra BB0_282;
cvt.s64.s32 %rd232, %r890;
add.s64 %rd370, %rd9, %rd232;
BB0_281:
ld.local.u8 %rs112, [%rd370+1];
ld.local.u8 %rs113, [%rd370+2];
ld.local.u8 %rs114, [%rd370+3];
ld.local.u8 %rs115, [%rd370+4];
st.local.u8 [%rd370], %rs112;
st.local.u8 [%rd370+1], %rs113;
st.local.u8 [%rd370+2], %rs114;
add.s64 %rd64, %rd370, 4;
st.local.u8 [%rd370+3], %rs115;
add.s32 %r890, %r890, 4;
setp.lt.s32 %p194, %r890, %r933;
mov.u64 %rd370, %rd64;
@%p194 bra BB0_281;
BB0_282:
mov.u16 %rs116, 0;
st.local.u8 [%rd11+-1], %rs116;
bra.uni BB0_368;
BB0_7:
mov.u32 %r10, %r933;
shr.u32 %r11, %r8, 8;
cvt.u16.u32 %rs1, %r11;
shr.u32 %r12, %r8, 16;
cvt.u16.u32 %rs2, %r12;
bfe.u32 %r13, %r8, 8, 8;
cvt.u64.u32 %rd131, %r13;
add.s64 %rd10, %rd9, %rd131;
cvt.s64.s32 %rd132, %r10;
add.s64 %rd11, %rd9, %rd132;
add.s32 %r300, %r10, -1;
shl.b32 %r301, %r300, 1;
cvt.s64.s32 %rd133, %r301;
add.s64 %rd12, %rd9, %rd133;
and.b32 %r299, %r8, 255;
setp.gt.s32 %p6, %r299, 93;
@%p6 bra BB0_62;
setp.gt.s32 %p34, %r299, 68;
@%p34 bra BB0_35;
setp.gt.s32 %p48, %r299, 44;
@%p48 bra BB0_22;
setp.gt.s32 %p55, %r299, 41;
@%p55 bra BB0_16;
setp.eq.s32 %p59, %r299, 36;
@%p59 bra BB0_325;
bra.uni BB0_12;
BB0_325:
setp.gt.s32 %p221, %r10, 254;
@%p221 bra BB0_367;
add.s32 %r933, %r10, 1;
st.local.u8 [%rd11], %rs1;
bra.uni BB0_368;
BB0_62:
setp.gt.s32 %p7, %r299, 112;
@%p7 bra BB0_92;
setp.gt.s32 %p21, %r299, 104;
@%p21 bra BB0_80;
setp.gt.s32 %p28, %r299, 99;
@%p28 bra BB0_70;
setp.eq.s32 %p32, %r299, 94;
@%p32 bra BB0_311;
bra.uni BB0_66;
BB0_311:
setp.gt.s32 %p214, %r10, 254;
@%p214 bra BB0_367;
add.s32 %r910, %r10, -1;
setp.lt.s32 %p215, %r910, 0;
@%p215 bra BB0_324;
and.b32 %r212, %r10, 3;
setp.eq.s32 %p216, %r212, 0;
@%p216 bra BB0_314;
bra.uni BB0_315;
BB0_314:
mov.u32 %r909, %r910;
mov.u32 %r910, %r10;
bra.uni BB0_322;
BB0_35:
setp.gt.s32 %p35, %r299, 83;
@%p35 bra BB0_46;
setp.gt.s32 %p42, %r299, 75;
@%p42 bra BB0_41;
setp.eq.s32 %p46, %r299, 69;
@%p46 bra BB0_128;
bra.uni BB0_38;
BB0_128:
add.s32 %r302, %r10, 4;
setp.gt.s32 %p61, %r302, 255;
@%p61 bra BB0_367;
mov.u32 %r823, 0;
setp.lt.s32 %p62, %r10, 1;
mov.u64 %rd353, %rd396;
@%p62 bra BB0_131;
BB0_130:
ld.local.u32 %r304, [%rd353];
and.b32 %r305, %r304, 1077952576;
shr.u32 %r306, %r305, 1;
and.b32 %r307, %r304, -2139062144;
shr.u32 %r308, %r307, 2;
not.b32 %r309, %r308;
and.b32 %r310, %r306, %r309;
and.b32 %r311, %r304, 522133279;
add.s32 %r312, %r311, 522133279;
mov.u32 %r313, -84215046;
sub.s32 %r314, %r313, %r311;
and.b32 %r315, %r310, %r314;
and.b32 %r316, %r315, %r312;
or.b32 %r317, %r316, %r304;
st.local.u32 [%rd353], %r317;
add.s64 %rd353, %rd353, 4;
add.s32 %r823, %r823, 4;
setp.lt.s32 %p63, %r823, %r10;
@%p63 bra BB0_130;
BB0_131:
mov.u32 %r825, 0;
ld.local.u32 %r319, [%rd396];
and.b32 %r320, %r319, 64;
shr.u32 %r321, %r320, 1;
and.b32 %r322, %r319, 128;
shr.u32 %r323, %r322, 2;
not.b32 %r324, %r323;
and.b32 %r325, %r321, %r324;
and.b32 %r326, %r319, 522133279;
add.s32 %r327, %r326, 31;
mov.u32 %r328, -84215046;
sub.s32 %r329, %r328, %r326;
and.b32 %r330, %r325, %r329;
and.b32 %r331, %r330, %r327;
not.b32 %r332, %r331;
or.b32 %r333, %r332, -33;
and.b32 %r824, %r333, %r319;
st.local.u32 [%rd396], %r824;
mov.u64 %rd354, %rd396;
@%p62 bra BB0_367;
BB0_132:
cvt.u16.u32 %rs21, %r824;
and.b16 %rs22, %rs21, 255;
setp.eq.s16 %p65, %rs22, 32;
selp.b32 %r334, 65280, 0, %p65;
and.b32 %r335, %r824, 65280;
setp.eq.s32 %p66, %r335, 8192;
or.b32 %r336, %r334, 16711680;
selp.b32 %r337, %r336, %r334, %p66;
and.b32 %r338, %r824, 16711680;
setp.eq.s32 %p67, %r338, 2097152;
or.b32 %r339, %r337, -16777216;
selp.b32 %r340, %r339, %r337, %p67;
and.b32 %r341, %r824, -16777216;
setp.eq.s32 %p68, %r341, 536870912;
selp.b32 %r342, 32, 0, %p68;
and.b32 %r343, %r824, 1077952512;
shr.u32 %r344, %r343, 1;
and.b32 %r345, %r824, -2139062144;
shr.u32 %r346, %r345, 2;
not.b32 %r347, %r346;
and.b32 %r348, %r344, %r347;
and.b32 %r349, %r824, 522133279;
add.s32 %r350, %r349, 522133279;
sub.s32 %r352, %r328, %r349;
and.b32 %r353, %r348, %r352;
and.b32 %r354, %r353, %r350;
and.b32 %r355, %r354, %r340;
not.b32 %r356, %r355;
and.b32 %r357, %r824, %r356;
st.local.u32 [%rd354], %r357;
add.s64 %rd16, %rd354, 4;
ld.local.u32 %r358, [%rd354+4];
and.b32 %r359, %r358, 64;
shr.u32 %r360, %r359, 1;
shr.u32 %r361, %r358, 2;
not.b32 %r362, %r361;
and.b32 %r363, %r358, 522133279;
add.s32 %r364, %r363, 31;
sub.s32 %r365, %r328, %r363;
and.b32 %r366, %r342, %r362;
and.b32 %r367, %r366, %r360;
and.b32 %r368, %r367, %r365;
and.b32 %r369, %r368, %r364;
not.b32 %r370, %r369;
and.b32 %r824, %r358, %r370;
st.local.u32 [%rd354+4], %r824;
add.s32 %r825, %r825, 4;
setp.lt.s32 %p69, %r825, %r10;
mov.u64 %rd354, %rd16;
@%p69 bra BB0_132;
bra.uni BB0_367;
BB0_92:
setp.gt.s32 %p8, %r299, 119;
@%p8 bra BB0_111;
setp.gt.s32 %p15, %r299, 114;
@%p15 bra BB0_106;
setp.eq.s32 %p19, %r299, 113;
@%p19 bra BB0_160;
bra.uni BB0_95;
BB0_160:
add.s32 %r933, %r10, %r10;
setp.gt.s32 %p109, %r933, 255;
@%p109 bra BB0_367;
add.s32 %r842, %r10, -1;
setp.lt.s32 %p110, %r842, 0;
@%p110 bra BB0_368;
and.b32 %r59, %r10, 3;
setp.eq.s32 %p111, %r59, 0;
@%p111 bra BB0_168;
setp.eq.s32 %p112, %r59, 1;
add.s32 %r841, %r10, -1;
@%p112 bra BB0_167;
setp.eq.s32 %p113, %r59, 2;
add.s32 %r840, %r10, -1;
@%p113 bra BB0_166;
ld.local.u8 %rs55, [%rd11+-1];
st.local.u8 [%rd12], %rs55;
ld.local.u8 %rs56, [%rd11+-1];
st.local.u8 [%rd12+1], %rs56;
add.s32 %r840, %r10, -2;
BB0_166:
cvt.s64.s32 %rd158, %r840;
add.s64 %rd159, %rd9, %rd158;
ld.local.u8 %rs57, [%rd159];
shl.b32 %r456, %r840, 1;
cvt.s64.s32 %rd160, %r456;
add.s64 %rd161, %rd9, %rd160;
st.local.u8 [%rd161], %rs57;
ld.local.u8 %rs58, [%rd159];
st.local.u8 [%rd161+1], %rs58;
add.s32 %r841, %r840, -1;
BB0_167:
cvt.s64.s32 %rd162, %r841;
add.s64 %rd163, %rd9, %rd162;
ld.local.u8 %rs59, [%rd163];
shl.b32 %r457, %r841, 1;
cvt.s64.s32 %rd164, %r457;
add.s64 %rd165, %rd9, %rd164;
st.local.u8 [%rd165], %rs59;
ld.local.u8 %rs60, [%rd163];
st.local.u8 [%rd165+1], %rs60;
add.s32 %r842, %r841, -1;
BB0_168:
setp.lt.u32 %p114, %r10, 4;
@%p114 bra BB0_368;
shl.b32 %r843, %r842, 1;
BB0_170:
cvt.s64.s32 %rd166, %r842;
add.s64 %rd167, %rd9, %rd166;
ld.local.u8 %rs61, [%rd167];
cvt.s64.s32 %rd168, %r843;
add.s64 %rd169, %rd9, %rd168;
st.local.u8 [%rd169], %rs61;
ld.local.u8 %rs62, [%rd167];
st.local.u8 [%rd169+1], %rs62;
ld.local.u8 %rs63, [%rd167+-1];
st.local.u8 [%rd169+-2], %rs63;
ld.local.u8 %rs64, [%rd167+-1];
st.local.u8 [%rd169+-1], %rs64;
ld.local.u8 %rs65, [%rd167+-2];
st.local.u8 [%rd169+-4], %rs65;
ld.local.u8 %rs66, [%rd167+-2];
st.local.u8 [%rd169+-3], %rs66;
ld.local.u8 %rs67, [%rd167+-3];
st.local.u8 [%rd169+-6], %rs67;
ld.local.u8 %rs68, [%rd167+-3];
st.local.u8 [%rd169+-5], %rs68;
add.s32 %r843, %r843, -8;
add.s32 %r842, %r842, -4;
setp.gt.s32 %p115, %r842, -1;
@%p115 bra BB0_170;
bra.uni BB0_368;
BB0_22:
setp.gt.s32 %p49, %r299, 63;
@%p49 bra BB0_27;
setp.eq.s32 %p53, %r299, 45;
@%p53 bra BB0_152;
bra.uni BB0_24;
BB0_152:
and.b32 %r449, %r11, 255;
setp.ge.s32 %p100, %r449, %r10;
@%p100 bra BB0_367;
ld.local.u8 %rs40, [%rd10];
add.s16 %rs41, %rs40, -1;
st.local.u8 [%rd10], %rs41;
bra.uni BB0_367;
BB0_80:
setp.gt.s32 %p22, %r299, 107;
@%p22 bra BB0_85;
setp.eq.s32 %p26, %r299, 105;
@%p26 bra BB0_251;
bra.uni BB0_82;
BB0_251:
bfe.u32 %r808, %r8, 8, 8;
add.s32 %r933, %r10, 1;
setp.ge.s32 %p167, %r808, %r933;
setp.gt.s32 %p168, %r10, 254;
or.pred %p169, %p167, %p168;
@%p169 bra BB0_367;
bfe.u32 %r809, %r8, 8, 8;
add.s32 %r476, %r10, -1;
add.s32 %r148, %r809, -1;
setp.le.s32 %p170, %r476, %r148;
@%p170 bra BB0_254;
BB0_253:
cvt.s64.s32 %rd214, %r10;
add.s64 %rd215, %rd9, %rd214;
ld.local.u8 %rs102, [%rd215+-1];
st.local.u8 [%rd215], %rs102;
add.s32 %r150, %r10, -1;
add.s32 %r477, %r10, -2;
setp.gt.s32 %p171, %r477, %r148;
mov.u32 %r10, %r150;
@%p171 bra BB0_253;
BB0_254:
st.local.u8 [%rd10], %rs2;
bra.uni BB0_368;
BB0_46:
setp.gt.s32 %p36, %r299, 89;
@%p36 bra BB0_56;
setp.eq.s32 %p40, %r299, 84;
@%p40 bra BB0_358;
bra.uni BB0_48;
BB0_358:
and.b32 %r677, %r11, 255;
setp.ge.s32 %p255, %r677, %r10;
@%p255 bra BB0_367;
bfe.u32 %r678, %r8, 10, 6;
shl.b32 %r679, %r11, 3;
and.b32 %r680, %r679, 24;
mov.u32 %r681, 32;
shl.b32 %r682, %r681, %r680;
mul.wide.u32 %rd342, %r678, 4;
add.s64 %rd343, %rd396, %rd342;
ld.local.u32 %r683, [%rd343];
and.b32 %r684, %r683, 1077952576;
shr.u32 %r685, %r684, 1;
shr.u32 %r686, %r683, 2;
not.b32 %r687, %r686;
and.b32 %r688, %r683, 522133279;
add.s32 %r689, %r688, 522133279;
mov.u32 %r690, -84215046;
sub.s32 %r691, %r690, %r688;
and.b32 %r692, %r682, %r687;
and.b32 %r693, %r692, %r685;
and.b32 %r694, %r693, %r691;
and.b32 %r695, %r694, %r689;
xor.b32 %r696, %r695, %r683;
st.local.u32 [%rd343], %r696;
bra.uni BB0_367;
BB0_111:
setp.gt.s32 %p9, %r299, 121;
@%p9 bra BB0_120;
setp.eq.s32 %p13, %r299, 120;
@%p13 bra BB0_263;
bra.uni BB0_113;
BB0_263:
and.b32 %r158, %r11, 255;
setp.ge.s32 %p178, %r158, %r10;
@%p178 bra BB0_367;
shr.u32 %r806, %r8, 16;
and.b32 %r933, %r806, 255;
add.s32 %r485, %r158, %r933;
setp.gt.s32 %p179, %r485, %r10;
@%p179 bra BB0_367;
setp.eq.s32 %p180, %r933, 0;
@%p180 bra BB0_268;
bfe.u32 %r883, %r8, 8, 8;
mov.u32 %r884, 0;
mov.u64 %rd367, %rd9;
BB0_267:
cvt.s64.s32 %rd220, %r883;
add.s64 %rd221, %rd9, %rd220;
ld.local.u8 %rs105, [%rd221];
st.local.u8 [%rd367], %rs105;
add.s64 %rd367, %rd367, 1;
add.s32 %r883, %r883, 1;
add.s32 %r884, %r884, 1;
setp.lt.s32 %p181, %r884, %r933;
@%p181 bra BB0_267;
BB0_268:
setp.ge.s32 %p182, %r933, %r10;
@%p182 bra BB0_368;
shr.u32 %r807, %r8, 16;
cvt.u64.u32 %rd222, %r807;
and.b64 %rd223, %rd222, 255;
add.s64 %rd368, %rd9, %rd223;
mov.u32 %r885, %r933;
BB0_270:
mov.u16 %rs106, 0;
st.local.u8 [%rd368], %rs106;
add.s64 %rd368, %rd368, 1;
add.s32 %r885, %r885, 1;
setp.lt.s32 %p183, %r885, %r10;
@%p183 bra BB0_270;
bra.uni BB0_368;
BB0_16:
setp.eq.s32 %p56, %r299, 42;
@%p56 bra BB0_158;
setp.eq.s32 %p57, %r299, 43;
@%p57 bra BB0_154;
bra.uni BB0_18;
BB0_154:
and.b32 %r450, %r11, 255;
setp.ge.s32 %p101, %r450, %r10;
@%p101 bra BB0_367;
ld.local.u8 %rs42, [%rd10];
add.s16 %rs43, %rs42, 1;
st.local.u8 [%rd10], %rs43;
bra.uni BB0_367;
BB0_70:
setp.eq.s32 %p29, %r299, 100;
@%p29 bra BB0_349;
setp.eq.s32 %p30, %r299, 101;
@%p30 bra BB0_133;
bra.uni BB0_72;
BB0_133:
add.s32 %r371, %r10, 4;
setp.gt.s32 %p70, %r371, 255;
@%p70 bra BB0_367;
mov.u32 %r826, 0;
setp.lt.s32 %p71, %r10, 1;
mov.u64 %rd355, %rd396;
@%p71 bra BB0_136;
BB0_135:
ld.local.u32 %r373, [%rd355];
and.b32 %r374, %r373, 1077952576;
shr.u32 %r375, %r374, 1;
and.b32 %r376, %r373, -2139062144;
shr.u32 %r377, %r376, 2;
not.b32 %r378, %r377;
and.b32 %r379, %r375, %r378;
and.b32 %r380, %r373, 522133279;
add.s32 %r381, %r380, 522133279;
mov.u32 %r382, -84215046;
sub.s32 %r383, %r382, %r380;
and.b32 %r384, %r379, %r383;
and.b32 %r385, %r384, %r381;
or.b32 %r386, %r385, %r373;
st.local.u32 [%rd355], %r386;
add.s64 %rd355, %rd355, 4;
add.s32 %r826, %r826, 4;
setp.lt.s32 %p72, %r826, %r10;
@%p72 bra BB0_135;
BB0_136:
ld.local.u32 %r387, [%rd396];
and.b32 %r388, %r387, 64;
shr.u32 %r389, %r388, 1;
and.b32 %r390, %r387, 128;
shr.u32 %r391, %r390, 2;
not.b32 %r392, %r391;
and.b32 %r393, %r389, %r392;
and.b32 %r394, %r387, 522133279;
add.s32 %r395, %r394, 31;
mov.u32 %r396, -84215046;
sub.s32 %r397, %r396, %r394;
and.b32 %r398, %r393, %r397;
and.b32 %r399, %r398, %r395;
not.b32 %r400, %r399;
or.b32 %r401, %r400, -33;
and.b32 %r827, %r401, %r387;
st.local.u32 [%rd396], %r827;
@%p71 bra BB0_367;
and.b32 %r24, %r11, 255;
mov.u32 %r828, 0;
mov.u64 %rd356, %rd396;
BB0_138:
cvt.u16.u32 %rs23, %r827;
and.b16 %rs24, %rs23, 255;
and.b16 %rs25, %rs1, 255;
setp.eq.s16 %p74, %rs24, %rs25;
selp.b32 %r403, 65280, 0, %p74;
shr.u16 %rs26, %rs23, 8;
setp.eq.s16 %p75, %rs26, %rs25;
or.b32 %r404, %r403, 16711680;
selp.b32 %r405, %r404, %r403, %p75;
shr.u32 %r406, %r827, 16;
cvt.u16.u32 %rs27, %r406;
and.b16 %rs28, %rs27, 255;
setp.eq.s16 %p76, %rs28, %rs25;
or.b32 %r407, %r405, -16777216;
selp.b32 %r408, %r407, %r405, %p76;
shr.u32 %r409, %r827, 24;
setp.eq.s32 %p77, %r409, %r24;
selp.b32 %r410, 32, 0, %p77;
and.b32 %r411, %r827, 1077952512;
shr.u32 %r412, %r411, 1;
and.b32 %r413, %r827, -2139062144;
shr.u32 %r414, %r413, 2;
not.b32 %r415, %r414;
and.b32 %r416, %r412, %r415;
and.b32 %r417, %r827, 522133279;
add.s32 %r418, %r417, 522133279;
sub.s32 %r420, %r396, %r417;
and.b32 %r421, %r416, %r420;
and.b32 %r422, %r421, %r418;
and.b32 %r423, %r422, %r408;
not.b32 %r424, %r423;
and.b32 %r425, %r827, %r424;
st.local.u32 [%rd356], %r425;
add.s64 %rd20, %rd356, 4;
ld.local.u32 %r426, [%rd356+4];
and.b32 %r427, %r426, 64;
shr.u32 %r428, %r427, 1;
shr.u32 %r429, %r426, 2;
not.b32 %r430, %r429;
and.b32 %r431, %r426, 522133279;
add.s32 %r432, %r431, 31;
sub.s32 %r433, %r396, %r431;
and.b32 %r434, %r410, %r430;
and.b32 %r435, %r434, %r428;
and.b32 %r436, %r435, %r433;
and.b32 %r437, %r436, %r432;
not.b32 %r438, %r437;
and.b32 %r827, %r426, %r438;
st.local.u32 [%rd356+4], %r827;
add.s32 %r828, %r828, 4;
setp.lt.s32 %p78, %r828, %r10;
mov.u64 %rd356, %rd20;
@%p78 bra BB0_138;
bra.uni BB0_367;
BB0_41:
setp.eq.s32 %p43, %r299, 76;
@%p43 bra BB0_156;
setp.eq.s32 %p44, %r299, 79;
@%p44 bra BB0_255;
bra.uni BB0_43;
BB0_255:
and.b32 %r881, %r11, 255;
setp.ge.s32 %p172, %r881, %r10;
@%p172 bra BB0_367;
shr.u32 %r810, %r8, 16;
and.b32 %r152, %r810, 255;
add.s32 %r478, %r881, %r152;
setp.gt.s32 %p173, %r478, %r10;
@%p173 bra BB0_367;
sub.s32 %r933, %r10, %r152;
setp.ge.s32 %p174, %r881, %r933;
@%p174 bra BB0_260;
bfe.u32 %r479, %r8, 16, 8;
bfe.u32 %r481, %r8, 8, 8;
add.s32 %r482, %r479, %r481;
cvt.u64.u32 %rd216, %r482;
add.s64 %rd365, %rd9, %rd216;
cvt.u64.u32 %rd217, %r11;
and.b64 %rd218, %rd217, 255;
add.s64 %rd364, %rd9, %rd218;
BB0_259:
ld.local.u8 %rs103, [%rd365];
st.local.u8 [%rd364], %rs103;
add.s64 %rd365, %rd365, 1;
add.s64 %rd364, %rd364, 1;
add.s32 %r881, %r881, 1;
setp.lt.s32 %p175, %r881, %r933;
@%p175 bra BB0_259;
BB0_260:
setp.eq.s32 %p176, %r152, 0;
@%p176 bra BB0_368;
bfe.u32 %r483, %r8, 16, 8;
sub.s32 %r484, %r10, %r483;
cvt.s64.s32 %rd219, %r484;
add.s64 %rd366, %rd9, %rd219;
mov.u32 %r882, %r933;
BB0_262:
mov.u16 %rs104, 0;
st.local.u8 [%rd366], %rs104;
add.s64 %rd366, %rd366, 1;
add.s32 %r882, %r882, 1;
setp.lt.s32 %p177, %r882, %r10;
@%p177 bra BB0_262;
bra.uni BB0_368;
BB0_106:
setp.eq.s32 %p16, %r299, 115;
@%p16 bra BB0_226;
setp.eq.s32 %p17, %r299, 116;
@%p17 bra BB0_360;
bra.uni BB0_108;
BB0_360:
mov.u32 %r928, 0;
setp.lt.s32 %p256, %r10, 1;
mov.u64 %rd391, %rd396;
@%p256 bra BB0_367;
BB0_361:
ld.local.u32 %r698, [%rd391];
and.b32 %r699, %r698, 1077952576;
shr.u32 %r700, %r699, 1;
and.b32 %r701, %r698, -2139062144;
shr.u32 %r702, %r701, 2;
not.b32 %r703, %r702;
and.b32 %r704, %r700, %r703;
and.b32 %r705, %r698, 522133279;
add.s32 %r706, %r705, 522133279;
mov.u32 %r707, -84215046;
sub.s32 %r708, %r707, %r705;
and.b32 %r709, %r704, %r708;
and.b32 %r710, %r709, %r706;
xor.b32 %r711, %r710, %r698;
st.local.u32 [%rd391], %r711;
add.s64 %rd391, %rd391, 4;
add.s32 %r928, %r928, 4;
setp.lt.s32 %p257, %r928, %r10;
@%p257 bra BB0_361;
bra.uni BB0_367;
BB0_27:
setp.eq.s32 %p50, %r299, 64;
@%p50 bra BB0_194;
setp.eq.s32 %p51, %r299, 67;
@%p51 bra BB0_362;
bra.uni BB0_29;
BB0_362:
mov.u32 %r929, 0;
setp.lt.s32 %p258, %r10, 1;
mov.u64 %rd392, %rd396;
@%p258 bra BB0_364;
BB0_363:
ld.local.u32 %r713, [%rd392];
and.b32 %r714, %r713, 1077952576;
shr.u32 %r715, %r714, 1;
and.b32 %r716, %r713, -2139062144;
shr.u32 %r717, %r716, 2;
not.b32 %r718, %r717;
and.b32 %r719, %r715, %r718;
and.b32 %r720, %r713, 522133279;
add.s32 %r721, %r720, 522133279;
mov.u32 %r722, -84215046;
sub.s32 %r723, %r722, %r720;
and.b32 %r724, %r719, %r723;
and.b32 %r725, %r724, %r721;
not.b32 %r726, %r725;
and.b32 %r727, %r713, %r726;
st.local.u32 [%rd392], %r727;
add.s64 %rd392, %rd392, 4;
add.s32 %r929, %r929, 4;
setp.lt.s32 %p259, %r929, %r10;
@%p259 bra BB0_363;
BB0_364:
ld.local.u32 %r728, [%rd396];
and.b32 %r729, %r728, 64;
shr.u32 %r730, %r729, 1;
shr.u32 %r731, %r728, 2;
and.b32 %r732, %r728, 522133279;
add.s32 %r733, %r732, 31;
mov.u32 %r734, -84215046;
sub.s32 %r735, %r734, %r732;
not.b32 %r736, %r731;
and.b32 %r737, %r736, %r730;
and.b32 %r738, %r737, %r735;
and.b32 %r739, %r738, %r733;
or.b32 %r740, %r739, %r728;
st.local.u32 [%rd396], %r740;
bra.uni BB0_367;
BB0_85:
setp.eq.s32 %p23, %r299, 108;
@%p23 bra BB0_365;
setp.eq.s32 %p24, %r299, 111;
@%p24 bra BB0_249;
bra.uni BB0_87;
BB0_249:
and.b32 %r475, %r11, 255;
setp.ge.s32 %p166, %r475, %r10;
@%p166 bra BB0_367;
st.local.u8 [%rd10], %rs2;
bra.uni BB0_367;
BB0_56:
setp.eq.s32 %p37, %r299, 90;
@%p37 bra BB0_171;
setp.eq.s32 %p38, %r299, 91;
@%p38 bra BB0_283;
bra.uni BB0_58;
BB0_283:
setp.lt.s32 %p195, %r10, 1;
@%p195 bra BB0_367;
add.s32 %r933, %r10, -1;
setp.lt.s32 %p196, %r933, 1;
@%p196 bra BB0_294;
and.b32 %r184, %r933, 3;
setp.eq.s32 %p197, %r184, 0;
mov.u32 %r894, 0;
@%p197 bra BB0_291;
setp.eq.s32 %p198, %r184, 1;
mov.u32 %r892, 0;
@%p198 bra BB0_290;
setp.eq.s32 %p199, %r184, 2;
mov.u32 %r891, 0;
@%p199 bra BB0_289;
ld.local.u8 %rs117, [%rd9+1];
st.local.u8 [%rd9], %rs117;
mov.u32 %r891, 1;
BB0_289:
add.s32 %r892, %r891, 1;
cvt.u64.u32 %rd233, %r892;
add.s64 %rd234, %rd9, %rd233;
ld.local.u8 %rs118, [%rd234];
cvt.u64.u32 %rd235, %r891;
add.s64 %rd236, %rd9, %rd235;
st.local.u8 [%rd236], %rs118;
BB0_290:
add.s32 %r894, %r892, 1;
cvt.s64.s32 %rd237, %r892;
add.s64 %rd238, %rd9, %rd237;
ld.local.u8 %rs119, [%rd238+1];
st.local.u8 [%rd238], %rs119;
BB0_291:
setp.lt.u32 %p200, %r933, 4;
@%p200 bra BB0_294;
cvt.s64.s32 %rd239, %r894;
add.s64 %rd371, %rd9, %rd239;
BB0_293:
ld.local.u8 %rs120, [%rd371+1];
ld.local.u8 %rs121, [%rd371+2];
ld.local.u8 %rs122, [%rd371+3];
ld.local.u8 %rs123, [%rd371+4];
st.local.u8 [%rd371], %rs120;
st.local.u8 [%rd371+1], %rs121;
st.local.u8 [%rd371+2], %rs122;
add.s64 %rd67, %rd371, 4;
st.local.u8 [%rd371+3], %rs123;
add.s32 %r894, %r894, 4;
setp.lt.s32 %p201, %r894, %r933;
mov.u64 %rd371, %rd67;
@%p201 bra BB0_293;
BB0_294:
mov.u16 %rs124, 0;
st.local.u8 [%rd11+-1], %rs124;
bra.uni BB0_368;
BB0_120:
setp.eq.s32 %p10, %r299, 122;
@%p10 bra BB0_177;
setp.eq.s32 %p11, %r299, 123;
@%p11 bra BB0_302;
bra.uni BB0_122;
BB0_302:
setp.lt.s32 %p208, %r300, 1;
@%p208 bra BB0_367;
and.b32 %r202, %r300, 3;
setp.eq.s32 %p209, %r202, 0;
mov.u32 %r902, %r300;
@%p209 bra BB0_309;
setp.eq.s32 %p210, %r202, 1;
mov.u32 %r900, %r300;
@%p210 bra BB0_308;
setp.eq.s32 %p211, %r202, 2;
mov.u32 %r899, %r300;
@%p211 bra BB0_307;
ld.local.u8 %rs130, [%rd9];
ld.local.u8 %rs131, [%rd11+-1];
st.local.u8 [%rd9], %rs131;
st.local.u8 [%rd11+-1], %rs130;
add.s32 %r899, %r10, -2;
BB0_307:
ld.local.u8 %rs132, [%rd9];
cvt.s64.s32 %rd245, %r899;
add.s64 %rd246, %rd9, %rd245;
ld.local.u8 %rs133, [%rd246];
st.local.u8 [%rd9], %rs133;
st.local.u8 [%rd246], %rs132;
add.s32 %r900, %r899, -1;
BB0_308:
ld.local.u8 %rs134, [%rd9];
cvt.s64.s32 %rd247, %r900;
add.s64 %rd248, %rd9, %rd247;
ld.local.u8 %rs135, [%rd248];
st.local.u8 [%rd9], %rs135;
st.local.u8 [%rd248], %rs134;
add.s32 %r902, %r900, -1;
BB0_309:
setp.lt.u32 %p212, %r300, 4;
@%p212 bra BB0_367;
BB0_310:
ld.local.u8 %rs136, [%rd9];
cvt.s64.s32 %rd249, %r902;
add.s64 %rd250, %rd9, %rd249;
ld.local.u8 %rs137, [%rd250];
st.local.u8 [%rd9], %rs137;
st.local.u8 [%rd250], %rs136;
ld.local.u8 %rs138, [%rd9];
ld.local.u8 %rs139, [%rd250+-1];
st.local.u8 [%rd9], %rs139;
st.local.u8 [%rd250+-1], %rs138;
ld.local.u8 %rs140, [%rd9];
ld.local.u8 %rs141, [%rd250+-2];
st.local.u8 [%rd9], %rs141;
st.local.u8 [%rd250+-2], %rs140;
ld.local.u8 %rs142, [%rd9];
ld.local.u8 %rs143, [%rd250+-3];
st.local.u8 [%rd9], %rs143;
st.local.u8 [%rd250+-3], %rs142;
add.s32 %r902, %r902, -4;
setp.gt.s32 %p213, %r902, 0;
@%p213 bra BB0_310;
bra.uni BB0_367;
BB0_12:
setp.eq.s32 %p60, %r299, 39;
@%p60 bra BB0_13;
bra.uni BB0_367;
BB0_13:
and.b32 %r933, %r11, 255;
setp.ge.s32 %p164, %r933, %r10;
@%p164 bra BB0_367;
cvt.u64.u32 %rd212, %r11;
and.b64 %rd213, %rd212, 255;
add.s64 %rd363, %rd9, %rd213;
mov.u32 %r879, %r933;
BB0_15:
mov.u16 %rs101, 0;
st.local.u8 [%rd363], %rs101;
add.s64 %rd363, %rd363, 1;
add.s32 %r879, %r879, 1;
setp.lt.s32 %p165, %r879, %r10;
@%p165 bra BB0_15;
bra.uni BB0_368;
BB0_66:
setp.eq.s32 %p33, %r299, 99;
@%p33 bra BB0_67;
bra.uni BB0_367;
BB0_67:
mov.u32 %r931, 0;
setp.lt.s32 %p262, %r10, 1;
mov.u64 %rd394, %rd396;
@%p262 bra BB0_69;
BB0_68:
ld.local.u32 %r758, [%rd394];
and.b32 %r759, %r758, 1077952576;
shr.u32 %r760, %r759, 1;
and.b32 %r761, %r758, -2139062144;
shr.u32 %r762, %r761, 2;
not.b32 %r763, %r762;
and.b32 %r764, %r760, %r763;
and.b32 %r765, %r758, 522133279;
add.s32 %r766, %r765, 522133279;
mov.u32 %r767, -84215046;
sub.s32 %r768, %r767, %r765;
and.b32 %r769, %r764, %r768;
and.b32 %r770, %r769, %r766;
or.b32 %r771, %r770, %r758;
st.local.u32 [%rd394], %r771;
add.s64 %rd394, %rd394, 4;
add.s32 %r931, %r931, 4;
setp.lt.s32 %p263, %r931, %r10;
@%p263 bra BB0_68;
BB0_69:
ld.local.u32 %r772, [%rd396];
and.b32 %r773, %r772, 64;
shr.u32 %r774, %r773, 1;
and.b32 %r775, %r772, 128;
shr.u32 %r776, %r775, 2;
not.b32 %r777, %r776;
and.b32 %r778, %r774, %r777;
and.b32 %r779, %r772, 522133279;
add.s32 %r780, %r779, 31;
mov.u32 %r781, -84215046;
sub.s32 %r782, %r781, %r779;
and.b32 %r783, %r778, %r782;
and.b32 %r784, %r783, %r780;
not.b32 %r785, %r784;
or.b32 %r786, %r785, -33;
and.b32 %r787, %r786, %r772;
st.local.u32 [%rd396], %r787;
bra.uni BB0_367;
BB0_38:
setp.eq.s32 %p47, %r299, 75;
@%p47 bra BB0_39;
bra.uni BB0_367;
BB0_39:
setp.lt.s32 %p107, %r10, 2;
@%p107 bra BB0_367;
ld.local.u8 %rs49, [%rd11+-2];
ld.local.u8 %rs50, [%rd11+-1];
st.local.u8 [%rd11+-2], %rs50;
st.local.u8 [%rd11+-1], %rs49;
bra.uni BB0_367;
BB0_95:
setp.eq.s32 %p20, %r299, 114;
@%p20 bra BB0_96;
bra.uni BB0_367;
BB0_96:
shr.u32 %r667, %r10, 31;
add.s32 %r668, %r10, %r667;
shr.s32 %r264, %r668, 1;
setp.lt.s32 %p249, %r10, 2;
@%p249 bra BB0_367;
add.s32 %r265, %r10, -1;
mov.u32 %r670, 1;
max.s32 %r266, %r264, %r670;
and.b32 %r267, %r266, 3;
setp.eq.s32 %p250, %r267, 0;
mov.u32 %r926, 0;
@%p250 bra BB0_103;
setp.eq.s32 %p251, %r267, 1;
mov.u32 %r925, 0;
@%p251 bra BB0_102;
setp.eq.s32 %p252, %r267, 2;
mov.u32 %r924, 0;
@%p252 bra BB0_101;
ld.local.u8 %rs172, [%rd9];
ld.local.u8 %rs173, [%rd11+-1];
st.local.u8 [%rd9], %rs173;
st.local.u8 [%rd11+-1], %rs172;
mov.u32 %r924, %r670;
BB0_101:
cvt.u64.u32 %rd332, %r924;
add.s64 %rd333, %rd9, %rd332;
ld.local.u8 %rs174, [%rd333];
sub.s32 %r674, %r265, %r924;
cvt.s64.s32 %rd334, %r674;
add.s64 %rd335, %rd9, %rd334;
ld.local.u8 %rs175, [%rd335];
st.local.u8 [%rd333], %rs175;
st.local.u8 [%rd335], %rs174;
add.s32 %r925, %r924, 1;
BB0_102:
cvt.s64.s32 %rd336, %r925;
add.s64 %rd337, %rd9, %rd336;
ld.local.u8 %rs176, [%rd337];
sub.s32 %r675, %r265, %r925;
cvt.s64.s32 %rd338, %r675;
add.s64 %rd339, %rd9, %rd338;
ld.local.u8 %rs177, [%rd339];
st.local.u8 [%rd337], %rs177;
st.local.u8 [%rd339], %rs176;
add.s32 %r926, %r925, 1;
BB0_103:
setp.lt.u32 %p253, %r266, 4;
@%p253 bra BB0_367;
cvt.s64.s32 %rd340, %r926;
add.s64 %rd390, %rd9, %rd340;
sub.s32 %r676, %r10, %r926;
cvt.s64.s32 %rd341, %r676;
add.s64 %rd389, %rd9, %rd341;
BB0_105:
ld.local.u8 %rs178, [%rd390];
ld.local.u8 %rs179, [%rd389+-1];
st.local.u8 [%rd390], %rs179;
st.local.u8 [%rd389+-1], %rs178;
ld.local.u8 %rs180, [%rd390+1];
ld.local.u8 %rs181, [%rd389+-2];
st.local.u8 [%rd390+1], %rs181;
st.local.u8 [%rd389+-2], %rs180;
ld.local.u8 %rs182, [%rd390+2];
ld.local.u8 %rs183, [%rd389+-3];
st.local.u8 [%rd390+2], %rs183;
st.local.u8 [%rd389+-3], %rs182;
ld.local.u8 %rs184, [%rd390+3];
add.s64 %rd105, %rd389, -4;
ld.local.u8 %rs185, [%rd389+-4];
st.local.u8 [%rd390+3], %rs185;
st.local.u8 [%rd389+-4], %rs184;
add.s64 %rd390, %rd390, 4;
add.s32 %r926, %r926, 4;
setp.lt.s32 %p254, %r926, %r264;
mov.u64 %rd389, %rd105;
@%p254 bra BB0_105;
bra.uni BB0_367;
BB0_24:
setp.eq.s32 %p54, %r299, 46;
@%p54 bra BB0_25;
bra.uni BB0_367;
BB0_25:
and.b32 %r447, %r11, 255;
add.s32 %r448, %r447, 1;
setp.ge.s32 %p99, %r448, %r10;
@%p99 bra BB0_367;
ld.local.u8 %rs39, [%rd10+1];
st.local.u8 [%rd10], %rs39;
bra.uni BB0_367;
BB0_82:
setp.eq.s32 %p27, %r299, 107;
@%p27 bra BB0_83;
bra.uni BB0_367;
BB0_83:
setp.lt.s32 %p108, %r10, 2;
@%p108 bra BB0_367;
add.u64 %rd157, %SPL, 0;
ld.local.v2.u8 {%rs51, %rs52}, [%rd157];
st.local.v2.u8 [%rd157], {%rs52, %rs51};
bra.uni BB0_367;
BB0_48:
setp.eq.s32 %p41, %r299, 89;
@%p41 bra BB0_49;
bra.uni BB0_367;
BB0_49:
and.b32 %r29, %r11, 255;
setp.le.s32 %p79, %r10, %r29;
@%p79 bra BB0_367;
add.s32 %r933, %r29, %r10;
setp.gt.s32 %p80, %r933, 255;
@%p80 bra BB0_367;
setp.eq.s32 %p81, %r29, 0;
@%p81 bra BB0_368;
cvt.s64.s32 %rd350, %r10;
bfe.u32 %r440, %r8, 8, 8;
sub.s32 %r441, %r10, %r440;
cvt.s64.s32 %rd134, %r441;
add.s64 %rd358, %rd9, %rd134;
add.s64 %rd357, %rd9, %rd350;
mov.u32 %r830, 0;
BB0_53:
setp.gt.s32 %p82, %r10, 254;
@%p82 bra BB0_55;
ld.local.u8 %rs29, [%rd358];
st.local.u8 [%rd357], %rs29;
BB0_55:
add.s32 %r830, %r830, 1;
add.s64 %rd358, %rd358, 1;
add.s64 %rd357, %rd357, 1;
add.s32 %r10, %r10, 1;
setp.lt.s32 %p83, %r830, %r29;
@%p83 bra BB0_53;
bra.uni BB0_368;
BB0_113:
setp.eq.s32 %p14, %r299, 121;
@%p14 bra BB0_114;
bra.uni BB0_367;
BB0_114:
and.b32 %r35, %r11, 255;
setp.ge.s32 %p84, %r35, %r10;
@%p84 bra BB0_367;
add.s32 %r933, %r35, %r10;
setp.gt.s32 %p85, %r933, 255;
@%p85 bra BB0_367;
setp.eq.s32 %p86, %r35, 0;
@%p86 bra BB0_368;
mov.u32 %r831, 0;
BB0_118:
cvt.u64.u32 %rd27, %r831;
add.s32 %r39, %r831, %r10;
add.s32 %r443, %r39, 1;
and.b32 %r40, %r831, 255;
setp.ge.s32 %p87, %r40, %r443;
setp.gt.s32 %p88, %r39, 254;
or.pred %p89, %p87, %p88;
@%p89 bra BB0_151;
shl.b32 %r444, %r831, 1;
cvt.s64.s32 %rd136, %r444;
add.s64 %rd137, %rd9, %rd136;
ld.local.u8 %rs3, [%rd137];
add.s32 %r839, %r39, -1;
add.s32 %r42, %r40, -1;
setp.le.s32 %p90, %r839, %r42;
@%p90 bra BB0_150;
and.b32 %r812, %r10, 3;
setp.eq.s32 %p91, %r812, 0;
@%p91 bra BB0_140;
bra.uni BB0_141;
BB0_140:
mov.u32 %r838, %r839;
mov.u32 %r839, %r39;
bra.uni BB0_148;
BB0_141:
and.b32 %r813, %r10, 3;
setp.eq.s32 %p92, %r813, 1;
@%p92 bra BB0_142;
bra.uni BB0_143;
BB0_142:
mov.u32 %r835, %r39;
bra.uni BB0_147;
BB0_143:
and.b32 %r814, %r10, 3;
setp.eq.s32 %p93, %r814, 2;
@%p93 bra BB0_144;
bra.uni BB0_145;
BB0_144:
mov.u32 %r835, %r839;
mov.u32 %r839, %r39;
bra.uni BB0_146;
BB0_145:
cvt.s64.s32 %rd138, %r839;
add.s64 %rd139, %rd9, %rd138;
ld.local.u8 %rs30, [%rd139];
st.local.u8 [%rd139+1], %rs30;
add.s32 %r835, %r39, -2;
BB0_146:
cvt.s64.s32 %rd140, %r835;
add.s64 %rd141, %rd9, %rd140;
ld.local.u8 %rs31, [%rd141];
cvt.s64.s32 %rd142, %r839;
add.s64 %rd143, %rd9, %rd142;
st.local.u8 [%rd143], %rs31;
add.s32 %r839, %r835, -1;
BB0_147:
cvt.s64.s32 %rd144, %r839;
add.s64 %rd145, %rd9, %rd144;
ld.local.u8 %rs32, [%rd145];
cvt.s64.s32 %rd146, %r835;
add.s64 %rd147, %rd9, %rd146;
st.local.u8 [%rd147], %rs32;
add.s32 %r838, %r839, -1;
BB0_148:
setp.lt.u32 %p94, %r10, 4;
@%p94 bra BB0_150;
BB0_149:
cvt.s64.s32 %rd148, %r838;
add.s64 %rd149, %rd9, %rd148;
ld.local.u8 %rs33, [%rd149];
cvt.s64.s32 %rd150, %r839;
add.s64 %rd151, %rd9, %rd150;
st.local.u8 [%rd151], %rs33;
ld.local.u8 %rs34, [%rd149+-1];
ld.local.u8 %rs35, [%rd149+-2];
ld.local.u8 %rs36, [%rd149+-3];
st.local.u8 [%rd149], %rs34;
st.local.u8 [%rd149+-1], %rs35;
st.local.u8 [%rd149+-2], %rs36;
add.s32 %r54, %r838, -4;
setp.gt.s32 %p95, %r54, %r42;
add.s32 %r839, %r838, -3;
mov.u32 %r838, %r54;
@%p95 bra BB0_149;
BB0_150:
and.b64 %rd152, %rd27, 255;
add.s64 %rd153, %rd9, %rd152;
st.local.u8 [%rd153], %rs3;
BB0_151:
cvt.u32.u64 %r445, %rd27;
add.s32 %r831, %r445, 1;
setp.lt.s32 %p96, %r831, %r35;
@%p96 bra BB0_118;
bra.uni BB0_368;
BB0_158:
shr.u32 %r811, %r8, 16;
and.b32 %r454, %r11, 255;
setp.ge.s32 %p104, %r454, %r10;
and.b32 %r57, %r811, 255;
setp.ge.s32 %p105, %r57, %r10;
or.pred %p106, %p104, %p105;
@%p106 bra BB0_367;
ld.local.u8 %rs47, [%rd10];
cvt.u64.u32 %rd154, %r57;
add.s64 %rd155, %rd9, %rd154;
ld.local.u8 %rs48, [%rd155];
st.local.u8 [%rd10], %rs48;
st.local.u8 [%rd155], %rs47;
bra.uni BB0_367;
BB0_18:
setp.eq.s32 %p58, %r299, 44;
@%p58 bra BB0_19;
bra.uni BB0_367;
BB0_19:
and.b16 %rs37, %rs1, 255;
setp.eq.s16 %p97, %rs37, 0;
@%p97 bra BB0_367;
and.b32 %r446, %r11, 255;
setp.ge.s32 %p98, %r446, %r10;
@%p98 bra BB0_367;
ld.local.u8 %rs38, [%rd10+-1];
st.local.u8 [%rd10], %rs38;
bra.uni BB0_367;
BB0_349:
shl.b32 %r933, %r10, 1;
setp.gt.s32 %p243, %r933, 255;
@%p243 bra BB0_367;
add.s32 %r258, %r10, -3;
mov.u32 %r263, 0;
setp.lt.s32 %p244, %r258, 1;
mov.u32 %r922, %r10;
@%p244 bra BB0_352;
BB0_351:
and.b32 %r595, %r922, 3;
shl.b32 %r596, %r595, 3;
shr.s32 %r597, %r263, 31;
shr.u32 %r598, %r597, 30;
add.s32 %r599, %r263, %r598;
shr.s32 %r600, %r599, 2;
mul.wide.s32 %rd304, %r600, 4;
add.s64 %rd305, %rd396, %rd304;
ld.local.u32 %r601, [%rd305+4];
ld.local.u32 %r602, [%rd305];
mov.b64 %rd306, {%r602, %r601};
and.b64 %rd307, %rd306, 4294967295;
shl.b64 %rd308, %rd307, %r596;
cvt.u32.u64 %r603, %rd308;
shr.u64 %rd309, %rd308, 32;
cvt.u32.u64 %r604, %rd309;
shr.s32 %r605, %r922, 31;
shr.u32 %r606, %r605, 30;
add.s32 %r607, %r922, %r606;
shr.s32 %r608, %r607, 2;
mul.wide.s32 %rd310, %r608, 4;
add.s64 %rd311, %rd396, %rd310;
ld.local.u32 %r609, [%rd311];
or.b32 %r610, %r603, %r609;
ld.local.u32 %r611, [%rd311+4];
st.local.u32 [%rd311], %r610;
or.b32 %r612, %r604, %r611;
st.local.u32 [%rd311+4], %r612;
add.s32 %r922, %r922, 4;
add.s32 %r263, %r263, 4;
setp.lt.s32 %p245, %r263, %r258;
@%p245 bra BB0_351;
BB0_352:
sub.s32 %r613, %r10, %r263;
setp.eq.s32 %p246, %r613, 3;
@%p246 bra BB0_357;
setp.eq.s32 %p247, %r613, 2;
@%p247 bra BB0_356;
bra.uni BB0_354;
BB0_356:
add.s32 %r629, %r263, %r10;
shr.s32 %r630, %r629, 31;
shr.u32 %r631, %r630, 30;
add.s32 %r632, %r629, %r631;
shr.s32 %r633, %r632, 2;
and.b32 %r634, %r629, 3;
shl.b32 %r635, %r634, 3;
shr.s32 %r636, %r263, 31;
shr.u32 %r637, %r636, 30;
add.s32 %r638, %r263, %r637;
shr.s32 %r639, %r638, 2;
mul.wide.s32 %rd316, %r639, 4;
add.s64 %rd317, %rd396, %rd316;
ld.local.u32 %r640, [%rd317+4];
ld.local.u32 %r641, [%rd317];
mov.b64 %rd318, {%r641, %r640};
and.b64 %rd319, %rd318, 65535;
shl.b64 %rd320, %rd319, %r635;
cvt.u32.u64 %r642, %rd320;
shr.u64 %rd321, %rd320, 32;
cvt.u32.u64 %r643, %rd321;
mul.wide.s32 %rd322, %r633, 4;
add.s64 %rd323, %rd396, %rd322;
ld.local.u32 %r644, [%rd323];
or.b32 %r645, %r642, %r644;
ld.local.u32 %r646, [%rd323+4];
st.local.u32 [%rd323], %r645;
or.b32 %r647, %r643, %r646;
st.local.u32 [%rd323+4], %r647;
bra.uni BB0_368;
BB0_72:
setp.eq.s32 %p31, %r299, 102;
@%p31 bra BB0_73;
bra.uni BB0_367;
BB0_73:
shl.b32 %r228, %r10, 1;
setp.gt.s32 %p222, %r228, 255;
@%p222 bra BB0_367;
add.s32 %r229, %r10, -3;
mov.u32 %r912, 0;
setp.lt.s32 %p223, %r229, 1;
mov.u32 %r911, %r10;
@%p223 bra BB0_76;
BB0_75:
add.s32 %r818, %r10, -3;
and.b32 %r501, %r911, 3;
shl.b32 %r502, %r501, 3;
shr.s32 %r503, %r912, 31;
shr.u32 %r504, %r503, 30;
add.s32 %r505, %r912, %r504;
shr.s32 %r506, %r505, 2;
mul.wide.s32 %rd263, %r506, 4;
add.s64 %rd264, %rd396, %rd263;
ld.local.u32 %r507, [%rd264+4];
ld.local.u32 %r508, [%rd264];
mov.b64 %rd265, {%r508, %r507};
and.b64 %rd266, %rd265, 4294967295;
shl.b64 %rd267, %rd266, %r502;
cvt.u32.u64 %r509, %rd267;
shr.u64 %rd268, %rd267, 32;
cvt.u32.u64 %r510, %rd268;
shr.s32 %r511, %r911, 31;
shr.u32 %r512, %r511, 30;
add.s32 %r513, %r911, %r512;
shr.s32 %r514, %r513, 2;
mul.wide.s32 %rd269, %r514, 4;
add.s64 %rd270, %rd396, %rd269;
ld.local.u32 %r515, [%rd270];
or.b32 %r516, %r509, %r515;
ld.local.u32 %r517, [%rd270+4];
st.local.u32 [%rd270], %r516;
or.b32 %r518, %r510, %r517;
st.local.u32 [%rd270+4], %r518;
add.s32 %r911, %r911, 4;
add.s32 %r912, %r912, 4;
setp.lt.s32 %p224, %r912, %r818;
@%p224 bra BB0_75;
BB0_76:
sub.s32 %r519, %r10, %r912;
setp.eq.s32 %p225, %r519, 3;
@%p225 bra BB0_328;
setp.eq.s32 %p226, %r519, 2;
@%p226 bra BB0_327;
bra.uni BB0_78;
BB0_327:
add.s32 %r535, %r912, %r10;
shr.s32 %r536, %r535, 31;
shr.u32 %r537, %r536, 30;
add.s32 %r538, %r535, %r537;
shr.s32 %r539, %r538, 2;
and.b32 %r540, %r535, 3;
shl.b32 %r541, %r540, 3;
shr.s32 %r542, %r912, 31;
shr.u32 %r543, %r542, 30;
add.s32 %r544, %r912, %r543;
shr.s32 %r545, %r544, 2;
mul.wide.s32 %rd275, %r545, 4;
add.s64 %rd276, %rd396, %rd275;
ld.local.u32 %r546, [%rd276+4];
ld.local.u32 %r547, [%rd276];
mov.b64 %rd277, {%r547, %r546};
and.b64 %rd278, %rd277, 65535;
shl.b64 %rd279, %rd278, %r541;
cvt.u32.u64 %r548, %rd279;
shr.u64 %rd280, %rd279, 32;
cvt.u32.u64 %r549, %rd280;
mul.wide.s32 %rd281, %r539, 4;
add.s64 %rd282, %rd396, %rd281;
ld.local.u32 %r550, [%rd282];
or.b32 %r551, %r548, %r550;
ld.local.u32 %r552, [%rd282+4];
st.local.u32 [%rd282], %r551;
or.b32 %r553, %r549, %r552;
st.local.u32 [%rd282+4], %r553;
bra.uni BB0_329;
BB0_156:
and.b32 %r452, %r11, 255;
setp.ge.s32 %p103, %r452, %r10;
@%p103 bra BB0_367;
ld.local.u8 %rs46, [%rd10];
mul.wide.u16 %r453, %rs46, 2;
st.local.u8 [%rd10], %r453;
bra.uni BB0_367;
BB0_43:
setp.eq.s32 %p45, %r299, 82;
@%p45 bra BB0_44;
bra.uni BB0_367;
BB0_44:
and.b32 %r451, %r11, 255;
setp.ge.s32 %p102, %r451, %r10;
@%p102 bra BB0_367;
ld.local.u8 %rs44, [%rd10];
shr.u16 %rs45, %rs44, 1;
st.local.u8 [%rd10], %rs45;
bra.uni BB0_367;
BB0_226:
setp.lt.s32 %p151, %r10, 1;
@%p151 bra BB0_367;
and.b32 %r136, %r10, 3;
setp.eq.s32 %p152, %r136, 0;
mov.u32 %r877, 0;
@%p152 bra BB0_238;
setp.eq.s32 %p153, %r136, 1;
mov.u32 %r876, 0;
@%p153 bra BB0_235;
setp.eq.s32 %p154, %r136, 2;
mov.u32 %r875, 0;
@%p154 bra BB0_232;
ld.local.u8 %rs87, [%rd9];
and.b16 %rs88, %rs1, 255;
mov.u32 %r875, 1;
setp.ne.s16 %p155, %rs87, %rs88;
@%p155 bra BB0_232;
st.local.u8 [%rd9], %rs2;
BB0_232:
cvt.u64.u32 %rd209, %r875;
add.s64 %rd37, %rd9, %rd209;
ld.local.u8 %rs89, [%rd37];
and.b16 %rs90, %rs1, 255;
setp.ne.s16 %p156, %rs89, %rs90;
@%p156 bra BB0_234;
st.local.u8 [%rd37], %rs2;
BB0_234:
add.s32 %r876, %r875, 1;
BB0_235:
cvt.s64.s32 %rd210, %r876;
add.s64 %rd38, %rd9, %rd210;
ld.local.u8 %rs91, [%rd38];
and.b16 %rs92, %rs1, 255;
setp.ne.s16 %p157, %rs91, %rs92;
@%p157 bra BB0_237;
st.local.u8 [%rd38], %rs2;
BB0_237:
add.s32 %r877, %r876, 1;
BB0_238:
setp.lt.u32 %p158, %r10, 4;
@%p158 bra BB0_367;
cvt.s64.s32 %rd211, %r877;
add.s64 %rd362, %rd9, %rd211;
BB0_240:
ld.local.u8 %rs93, [%rd362];
and.b16 %rs94, %rs1, 255;
setp.ne.s16 %p159, %rs93, %rs94;
@%p159 bra BB0_242;
st.local.u8 [%rd362], %rs2;
BB0_242:
ld.local.u8 %rs95, [%rd362+1];
setp.ne.s16 %p160, %rs95, %rs94;
@%p160 bra BB0_244;
st.local.u8 [%rd362+1], %rs2;
BB0_244:
ld.local.u8 %rs97, [%rd362+2];
setp.ne.s16 %p161, %rs97, %rs94;
@%p161 bra BB0_246;
st.local.u8 [%rd362+2], %rs2;
BB0_246:
ld.local.u8 %rs99, [%rd362+3];
setp.ne.s16 %p162, %rs99, %rs94;
@%p162 bra BB0_248;
st.local.u8 [%rd362+3], %rs2;
BB0_248:
add.s32 %r877, %r877, 4;
setp.lt.s32 %p163, %r877, %r10;
add.s64 %rd362, %rd362, 4;
@%p163 bra BB0_240;
bra.uni BB0_367;
BB0_108:
setp.eq.s32 %p18, %r299, 117;
@%p18 bra BB0_109;
bra.uni BB0_367;
BB0_109:
mov.u32 %r930, 0;
setp.lt.s32 %p260, %r10, 1;
mov.u64 %rd393, %rd396;
@%p260 bra BB0_367;
BB0_110:
ld.local.u32 %r742, [%rd393];
and.b32 %r743, %r742, 1077952576;
shr.u32 %r744, %r743, 1;
and.b32 %r745, %r742, -2139062144;
shr.u32 %r746, %r745, 2;
not.b32 %r747, %r746;
and.b32 %r748, %r744, %r747;
and.b32 %r749, %r742, 522133279;
add.s32 %r750, %r749, 522133279;
mov.u32 %r751, -84215046;
sub.s32 %r752, %r751, %r749;
and.b32 %r753, %r748, %r752;
and.b32 %r754, %r753, %r750;
not.b32 %r755, %r754;
and.b32 %r756, %r742, %r755;
st.local.u32 [%rd393], %r756;
add.s64 %rd393, %rd393, 4;
add.s32 %r930, %r930, 4;
setp.lt.s32 %p261, %r930, %r10;
@%p261 bra BB0_110;
bra.uni BB0_367;
BB0_194:
mov.u32 %r933, 0;
setp.lt.s32 %p132, %r10, 1;
@%p132 bra BB0_216;
and.b32 %r100, %r10, 3;
setp.eq.s32 %p133, %r100, 0;
mov.u32 %r862, 0;
mov.u32 %r933, %r862;
@%p133 bra BB0_205;
setp.eq.s32 %p134, %r100, 1;
mov.u32 %r859, 0;
mov.u32 %r933, %r859;
@%p134 bra BB0_202;
setp.eq.s32 %p135, %r100, 2;
mov.u32 %r856, 0;
mov.u32 %r933, %r856;
@%p135 bra BB0_199;
ld.local.u8 %rs76, [%rd9];
and.b16 %rs77, %rs1, 255;
setp.ne.s16 %p136, %rs76, %rs77;
selp.u32 %r933, 1, 0, %p136;
mov.u32 %r856, 1;
BB0_199:
cvt.u64.u32 %rd185, %r856;
add.s64 %rd186, %rd9, %rd185;
ld.local.u8 %rs6, [%rd186];
and.b16 %rs78, %rs1, 255;
setp.eq.s16 %p137, %rs6, %rs78;
@%p137 bra BB0_201;
cvt.u64.u32 %rd187, %r933;
add.s64 %rd188, %rd9, %rd187;
st.local.u8 [%rd188], %rs6;
add.s32 %r933, %r933, 1;
BB0_201:
add.s32 %r859, %r856, 1;
BB0_202:
cvt.s64.s32 %rd189, %r859;
add.s64 %rd190, %rd9, %rd189;
ld.local.u8 %rs7, [%rd190];
and.b16 %rs79, %rs1, 255;
setp.eq.s16 %p138, %rs7, %rs79;
@%p138 bra BB0_204;
cvt.s64.s32 %rd191, %r933;
add.s64 %rd192, %rd9, %rd191;
st.local.u8 [%rd192], %rs7;
add.s32 %r933, %r933, 1;
BB0_204:
add.s32 %r862, %r859, 1;
BB0_205:
setp.lt.u32 %p139, %r10, 4;
@%p139 bra BB0_216;
cvt.s64.s32 %rd193, %r862;
add.s64 %rd360, %rd9, %rd193;
BB0_207:
ld.local.u8 %rs8, [%rd360];
and.b16 %rs80, %rs1, 255;
setp.eq.s16 %p140, %rs8, %rs80;
@%p140 bra BB0_209;
cvt.s64.s32 %rd194, %r933;
add.s64 %rd195, %rd9, %rd194;
st.local.u8 [%rd195], %rs8;
add.s32 %r933, %r933, 1;
BB0_209:
ld.local.u8 %rs9, [%rd360+1];
setp.eq.s16 %p141, %rs9, %rs80;
@%p141 bra BB0_211;
cvt.s64.s32 %rd196, %r933;
add.s64 %rd197, %rd9, %rd196;
st.local.u8 [%rd197], %rs9;
add.s32 %r933, %r933, 1;
BB0_211:
ld.local.u8 %rs10, [%rd360+2];
setp.eq.s16 %p142, %rs10, %rs80;
@%p142 bra BB0_213;
cvt.s64.s32 %rd198, %r933;
add.s64 %rd199, %rd9, %rd198;
st.local.u8 [%rd199], %rs10;
add.s32 %r933, %r933, 1;
BB0_213:
ld.local.u8 %rs11, [%rd360+3];
setp.eq.s16 %p143, %rs11, %rs80;
@%p143 bra BB0_215;
cvt.s64.s32 %rd200, %r933;
add.s64 %rd201, %rd9, %rd200;
st.local.u8 [%rd201], %rs11;
add.s32 %r933, %r933, 1;
BB0_215:
add.s32 %r862, %r862, 4;
setp.lt.s32 %p144, %r862, %r10;
add.s64 %rd360, %rd360, 4;
@%p144 bra BB0_207;
BB0_216:
setp.le.s32 %p145, %r10, %r933;
@%p145 bra BB0_368;
sub.s32 %r126, %r10, %r933;
and.b32 %r127, %r126, 3;
setp.eq.s32 %p146, %r127, 0;
mov.u32 %r873, %r933;
@%p146 bra BB0_223;
setp.eq.s32 %p147, %r127, 1;
mov.u32 %r872, %r933;
@%p147 bra BB0_222;
setp.eq.s32 %p148, %r127, 2;
mov.u32 %r871, %r933;
@%p148 bra BB0_221;
cvt.s64.s32 %rd202, %r933;
add.s64 %rd203, %rd9, %rd202;
mov.u16 %rs84, 0;
st.local.u8 [%rd203], %rs84;
add.s32 %r871, %r933, 1;
BB0_221:
cvt.s64.s32 %rd204, %r871;
add.s64 %rd205, %rd9, %rd204;
mov.u16 %rs85, 0;
st.local.u8 [%rd205], %rs85;
add.s32 %r872, %r871, 1;
BB0_222:
cvt.s64.s32 %rd206, %r872;
add.s64 %rd207, %rd9, %rd206;
mov.u16 %rs86, 0;
st.local.u8 [%rd207], %rs86;
add.s32 %r873, %r872, 1;
BB0_223:
setp.lt.u32 %p149, %r126, 4;
@%p149 bra BB0_368;
cvt.s64.s32 %rd208, %r873;
add.s64 %rd361, %rd9, %rd208;
BB0_225:
mov.u32 %r468, 0;
st.local.u8 [%rd361+3], %r468;
st.local.u8 [%rd361+2], %r468;
st.local.u8 [%rd361+1], %r468;
st.local.u8 [%rd361], %r468;
add.s64 %rd361, %rd361, 4;
add.s32 %r873, %r873, 4;
setp.lt.s32 %p150, %r873, %r10;
@%p150 bra BB0_225;
bra.uni BB0_368;
BB0_29:
setp.eq.s32 %p52, %r299, 68;
@%p52 bra BB0_30;
bra.uni BB0_367;
BB0_30:
and.b32 %r886, %r11, 255;
setp.ge.s32 %p184, %r886, %r10;
@%p184 bra BB0_367;
add.s32 %r933, %r10, -1;
setp.ge.s32 %p185, %r886, %r933;
@%p185 bra BB0_34;
cvt.u64.u32 %rd224, %r11;
and.b64 %rd225, %rd224, 255;
add.s64 %rd369, %rd9, %rd225;
BB0_33:
add.s64 %rd61, %rd369, 1;
ld.local.u8 %rs107, [%rd369+1];
st.local.u8 [%rd369], %rs107;
add.s32 %r886, %r886, 1;
setp.lt.s32 %p186, %r886, %r933;
mov.u64 %rd369, %rd61;
@%p186 bra BB0_33;
BB0_34:
mov.u16 %rs108, 0;
st.local.u8 [%rd11+-1], %rs108;
bra.uni BB0_368;
BB0_365:
mov.u32 %r932, 0;
setp.lt.s32 %p264, %r10, 1;
mov.u64 %rd395, %rd396;
@%p264 bra BB0_367;
BB0_366:
ld.local.u32 %r789, [%rd395];
and.b32 %r790, %r789, 1077952576;
shr.u32 %r791, %r790, 1;
and.b32 %r792, %r789, -2139062144;
shr.u32 %r793, %r792, 2;
not.b32 %r794, %r793;
and.b32 %r795, %r791, %r794;
and.b32 %r796, %r789, 522133279;
add.s32 %r797, %r796, 522133279;
mov.u32 %r798, -84215046;
sub.s32 %r799, %r798, %r796;
and.b32 %r800, %r795, %r799;
and.b32 %r801, %r800, %r797;
or.b32 %r802, %r801, %r789;
st.local.u32 [%rd395], %r802;
add.s64 %rd395, %rd395, 4;
add.s32 %r932, %r932, 4;
setp.lt.s32 %p265, %r932, %r10;
@%p265 bra BB0_366;
bra.uni BB0_367;
BB0_87:
setp.eq.s32 %p25, %r299, 112;
@%p25 bra BB0_88;
bra.uni BB0_367;
BB0_88:
and.b32 %r246, %r11, 255;
mad.lo.s32 %r933, %r246, %r10, %r10;
setp.gt.s32 %p234, %r933, 255;
@%p234 bra BB0_367;
setp.eq.s32 %p235, %r246, 0;
@%p235 bra BB0_368;
and.b32 %r248, %r10, 3;
mov.u32 %r917, 0;
mov.u64 %rd387, %rd9;
BB0_91:
setp.lt.s32 %p236, %r10, 1;
@%p236 bra BB0_348;
setp.eq.s32 %p237, %r248, 0;
mov.u64 %rd303, 0;
mov.u32 %r921, 0;
@%p237 bra BB0_340;
bra.uni BB0_341;
BB0_340:
mov.u64 %rd385, %rd387;
mov.u64 %rd386, %rd11;
mov.u64 %rd387, %rd303;
mov.u64 %rd11, %rd303;
bra.uni BB0_346;
BB0_341:
setp.eq.s32 %p238, %r248, 1;
mov.u32 %r919, 0;
@%p238 bra BB0_345;
setp.eq.s32 %p239, %r248, 2;
mov.u32 %r919, 1;
@%p239 bra BB0_344;
ld.local.u8 %rs165, [%rd387];
st.local.u8 [%rd11], %rs165;
add.s64 %rd11, %rd11, 1;
add.s64 %rd387, %rd387, 1;
mov.u32 %r919, 2;
BB0_344:
ld.local.u8 %rs166, [%rd387];
st.local.u8 [%rd11], %rs166;
add.s64 %rd11, %rd11, 1;
add.s64 %rd387, %rd387, 1;
BB0_345:
ld.local.u8 %rs167, [%rd387];
st.local.u8 [%rd11], %rs167;
add.s64 %rd386, %rd11, 1;
add.s64 %rd385, %rd387, 1;
add.s32 %r921, %r919, 1;
mov.u64 %rd387, %rd385;
mov.u64 %rd11, %rd386;
BB0_346:
setp.lt.u32 %p240, %r10, 4;
@%p240 bra BB0_348;
BB0_347:
ld.local.u8 %rs168, [%rd385];
st.local.u8 [%rd386], %rs168;
ld.local.u8 %rs169, [%rd385+1];
st.local.u8 [%rd386+1], %rs169;
ld.local.u8 %rs170, [%rd385+2];
st.local.u8 [%rd386+2], %rs170;
ld.local.u8 %rs171, [%rd385+3];
st.local.u8 [%rd386+3], %rs171;
add.s64 %rd386, %rd386, 4;
add.s64 %rd385, %rd385, 4;
add.s32 %r921, %r921, 4;
setp.lt.s32 %p241, %r921, %r10;
mov.u64 %rd387, %rd385;
mov.u64 %rd11, %rd386;
@%p241 bra BB0_347;
BB0_348:
add.s32 %r917, %r917, 1;
setp.lt.s32 %p242, %r917, %r246;
@%p242 bra BB0_91;
bra.uni BB0_368;
BB0_171:
and.b32 %r74, %r11, 255;
add.s32 %r933, %r74, %r10;
setp.gt.s32 %p116, %r933, 255;
setp.eq.s32 %p117, %r10, 0;
or.pred %p118, %p117, %p116;
@%p118 bra BB0_367;
setp.eq.s32 %p119, %r74, 0;
@%p119 bra BB0_368;
ld.local.u8 %rs4, [%rd11+-1];
mov.u32 %r846, 0;
BB0_174:
setp.gt.s32 %p120, %r10, 254;
@%p120 bra BB0_176;
st.local.u8 [%rd11], %rs4;
BB0_176:
add.s32 %r846, %r846, 1;
add.s64 %rd11, %rd11, 1;
add.s32 %r10, %r10, 1;
setp.lt.s32 %p121, %r846, %r74;
@%p121 bra BB0_174;
bra.uni BB0_368;
BB0_58:
setp.eq.s32 %p39, %r299, 93;
@%p39 bra BB0_59;
bra.uni BB0_367;
BB0_59:
setp.eq.s32 %p187, %r10, 0;
mov.u32 %r933, 0;
@%p187 bra BB0_368;
add.s32 %r490, %r10, 255;
and.b32 %r171, %r490, 255;
setp.ge.s32 %p188, %r171, %r10;
@%p188 bra BB0_367;
add.s32 %r933, %r10, -1;
setp.le.s32 %p189, %r933, %r171;
@%p189 bra BB0_282;
sub.s32 %r173, %r933, %r171;
and.b32 %r174, %r173, 3;
setp.eq.s32 %p190, %r174, 0;
@%p190 bra BB0_272;
bra.uni BB0_273;
BB0_272:
mov.u32 %r890, %r171;
bra.uni BB0_279;
BB0_177:
and.b32 %r80, %r11, 255;
add.s32 %r933, %r80, %r10;
setp.gt.s32 %p122, %r933, 255;
@%p122 bra BB0_367;
ld.local.u8 %rs5, [%rd9];
setp.eq.s32 %p123, %r80, 0;
mov.u32 %r847, 0;
@%p123 bra BB0_368;
BB0_179:
add.s32 %r83, %r10, %r847;
setp.gt.s32 %p124, %r83, 254;
@%p124 bra BB0_193;
add.s32 %r855, %r83, -1;
setp.lt.s32 %p125, %r855, 0;
@%p125 bra BB0_192;
and.b32 %r85, %r83, 3;
setp.eq.s32 %p126, %r85, 0;
@%p126 bra BB0_182;
bra.uni BB0_183;
BB0_182:
mov.u32 %r854, %r855;
mov.u32 %r855, %r83;
bra.uni BB0_190;
BB0_183:
setp.eq.s32 %p127, %r85, 1;
@%p127 bra BB0_184;
bra.uni BB0_185;
BB0_184:
mov.u32 %r851, %r83;
bra.uni BB0_189;
BB0_185:
setp.eq.s32 %p128, %r85, 2;
@%p128 bra BB0_186;
bra.uni BB0_187;
BB0_186:
mov.u32 %r851, %r855;
mov.u32 %r855, %r83;
bra.uni BB0_188;
BB0_187:
cvt.s64.s32 %rd171, %r855;
add.s64 %rd172, %rd9, %rd171;
ld.local.u8 %rs69, [%rd172];
st.local.u8 [%rd172+1], %rs69;
add.s32 %r851, %r83, -2;
BB0_188:
cvt.s64.s32 %rd173, %r851;
add.s64 %rd174, %rd9, %rd173;
ld.local.u8 %rs70, [%rd174];
cvt.s64.s32 %rd175, %r855;
add.s64 %rd176, %rd9, %rd175;
st.local.u8 [%rd176], %rs70;
add.s32 %r855, %r851, -1;
BB0_189:
cvt.s64.s32 %rd177, %r855;
add.s64 %rd178, %rd9, %rd177;
ld.local.u8 %rs71, [%rd178];
cvt.s64.s32 %rd179, %r851;
add.s64 %rd180, %rd9, %rd179;
st.local.u8 [%rd180], %rs71;
add.s32 %r854, %r855, -1;
BB0_190:
setp.lt.u32 %p129, %r83, 4;
@%p129 bra BB0_192;
BB0_191:
cvt.s64.s32 %rd181, %r854;
add.s64 %rd182, %rd9, %rd181;
ld.local.u8 %rs72, [%rd182];
cvt.s64.s32 %rd183, %r855;
add.s64 %rd184, %rd9, %rd183;
st.local.u8 [%rd184], %rs72;
ld.local.u8 %rs73, [%rd182+-1];
ld.local.u8 %rs74, [%rd182+-2];
ld.local.u8 %rs75, [%rd182+-3];
st.local.u8 [%rd182], %rs73;
st.local.u8 [%rd182+-1], %rs74;
st.local.u8 [%rd182+-2], %rs75;
add.s32 %r97, %r854, -4;
setp.gt.s32 %p130, %r97, -1;
add.s32 %r855, %r854, -3;
mov.u32 %r854, %r97;
@%p130 bra BB0_191;
BB0_192:
st.local.u8 [%rd9], %rs5;
BB0_193:
add.s32 %r847, %r847, 1;
setp.lt.s32 %p131, %r847, %r80;
@%p131 bra BB0_179;
bra.uni BB0_368;
BB0_122:
setp.eq.s32 %p12, %r299, 125;
@%p12 bra BB0_123;
bra.uni BB0_367;
BB0_123:
setp.lt.s32 %p202, %r300, 1;
@%p202 bra BB0_367;
and.b32 %r193, %r300, 3;
setp.eq.s32 %p203, %r193, 0;
mov.u32 %r897, 0;
@%p203 bra BB0_299;
setp.eq.s32 %p204, %r193, 1;
@%p204 bra BB0_297;
bra.uni BB0_126;
BB0_297:
ld.local.u8 %rs187, [%rd11+-1];
mov.u32 %r896, 0;
bra.uni BB0_298;
BB0_315:
setp.eq.s32 %p217, %r212, 1;
@%p217 bra BB0_316;
bra.uni BB0_317;
BB0_316:
mov.u32 %r906, %r10;
bra.uni BB0_321;
BB0_357:
add.s32 %r648, %r263, %r10;
shr.s32 %r649, %r648, 31;
shr.u32 %r650, %r649, 30;
add.s32 %r651, %r648, %r650;
shr.s32 %r652, %r651, 2;
and.b32 %r653, %r648, 3;
shl.b32 %r654, %r653, 3;
shr.s32 %r655, %r263, 31;
shr.u32 %r656, %r655, 30;
add.s32 %r657, %r263, %r656;
shr.s32 %r658, %r657, 2;
mul.wide.s32 %rd324, %r658, 4;
add.s64 %rd325, %rd396, %rd324;
ld.local.u32 %r659, [%rd325+4];
ld.local.u32 %r660, [%rd325];
mov.b64 %rd326, {%r660, %r659};
and.b64 %rd327, %rd326, 16777215;
shl.b64 %rd328, %rd327, %r654;
cvt.u32.u64 %r661, %rd328;
shr.u64 %rd329, %rd328, 32;
cvt.u32.u64 %r662, %rd329;
mul.wide.s32 %rd330, %r652, 4;
add.s64 %rd331, %rd396, %rd330;
ld.local.u32 %r663, [%rd331];
or.b32 %r664, %r661, %r663;
ld.local.u32 %r665, [%rd331+4];
st.local.u32 [%rd331], %r664;
or.b32 %r666, %r662, %r665;
st.local.u32 [%rd331+4], %r666;
bra.uni BB0_368;
BB0_354:
setp.ne.s32 %p248, %r613, 1;
@%p248 bra BB0_368;
add.s32 %r614, %r263, %r10;
shr.s32 %r615, %r614, 31;
shr.u32 %r616, %r615, 30;
add.s32 %r617, %r614, %r616;
shr.s32 %r618, %r617, 2;
and.b32 %r619, %r614, 3;
shl.b32 %r620, %r619, 3;
shr.s32 %r621, %r263, 31;
shr.u32 %r622, %r621, 30;
add.s32 %r623, %r263, %r622;
shr.s32 %r624, %r623, 2;
mul.wide.s32 %rd312, %r624, 4;
add.s64 %rd313, %rd396, %rd312;
ld.local.u8 %r625, [%rd313];
shl.b32 %r626, %r625, %r620;
mul.wide.s32 %rd314, %r618, 4;
add.s64 %rd315, %rd396, %rd314;
ld.local.u32 %r627, [%rd315];
or.b32 %r628, %r626, %r627;
st.local.u32 [%rd315], %r628;
bra.uni BB0_368;
BB0_317:
setp.eq.s32 %p218, %r212, 2;
@%p218 bra BB0_318;
bra.uni BB0_319;
BB0_318:
mov.u32 %r906, %r910;
mov.u32 %r910, %r10;
bra.uni BB0_320;
BB0_328:
add.s32 %r554, %r912, %r10;
shr.s32 %r555, %r554, 31;
shr.u32 %r556, %r555, 30;
add.s32 %r557, %r554, %r556;
shr.s32 %r558, %r557, 2;
and.b32 %r559, %r554, 3;
shl.b32 %r560, %r559, 3;
shr.s32 %r561, %r912, 31;
shr.u32 %r562, %r561, 30;
add.s32 %r563, %r912, %r562;
shr.s32 %r564, %r563, 2;
mul.wide.s32 %rd283, %r564, 4;
add.s64 %rd284, %rd396, %rd283;
ld.local.u32 %r565, [%rd284+4];
ld.local.u32 %r566, [%rd284];
mov.b64 %rd285, {%r566, %r565};
and.b64 %rd286, %rd285, 16777215;
shl.b64 %rd287, %rd286, %r560;
cvt.u32.u64 %r567, %rd287;
shr.u64 %rd288, %rd287, 32;
cvt.u32.u64 %r568, %rd288;
mul.wide.s32 %rd289, %r558, 4;
add.s64 %rd290, %rd396, %rd289;
ld.local.u32 %r569, [%rd290];
or.b32 %r570, %r567, %r569;
ld.local.u32 %r571, [%rd290+4];
st.local.u32 [%rd290], %r570;
or.b32 %r572, %r568, %r571;
st.local.u32 [%rd290+4], %r572;
bra.uni BB0_329;
BB0_78:
setp.ne.s32 %p227, %r519, 1;
@%p227 bra BB0_329;
add.s32 %r520, %r912, %r10;
shr.s32 %r521, %r520, 31;
shr.u32 %r522, %r521, 30;
add.s32 %r523, %r520, %r522;
shr.s32 %r524, %r523, 2;
and.b32 %r525, %r520, 3;
shl.b32 %r526, %r525, 3;
shr.s32 %r527, %r912, 31;
shr.u32 %r528, %r527, 30;
add.s32 %r529, %r912, %r528;
shr.s32 %r530, %r529, 2;
mul.wide.s32 %rd271, %r530, 4;
add.s64 %rd272, %rd396, %rd271;
ld.local.u8 %r531, [%rd272];
shl.b32 %r532, %r531, %r526;
mul.wide.s32 %rd273, %r524, 4;
add.s64 %rd274, %rd396, %rd273;
ld.local.u32 %r533, [%rd274];
or.b32 %r534, %r532, %r533;
st.local.u32 [%rd274], %r534;
BB0_329:
shl.b32 %r933, %r10, 1;
shr.u32 %r573, %r10, 31;
add.s32 %r574, %r10, %r573;
shr.s32 %r235, %r574, 1;
setp.lt.s32 %p228, %r10, 2;
@%p228 bra BB0_368;
add.s32 %r576, %r10, %r10;
add.s32 %r236, %r576, -1;
mov.u32 %r577, 1;
max.s32 %r237, %r235, %r577;
and.b32 %r238, %r237, 3;
setp.eq.s32 %p229, %r238, 0;
mov.u32 %r916, 0;
@%p229 bra BB0_336;
setp.eq.s32 %p230, %r238, 1;
mov.u32 %r914, 0;
@%p230 bra BB0_335;
setp.eq.s32 %p231, %r238, 2;
mov.u32 %r913, 0;
@%p231 bra BB0_334;
ld.local.u8 %rs151, [%rd11];
ld.local.u8 %rs152, [%rd12+1];
st.local.u8 [%rd11], %rs152;
st.local.u8 [%rd12+1], %rs151;
mov.u32 %r913, %r577;
BB0_334:
add.s32 %r581, %r913, %r10;
cvt.s64.s32 %rd291, %r581;
add.s64 %rd292, %rd9, %rd291;
ld.local.u8 %rs153, [%rd292];
sub.s32 %r582, %r236, %r913;
cvt.s64.s32 %rd293, %r582;
add.s64 %rd294, %rd9, %rd293;
ld.local.u8 %rs154, [%rd294];
st.local.u8 [%rd292], %rs154;
st.local.u8 [%rd294], %rs153;
add.s32 %r914, %r913, 1;
BB0_335:
add.s32 %r583, %r914, %r10;
cvt.s64.s32 %rd295, %r583;
add.s64 %rd296, %rd9, %rd295;
ld.local.u8 %rs155, [%rd296];
sub.s32 %r584, %r236, %r914;
cvt.s64.s32 %rd297, %r584;
add.s64 %rd298, %rd9, %rd297;
ld.local.u8 %rs156, [%rd298];
st.local.u8 [%rd296], %rs156;
st.local.u8 [%rd298], %rs155;
add.s32 %r916, %r914, 1;
BB0_336:
shl.b32 %r933, %r10, 1;
setp.lt.u32 %p232, %r237, 4;
@%p232 bra BB0_368;
shl.b32 %r585, %r10, 1;
sub.s32 %r586, %r585, %r916;
cvt.s64.s32 %rd299, %r586;
add.s64 %rd374, %rd9, %rd299;
add.s32 %r587, %r916, %r10;
cvt.s64.s32 %rd300, %r587;
add.s64 %rd373, %rd9, %rd300;
BB0_338:
shl.b32 %r933, %r10, 1;
ld.local.u8 %rs157, [%rd373];
ld.local.u8 %rs158, [%rd374+-1];
st.local.u8 [%rd373], %rs158;
st.local.u8 [%rd374+-1], %rs157;
ld.local.u8 %rs159, [%rd373+1];
ld.local.u8 %rs160, [%rd374+-2];
st.local.u8 [%rd373+1], %rs160;
st.local.u8 [%rd374+-2], %rs159;
ld.local.u8 %rs161, [%rd373+2];
ld.local.u8 %rs162, [%rd374+-3];
st.local.u8 [%rd373+2], %rs162;
st.local.u8 [%rd374+-3], %rs161;
ld.local.u8 %rs163, [%rd373+3];
add.s64 %rd75, %rd374, -4;
ld.local.u8 %rs164, [%rd374+-4];
st.local.u8 [%rd373+3], %rs164;
st.local.u8 [%rd374+-4], %rs163;
add.s64 %rd373, %rd373, 4;
add.s32 %r916, %r916, 4;
setp.lt.s32 %p233, %r916, %r235;
mov.u64 %rd374, %rd75;
@%p233 bra BB0_338;
bra.uni BB0_368;
BB0_126:
setp.eq.s32 %p205, %r193, 2;
@%p205 bra BB0_295;
bra.uni BB0_127;
BB0_295:
ld.local.u8 %rs186, [%rd11+-1];
mov.u32 %r895, 0;
bra.uni BB0_296;
BB0_319:
ld.local.u8 %rs144, [%rd11+-1];
st.local.u8 [%rd11], %rs144;
add.s32 %r906, %r10, -2;
BB0_320:
cvt.s64.s32 %rd251, %r906;
add.s64 %rd252, %rd9, %rd251;
ld.local.u8 %rs145, [%rd252];
cvt.s64.s32 %rd253, %r910;
add.s64 %rd254, %rd9, %rd253;
st.local.u8 [%rd254], %rs145;
add.s32 %r910, %r906, -1;
BB0_321:
cvt.s64.s32 %rd255, %r910;
add.s64 %rd256, %rd9, %rd255;
ld.local.u8 %rs146, [%rd256];
cvt.s64.s32 %rd257, %r906;
add.s64 %rd258, %rd9, %rd257;
st.local.u8 [%rd258], %rs146;
add.s32 %r909, %r910, -1;
BB0_322:
setp.lt.u32 %p219, %r10, 4;
@%p219 bra BB0_324;
BB0_323:
cvt.s64.s32 %rd259, %r909;
add.s64 %rd260, %rd9, %rd259;
ld.local.u8 %rs147, [%rd260];
cvt.s64.s32 %rd261, %r910;
add.s64 %rd262, %rd9, %rd261;
st.local.u8 [%rd262], %rs147;
ld.local.u8 %rs148, [%rd260+-1];
ld.local.u8 %rs149, [%rd260+-2];
ld.local.u8 %rs150, [%rd260+-3];
st.local.u8 [%rd260], %rs148;
st.local.u8 [%rd260+-1], %rs149;
st.local.u8 [%rd260+-2], %rs150;
add.s32 %r224, %r909, -4;
setp.gt.s32 %p220, %r224, -1;
add.s32 %r910, %r909, -3;
mov.u32 %r909, %r224;
@%p220 bra BB0_323;
BB0_324:
st.local.u8 [%rd9], %rs1;
add.s32 %r933, %r10, 1;
bra.uni BB0_368;
BB0_273:
setp.eq.s32 %p191, %r174, 1;
@%p191 bra BB0_278;
setp.eq.s32 %p192, %r174, 2;
@%p192 bra BB0_275;
bra.uni BB0_276;
BB0_275:
mov.u32 %r887, %r171;
bra.uni BB0_277;
BB0_127:
ld.local.u8 %rs186, [%rd9];
ld.local.u8 %rs125, [%rd11+-1];
st.local.u8 [%rd9], %rs125;
st.local.u8 [%rd11+-1], %rs186;
mov.u32 %r895, 1;
BB0_296:
cvt.u64.u32 %rd240, %r895;
add.s64 %rd241, %rd9, %rd240;
ld.local.u8 %rs187, [%rd241];
st.local.u8 [%rd241], %rs186;
st.local.u8 [%rd11+-1], %rs187;
add.s32 %r896, %r895, 1;
BB0_298:
cvt.s64.s32 %rd242, %r896;
add.s64 %rd243, %rd9, %rd242;
ld.local.u8 %rs126, [%rd243];
st.local.u8 [%rd243], %rs187;
st.local.u8 [%rd11+-1], %rs126;
add.s32 %r897, %r896, 1;
BB0_299:
setp.lt.u32 %p206, %r300, 4;
@%p206 bra BB0_367;
ld.local.u8 %rs188, [%rd11+-1];
cvt.s64.s32 %rd244, %r897;
add.s64 %rd372, %rd9, %rd244;
BB0_301:
ld.local.u8 %rs127, [%rd372];
st.local.u8 [%rd372], %rs188;
st.local.u8 [%rd11+-1], %rs127;
ld.local.u8 %rs128, [%rd372+1];
st.local.u8 [%rd372+1], %rs127;
st.local.u8 [%rd11+-1], %rs128;
ld.local.u8 %rs129, [%rd372+2];
st.local.u8 [%rd372+2], %rs128;
st.local.u8 [%rd11+-1], %rs129;
ld.local.u8 %rs188, [%rd372+3];
st.local.u8 [%rd372+3], %rs129;
st.local.u8 [%rd11+-1], %rs188;
add.s64 %rd372, %rd372, 4;
add.s32 %r897, %r897, 4;
setp.lt.s32 %p207, %r897, %r300;
@%p207 bra BB0_301;
BB0_367:
mov.u32 %r933, %r10;
BB0_368:
ld.param.u64 %rd347, [amp_param_2];
add.s32 %r821, %r821, 1;
mul.wide.u32 %rd344, %r821, 4;
add.s64 %rd345, %rd347, %rd344;
ld.const.u32 %r8, [%rd345];
setp.ne.s32 %p266, %r8, 0;
@%p266 bra BB0_7;
BB0_369:
ld.param.u64 %rd348, [amp_param_0];
st.local.u32 [%rd396+256], %r933;
add.s64 %rd397, %rd348, %rd127;
mov.u32 %r935, 0;
BB0_370:
ld.local.u32 %r805, [%rd396];
st.global.u32 [%rd397], %r805;
add.s64 %rd397, %rd397, 4;
add.s64 %rd396, %rd396, 4;
add.s32 %r935, %r935, 1;
setp.lt.u32 %p267, %r935, 65;
@%p267 bra BB0_370;
BB0_371:
ret;
}