// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: UNKNOWN // Unknown Toolkit Version // Based on LLVM 3.4svn // .version 6.3 .target sm_52, texmode_independent .address_size 64 // .globl amp .entry amp( .param .u64 .ptr .global .align 4 amp_param_0, .param .u64 .ptr .global .align 4 amp_param_1, .param .u64 .ptr .const .align 4 amp_param_2, .param .u64 .ptr .global .align 4 amp_param_3, .param .u64 .ptr .global .align 4 amp_param_4, .param .u32 amp_param_5, .param .u64 amp_param_6 ) { .local .align 4 .b8 __local_depot0[260]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<268>; .reg .b16 %rs<189>; .reg .b32 %r<936>; .reg .b64 %rd<398>; mov.u64 %SPL, __local_depot0; cvta.local.u64 %SP, %SPL; ld.param.u64 %rd122, [amp_param_1]; ld.param.u64 %rd123, [amp_param_2]; ld.param.u64 %rd124, [amp_param_6]; add.u64 %rd125, %SP, 0; add.u64 %rd396, %SPL, 0; mov.u32 %r291, %ctaid.x; mov.u32 %r292, %ntid.x; mov.b32 %r293, %envreg3; mad.lo.s32 %r1, %r291, %r292, %r293; mov.u32 %r2, %tid.x; add.s32 %r3, %r1, %r2; cvt.s64.s32 %rd126, %r3; setp.ge.u64 %p1, %rd126, %rd124; @%p1 bra BB0_371; ld.const.u32 %r8, [%rd123]; setp.ne.s32 %p2, %r8, 58; @%p2 bra BB0_3; ld.const.u32 %r294, [%rd123+4]; setp.eq.s32 %p3, %r294, 0; @%p3 bra BB0_371; BB0_3: mul.wide.s32 %rd127, %r3, 260; add.s64 %rd351, %rd122, %rd127; mov.u32 %r819, 0; mov.u64 %rd352, %rd396; BB0_4: ld.global.u32 %r297, [%rd351]; st.local.u32 [%rd352], %r297; add.s64 %rd352, %rd352, 4; add.s64 %rd351, %rd351, 4; add.s32 %r819, %r819, 1; setp.lt.u32 %p4, %r819, 65; @%p4 bra BB0_4; add.s64 %rd129, %rd122, %rd127; ld.global.u32 %r933, [%rd129+256]; setp.eq.s32 %p5, %r8, 0; @%p5 bra BB0_369; cvta.to.local.u64 %rd9, %rd125; mov.u32 %r821, 0; bra.uni BB0_7; BB0_276: add.s32 %r887, %r171, 1; cvt.u64.u32 %rd226, %r171; add.s64 %rd227, %rd9, %rd226; ld.local.u8 %rs109, [%rd227+1]; st.local.u8 [%rd227], %rs109; BB0_277: add.s32 %r171, %r887, 1; cvt.s64.s32 %rd228, %r887; add.s64 %rd229, %rd9, %rd228; ld.local.u8 %rs110, [%rd229+1]; st.local.u8 [%rd229], %rs110; BB0_278: add.s32 %r890, %r171, 1; cvt.s64.s32 %rd230, %r171; add.s64 %rd231, %rd9, %rd230; ld.local.u8 %rs111, [%rd231+1]; st.local.u8 [%rd231], %rs111; BB0_279: setp.lt.u32 %p193, %r173, 4; @%p193 bra BB0_282; cvt.s64.s32 %rd232, %r890; add.s64 %rd370, %rd9, %rd232; BB0_281: ld.local.u8 %rs112, [%rd370+1]; ld.local.u8 %rs113, [%rd370+2]; ld.local.u8 %rs114, [%rd370+3]; ld.local.u8 %rs115, [%rd370+4]; st.local.u8 [%rd370], %rs112; st.local.u8 [%rd370+1], %rs113; st.local.u8 [%rd370+2], %rs114; add.s64 %rd64, %rd370, 4; st.local.u8 [%rd370+3], %rs115; add.s32 %r890, %r890, 4; setp.lt.s32 %p194, %r890, %r933; mov.u64 %rd370, %rd64; @%p194 bra BB0_281; BB0_282: mov.u16 %rs116, 0; st.local.u8 [%rd11+-1], %rs116; bra.uni BB0_368; BB0_7: mov.u32 %r10, %r933; shr.u32 %r11, %r8, 8; cvt.u16.u32 %rs1, %r11; shr.u32 %r12, %r8, 16; cvt.u16.u32 %rs2, %r12; bfe.u32 %r13, %r8, 8, 8; cvt.u64.u32 %rd131, %r13; add.s64 %rd10, %rd9, %rd131; cvt.s64.s32 %rd132, %r10; add.s64 %rd11, %rd9, %rd132; add.s32 %r300, %r10, -1; shl.b32 %r301, %r300, 1; cvt.s64.s32 %rd133, %r301; add.s64 %rd12, %rd9, %rd133; and.b32 %r299, %r8, 255; setp.gt.s32 %p6, %r299, 93; @%p6 bra BB0_62; setp.gt.s32 %p34, %r299, 68; @%p34 bra BB0_35; setp.gt.s32 %p48, %r299, 44; @%p48 bra BB0_22; setp.gt.s32 %p55, %r299, 41; @%p55 bra BB0_16; setp.eq.s32 %p59, %r299, 36; @%p59 bra BB0_325; bra.uni BB0_12; BB0_325: setp.gt.s32 %p221, %r10, 254; @%p221 bra BB0_367; add.s32 %r933, %r10, 1; st.local.u8 [%rd11], %rs1; bra.uni BB0_368; BB0_62: setp.gt.s32 %p7, %r299, 112; @%p7 bra BB0_92; setp.gt.s32 %p21, %r299, 104; @%p21 bra BB0_80; setp.gt.s32 %p28, %r299, 99; @%p28 bra BB0_70; setp.eq.s32 %p32, %r299, 94; @%p32 bra BB0_311; bra.uni BB0_66; BB0_311: setp.gt.s32 %p214, %r10, 254; @%p214 bra BB0_367; add.s32 %r910, %r10, -1; setp.lt.s32 %p215, %r910, 0; @%p215 bra BB0_324; and.b32 %r212, %r10, 3; setp.eq.s32 %p216, %r212, 0; @%p216 bra BB0_314; bra.uni BB0_315; BB0_314: mov.u32 %r909, %r910; mov.u32 %r910, %r10; bra.uni BB0_322; BB0_35: setp.gt.s32 %p35, %r299, 83; @%p35 bra BB0_46; setp.gt.s32 %p42, %r299, 75; @%p42 bra BB0_41; setp.eq.s32 %p46, %r299, 69; @%p46 bra BB0_128; bra.uni BB0_38; BB0_128: add.s32 %r302, %r10, 4; setp.gt.s32 %p61, %r302, 255; @%p61 bra BB0_367; mov.u32 %r823, 0; setp.lt.s32 %p62, %r10, 1; mov.u64 %rd353, %rd396; @%p62 bra BB0_131; BB0_130: ld.local.u32 %r304, [%rd353]; and.b32 %r305, %r304, 1077952576; shr.u32 %r306, %r305, 1; and.b32 %r307, %r304, -2139062144; shr.u32 %r308, %r307, 2; not.b32 %r309, %r308; and.b32 %r310, %r306, %r309; and.b32 %r311, %r304, 522133279; add.s32 %r312, %r311, 522133279; mov.u32 %r313, -84215046; sub.s32 %r314, %r313, %r311; and.b32 %r315, %r310, %r314; and.b32 %r316, %r315, %r312; or.b32 %r317, %r316, %r304; st.local.u32 [%rd353], %r317; add.s64 %rd353, %rd353, 4; add.s32 %r823, %r823, 4; setp.lt.s32 %p63, %r823, %r10; @%p63 bra BB0_130; BB0_131: mov.u32 %r825, 0; ld.local.u32 %r319, [%rd396]; and.b32 %r320, %r319, 64; shr.u32 %r321, %r320, 1; and.b32 %r322, %r319, 128; shr.u32 %r323, %r322, 2; not.b32 %r324, %r323; and.b32 %r325, %r321, %r324; and.b32 %r326, %r319, 522133279; add.s32 %r327, %r326, 31; mov.u32 %r328, -84215046; sub.s32 %r329, %r328, %r326; and.b32 %r330, %r325, %r329; and.b32 %r331, %r330, %r327; not.b32 %r332, %r331; or.b32 %r333, %r332, -33; and.b32 %r824, %r333, %r319; st.local.u32 [%rd396], %r824; mov.u64 %rd354, %rd396; @%p62 bra BB0_367; BB0_132: cvt.u16.u32 %rs21, %r824; and.b16 %rs22, %rs21, 255; setp.eq.s16 %p65, %rs22, 32; selp.b32 %r334, 65280, 0, %p65; and.b32 %r335, %r824, 65280; setp.eq.s32 %p66, %r335, 8192; or.b32 %r336, %r334, 16711680; selp.b32 %r337, %r336, %r334, %p66; and.b32 %r338, %r824, 16711680; setp.eq.s32 %p67, %r338, 2097152; or.b32 %r339, %r337, -16777216; selp.b32 %r340, %r339, %r337, %p67; and.b32 %r341, %r824, -16777216; setp.eq.s32 %p68, %r341, 536870912; selp.b32 %r342, 32, 0, %p68; and.b32 %r343, %r824, 1077952512; shr.u32 %r344, %r343, 1; and.b32 %r345, %r824, -2139062144; shr.u32 %r346, %r345, 2; not.b32 %r347, %r346; and.b32 %r348, %r344, %r347; and.b32 %r349, %r824, 522133279; add.s32 %r350, %r349, 522133279; sub.s32 %r352, %r328, %r349; and.b32 %r353, %r348, %r352; and.b32 %r354, %r353, %r350; and.b32 %r355, %r354, %r340; not.b32 %r356, %r355; and.b32 %r357, %r824, %r356; st.local.u32 [%rd354], %r357; add.s64 %rd16, %rd354, 4; ld.local.u32 %r358, [%rd354+4]; and.b32 %r359, %r358, 64; shr.u32 %r360, %r359, 1; shr.u32 %r361, %r358, 2; not.b32 %r362, %r361; and.b32 %r363, %r358, 522133279; add.s32 %r364, %r363, 31; sub.s32 %r365, %r328, %r363; and.b32 %r366, %r342, %r362; and.b32 %r367, %r366, %r360; and.b32 %r368, %r367, %r365; and.b32 %r369, %r368, %r364; not.b32 %r370, %r369; and.b32 %r824, %r358, %r370; st.local.u32 [%rd354+4], %r824; add.s32 %r825, %r825, 4; setp.lt.s32 %p69, %r825, %r10; mov.u64 %rd354, %rd16; @%p69 bra BB0_132; bra.uni BB0_367; BB0_92: setp.gt.s32 %p8, %r299, 119; @%p8 bra BB0_111; setp.gt.s32 %p15, %r299, 114; @%p15 bra BB0_106; setp.eq.s32 %p19, %r299, 113; @%p19 bra BB0_160; bra.uni BB0_95; BB0_160: add.s32 %r933, %r10, %r10; setp.gt.s32 %p109, %r933, 255; @%p109 bra BB0_367; add.s32 %r842, %r10, -1; setp.lt.s32 %p110, %r842, 0; @%p110 bra BB0_368; and.b32 %r59, %r10, 3; setp.eq.s32 %p111, %r59, 0; @%p111 bra BB0_168; setp.eq.s32 %p112, %r59, 1; add.s32 %r841, %r10, -1; @%p112 bra BB0_167; setp.eq.s32 %p113, %r59, 2; add.s32 %r840, %r10, -1; @%p113 bra BB0_166; ld.local.u8 %rs55, [%rd11+-1]; st.local.u8 [%rd12], %rs55; ld.local.u8 %rs56, [%rd11+-1]; st.local.u8 [%rd12+1], %rs56; add.s32 %r840, %r10, -2; BB0_166: cvt.s64.s32 %rd158, %r840; add.s64 %rd159, %rd9, %rd158; ld.local.u8 %rs57, [%rd159]; shl.b32 %r456, %r840, 1; cvt.s64.s32 %rd160, %r456; add.s64 %rd161, %rd9, %rd160; st.local.u8 [%rd161], %rs57; ld.local.u8 %rs58, [%rd159]; st.local.u8 [%rd161+1], %rs58; add.s32 %r841, %r840, -1; BB0_167: cvt.s64.s32 %rd162, %r841; add.s64 %rd163, %rd9, %rd162; ld.local.u8 %rs59, [%rd163]; shl.b32 %r457, %r841, 1; cvt.s64.s32 %rd164, %r457; add.s64 %rd165, %rd9, %rd164; st.local.u8 [%rd165], %rs59; ld.local.u8 %rs60, [%rd163]; st.local.u8 [%rd165+1], %rs60; add.s32 %r842, %r841, -1; BB0_168: setp.lt.u32 %p114, %r10, 4; @%p114 bra BB0_368; shl.b32 %r843, %r842, 1; BB0_170: cvt.s64.s32 %rd166, %r842; add.s64 %rd167, %rd9, %rd166; ld.local.u8 %rs61, [%rd167]; cvt.s64.s32 %rd168, %r843; add.s64 %rd169, %rd9, %rd168; st.local.u8 [%rd169], %rs61; ld.local.u8 %rs62, [%rd167]; st.local.u8 [%rd169+1], %rs62; ld.local.u8 %rs63, [%rd167+-1]; st.local.u8 [%rd169+-2], %rs63; ld.local.u8 %rs64, [%rd167+-1]; st.local.u8 [%rd169+-1], %rs64; ld.local.u8 %rs65, [%rd167+-2]; st.local.u8 [%rd169+-4], %rs65; ld.local.u8 %rs66, [%rd167+-2]; st.local.u8 [%rd169+-3], %rs66; ld.local.u8 %rs67, [%rd167+-3]; st.local.u8 [%rd169+-6], %rs67; ld.local.u8 %rs68, [%rd167+-3]; st.local.u8 [%rd169+-5], %rs68; add.s32 %r843, %r843, -8; add.s32 %r842, %r842, -4; setp.gt.s32 %p115, %r842, -1; @%p115 bra BB0_170; bra.uni BB0_368; BB0_22: setp.gt.s32 %p49, %r299, 63; @%p49 bra BB0_27; setp.eq.s32 %p53, %r299, 45; @%p53 bra BB0_152; bra.uni BB0_24; BB0_152: and.b32 %r449, %r11, 255; setp.ge.s32 %p100, %r449, %r10; @%p100 bra BB0_367; ld.local.u8 %rs40, [%rd10]; add.s16 %rs41, %rs40, -1; st.local.u8 [%rd10], %rs41; bra.uni BB0_367; BB0_80: setp.gt.s32 %p22, %r299, 107; @%p22 bra BB0_85; setp.eq.s32 %p26, %r299, 105; @%p26 bra BB0_251; bra.uni BB0_82; BB0_251: bfe.u32 %r808, %r8, 8, 8; add.s32 %r933, %r10, 1; setp.ge.s32 %p167, %r808, %r933; setp.gt.s32 %p168, %r10, 254; or.pred %p169, %p167, %p168; @%p169 bra BB0_367; bfe.u32 %r809, %r8, 8, 8; add.s32 %r476, %r10, -1; add.s32 %r148, %r809, -1; setp.le.s32 %p170, %r476, %r148; @%p170 bra BB0_254; BB0_253: cvt.s64.s32 %rd214, %r10; add.s64 %rd215, %rd9, %rd214; ld.local.u8 %rs102, [%rd215+-1]; st.local.u8 [%rd215], %rs102; add.s32 %r150, %r10, -1; add.s32 %r477, %r10, -2; setp.gt.s32 %p171, %r477, %r148; mov.u32 %r10, %r150; @%p171 bra BB0_253; BB0_254: st.local.u8 [%rd10], %rs2; bra.uni BB0_368; BB0_46: setp.gt.s32 %p36, %r299, 89; @%p36 bra BB0_56; setp.eq.s32 %p40, %r299, 84; @%p40 bra BB0_358; bra.uni BB0_48; BB0_358: and.b32 %r677, %r11, 255; setp.ge.s32 %p255, %r677, %r10; @%p255 bra BB0_367; bfe.u32 %r678, %r8, 10, 6; shl.b32 %r679, %r11, 3; and.b32 %r680, %r679, 24; mov.u32 %r681, 32; shl.b32 %r682, %r681, %r680; mul.wide.u32 %rd342, %r678, 4; add.s64 %rd343, %rd396, %rd342; ld.local.u32 %r683, [%rd343]; and.b32 %r684, %r683, 1077952576; shr.u32 %r685, %r684, 1; shr.u32 %r686, %r683, 2; not.b32 %r687, %r686; and.b32 %r688, %r683, 522133279; add.s32 %r689, %r688, 522133279; mov.u32 %r690, -84215046; sub.s32 %r691, %r690, %r688; and.b32 %r692, %r682, %r687; and.b32 %r693, %r692, %r685; and.b32 %r694, %r693, %r691; and.b32 %r695, %r694, %r689; xor.b32 %r696, %r695, %r683; st.local.u32 [%rd343], %r696; bra.uni BB0_367; BB0_111: setp.gt.s32 %p9, %r299, 121; @%p9 bra BB0_120; setp.eq.s32 %p13, %r299, 120; @%p13 bra BB0_263; bra.uni BB0_113; BB0_263: and.b32 %r158, %r11, 255; setp.ge.s32 %p178, %r158, %r10; @%p178 bra BB0_367; shr.u32 %r806, %r8, 16; and.b32 %r933, %r806, 255; add.s32 %r485, %r158, %r933; setp.gt.s32 %p179, %r485, %r10; @%p179 bra BB0_367; setp.eq.s32 %p180, %r933, 0; @%p180 bra BB0_268; bfe.u32 %r883, %r8, 8, 8; mov.u32 %r884, 0; mov.u64 %rd367, %rd9; BB0_267: cvt.s64.s32 %rd220, %r883; add.s64 %rd221, %rd9, %rd220; ld.local.u8 %rs105, [%rd221]; st.local.u8 [%rd367], %rs105; add.s64 %rd367, %rd367, 1; add.s32 %r883, %r883, 1; add.s32 %r884, %r884, 1; setp.lt.s32 %p181, %r884, %r933; @%p181 bra BB0_267; BB0_268: setp.ge.s32 %p182, %r933, %r10; @%p182 bra BB0_368; shr.u32 %r807, %r8, 16; cvt.u64.u32 %rd222, %r807; and.b64 %rd223, %rd222, 255; add.s64 %rd368, %rd9, %rd223; mov.u32 %r885, %r933; BB0_270: mov.u16 %rs106, 0; st.local.u8 [%rd368], %rs106; add.s64 %rd368, %rd368, 1; add.s32 %r885, %r885, 1; setp.lt.s32 %p183, %r885, %r10; @%p183 bra BB0_270; bra.uni BB0_368; BB0_16: setp.eq.s32 %p56, %r299, 42; @%p56 bra BB0_158; setp.eq.s32 %p57, %r299, 43; @%p57 bra BB0_154; bra.uni BB0_18; BB0_154: and.b32 %r450, %r11, 255; setp.ge.s32 %p101, %r450, %r10; @%p101 bra BB0_367; ld.local.u8 %rs42, [%rd10]; add.s16 %rs43, %rs42, 1; st.local.u8 [%rd10], %rs43; bra.uni BB0_367; BB0_70: setp.eq.s32 %p29, %r299, 100; @%p29 bra BB0_349; setp.eq.s32 %p30, %r299, 101; @%p30 bra BB0_133; bra.uni BB0_72; BB0_133: add.s32 %r371, %r10, 4; setp.gt.s32 %p70, %r371, 255; @%p70 bra BB0_367; mov.u32 %r826, 0; setp.lt.s32 %p71, %r10, 1; mov.u64 %rd355, %rd396; @%p71 bra BB0_136; BB0_135: ld.local.u32 %r373, [%rd355]; and.b32 %r374, %r373, 1077952576; shr.u32 %r375, %r374, 1; and.b32 %r376, %r373, -2139062144; shr.u32 %r377, %r376, 2; not.b32 %r378, %r377; and.b32 %r379, %r375, %r378; and.b32 %r380, %r373, 522133279; add.s32 %r381, %r380, 522133279; mov.u32 %r382, -84215046; sub.s32 %r383, %r382, %r380; and.b32 %r384, %r379, %r383; and.b32 %r385, %r384, %r381; or.b32 %r386, %r385, %r373; st.local.u32 [%rd355], %r386; add.s64 %rd355, %rd355, 4; add.s32 %r826, %r826, 4; setp.lt.s32 %p72, %r826, %r10; @%p72 bra BB0_135; BB0_136: ld.local.u32 %r387, [%rd396]; and.b32 %r388, %r387, 64; shr.u32 %r389, %r388, 1; and.b32 %r390, %r387, 128; shr.u32 %r391, %r390, 2; not.b32 %r392, %r391; and.b32 %r393, %r389, %r392; and.b32 %r394, %r387, 522133279; add.s32 %r395, %r394, 31; mov.u32 %r396, -84215046; sub.s32 %r397, %r396, %r394; and.b32 %r398, %r393, %r397; and.b32 %r399, %r398, %r395; not.b32 %r400, %r399; or.b32 %r401, %r400, -33; and.b32 %r827, %r401, %r387; st.local.u32 [%rd396], %r827; @%p71 bra BB0_367; and.b32 %r24, %r11, 255; mov.u32 %r828, 0; mov.u64 %rd356, %rd396; BB0_138: cvt.u16.u32 %rs23, %r827; and.b16 %rs24, %rs23, 255; and.b16 %rs25, %rs1, 255; setp.eq.s16 %p74, %rs24, %rs25; selp.b32 %r403, 65280, 0, %p74; shr.u16 %rs26, %rs23, 8; setp.eq.s16 %p75, %rs26, %rs25; or.b32 %r404, %r403, 16711680; selp.b32 %r405, %r404, %r403, %p75; shr.u32 %r406, %r827, 16; cvt.u16.u32 %rs27, %r406; and.b16 %rs28, %rs27, 255; setp.eq.s16 %p76, %rs28, %rs25; or.b32 %r407, %r405, -16777216; selp.b32 %r408, %r407, %r405, %p76; shr.u32 %r409, %r827, 24; setp.eq.s32 %p77, %r409, %r24; selp.b32 %r410, 32, 0, %p77; and.b32 %r411, %r827, 1077952512; shr.u32 %r412, %r411, 1; and.b32 %r413, %r827, -2139062144; shr.u32 %r414, %r413, 2; not.b32 %r415, %r414; and.b32 %r416, %r412, %r415; and.b32 %r417, %r827, 522133279; add.s32 %r418, %r417, 522133279; sub.s32 %r420, %r396, %r417; and.b32 %r421, %r416, %r420; and.b32 %r422, %r421, %r418; and.b32 %r423, %r422, %r408; not.b32 %r424, %r423; and.b32 %r425, %r827, %r424; st.local.u32 [%rd356], %r425; add.s64 %rd20, %rd356, 4; ld.local.u32 %r426, [%rd356+4]; and.b32 %r427, %r426, 64; shr.u32 %r428, %r427, 1; shr.u32 %r429, %r426, 2; not.b32 %r430, %r429; and.b32 %r431, %r426, 522133279; add.s32 %r432, %r431, 31; sub.s32 %r433, %r396, %r431; and.b32 %r434, %r410, %r430; and.b32 %r435, %r434, %r428; and.b32 %r436, %r435, %r433; and.b32 %r437, %r436, %r432; not.b32 %r438, %r437; and.b32 %r827, %r426, %r438; st.local.u32 [%rd356+4], %r827; add.s32 %r828, %r828, 4; setp.lt.s32 %p78, %r828, %r10; mov.u64 %rd356, %rd20; @%p78 bra BB0_138; bra.uni BB0_367; BB0_41: setp.eq.s32 %p43, %r299, 76; @%p43 bra BB0_156; setp.eq.s32 %p44, %r299, 79; @%p44 bra BB0_255; bra.uni BB0_43; BB0_255: and.b32 %r881, %r11, 255; setp.ge.s32 %p172, %r881, %r10; @%p172 bra BB0_367; shr.u32 %r810, %r8, 16; and.b32 %r152, %r810, 255; add.s32 %r478, %r881, %r152; setp.gt.s32 %p173, %r478, %r10; @%p173 bra BB0_367; sub.s32 %r933, %r10, %r152; setp.ge.s32 %p174, %r881, %r933; @%p174 bra BB0_260; bfe.u32 %r479, %r8, 16, 8; bfe.u32 %r481, %r8, 8, 8; add.s32 %r482, %r479, %r481; cvt.u64.u32 %rd216, %r482; add.s64 %rd365, %rd9, %rd216; cvt.u64.u32 %rd217, %r11; and.b64 %rd218, %rd217, 255; add.s64 %rd364, %rd9, %rd218; BB0_259: ld.local.u8 %rs103, [%rd365]; st.local.u8 [%rd364], %rs103; add.s64 %rd365, %rd365, 1; add.s64 %rd364, %rd364, 1; add.s32 %r881, %r881, 1; setp.lt.s32 %p175, %r881, %r933; @%p175 bra BB0_259; BB0_260: setp.eq.s32 %p176, %r152, 0; @%p176 bra BB0_368; bfe.u32 %r483, %r8, 16, 8; sub.s32 %r484, %r10, %r483; cvt.s64.s32 %rd219, %r484; add.s64 %rd366, %rd9, %rd219; mov.u32 %r882, %r933; BB0_262: mov.u16 %rs104, 0; st.local.u8 [%rd366], %rs104; add.s64 %rd366, %rd366, 1; add.s32 %r882, %r882, 1; setp.lt.s32 %p177, %r882, %r10; @%p177 bra BB0_262; bra.uni BB0_368; BB0_106: setp.eq.s32 %p16, %r299, 115; @%p16 bra BB0_226; setp.eq.s32 %p17, %r299, 116; @%p17 bra BB0_360; bra.uni BB0_108; BB0_360: mov.u32 %r928, 0; setp.lt.s32 %p256, %r10, 1; mov.u64 %rd391, %rd396; @%p256 bra BB0_367; BB0_361: ld.local.u32 %r698, [%rd391]; and.b32 %r699, %r698, 1077952576; shr.u32 %r700, %r699, 1; and.b32 %r701, %r698, -2139062144; shr.u32 %r702, %r701, 2; not.b32 %r703, %r702; and.b32 %r704, %r700, %r703; and.b32 %r705, %r698, 522133279; add.s32 %r706, %r705, 522133279; mov.u32 %r707, -84215046; sub.s32 %r708, %r707, %r705; and.b32 %r709, %r704, %r708; and.b32 %r710, %r709, %r706; xor.b32 %r711, %r710, %r698; st.local.u32 [%rd391], %r711; add.s64 %rd391, %rd391, 4; add.s32 %r928, %r928, 4; setp.lt.s32 %p257, %r928, %r10; @%p257 bra BB0_361; bra.uni BB0_367; BB0_27: setp.eq.s32 %p50, %r299, 64; @%p50 bra BB0_194; setp.eq.s32 %p51, %r299, 67; @%p51 bra BB0_362; bra.uni BB0_29; BB0_362: mov.u32 %r929, 0; setp.lt.s32 %p258, %r10, 1; mov.u64 %rd392, %rd396; @%p258 bra BB0_364; BB0_363: ld.local.u32 %r713, [%rd392]; and.b32 %r714, %r713, 1077952576; shr.u32 %r715, %r714, 1; and.b32 %r716, %r713, -2139062144; shr.u32 %r717, %r716, 2; not.b32 %r718, %r717; and.b32 %r719, %r715, %r718; and.b32 %r720, %r713, 522133279; add.s32 %r721, %r720, 522133279; mov.u32 %r722, -84215046; sub.s32 %r723, %r722, %r720; and.b32 %r724, %r719, %r723; and.b32 %r725, %r724, %r721; not.b32 %r726, %r725; and.b32 %r727, %r713, %r726; st.local.u32 [%rd392], %r727; add.s64 %rd392, %rd392, 4; add.s32 %r929, %r929, 4; setp.lt.s32 %p259, %r929, %r10; @%p259 bra BB0_363; BB0_364: ld.local.u32 %r728, [%rd396]; and.b32 %r729, %r728, 64; shr.u32 %r730, %r729, 1; shr.u32 %r731, %r728, 2; and.b32 %r732, %r728, 522133279; add.s32 %r733, %r732, 31; mov.u32 %r734, -84215046; sub.s32 %r735, %r734, %r732; not.b32 %r736, %r731; and.b32 %r737, %r736, %r730; and.b32 %r738, %r737, %r735; and.b32 %r739, %r738, %r733; or.b32 %r740, %r739, %r728; st.local.u32 [%rd396], %r740; bra.uni BB0_367; BB0_85: setp.eq.s32 %p23, %r299, 108; @%p23 bra BB0_365; setp.eq.s32 %p24, %r299, 111; @%p24 bra BB0_249; bra.uni BB0_87; BB0_249: and.b32 %r475, %r11, 255; setp.ge.s32 %p166, %r475, %r10; @%p166 bra BB0_367; st.local.u8 [%rd10], %rs2; bra.uni BB0_367; BB0_56: setp.eq.s32 %p37, %r299, 90; @%p37 bra BB0_171; setp.eq.s32 %p38, %r299, 91; @%p38 bra BB0_283; bra.uni BB0_58; BB0_283: setp.lt.s32 %p195, %r10, 1; @%p195 bra BB0_367; add.s32 %r933, %r10, -1; setp.lt.s32 %p196, %r933, 1; @%p196 bra BB0_294; and.b32 %r184, %r933, 3; setp.eq.s32 %p197, %r184, 0; mov.u32 %r894, 0; @%p197 bra BB0_291; setp.eq.s32 %p198, %r184, 1; mov.u32 %r892, 0; @%p198 bra BB0_290; setp.eq.s32 %p199, %r184, 2; mov.u32 %r891, 0; @%p199 bra BB0_289; ld.local.u8 %rs117, [%rd9+1]; st.local.u8 [%rd9], %rs117; mov.u32 %r891, 1; BB0_289: add.s32 %r892, %r891, 1; cvt.u64.u32 %rd233, %r892; add.s64 %rd234, %rd9, %rd233; ld.local.u8 %rs118, [%rd234]; cvt.u64.u32 %rd235, %r891; add.s64 %rd236, %rd9, %rd235; st.local.u8 [%rd236], %rs118; BB0_290: add.s32 %r894, %r892, 1; cvt.s64.s32 %rd237, %r892; add.s64 %rd238, %rd9, %rd237; ld.local.u8 %rs119, [%rd238+1]; st.local.u8 [%rd238], %rs119; BB0_291: setp.lt.u32 %p200, %r933, 4; @%p200 bra BB0_294; cvt.s64.s32 %rd239, %r894; add.s64 %rd371, %rd9, %rd239; BB0_293: ld.local.u8 %rs120, [%rd371+1]; ld.local.u8 %rs121, [%rd371+2]; ld.local.u8 %rs122, [%rd371+3]; ld.local.u8 %rs123, [%rd371+4]; st.local.u8 [%rd371], %rs120; st.local.u8 [%rd371+1], %rs121; st.local.u8 [%rd371+2], %rs122; add.s64 %rd67, %rd371, 4; st.local.u8 [%rd371+3], %rs123; add.s32 %r894, %r894, 4; setp.lt.s32 %p201, %r894, %r933; mov.u64 %rd371, %rd67; @%p201 bra BB0_293; BB0_294: mov.u16 %rs124, 0; st.local.u8 [%rd11+-1], %rs124; bra.uni BB0_368; BB0_120: setp.eq.s32 %p10, %r299, 122; @%p10 bra BB0_177; setp.eq.s32 %p11, %r299, 123; @%p11 bra BB0_302; bra.uni BB0_122; BB0_302: setp.lt.s32 %p208, %r300, 1; @%p208 bra BB0_367; and.b32 %r202, %r300, 3; setp.eq.s32 %p209, %r202, 0; mov.u32 %r902, %r300; @%p209 bra BB0_309; setp.eq.s32 %p210, %r202, 1; mov.u32 %r900, %r300; @%p210 bra BB0_308; setp.eq.s32 %p211, %r202, 2; mov.u32 %r899, %r300; @%p211 bra BB0_307; ld.local.u8 %rs130, [%rd9]; ld.local.u8 %rs131, [%rd11+-1]; st.local.u8 [%rd9], %rs131; st.local.u8 [%rd11+-1], %rs130; add.s32 %r899, %r10, -2; BB0_307: ld.local.u8 %rs132, [%rd9]; cvt.s64.s32 %rd245, %r899; add.s64 %rd246, %rd9, %rd245; ld.local.u8 %rs133, [%rd246]; st.local.u8 [%rd9], %rs133; st.local.u8 [%rd246], %rs132; add.s32 %r900, %r899, -1; BB0_308: ld.local.u8 %rs134, [%rd9]; cvt.s64.s32 %rd247, %r900; add.s64 %rd248, %rd9, %rd247; ld.local.u8 %rs135, [%rd248]; st.local.u8 [%rd9], %rs135; st.local.u8 [%rd248], %rs134; add.s32 %r902, %r900, -1; BB0_309: setp.lt.u32 %p212, %r300, 4; @%p212 bra BB0_367; BB0_310: ld.local.u8 %rs136, [%rd9]; cvt.s64.s32 %rd249, %r902; add.s64 %rd250, %rd9, %rd249; ld.local.u8 %rs137, [%rd250]; st.local.u8 [%rd9], %rs137; st.local.u8 [%rd250], %rs136; ld.local.u8 %rs138, [%rd9]; ld.local.u8 %rs139, [%rd250+-1]; st.local.u8 [%rd9], %rs139; st.local.u8 [%rd250+-1], %rs138; ld.local.u8 %rs140, [%rd9]; ld.local.u8 %rs141, [%rd250+-2]; st.local.u8 [%rd9], %rs141; st.local.u8 [%rd250+-2], %rs140; ld.local.u8 %rs142, [%rd9]; ld.local.u8 %rs143, [%rd250+-3]; st.local.u8 [%rd9], %rs143; st.local.u8 [%rd250+-3], %rs142; add.s32 %r902, %r902, -4; setp.gt.s32 %p213, %r902, 0; @%p213 bra BB0_310; bra.uni BB0_367; BB0_12: setp.eq.s32 %p60, %r299, 39; @%p60 bra BB0_13; bra.uni BB0_367; BB0_13: and.b32 %r933, %r11, 255; setp.ge.s32 %p164, %r933, %r10; @%p164 bra BB0_367; cvt.u64.u32 %rd212, %r11; and.b64 %rd213, %rd212, 255; add.s64 %rd363, %rd9, %rd213; mov.u32 %r879, %r933; BB0_15: mov.u16 %rs101, 0; st.local.u8 [%rd363], %rs101; add.s64 %rd363, %rd363, 1; add.s32 %r879, %r879, 1; setp.lt.s32 %p165, %r879, %r10; @%p165 bra BB0_15; bra.uni BB0_368; BB0_66: setp.eq.s32 %p33, %r299, 99; @%p33 bra BB0_67; bra.uni BB0_367; BB0_67: mov.u32 %r931, 0; setp.lt.s32 %p262, %r10, 1; mov.u64 %rd394, %rd396; @%p262 bra BB0_69; BB0_68: ld.local.u32 %r758, [%rd394]; and.b32 %r759, %r758, 1077952576; shr.u32 %r760, %r759, 1; and.b32 %r761, %r758, -2139062144; shr.u32 %r762, %r761, 2; not.b32 %r763, %r762; and.b32 %r764, %r760, %r763; and.b32 %r765, %r758, 522133279; add.s32 %r766, %r765, 522133279; mov.u32 %r767, -84215046; sub.s32 %r768, %r767, %r765; and.b32 %r769, %r764, %r768; and.b32 %r770, %r769, %r766; or.b32 %r771, %r770, %r758; st.local.u32 [%rd394], %r771; add.s64 %rd394, %rd394, 4; add.s32 %r931, %r931, 4; setp.lt.s32 %p263, %r931, %r10; @%p263 bra BB0_68; BB0_69: ld.local.u32 %r772, [%rd396]; and.b32 %r773, %r772, 64; shr.u32 %r774, %r773, 1; and.b32 %r775, %r772, 128; shr.u32 %r776, %r775, 2; not.b32 %r777, %r776; and.b32 %r778, %r774, %r777; and.b32 %r779, %r772, 522133279; add.s32 %r780, %r779, 31; mov.u32 %r781, -84215046; sub.s32 %r782, %r781, %r779; and.b32 %r783, %r778, %r782; and.b32 %r784, %r783, %r780; not.b32 %r785, %r784; or.b32 %r786, %r785, -33; and.b32 %r787, %r786, %r772; st.local.u32 [%rd396], %r787; bra.uni BB0_367; BB0_38: setp.eq.s32 %p47, %r299, 75; @%p47 bra BB0_39; bra.uni BB0_367; BB0_39: setp.lt.s32 %p107, %r10, 2; @%p107 bra BB0_367; ld.local.u8 %rs49, [%rd11+-2]; ld.local.u8 %rs50, [%rd11+-1]; st.local.u8 [%rd11+-2], %rs50; st.local.u8 [%rd11+-1], %rs49; bra.uni BB0_367; BB0_95: setp.eq.s32 %p20, %r299, 114; @%p20 bra BB0_96; bra.uni BB0_367; BB0_96: shr.u32 %r667, %r10, 31; add.s32 %r668, %r10, %r667; shr.s32 %r264, %r668, 1; setp.lt.s32 %p249, %r10, 2; @%p249 bra BB0_367; add.s32 %r265, %r10, -1; mov.u32 %r670, 1; max.s32 %r266, %r264, %r670; and.b32 %r267, %r266, 3; setp.eq.s32 %p250, %r267, 0; mov.u32 %r926, 0; @%p250 bra BB0_103; setp.eq.s32 %p251, %r267, 1; mov.u32 %r925, 0; @%p251 bra BB0_102; setp.eq.s32 %p252, %r267, 2; mov.u32 %r924, 0; @%p252 bra BB0_101; ld.local.u8 %rs172, [%rd9]; ld.local.u8 %rs173, [%rd11+-1]; st.local.u8 [%rd9], %rs173; st.local.u8 [%rd11+-1], %rs172; mov.u32 %r924, %r670; BB0_101: cvt.u64.u32 %rd332, %r924; add.s64 %rd333, %rd9, %rd332; ld.local.u8 %rs174, [%rd333]; sub.s32 %r674, %r265, %r924; cvt.s64.s32 %rd334, %r674; add.s64 %rd335, %rd9, %rd334; ld.local.u8 %rs175, [%rd335]; st.local.u8 [%rd333], %rs175; st.local.u8 [%rd335], %rs174; add.s32 %r925, %r924, 1; BB0_102: cvt.s64.s32 %rd336, %r925; add.s64 %rd337, %rd9, %rd336; ld.local.u8 %rs176, [%rd337]; sub.s32 %r675, %r265, %r925; cvt.s64.s32 %rd338, %r675; add.s64 %rd339, %rd9, %rd338; ld.local.u8 %rs177, [%rd339]; st.local.u8 [%rd337], %rs177; st.local.u8 [%rd339], %rs176; add.s32 %r926, %r925, 1; BB0_103: setp.lt.u32 %p253, %r266, 4; @%p253 bra BB0_367; cvt.s64.s32 %rd340, %r926; add.s64 %rd390, %rd9, %rd340; sub.s32 %r676, %r10, %r926; cvt.s64.s32 %rd341, %r676; add.s64 %rd389, %rd9, %rd341; BB0_105: ld.local.u8 %rs178, [%rd390]; ld.local.u8 %rs179, [%rd389+-1]; st.local.u8 [%rd390], %rs179; st.local.u8 [%rd389+-1], %rs178; ld.local.u8 %rs180, [%rd390+1]; ld.local.u8 %rs181, [%rd389+-2]; st.local.u8 [%rd390+1], %rs181; st.local.u8 [%rd389+-2], %rs180; ld.local.u8 %rs182, [%rd390+2]; ld.local.u8 %rs183, [%rd389+-3]; st.local.u8 [%rd390+2], %rs183; st.local.u8 [%rd389+-3], %rs182; ld.local.u8 %rs184, [%rd390+3]; add.s64 %rd105, %rd389, -4; ld.local.u8 %rs185, [%rd389+-4]; st.local.u8 [%rd390+3], %rs185; st.local.u8 [%rd389+-4], %rs184; add.s64 %rd390, %rd390, 4; add.s32 %r926, %r926, 4; setp.lt.s32 %p254, %r926, %r264; mov.u64 %rd389, %rd105; @%p254 bra BB0_105; bra.uni BB0_367; BB0_24: setp.eq.s32 %p54, %r299, 46; @%p54 bra BB0_25; bra.uni BB0_367; BB0_25: and.b32 %r447, %r11, 255; add.s32 %r448, %r447, 1; setp.ge.s32 %p99, %r448, %r10; @%p99 bra BB0_367; ld.local.u8 %rs39, [%rd10+1]; st.local.u8 [%rd10], %rs39; bra.uni BB0_367; BB0_82: setp.eq.s32 %p27, %r299, 107; @%p27 bra BB0_83; bra.uni BB0_367; BB0_83: setp.lt.s32 %p108, %r10, 2; @%p108 bra BB0_367; add.u64 %rd157, %SPL, 0; ld.local.v2.u8 {%rs51, %rs52}, [%rd157]; st.local.v2.u8 [%rd157], {%rs52, %rs51}; bra.uni BB0_367; BB0_48: setp.eq.s32 %p41, %r299, 89; @%p41 bra BB0_49; bra.uni BB0_367; BB0_49: and.b32 %r29, %r11, 255; setp.le.s32 %p79, %r10, %r29; @%p79 bra BB0_367; add.s32 %r933, %r29, %r10; setp.gt.s32 %p80, %r933, 255; @%p80 bra BB0_367; setp.eq.s32 %p81, %r29, 0; @%p81 bra BB0_368; cvt.s64.s32 %rd350, %r10; bfe.u32 %r440, %r8, 8, 8; sub.s32 %r441, %r10, %r440; cvt.s64.s32 %rd134, %r441; add.s64 %rd358, %rd9, %rd134; add.s64 %rd357, %rd9, %rd350; mov.u32 %r830, 0; BB0_53: setp.gt.s32 %p82, %r10, 254; @%p82 bra BB0_55; ld.local.u8 %rs29, [%rd358]; st.local.u8 [%rd357], %rs29; BB0_55: add.s32 %r830, %r830, 1; add.s64 %rd358, %rd358, 1; add.s64 %rd357, %rd357, 1; add.s32 %r10, %r10, 1; setp.lt.s32 %p83, %r830, %r29; @%p83 bra BB0_53; bra.uni BB0_368; BB0_113: setp.eq.s32 %p14, %r299, 121; @%p14 bra BB0_114; bra.uni BB0_367; BB0_114: and.b32 %r35, %r11, 255; setp.ge.s32 %p84, %r35, %r10; @%p84 bra BB0_367; add.s32 %r933, %r35, %r10; setp.gt.s32 %p85, %r933, 255; @%p85 bra BB0_367; setp.eq.s32 %p86, %r35, 0; @%p86 bra BB0_368; mov.u32 %r831, 0; BB0_118: cvt.u64.u32 %rd27, %r831; add.s32 %r39, %r831, %r10; add.s32 %r443, %r39, 1; and.b32 %r40, %r831, 255; setp.ge.s32 %p87, %r40, %r443; setp.gt.s32 %p88, %r39, 254; or.pred %p89, %p87, %p88; @%p89 bra BB0_151; shl.b32 %r444, %r831, 1; cvt.s64.s32 %rd136, %r444; add.s64 %rd137, %rd9, %rd136; ld.local.u8 %rs3, [%rd137]; add.s32 %r839, %r39, -1; add.s32 %r42, %r40, -1; setp.le.s32 %p90, %r839, %r42; @%p90 bra BB0_150; and.b32 %r812, %r10, 3; setp.eq.s32 %p91, %r812, 0; @%p91 bra BB0_140; bra.uni BB0_141; BB0_140: mov.u32 %r838, %r839; mov.u32 %r839, %r39; bra.uni BB0_148; BB0_141: and.b32 %r813, %r10, 3; setp.eq.s32 %p92, %r813, 1; @%p92 bra BB0_142; bra.uni BB0_143; BB0_142: mov.u32 %r835, %r39; bra.uni BB0_147; BB0_143: and.b32 %r814, %r10, 3; setp.eq.s32 %p93, %r814, 2; @%p93 bra BB0_144; bra.uni BB0_145; BB0_144: mov.u32 %r835, %r839; mov.u32 %r839, %r39; bra.uni BB0_146; BB0_145: cvt.s64.s32 %rd138, %r839; add.s64 %rd139, %rd9, %rd138; ld.local.u8 %rs30, [%rd139]; st.local.u8 [%rd139+1], %rs30; add.s32 %r835, %r39, -2; BB0_146: cvt.s64.s32 %rd140, %r835; add.s64 %rd141, %rd9, %rd140; ld.local.u8 %rs31, [%rd141]; cvt.s64.s32 %rd142, %r839; add.s64 %rd143, %rd9, %rd142; st.local.u8 [%rd143], %rs31; add.s32 %r839, %r835, -1; BB0_147: cvt.s64.s32 %rd144, %r839; add.s64 %rd145, %rd9, %rd144; ld.local.u8 %rs32, [%rd145]; cvt.s64.s32 %rd146, %r835; add.s64 %rd147, %rd9, %rd146; st.local.u8 [%rd147], %rs32; add.s32 %r838, %r839, -1; BB0_148: setp.lt.u32 %p94, %r10, 4; @%p94 bra BB0_150; BB0_149: cvt.s64.s32 %rd148, %r838; add.s64 %rd149, %rd9, %rd148; ld.local.u8 %rs33, [%rd149]; cvt.s64.s32 %rd150, %r839; add.s64 %rd151, %rd9, %rd150; st.local.u8 [%rd151], %rs33; ld.local.u8 %rs34, [%rd149+-1]; ld.local.u8 %rs35, [%rd149+-2]; ld.local.u8 %rs36, [%rd149+-3]; st.local.u8 [%rd149], %rs34; st.local.u8 [%rd149+-1], %rs35; st.local.u8 [%rd149+-2], %rs36; add.s32 %r54, %r838, -4; setp.gt.s32 %p95, %r54, %r42; add.s32 %r839, %r838, -3; mov.u32 %r838, %r54; @%p95 bra BB0_149; BB0_150: and.b64 %rd152, %rd27, 255; add.s64 %rd153, %rd9, %rd152; st.local.u8 [%rd153], %rs3; BB0_151: cvt.u32.u64 %r445, %rd27; add.s32 %r831, %r445, 1; setp.lt.s32 %p96, %r831, %r35; @%p96 bra BB0_118; bra.uni BB0_368; BB0_158: shr.u32 %r811, %r8, 16; and.b32 %r454, %r11, 255; setp.ge.s32 %p104, %r454, %r10; and.b32 %r57, %r811, 255; setp.ge.s32 %p105, %r57, %r10; or.pred %p106, %p104, %p105; @%p106 bra BB0_367; ld.local.u8 %rs47, [%rd10]; cvt.u64.u32 %rd154, %r57; add.s64 %rd155, %rd9, %rd154; ld.local.u8 %rs48, [%rd155]; st.local.u8 [%rd10], %rs48; st.local.u8 [%rd155], %rs47; bra.uni BB0_367; BB0_18: setp.eq.s32 %p58, %r299, 44; @%p58 bra BB0_19; bra.uni BB0_367; BB0_19: and.b16 %rs37, %rs1, 255; setp.eq.s16 %p97, %rs37, 0; @%p97 bra BB0_367; and.b32 %r446, %r11, 255; setp.ge.s32 %p98, %r446, %r10; @%p98 bra BB0_367; ld.local.u8 %rs38, [%rd10+-1]; st.local.u8 [%rd10], %rs38; bra.uni BB0_367; BB0_349: shl.b32 %r933, %r10, 1; setp.gt.s32 %p243, %r933, 255; @%p243 bra BB0_367; add.s32 %r258, %r10, -3; mov.u32 %r263, 0; setp.lt.s32 %p244, %r258, 1; mov.u32 %r922, %r10; @%p244 bra BB0_352; BB0_351: and.b32 %r595, %r922, 3; shl.b32 %r596, %r595, 3; shr.s32 %r597, %r263, 31; shr.u32 %r598, %r597, 30; add.s32 %r599, %r263, %r598; shr.s32 %r600, %r599, 2; mul.wide.s32 %rd304, %r600, 4; add.s64 %rd305, %rd396, %rd304; ld.local.u32 %r601, [%rd305+4]; ld.local.u32 %r602, [%rd305]; mov.b64 %rd306, {%r602, %r601}; and.b64 %rd307, %rd306, 4294967295; shl.b64 %rd308, %rd307, %r596; cvt.u32.u64 %r603, %rd308; shr.u64 %rd309, %rd308, 32; cvt.u32.u64 %r604, %rd309; shr.s32 %r605, %r922, 31; shr.u32 %r606, %r605, 30; add.s32 %r607, %r922, %r606; shr.s32 %r608, %r607, 2; mul.wide.s32 %rd310, %r608, 4; add.s64 %rd311, %rd396, %rd310; ld.local.u32 %r609, [%rd311]; or.b32 %r610, %r603, %r609; ld.local.u32 %r611, [%rd311+4]; st.local.u32 [%rd311], %r610; or.b32 %r612, %r604, %r611; st.local.u32 [%rd311+4], %r612; add.s32 %r922, %r922, 4; add.s32 %r263, %r263, 4; setp.lt.s32 %p245, %r263, %r258; @%p245 bra BB0_351; BB0_352: sub.s32 %r613, %r10, %r263; setp.eq.s32 %p246, %r613, 3; @%p246 bra BB0_357; setp.eq.s32 %p247, %r613, 2; @%p247 bra BB0_356; bra.uni BB0_354; BB0_356: add.s32 %r629, %r263, %r10; shr.s32 %r630, %r629, 31; shr.u32 %r631, %r630, 30; add.s32 %r632, %r629, %r631; shr.s32 %r633, %r632, 2; and.b32 %r634, %r629, 3; shl.b32 %r635, %r634, 3; shr.s32 %r636, %r263, 31; shr.u32 %r637, %r636, 30; add.s32 %r638, %r263, %r637; shr.s32 %r639, %r638, 2; mul.wide.s32 %rd316, %r639, 4; add.s64 %rd317, %rd396, %rd316; ld.local.u32 %r640, [%rd317+4]; ld.local.u32 %r641, [%rd317]; mov.b64 %rd318, {%r641, %r640}; and.b64 %rd319, %rd318, 65535; shl.b64 %rd320, %rd319, %r635; cvt.u32.u64 %r642, %rd320; shr.u64 %rd321, %rd320, 32; cvt.u32.u64 %r643, %rd321; mul.wide.s32 %rd322, %r633, 4; add.s64 %rd323, %rd396, %rd322; ld.local.u32 %r644, [%rd323]; or.b32 %r645, %r642, %r644; ld.local.u32 %r646, [%rd323+4]; st.local.u32 [%rd323], %r645; or.b32 %r647, %r643, %r646; st.local.u32 [%rd323+4], %r647; bra.uni BB0_368; BB0_72: setp.eq.s32 %p31, %r299, 102; @%p31 bra BB0_73; bra.uni BB0_367; BB0_73: shl.b32 %r228, %r10, 1; setp.gt.s32 %p222, %r228, 255; @%p222 bra BB0_367; add.s32 %r229, %r10, -3; mov.u32 %r912, 0; setp.lt.s32 %p223, %r229, 1; mov.u32 %r911, %r10; @%p223 bra BB0_76; BB0_75: add.s32 %r818, %r10, -3; and.b32 %r501, %r911, 3; shl.b32 %r502, %r501, 3; shr.s32 %r503, %r912, 31; shr.u32 %r504, %r503, 30; add.s32 %r505, %r912, %r504; shr.s32 %r506, %r505, 2; mul.wide.s32 %rd263, %r506, 4; add.s64 %rd264, %rd396, %rd263; ld.local.u32 %r507, [%rd264+4]; ld.local.u32 %r508, [%rd264]; mov.b64 %rd265, {%r508, %r507}; and.b64 %rd266, %rd265, 4294967295; shl.b64 %rd267, %rd266, %r502; cvt.u32.u64 %r509, %rd267; shr.u64 %rd268, %rd267, 32; cvt.u32.u64 %r510, %rd268; shr.s32 %r511, %r911, 31; shr.u32 %r512, %r511, 30; add.s32 %r513, %r911, %r512; shr.s32 %r514, %r513, 2; mul.wide.s32 %rd269, %r514, 4; add.s64 %rd270, %rd396, %rd269; ld.local.u32 %r515, [%rd270]; or.b32 %r516, %r509, %r515; ld.local.u32 %r517, [%rd270+4]; st.local.u32 [%rd270], %r516; or.b32 %r518, %r510, %r517; st.local.u32 [%rd270+4], %r518; add.s32 %r911, %r911, 4; add.s32 %r912, %r912, 4; setp.lt.s32 %p224, %r912, %r818; @%p224 bra BB0_75; BB0_76: sub.s32 %r519, %r10, %r912; setp.eq.s32 %p225, %r519, 3; @%p225 bra BB0_328; setp.eq.s32 %p226, %r519, 2; @%p226 bra BB0_327; bra.uni BB0_78; BB0_327: add.s32 %r535, %r912, %r10; shr.s32 %r536, %r535, 31; shr.u32 %r537, %r536, 30; add.s32 %r538, %r535, %r537; shr.s32 %r539, %r538, 2; and.b32 %r540, %r535, 3; shl.b32 %r541, %r540, 3; shr.s32 %r542, %r912, 31; shr.u32 %r543, %r542, 30; add.s32 %r544, %r912, %r543; shr.s32 %r545, %r544, 2; mul.wide.s32 %rd275, %r545, 4; add.s64 %rd276, %rd396, %rd275; ld.local.u32 %r546, [%rd276+4]; ld.local.u32 %r547, [%rd276]; mov.b64 %rd277, {%r547, %r546}; and.b64 %rd278, %rd277, 65535; shl.b64 %rd279, %rd278, %r541; cvt.u32.u64 %r548, %rd279; shr.u64 %rd280, %rd279, 32; cvt.u32.u64 %r549, %rd280; mul.wide.s32 %rd281, %r539, 4; add.s64 %rd282, %rd396, %rd281; ld.local.u32 %r550, [%rd282]; or.b32 %r551, %r548, %r550; ld.local.u32 %r552, [%rd282+4]; st.local.u32 [%rd282], %r551; or.b32 %r553, %r549, %r552; st.local.u32 [%rd282+4], %r553; bra.uni BB0_329; BB0_156: and.b32 %r452, %r11, 255; setp.ge.s32 %p103, %r452, %r10; @%p103 bra BB0_367; ld.local.u8 %rs46, [%rd10]; mul.wide.u16 %r453, %rs46, 2; st.local.u8 [%rd10], %r453; bra.uni BB0_367; BB0_43: setp.eq.s32 %p45, %r299, 82; @%p45 bra BB0_44; bra.uni BB0_367; BB0_44: and.b32 %r451, %r11, 255; setp.ge.s32 %p102, %r451, %r10; @%p102 bra BB0_367; ld.local.u8 %rs44, [%rd10]; shr.u16 %rs45, %rs44, 1; st.local.u8 [%rd10], %rs45; bra.uni BB0_367; BB0_226: setp.lt.s32 %p151, %r10, 1; @%p151 bra BB0_367; and.b32 %r136, %r10, 3; setp.eq.s32 %p152, %r136, 0; mov.u32 %r877, 0; @%p152 bra BB0_238; setp.eq.s32 %p153, %r136, 1; mov.u32 %r876, 0; @%p153 bra BB0_235; setp.eq.s32 %p154, %r136, 2; mov.u32 %r875, 0; @%p154 bra BB0_232; ld.local.u8 %rs87, [%rd9]; and.b16 %rs88, %rs1, 255; mov.u32 %r875, 1; setp.ne.s16 %p155, %rs87, %rs88; @%p155 bra BB0_232; st.local.u8 [%rd9], %rs2; BB0_232: cvt.u64.u32 %rd209, %r875; add.s64 %rd37, %rd9, %rd209; ld.local.u8 %rs89, [%rd37]; and.b16 %rs90, %rs1, 255; setp.ne.s16 %p156, %rs89, %rs90; @%p156 bra BB0_234; st.local.u8 [%rd37], %rs2; BB0_234: add.s32 %r876, %r875, 1; BB0_235: cvt.s64.s32 %rd210, %r876; add.s64 %rd38, %rd9, %rd210; ld.local.u8 %rs91, [%rd38]; and.b16 %rs92, %rs1, 255; setp.ne.s16 %p157, %rs91, %rs92; @%p157 bra BB0_237; st.local.u8 [%rd38], %rs2; BB0_237: add.s32 %r877, %r876, 1; BB0_238: setp.lt.u32 %p158, %r10, 4; @%p158 bra BB0_367; cvt.s64.s32 %rd211, %r877; add.s64 %rd362, %rd9, %rd211; BB0_240: ld.local.u8 %rs93, [%rd362]; and.b16 %rs94, %rs1, 255; setp.ne.s16 %p159, %rs93, %rs94; @%p159 bra BB0_242; st.local.u8 [%rd362], %rs2; BB0_242: ld.local.u8 %rs95, [%rd362+1]; setp.ne.s16 %p160, %rs95, %rs94; @%p160 bra BB0_244; st.local.u8 [%rd362+1], %rs2; BB0_244: ld.local.u8 %rs97, [%rd362+2]; setp.ne.s16 %p161, %rs97, %rs94; @%p161 bra BB0_246; st.local.u8 [%rd362+2], %rs2; BB0_246: ld.local.u8 %rs99, [%rd362+3]; setp.ne.s16 %p162, %rs99, %rs94; @%p162 bra BB0_248; st.local.u8 [%rd362+3], %rs2; BB0_248: add.s32 %r877, %r877, 4; setp.lt.s32 %p163, %r877, %r10; add.s64 %rd362, %rd362, 4; @%p163 bra BB0_240; bra.uni BB0_367; BB0_108: setp.eq.s32 %p18, %r299, 117; @%p18 bra BB0_109; bra.uni BB0_367; BB0_109: mov.u32 %r930, 0; setp.lt.s32 %p260, %r10, 1; mov.u64 %rd393, %rd396; @%p260 bra BB0_367; BB0_110: ld.local.u32 %r742, [%rd393]; and.b32 %r743, %r742, 1077952576; shr.u32 %r744, %r743, 1; and.b32 %r745, %r742, -2139062144; shr.u32 %r746, %r745, 2; not.b32 %r747, %r746; and.b32 %r748, %r744, %r747; and.b32 %r749, %r742, 522133279; add.s32 %r750, %r749, 522133279; mov.u32 %r751, -84215046; sub.s32 %r752, %r751, %r749; and.b32 %r753, %r748, %r752; and.b32 %r754, %r753, %r750; not.b32 %r755, %r754; and.b32 %r756, %r742, %r755; st.local.u32 [%rd393], %r756; add.s64 %rd393, %rd393, 4; add.s32 %r930, %r930, 4; setp.lt.s32 %p261, %r930, %r10; @%p261 bra BB0_110; bra.uni BB0_367; BB0_194: mov.u32 %r933, 0; setp.lt.s32 %p132, %r10, 1; @%p132 bra BB0_216; and.b32 %r100, %r10, 3; setp.eq.s32 %p133, %r100, 0; mov.u32 %r862, 0; mov.u32 %r933, %r862; @%p133 bra BB0_205; setp.eq.s32 %p134, %r100, 1; mov.u32 %r859, 0; mov.u32 %r933, %r859; @%p134 bra BB0_202; setp.eq.s32 %p135, %r100, 2; mov.u32 %r856, 0; mov.u32 %r933, %r856; @%p135 bra BB0_199; ld.local.u8 %rs76, [%rd9]; and.b16 %rs77, %rs1, 255; setp.ne.s16 %p136, %rs76, %rs77; selp.u32 %r933, 1, 0, %p136; mov.u32 %r856, 1; BB0_199: cvt.u64.u32 %rd185, %r856; add.s64 %rd186, %rd9, %rd185; ld.local.u8 %rs6, [%rd186]; and.b16 %rs78, %rs1, 255; setp.eq.s16 %p137, %rs6, %rs78; @%p137 bra BB0_201; cvt.u64.u32 %rd187, %r933; add.s64 %rd188, %rd9, %rd187; st.local.u8 [%rd188], %rs6; add.s32 %r933, %r933, 1; BB0_201: add.s32 %r859, %r856, 1; BB0_202: cvt.s64.s32 %rd189, %r859; add.s64 %rd190, %rd9, %rd189; ld.local.u8 %rs7, [%rd190]; and.b16 %rs79, %rs1, 255; setp.eq.s16 %p138, %rs7, %rs79; @%p138 bra BB0_204; cvt.s64.s32 %rd191, %r933; add.s64 %rd192, %rd9, %rd191; st.local.u8 [%rd192], %rs7; add.s32 %r933, %r933, 1; BB0_204: add.s32 %r862, %r859, 1; BB0_205: setp.lt.u32 %p139, %r10, 4; @%p139 bra BB0_216; cvt.s64.s32 %rd193, %r862; add.s64 %rd360, %rd9, %rd193; BB0_207: ld.local.u8 %rs8, [%rd360]; and.b16 %rs80, %rs1, 255; setp.eq.s16 %p140, %rs8, %rs80; @%p140 bra BB0_209; cvt.s64.s32 %rd194, %r933; add.s64 %rd195, %rd9, %rd194; st.local.u8 [%rd195], %rs8; add.s32 %r933, %r933, 1; BB0_209: ld.local.u8 %rs9, [%rd360+1]; setp.eq.s16 %p141, %rs9, %rs80; @%p141 bra BB0_211; cvt.s64.s32 %rd196, %r933; add.s64 %rd197, %rd9, %rd196; st.local.u8 [%rd197], %rs9; add.s32 %r933, %r933, 1; BB0_211: ld.local.u8 %rs10, [%rd360+2]; setp.eq.s16 %p142, %rs10, %rs80; @%p142 bra BB0_213; cvt.s64.s32 %rd198, %r933; add.s64 %rd199, %rd9, %rd198; st.local.u8 [%rd199], %rs10; add.s32 %r933, %r933, 1; BB0_213: ld.local.u8 %rs11, [%rd360+3]; setp.eq.s16 %p143, %rs11, %rs80; @%p143 bra BB0_215; cvt.s64.s32 %rd200, %r933; add.s64 %rd201, %rd9, %rd200; st.local.u8 [%rd201], %rs11; add.s32 %r933, %r933, 1; BB0_215: add.s32 %r862, %r862, 4; setp.lt.s32 %p144, %r862, %r10; add.s64 %rd360, %rd360, 4; @%p144 bra BB0_207; BB0_216: setp.le.s32 %p145, %r10, %r933; @%p145 bra BB0_368; sub.s32 %r126, %r10, %r933; and.b32 %r127, %r126, 3; setp.eq.s32 %p146, %r127, 0; mov.u32 %r873, %r933; @%p146 bra BB0_223; setp.eq.s32 %p147, %r127, 1; mov.u32 %r872, %r933; @%p147 bra BB0_222; setp.eq.s32 %p148, %r127, 2; mov.u32 %r871, %r933; @%p148 bra BB0_221; cvt.s64.s32 %rd202, %r933; add.s64 %rd203, %rd9, %rd202; mov.u16 %rs84, 0; st.local.u8 [%rd203], %rs84; add.s32 %r871, %r933, 1; BB0_221: cvt.s64.s32 %rd204, %r871; add.s64 %rd205, %rd9, %rd204; mov.u16 %rs85, 0; st.local.u8 [%rd205], %rs85; add.s32 %r872, %r871, 1; BB0_222: cvt.s64.s32 %rd206, %r872; add.s64 %rd207, %rd9, %rd206; mov.u16 %rs86, 0; st.local.u8 [%rd207], %rs86; add.s32 %r873, %r872, 1; BB0_223: setp.lt.u32 %p149, %r126, 4; @%p149 bra BB0_368; cvt.s64.s32 %rd208, %r873; add.s64 %rd361, %rd9, %rd208; BB0_225: mov.u32 %r468, 0; st.local.u8 [%rd361+3], %r468; st.local.u8 [%rd361+2], %r468; st.local.u8 [%rd361+1], %r468; st.local.u8 [%rd361], %r468; add.s64 %rd361, %rd361, 4; add.s32 %r873, %r873, 4; setp.lt.s32 %p150, %r873, %r10; @%p150 bra BB0_225; bra.uni BB0_368; BB0_29: setp.eq.s32 %p52, %r299, 68; @%p52 bra BB0_30; bra.uni BB0_367; BB0_30: and.b32 %r886, %r11, 255; setp.ge.s32 %p184, %r886, %r10; @%p184 bra BB0_367; add.s32 %r933, %r10, -1; setp.ge.s32 %p185, %r886, %r933; @%p185 bra BB0_34; cvt.u64.u32 %rd224, %r11; and.b64 %rd225, %rd224, 255; add.s64 %rd369, %rd9, %rd225; BB0_33: add.s64 %rd61, %rd369, 1; ld.local.u8 %rs107, [%rd369+1]; st.local.u8 [%rd369], %rs107; add.s32 %r886, %r886, 1; setp.lt.s32 %p186, %r886, %r933; mov.u64 %rd369, %rd61; @%p186 bra BB0_33; BB0_34: mov.u16 %rs108, 0; st.local.u8 [%rd11+-1], %rs108; bra.uni BB0_368; BB0_365: mov.u32 %r932, 0; setp.lt.s32 %p264, %r10, 1; mov.u64 %rd395, %rd396; @%p264 bra BB0_367; BB0_366: ld.local.u32 %r789, [%rd395]; and.b32 %r790, %r789, 1077952576; shr.u32 %r791, %r790, 1; and.b32 %r792, %r789, -2139062144; shr.u32 %r793, %r792, 2; not.b32 %r794, %r793; and.b32 %r795, %r791, %r794; and.b32 %r796, %r789, 522133279; add.s32 %r797, %r796, 522133279; mov.u32 %r798, -84215046; sub.s32 %r799, %r798, %r796; and.b32 %r800, %r795, %r799; and.b32 %r801, %r800, %r797; or.b32 %r802, %r801, %r789; st.local.u32 [%rd395], %r802; add.s64 %rd395, %rd395, 4; add.s32 %r932, %r932, 4; setp.lt.s32 %p265, %r932, %r10; @%p265 bra BB0_366; bra.uni BB0_367; BB0_87: setp.eq.s32 %p25, %r299, 112; @%p25 bra BB0_88; bra.uni BB0_367; BB0_88: and.b32 %r246, %r11, 255; mad.lo.s32 %r933, %r246, %r10, %r10; setp.gt.s32 %p234, %r933, 255; @%p234 bra BB0_367; setp.eq.s32 %p235, %r246, 0; @%p235 bra BB0_368; and.b32 %r248, %r10, 3; mov.u32 %r917, 0; mov.u64 %rd387, %rd9; BB0_91: setp.lt.s32 %p236, %r10, 1; @%p236 bra BB0_348; setp.eq.s32 %p237, %r248, 0; mov.u64 %rd303, 0; mov.u32 %r921, 0; @%p237 bra BB0_340; bra.uni BB0_341; BB0_340: mov.u64 %rd385, %rd387; mov.u64 %rd386, %rd11; mov.u64 %rd387, %rd303; mov.u64 %rd11, %rd303; bra.uni BB0_346; BB0_341: setp.eq.s32 %p238, %r248, 1; mov.u32 %r919, 0; @%p238 bra BB0_345; setp.eq.s32 %p239, %r248, 2; mov.u32 %r919, 1; @%p239 bra BB0_344; ld.local.u8 %rs165, [%rd387]; st.local.u8 [%rd11], %rs165; add.s64 %rd11, %rd11, 1; add.s64 %rd387, %rd387, 1; mov.u32 %r919, 2; BB0_344: ld.local.u8 %rs166, [%rd387]; st.local.u8 [%rd11], %rs166; add.s64 %rd11, %rd11, 1; add.s64 %rd387, %rd387, 1; BB0_345: ld.local.u8 %rs167, [%rd387]; st.local.u8 [%rd11], %rs167; add.s64 %rd386, %rd11, 1; add.s64 %rd385, %rd387, 1; add.s32 %r921, %r919, 1; mov.u64 %rd387, %rd385; mov.u64 %rd11, %rd386; BB0_346: setp.lt.u32 %p240, %r10, 4; @%p240 bra BB0_348; BB0_347: ld.local.u8 %rs168, [%rd385]; st.local.u8 [%rd386], %rs168; ld.local.u8 %rs169, [%rd385+1]; st.local.u8 [%rd386+1], %rs169; ld.local.u8 %rs170, [%rd385+2]; st.local.u8 [%rd386+2], %rs170; ld.local.u8 %rs171, [%rd385+3]; st.local.u8 [%rd386+3], %rs171; add.s64 %rd386, %rd386, 4; add.s64 %rd385, %rd385, 4; add.s32 %r921, %r921, 4; setp.lt.s32 %p241, %r921, %r10; mov.u64 %rd387, %rd385; mov.u64 %rd11, %rd386; @%p241 bra BB0_347; BB0_348: add.s32 %r917, %r917, 1; setp.lt.s32 %p242, %r917, %r246; @%p242 bra BB0_91; bra.uni BB0_368; BB0_171: and.b32 %r74, %r11, 255; add.s32 %r933, %r74, %r10; setp.gt.s32 %p116, %r933, 255; setp.eq.s32 %p117, %r10, 0; or.pred %p118, %p117, %p116; @%p118 bra BB0_367; setp.eq.s32 %p119, %r74, 0; @%p119 bra BB0_368; ld.local.u8 %rs4, [%rd11+-1]; mov.u32 %r846, 0; BB0_174: setp.gt.s32 %p120, %r10, 254; @%p120 bra BB0_176; st.local.u8 [%rd11], %rs4; BB0_176: add.s32 %r846, %r846, 1; add.s64 %rd11, %rd11, 1; add.s32 %r10, %r10, 1; setp.lt.s32 %p121, %r846, %r74; @%p121 bra BB0_174; bra.uni BB0_368; BB0_58: setp.eq.s32 %p39, %r299, 93; @%p39 bra BB0_59; bra.uni BB0_367; BB0_59: setp.eq.s32 %p187, %r10, 0; mov.u32 %r933, 0; @%p187 bra BB0_368; add.s32 %r490, %r10, 255; and.b32 %r171, %r490, 255; setp.ge.s32 %p188, %r171, %r10; @%p188 bra BB0_367; add.s32 %r933, %r10, -1; setp.le.s32 %p189, %r933, %r171; @%p189 bra BB0_282; sub.s32 %r173, %r933, %r171; and.b32 %r174, %r173, 3; setp.eq.s32 %p190, %r174, 0; @%p190 bra BB0_272; bra.uni BB0_273; BB0_272: mov.u32 %r890, %r171; bra.uni BB0_279; BB0_177: and.b32 %r80, %r11, 255; add.s32 %r933, %r80, %r10; setp.gt.s32 %p122, %r933, 255; @%p122 bra BB0_367; ld.local.u8 %rs5, [%rd9]; setp.eq.s32 %p123, %r80, 0; mov.u32 %r847, 0; @%p123 bra BB0_368; BB0_179: add.s32 %r83, %r10, %r847; setp.gt.s32 %p124, %r83, 254; @%p124 bra BB0_193; add.s32 %r855, %r83, -1; setp.lt.s32 %p125, %r855, 0; @%p125 bra BB0_192; and.b32 %r85, %r83, 3; setp.eq.s32 %p126, %r85, 0; @%p126 bra BB0_182; bra.uni BB0_183; BB0_182: mov.u32 %r854, %r855; mov.u32 %r855, %r83; bra.uni BB0_190; BB0_183: setp.eq.s32 %p127, %r85, 1; @%p127 bra BB0_184; bra.uni BB0_185; BB0_184: mov.u32 %r851, %r83; bra.uni BB0_189; BB0_185: setp.eq.s32 %p128, %r85, 2; @%p128 bra BB0_186; bra.uni BB0_187; BB0_186: mov.u32 %r851, %r855; mov.u32 %r855, %r83; bra.uni BB0_188; BB0_187: cvt.s64.s32 %rd171, %r855; add.s64 %rd172, %rd9, %rd171; ld.local.u8 %rs69, [%rd172]; st.local.u8 [%rd172+1], %rs69; add.s32 %r851, %r83, -2; BB0_188: cvt.s64.s32 %rd173, %r851; add.s64 %rd174, %rd9, %rd173; ld.local.u8 %rs70, [%rd174]; cvt.s64.s32 %rd175, %r855; add.s64 %rd176, %rd9, %rd175; st.local.u8 [%rd176], %rs70; add.s32 %r855, %r851, -1; BB0_189: cvt.s64.s32 %rd177, %r855; add.s64 %rd178, %rd9, %rd177; ld.local.u8 %rs71, [%rd178]; cvt.s64.s32 %rd179, %r851; add.s64 %rd180, %rd9, %rd179; st.local.u8 [%rd180], %rs71; add.s32 %r854, %r855, -1; BB0_190: setp.lt.u32 %p129, %r83, 4; @%p129 bra BB0_192; BB0_191: cvt.s64.s32 %rd181, %r854; add.s64 %rd182, %rd9, %rd181; ld.local.u8 %rs72, [%rd182]; cvt.s64.s32 %rd183, %r855; add.s64 %rd184, %rd9, %rd183; st.local.u8 [%rd184], %rs72; ld.local.u8 %rs73, [%rd182+-1]; ld.local.u8 %rs74, [%rd182+-2]; ld.local.u8 %rs75, [%rd182+-3]; st.local.u8 [%rd182], %rs73; st.local.u8 [%rd182+-1], %rs74; st.local.u8 [%rd182+-2], %rs75; add.s32 %r97, %r854, -4; setp.gt.s32 %p130, %r97, -1; add.s32 %r855, %r854, -3; mov.u32 %r854, %r97; @%p130 bra BB0_191; BB0_192: st.local.u8 [%rd9], %rs5; BB0_193: add.s32 %r847, %r847, 1; setp.lt.s32 %p131, %r847, %r80; @%p131 bra BB0_179; bra.uni BB0_368; BB0_122: setp.eq.s32 %p12, %r299, 125; @%p12 bra BB0_123; bra.uni BB0_367; BB0_123: setp.lt.s32 %p202, %r300, 1; @%p202 bra BB0_367; and.b32 %r193, %r300, 3; setp.eq.s32 %p203, %r193, 0; mov.u32 %r897, 0; @%p203 bra BB0_299; setp.eq.s32 %p204, %r193, 1; @%p204 bra BB0_297; bra.uni BB0_126; BB0_297: ld.local.u8 %rs187, [%rd11+-1]; mov.u32 %r896, 0; bra.uni BB0_298; BB0_315: setp.eq.s32 %p217, %r212, 1; @%p217 bra BB0_316; bra.uni BB0_317; BB0_316: mov.u32 %r906, %r10; bra.uni BB0_321; BB0_357: add.s32 %r648, %r263, %r10; shr.s32 %r649, %r648, 31; shr.u32 %r650, %r649, 30; add.s32 %r651, %r648, %r650; shr.s32 %r652, %r651, 2; and.b32 %r653, %r648, 3; shl.b32 %r654, %r653, 3; shr.s32 %r655, %r263, 31; shr.u32 %r656, %r655, 30; add.s32 %r657, %r263, %r656; shr.s32 %r658, %r657, 2; mul.wide.s32 %rd324, %r658, 4; add.s64 %rd325, %rd396, %rd324; ld.local.u32 %r659, [%rd325+4]; ld.local.u32 %r660, [%rd325]; mov.b64 %rd326, {%r660, %r659}; and.b64 %rd327, %rd326, 16777215; shl.b64 %rd328, %rd327, %r654; cvt.u32.u64 %r661, %rd328; shr.u64 %rd329, %rd328, 32; cvt.u32.u64 %r662, %rd329; mul.wide.s32 %rd330, %r652, 4; add.s64 %rd331, %rd396, %rd330; ld.local.u32 %r663, [%rd331]; or.b32 %r664, %r661, %r663; ld.local.u32 %r665, [%rd331+4]; st.local.u32 [%rd331], %r664; or.b32 %r666, %r662, %r665; st.local.u32 [%rd331+4], %r666; bra.uni BB0_368; BB0_354: setp.ne.s32 %p248, %r613, 1; @%p248 bra BB0_368; add.s32 %r614, %r263, %r10; shr.s32 %r615, %r614, 31; shr.u32 %r616, %r615, 30; add.s32 %r617, %r614, %r616; shr.s32 %r618, %r617, 2; and.b32 %r619, %r614, 3; shl.b32 %r620, %r619, 3; shr.s32 %r621, %r263, 31; shr.u32 %r622, %r621, 30; add.s32 %r623, %r263, %r622; shr.s32 %r624, %r623, 2; mul.wide.s32 %rd312, %r624, 4; add.s64 %rd313, %rd396, %rd312; ld.local.u8 %r625, [%rd313]; shl.b32 %r626, %r625, %r620; mul.wide.s32 %rd314, %r618, 4; add.s64 %rd315, %rd396, %rd314; ld.local.u32 %r627, [%rd315]; or.b32 %r628, %r626, %r627; st.local.u32 [%rd315], %r628; bra.uni BB0_368; BB0_317: setp.eq.s32 %p218, %r212, 2; @%p218 bra BB0_318; bra.uni BB0_319; BB0_318: mov.u32 %r906, %r910; mov.u32 %r910, %r10; bra.uni BB0_320; BB0_328: add.s32 %r554, %r912, %r10; shr.s32 %r555, %r554, 31; shr.u32 %r556, %r555, 30; add.s32 %r557, %r554, %r556; shr.s32 %r558, %r557, 2; and.b32 %r559, %r554, 3; shl.b32 %r560, %r559, 3; shr.s32 %r561, %r912, 31; shr.u32 %r562, %r561, 30; add.s32 %r563, %r912, %r562; shr.s32 %r564, %r563, 2; mul.wide.s32 %rd283, %r564, 4; add.s64 %rd284, %rd396, %rd283; ld.local.u32 %r565, [%rd284+4]; ld.local.u32 %r566, [%rd284]; mov.b64 %rd285, {%r566, %r565}; and.b64 %rd286, %rd285, 16777215; shl.b64 %rd287, %rd286, %r560; cvt.u32.u64 %r567, %rd287; shr.u64 %rd288, %rd287, 32; cvt.u32.u64 %r568, %rd288; mul.wide.s32 %rd289, %r558, 4; add.s64 %rd290, %rd396, %rd289; ld.local.u32 %r569, [%rd290]; or.b32 %r570, %r567, %r569; ld.local.u32 %r571, [%rd290+4]; st.local.u32 [%rd290], %r570; or.b32 %r572, %r568, %r571; st.local.u32 [%rd290+4], %r572; bra.uni BB0_329; BB0_78: setp.ne.s32 %p227, %r519, 1; @%p227 bra BB0_329; add.s32 %r520, %r912, %r10; shr.s32 %r521, %r520, 31; shr.u32 %r522, %r521, 30; add.s32 %r523, %r520, %r522; shr.s32 %r524, %r523, 2; and.b32 %r525, %r520, 3; shl.b32 %r526, %r525, 3; shr.s32 %r527, %r912, 31; shr.u32 %r528, %r527, 30; add.s32 %r529, %r912, %r528; shr.s32 %r530, %r529, 2; mul.wide.s32 %rd271, %r530, 4; add.s64 %rd272, %rd396, %rd271; ld.local.u8 %r531, [%rd272]; shl.b32 %r532, %r531, %r526; mul.wide.s32 %rd273, %r524, 4; add.s64 %rd274, %rd396, %rd273; ld.local.u32 %r533, [%rd274]; or.b32 %r534, %r532, %r533; st.local.u32 [%rd274], %r534; BB0_329: shl.b32 %r933, %r10, 1; shr.u32 %r573, %r10, 31; add.s32 %r574, %r10, %r573; shr.s32 %r235, %r574, 1; setp.lt.s32 %p228, %r10, 2; @%p228 bra BB0_368; add.s32 %r576, %r10, %r10; add.s32 %r236, %r576, -1; mov.u32 %r577, 1; max.s32 %r237, %r235, %r577; and.b32 %r238, %r237, 3; setp.eq.s32 %p229, %r238, 0; mov.u32 %r916, 0; @%p229 bra BB0_336; setp.eq.s32 %p230, %r238, 1; mov.u32 %r914, 0; @%p230 bra BB0_335; setp.eq.s32 %p231, %r238, 2; mov.u32 %r913, 0; @%p231 bra BB0_334; ld.local.u8 %rs151, [%rd11]; ld.local.u8 %rs152, [%rd12+1]; st.local.u8 [%rd11], %rs152; st.local.u8 [%rd12+1], %rs151; mov.u32 %r913, %r577; BB0_334: add.s32 %r581, %r913, %r10; cvt.s64.s32 %rd291, %r581; add.s64 %rd292, %rd9, %rd291; ld.local.u8 %rs153, [%rd292]; sub.s32 %r582, %r236, %r913; cvt.s64.s32 %rd293, %r582; add.s64 %rd294, %rd9, %rd293; ld.local.u8 %rs154, [%rd294]; st.local.u8 [%rd292], %rs154; st.local.u8 [%rd294], %rs153; add.s32 %r914, %r913, 1; BB0_335: add.s32 %r583, %r914, %r10; cvt.s64.s32 %rd295, %r583; add.s64 %rd296, %rd9, %rd295; ld.local.u8 %rs155, [%rd296]; sub.s32 %r584, %r236, %r914; cvt.s64.s32 %rd297, %r584; add.s64 %rd298, %rd9, %rd297; ld.local.u8 %rs156, [%rd298]; st.local.u8 [%rd296], %rs156; st.local.u8 [%rd298], %rs155; add.s32 %r916, %r914, 1; BB0_336: shl.b32 %r933, %r10, 1; setp.lt.u32 %p232, %r237, 4; @%p232 bra BB0_368; shl.b32 %r585, %r10, 1; sub.s32 %r586, %r585, %r916; cvt.s64.s32 %rd299, %r586; add.s64 %rd374, %rd9, %rd299; add.s32 %r587, %r916, %r10; cvt.s64.s32 %rd300, %r587; add.s64 %rd373, %rd9, %rd300; BB0_338: shl.b32 %r933, %r10, 1; ld.local.u8 %rs157, [%rd373]; ld.local.u8 %rs158, [%rd374+-1]; st.local.u8 [%rd373], %rs158; st.local.u8 [%rd374+-1], %rs157; ld.local.u8 %rs159, [%rd373+1]; ld.local.u8 %rs160, [%rd374+-2]; st.local.u8 [%rd373+1], %rs160; st.local.u8 [%rd374+-2], %rs159; ld.local.u8 %rs161, [%rd373+2]; ld.local.u8 %rs162, [%rd374+-3]; st.local.u8 [%rd373+2], %rs162; st.local.u8 [%rd374+-3], %rs161; ld.local.u8 %rs163, [%rd373+3]; add.s64 %rd75, %rd374, -4; ld.local.u8 %rs164, [%rd374+-4]; st.local.u8 [%rd373+3], %rs164; st.local.u8 [%rd374+-4], %rs163; add.s64 %rd373, %rd373, 4; add.s32 %r916, %r916, 4; setp.lt.s32 %p233, %r916, %r235; mov.u64 %rd374, %rd75; @%p233 bra BB0_338; bra.uni BB0_368; BB0_126: setp.eq.s32 %p205, %r193, 2; @%p205 bra BB0_295; bra.uni BB0_127; BB0_295: ld.local.u8 %rs186, [%rd11+-1]; mov.u32 %r895, 0; bra.uni BB0_296; BB0_319: ld.local.u8 %rs144, [%rd11+-1]; st.local.u8 [%rd11], %rs144; add.s32 %r906, %r10, -2; BB0_320: cvt.s64.s32 %rd251, %r906; add.s64 %rd252, %rd9, %rd251; ld.local.u8 %rs145, [%rd252]; cvt.s64.s32 %rd253, %r910; add.s64 %rd254, %rd9, %rd253; st.local.u8 [%rd254], %rs145; add.s32 %r910, %r906, -1; BB0_321: cvt.s64.s32 %rd255, %r910; add.s64 %rd256, %rd9, %rd255; ld.local.u8 %rs146, [%rd256]; cvt.s64.s32 %rd257, %r906; add.s64 %rd258, %rd9, %rd257; st.local.u8 [%rd258], %rs146; add.s32 %r909, %r910, -1; BB0_322: setp.lt.u32 %p219, %r10, 4; @%p219 bra BB0_324; BB0_323: cvt.s64.s32 %rd259, %r909; add.s64 %rd260, %rd9, %rd259; ld.local.u8 %rs147, [%rd260]; cvt.s64.s32 %rd261, %r910; add.s64 %rd262, %rd9, %rd261; st.local.u8 [%rd262], %rs147; ld.local.u8 %rs148, [%rd260+-1]; ld.local.u8 %rs149, [%rd260+-2]; ld.local.u8 %rs150, [%rd260+-3]; st.local.u8 [%rd260], %rs148; st.local.u8 [%rd260+-1], %rs149; st.local.u8 [%rd260+-2], %rs150; add.s32 %r224, %r909, -4; setp.gt.s32 %p220, %r224, -1; add.s32 %r910, %r909, -3; mov.u32 %r909, %r224; @%p220 bra BB0_323; BB0_324: st.local.u8 [%rd9], %rs1; add.s32 %r933, %r10, 1; bra.uni BB0_368; BB0_273: setp.eq.s32 %p191, %r174, 1; @%p191 bra BB0_278; setp.eq.s32 %p192, %r174, 2; @%p192 bra BB0_275; bra.uni BB0_276; BB0_275: mov.u32 %r887, %r171; bra.uni BB0_277; BB0_127: ld.local.u8 %rs186, [%rd9]; ld.local.u8 %rs125, [%rd11+-1]; st.local.u8 [%rd9], %rs125; st.local.u8 [%rd11+-1], %rs186; mov.u32 %r895, 1; BB0_296: cvt.u64.u32 %rd240, %r895; add.s64 %rd241, %rd9, %rd240; ld.local.u8 %rs187, [%rd241]; st.local.u8 [%rd241], %rs186; st.local.u8 [%rd11+-1], %rs187; add.s32 %r896, %r895, 1; BB0_298: cvt.s64.s32 %rd242, %r896; add.s64 %rd243, %rd9, %rd242; ld.local.u8 %rs126, [%rd243]; st.local.u8 [%rd243], %rs187; st.local.u8 [%rd11+-1], %rs126; add.s32 %r897, %r896, 1; BB0_299: setp.lt.u32 %p206, %r300, 4; @%p206 bra BB0_367; ld.local.u8 %rs188, [%rd11+-1]; cvt.s64.s32 %rd244, %r897; add.s64 %rd372, %rd9, %rd244; BB0_301: ld.local.u8 %rs127, [%rd372]; st.local.u8 [%rd372], %rs188; st.local.u8 [%rd11+-1], %rs127; ld.local.u8 %rs128, [%rd372+1]; st.local.u8 [%rd372+1], %rs127; st.local.u8 [%rd11+-1], %rs128; ld.local.u8 %rs129, [%rd372+2]; st.local.u8 [%rd372+2], %rs128; st.local.u8 [%rd11+-1], %rs129; ld.local.u8 %rs188, [%rd372+3]; st.local.u8 [%rd372+3], %rs129; st.local.u8 [%rd11+-1], %rs188; add.s64 %rd372, %rd372, 4; add.s32 %r897, %r897, 4; setp.lt.s32 %p207, %r897, %r300; @%p207 bra BB0_301; BB0_367: mov.u32 %r933, %r10; BB0_368: ld.param.u64 %rd347, [amp_param_2]; add.s32 %r821, %r821, 1; mul.wide.u32 %rd344, %r821, 4; add.s64 %rd345, %rd347, %rd344; ld.const.u32 %r8, [%rd345]; setp.ne.s32 %p266, %r8, 0; @%p266 bra BB0_7; BB0_369: ld.param.u64 %rd348, [amp_param_0]; st.local.u32 [%rd396+256], %r933; add.s64 %rd397, %rd348, %rd127; mov.u32 %r935, 0; BB0_370: ld.local.u32 %r805, [%rd396]; st.global.u32 [%rd397], %r805; add.s64 %rd397, %rd397, 4; add.s64 %rd396, %rd396, 4; add.s32 %r935, %r935, 1; setp.lt.u32 %p267, %r935, 65; @%p267 bra BB0_370; BB0_371: ret; }