Files
odiseectf/yt/kernels/m03200-pure.9cb33c53.kernel
2018-12-16 14:17:14 +01:00

19219 lines
582 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: UNKNOWN
// Unknown Toolkit Version
// Based on LLVM 3.4svn
//
.version 6.3
.target sm_52, texmode_independent
.address_size 64
// .globl gpu_decompress
.const .align 8 .b8 c_sbox0[1024] = {166, 11, 49, 209, 172, 181, 223, 152, 219, 114, 253, 47, 183, 223, 26, 208, 237, 175, 225, 184, 150, 126, 38, 106, 69, 144, 124, 186, 153, 127, 44, 241, 71, 153, 161, 36, 247, 108, 145, 179, 226, 242, 1, 8, 22, 252, 142, 133, 216, 32, 105, 99, 105, 78, 87, 113, 163, 254, 88, 164, 126, 61, 147, 244, 143, 116, 149, 13, 88, 182, 142, 114, 88, 205, 139, 113, 238, 74, 21, 130, 29, 164, 84, 123, 181, 89, 90, 194, 57, 213, 48, 156, 19, 96, 242, 42, 35, 176, 209, 197, 240, 133, 96, 40, 24, 121, 65, 202, 239, 56, 219, 184, 176, 220, 121, 142, 14, 24, 58, 96, 139, 14, 158, 108, 62, 138, 30, 176, 193, 119, 21, 215, 39, 75, 49, 189, 218, 47, 175, 120, 96, 92, 96, 85, 243, 37, 85, 230, 148, 171, 85, 170, 98, 152, 72, 87, 64, 20, 232, 99, 106, 57, 202, 85, 182, 16, 171, 42, 52, 92, 204, 180, 206, 232, 65, 17, 175, 134, 84, 161, 147, 233, 114, 124, 17, 20, 238, 179, 42, 188, 111, 99, 93, 197, 169, 43, 246, 49, 24, 116, 22, 62, 92, 206, 30, 147, 135, 155, 51, 186, 214, 175, 92, 207, 36, 108, 129, 83, 50, 122, 119, 134, 149, 40, 152, 72, 143, 59, 175, 185, 75, 107, 27, 232, 191, 196, 147, 33, 40, 102, 204, 9, 216, 97, 145, 169, 33, 251, 96, 172, 124, 72, 50, 128, 236, 93, 93, 93, 132, 239, 177, 117, 133, 233, 2, 35, 38, 220, 136, 27, 101, 235, 129, 62, 137, 35, 197, 172, 150, 211, 243, 111, 109, 15, 57, 66, 244, 131, 130, 68, 11, 46, 4, 32, 132, 164, 74, 240, 200, 105, 94, 155, 31, 158, 66, 104, 198, 33, 154, 108, 233, 246, 97, 156, 12, 103, 240, 136, 211, 171, 210, 160, 81, 106, 104, 47, 84, 216, 40, 167, 15, 150, 163, 51, 81, 171, 108, 11, 239, 110, 228, 59, 122, 19, 80, 240, 59, 186, 152, 42, 251, 126, 29, 101, 241, 161, 118, 1, 175, 57, 62, 89, 202, 102, 136, 14, 67, 130, 25, 134, 238, 140, 180, 159, 111, 69, 195, 165, 132, 125, 190, 94, 139, 59, 216, 117, 111, 224, 115, 32, 193, 133, 159, 68, 26, 64, 166, 106, 193, 86, 98, 170, 211, 78, 6, 119, 63, 54, 114, 223, 254, 27, 61, 2, 155, 66, 36, 215, 208, 55, 72, 18, 10, 208, 211, 234, 15, 219, 155, 192, 241, 73, 201, 114, 83, 7, 123, 27, 153, 128, 216, 121, 212, 37, 247, 222, 232, 246, 26, 80, 254, 227, 59, 76, 121, 182, 189, 224, 108, 151, 186, 6, 192, 4, 182, 79, 169, 193, 196, 96, 159, 64, 194, 158, 92, 94, 99, 36, 106, 25, 175, 111, 251, 104, 181, 83, 108, 62, 235, 178, 57, 19, 111, 236, 82, 59, 31, 81, 252, 109, 44, 149, 48, 155, 68, 69, 129, 204, 9, 189, 94, 175, 4, 208, 227, 190, 253, 74, 51, 222, 7, 40, 15, 102, 179, 75, 46, 25, 87, 168, 203, 192, 15, 116, 200, 69, 57, 95, 11, 210, 219, 251, 211, 185, 189, 192, 121, 85, 10, 50, 96, 26, 198, 0, 161, 214, 121, 114, 44, 64, 254, 37, 159, 103, 204, 163, 31, 251, 248, 233, 165, 142, 248, 34, 50, 219, 223, 22, 117, 60, 21, 107, 97, 253, 200, 30, 80, 47, 171, 82, 5, 173, 250, 181, 61, 50, 96, 135, 35, 253, 72, 123, 49, 83, 130, 223, 0, 62, 187, 87, 92, 158, 160, 140, 111, 202, 46, 86, 135, 26, 219, 105, 23, 223, 246, 168, 66, 213, 195, 255, 126, 40, 198, 50, 103, 172, 115, 85, 79, 140, 176, 39, 91, 105, 200, 88, 202, 187, 93, 163, 255, 225, 160, 17, 240, 184, 152, 61, 250, 16, 184, 131, 33, 253, 108, 181, 252, 74, 91, 211, 209, 45, 121, 228, 83, 154, 101, 69, 248, 182, 188, 73, 142, 210, 144, 151, 251, 75, 218, 242, 221, 225, 51, 126, 203, 164, 65, 19, 251, 98, 232, 198, 228, 206, 218, 202, 32, 239, 1, 76, 119, 54, 254, 158, 126, 208, 180, 31, 241, 43, 77, 218, 219, 149, 152, 145, 144, 174, 113, 142, 173, 234, 160, 213, 147, 107, 208, 209, 142, 208, 224, 37, 199, 175, 47, 91, 60, 142, 183, 148, 117, 142, 251, 226, 246, 143, 100, 43, 18, 242, 18, 184, 136, 136, 28, 240, 13, 144, 160, 94, 173, 79, 28, 195, 143, 104, 145, 241, 207, 209, 173, 193, 168, 179, 24, 34, 47, 47, 119, 23, 14, 190, 254, 45, 117, 234, 161, 31, 2, 139, 15, 204, 160, 229, 232, 116, 111, 181, 214, 243, 172, 24, 153, 226, 137, 206, 224, 79, 168, 180, 183, 224, 19, 253, 129, 59, 196, 124, 217, 168, 173, 210, 102, 162, 95, 22, 5, 119, 149, 128, 20, 115, 204, 147, 119, 20, 26, 33, 101, 32, 173, 230, 134, 250, 181, 119, 245, 66, 84, 199, 207, 53, 157, 251, 12, 175, 205, 235, 160, 137, 62, 123, 211, 27, 65, 214, 73, 126, 30, 174, 45, 14, 37, 0, 94, 179, 113, 32, 187, 0, 104, 34, 175, 224, 184, 87, 155, 54, 100, 36, 30, 185, 9, 240, 29, 145, 99, 85, 170, 166, 223, 89, 137, 67, 193, 120, 127, 83, 90, 217, 162, 91, 125, 32, 197, 185, 229, 2, 118, 3, 38, 131, 169, 207, 149, 98, 104, 25, 200, 17, 65, 74, 115, 78, 202, 45, 71, 179, 74, 169, 20, 123, 82, 0, 81, 27, 21, 41, 83, 154, 63, 87, 15, 214, 228, 198, 155, 188, 118, 164, 96, 43, 0, 116, 230, 129, 181, 111, 186, 8, 31, 233, 27, 87, 107, 236, 150, 242, 21, 217, 13, 42, 33, 101, 99, 182, 182, 249, 185, 231, 46, 5, 52, 255, 100, 86, 133, 197, 93, 45, 176, 83, 161, 143, 159, 169, 153, 71, 186, 8, 106, 7, 133, 110};
.const .align 8 .b8 c_sbox1[1024] = {233, 112, 122, 75, 68, 41, 179, 181, 46, 9, 117, 219, 35, 38, 25, 196, 176, 166, 110, 173, 125, 223, 167, 73, 184, 96, 238, 156, 102, 178, 237, 143, 113, 140, 170, 236, 255, 23, 154, 105, 108, 82, 100, 86, 225, 158, 177, 194, 165, 2, 54, 25, 41, 76, 9, 117, 64, 19, 89, 160, 62, 58, 24, 228, 154, 152, 84, 63, 101, 157, 66, 91, 214, 228, 143, 107, 214, 63, 247, 153, 7, 156, 210, 161, 245, 48, 232, 239, 230, 56, 45, 77, 193, 93, 37, 240, 134, 32, 221, 76, 38, 235, 112, 132, 198, 233, 130, 99, 94, 204, 30, 2, 63, 107, 104, 9, 201, 239, 186, 62, 20, 24, 151, 60, 161, 112, 106, 107, 132, 53, 127, 104, 134, 226, 160, 82, 5, 83, 156, 183, 55, 7, 80, 170, 28, 132, 7, 62, 92, 174, 222, 127, 236, 68, 125, 142, 184, 242, 22, 87, 55, 218, 58, 176, 13, 12, 80, 240, 4, 31, 28, 240, 255, 179, 0, 2, 26, 245, 12, 174, 178, 116, 181, 60, 88, 122, 131, 37, 189, 33, 9, 220, 249, 19, 145, 209, 246, 47, 169, 124, 115, 71, 50, 148, 1, 71, 245, 34, 129, 229, 229, 58, 220, 218, 194, 55, 52, 118, 181, 200, 167, 221, 243, 154, 70, 97, 68, 169, 14, 3, 208, 15, 62, 199, 200, 236, 65, 30, 117, 164, 153, 205, 56, 226, 47, 14, 234, 59, 161, 187, 128, 50, 49, 179, 62, 24, 56, 139, 84, 78, 8, 185, 109, 79, 3, 13, 66, 111, 191, 4, 10, 246, 144, 18, 184, 44, 121, 124, 151, 36, 114, 176, 121, 86, 175, 137, 175, 188, 31, 119, 154, 222, 16, 8, 147, 217, 18, 174, 139, 179, 46, 63, 207, 220, 31, 114, 18, 85, 36, 113, 107, 46, 230, 221, 26, 80, 135, 205, 132, 159, 24, 71, 88, 122, 23, 218, 8, 116, 188, 154, 159, 188, 140, 125, 75, 233, 58, 236, 122, 236, 250, 29, 133, 219, 102, 67, 9, 99, 210, 195, 100, 196, 71, 24, 28, 239, 8, 217, 21, 50, 55, 59, 67, 221, 22, 186, 194, 36, 67, 77, 161, 18, 81, 196, 101, 42, 2, 0, 148, 80, 221, 228, 58, 19, 158, 248, 223, 113, 85, 78, 49, 16, 214, 119, 172, 129, 155, 25, 17, 95, 241, 86, 53, 4, 107, 199, 163, 215, 59, 24, 17, 60, 9, 165, 36, 89, 237, 230, 143, 242, 250, 251, 241, 151, 44, 191, 186, 158, 110, 60, 21, 30, 112, 69, 227, 134, 177, 111, 233, 234, 10, 94, 14, 134, 179, 42, 62, 90, 28, 231, 31, 119, 250, 6, 61, 78, 185, 220, 101, 41, 15, 29, 231, 153, 214, 137, 62, 128, 37, 200, 102, 82, 120, 201, 76, 46, 106, 179, 16, 156, 186, 14, 21, 198, 120, 234, 226, 148, 83, 60, 252, 165, 244, 45, 10, 30, 167, 78, 247, 242, 61, 43, 29, 54, 15, 38, 57, 25, 96, 121, 194, 25, 8, 167, 35, 82, 182, 18, 19, 247, 110, 254, 173, 235, 102, 31, 195, 234, 149, 69, 188, 227, 131, 200, 123, 166, 209, 55, 127, 177, 40, 255, 140, 1, 239, 221, 50, 195, 165, 90, 108, 190, 133, 33, 88, 101, 2, 152, 171, 104, 15, 165, 206, 238, 59, 149, 47, 219, 173, 125, 239, 42, 132, 47, 110, 91, 40, 182, 33, 21, 112, 97, 7, 41, 117, 71, 221, 236, 16, 21, 159, 97, 48, 168, 204, 19, 150, 189, 97, 235, 30, 254, 52, 3, 207, 99, 3, 170, 144, 92, 115, 181, 57, 162, 112, 76, 11, 158, 158, 213, 20, 222, 170, 203, 188, 134, 204, 238, 167, 44, 98, 96, 171, 92, 171, 156, 110, 132, 243, 178, 175, 30, 139, 100, 202, 240, 189, 25, 185, 105, 35, 160, 80, 187, 90, 101, 50, 90, 104, 64, 179, 180, 42, 60, 213, 233, 158, 49, 247, 184, 33, 192, 25, 11, 84, 155, 153, 160, 95, 135, 126, 153, 247, 149, 168, 125, 61, 98, 154, 136, 55, 248, 119, 45, 227, 151, 95, 147, 237, 17, 129, 18, 104, 22, 41, 136, 53, 14, 214, 31, 230, 199, 161, 223, 222, 150, 153, 186, 88, 120, 165, 132, 245, 87, 99, 114, 34, 27, 255, 195, 131, 155, 150, 70, 194, 26, 235, 10, 179, 205, 84, 48, 46, 83, 228, 72, 217, 143, 40, 49, 188, 109, 239, 242, 235, 88, 234, 255, 198, 52, 97, 237, 40, 254, 115, 60, 124, 238, 217, 20, 74, 93, 227, 183, 100, 232, 20, 93, 16, 66, 224, 19, 62, 32, 182, 226, 238, 69, 234, 171, 170, 163, 21, 79, 108, 219, 208, 79, 203, 250, 66, 244, 66, 199, 181, 187, 106, 239, 29, 59, 79, 101, 5, 33, 205, 65, 158, 121, 30, 216, 199, 77, 133, 134, 106, 71, 75, 228, 80, 98, 129, 61, 242, 161, 98, 207, 70, 38, 141, 91, 160, 131, 136, 252, 163, 182, 199, 193, 195, 36, 21, 127, 146, 116, 203, 105, 11, 138, 132, 71, 133, 178, 146, 86, 0, 191, 91, 9, 157, 72, 25, 173, 116, 177, 98, 20, 0, 14, 130, 35, 42, 141, 66, 88, 234, 245, 85, 12, 62, 244, 173, 29, 97, 112, 63, 35, 146, 240, 114, 51, 65, 126, 147, 141, 241, 236, 95, 214, 219, 59, 34, 108, 89, 55, 222, 124, 96, 116, 238, 203, 167, 242, 133, 64, 110, 50, 119, 206, 132, 128, 7, 166, 158, 80, 248, 25, 85, 216, 239, 232, 53, 151, 217, 97, 170, 167, 105, 169, 194, 6, 12, 197, 252, 171, 4, 90, 220, 202, 11, 128, 46, 122, 68, 158, 132, 52, 69, 195, 5, 103, 213, 253, 201, 158, 30, 14, 211, 219, 115, 219, 205, 136, 85, 16, 121, 218, 95, 103, 64, 67, 103, 227, 101, 52, 196, 197, 216, 56, 62, 113, 158, 248, 40, 61, 32, 255, 109, 241, 231, 33, 62, 21, 74, 61, 176, 143, 43, 159, 227, 230, 247, 173, 131, 219};
.const .align 8 .b8 c_sbox2[1024] = {104, 90, 61, 233, 247, 64, 129, 148, 28, 38, 76, 246, 52, 41, 105, 148, 247, 32, 21, 65, 247, 212, 2, 118, 46, 107, 244, 188, 104, 0, 162, 212, 113, 36, 8, 212, 106, 244, 32, 51, 183, 212, 183, 67, 175, 97, 0, 80, 46, 246, 57, 30, 70, 69, 36, 151, 116, 79, 33, 20, 64, 136, 139, 191, 29, 252, 149, 77, 175, 145, 181, 150, 211, 221, 244, 112, 69, 47, 160, 102, 236, 9, 188, 191, 133, 151, 189, 3, 208, 109, 172, 127, 4, 133, 203, 49, 179, 39, 235, 150, 65, 57, 253, 85, 230, 71, 37, 218, 154, 10, 202, 171, 37, 120, 80, 40, 244, 41, 4, 83, 218, 134, 44, 10, 251, 109, 182, 233, 98, 20, 220, 104, 0, 105, 72, 215, 164, 192, 14, 104, 238, 141, 161, 39, 162, 254, 63, 79, 140, 173, 135, 232, 6, 224, 140, 181, 182, 214, 244, 122, 124, 30, 206, 170, 236, 95, 55, 211, 153, 163, 120, 206, 66, 42, 107, 64, 53, 158, 254, 32, 185, 133, 243, 217, 171, 215, 57, 238, 139, 78, 18, 59, 247, 250, 201, 29, 86, 24, 109, 75, 49, 102, 163, 38, 178, 151, 227, 234, 116, 250, 110, 58, 50, 67, 91, 221, 247, 231, 65, 104, 251, 32, 120, 202, 78, 245, 10, 251, 151, 179, 254, 216, 172, 86, 64, 69, 39, 149, 72, 186, 58, 58, 83, 85, 135, 141, 131, 32, 183, 169, 107, 254, 75, 149, 150, 208, 188, 103, 168, 85, 88, 154, 21, 161, 99, 41, 169, 204, 51, 219, 225, 153, 86, 74, 42, 166, 249, 37, 49, 63, 28, 126, 244, 94, 124, 49, 41, 144, 2, 232, 248, 253, 112, 47, 39, 4, 92, 21, 187, 128, 227, 44, 40, 5, 72, 21, 193, 149, 34, 109, 198, 228, 63, 19, 193, 72, 220, 134, 15, 199, 238, 201, 249, 7, 15, 31, 4, 65, 164, 121, 71, 64, 23, 110, 136, 93, 235, 81, 95, 50, 209, 192, 155, 213, 143, 193, 188, 242, 100, 53, 17, 65, 52, 120, 123, 37, 96, 156, 42, 96, 163, 232, 248, 223, 27, 108, 99, 31, 194, 180, 18, 14, 158, 50, 225, 2, 209, 79, 102, 175, 21, 129, 209, 202, 224, 149, 35, 107, 225, 146, 62, 51, 98, 11, 36, 59, 34, 185, 190, 238, 14, 162, 178, 133, 153, 13, 186, 230, 140, 12, 114, 222, 40, 247, 162, 45, 69, 120, 18, 208, 253, 148, 183, 149, 98, 8, 125, 100, 240, 245, 204, 231, 111, 163, 73, 84, 250, 72, 125, 135, 39, 253, 157, 195, 30, 141, 62, 243, 65, 99, 71, 10, 116, 255, 46, 153, 171, 110, 111, 58, 55, 253, 248, 244, 96, 220, 18, 168, 248, 221, 235, 161, 76, 225, 27, 153, 13, 107, 110, 219, 16, 85, 123, 198, 55, 44, 103, 109, 59, 212, 101, 39, 4, 232, 208, 220, 199, 13, 41, 241, 163, 255, 0, 204, 146, 15, 57, 181, 11, 237, 15, 105, 251, 159, 123, 102, 156, 125, 219, 206, 11, 207, 145, 160, 163, 94, 21, 217, 136, 47, 19, 187, 36, 173, 91, 81, 191, 121, 148, 123, 235, 214, 59, 118, 179, 46, 57, 55, 121, 89, 17, 204, 151, 226, 38, 128, 45, 49, 46, 244, 167, 173, 66, 104, 59, 43, 106, 198, 204, 76, 117, 18, 28, 241, 46, 120, 55, 66, 18, 106, 231, 81, 146, 183, 230, 187, 161, 6, 80, 99, 251, 75, 24, 16, 107, 26, 250, 237, 202, 17, 216, 189, 37, 61, 201, 195, 225, 226, 89, 22, 66, 68, 134, 19, 18, 10, 110, 236, 12, 217, 42, 234, 171, 213, 78, 103, 175, 100, 95, 168, 134, 218, 136, 233, 191, 190, 254, 195, 228, 100, 87, 128, 188, 157, 134, 192, 247, 240, 248, 123, 120, 96, 77, 96, 3, 96, 70, 131, 253, 209, 176, 31, 56, 246, 4, 174, 69, 119, 204, 252, 54, 215, 51, 107, 66, 131, 113, 171, 30, 240, 135, 65, 128, 176, 95, 94, 0, 60, 190, 87, 160, 119, 36, 174, 232, 189, 153, 66, 70, 85, 97, 46, 88, 191, 143, 244, 88, 78, 162, 253, 221, 242, 56, 239, 116, 244, 194, 189, 137, 135, 195, 249, 102, 83, 116, 142, 179, 200, 85, 242, 117, 180, 185, 217, 252, 70, 97, 38, 235, 122, 132, 223, 29, 139, 121, 14, 106, 132, 226, 149, 95, 145, 142, 89, 110, 70, 112, 87, 180, 32, 145, 85, 213, 140, 76, 222, 2, 201, 225, 172, 11, 185, 208, 5, 130, 187, 72, 98, 168, 17, 158, 169, 116, 117, 182, 25, 127, 183, 9, 220, 169, 224, 161, 9, 45, 102, 51, 70, 50, 196, 2, 31, 90, 232, 140, 190, 240, 9, 37, 160, 153, 74, 16, 254, 110, 29, 29, 61, 185, 26, 223, 164, 165, 11, 15, 242, 134, 161, 105, 241, 104, 40, 131, 218, 183, 220, 254, 6, 57, 87, 155, 206, 226, 161, 82, 127, 205, 79, 1, 94, 17, 80, 250, 131, 6, 167, 196, 181, 2, 160, 39, 208, 230, 13, 39, 140, 248, 154, 65, 134, 63, 119, 6, 76, 96, 195, 181, 6, 168, 97, 40, 122, 23, 240, 224, 134, 245, 192, 170, 88, 96, 0, 98, 125, 220, 48, 215, 158, 230, 17, 99, 234, 56, 35, 148, 221, 194, 83, 52, 22, 194, 194, 86, 238, 203, 187, 222, 182, 188, 144, 161, 125, 252, 235, 118, 29, 89, 206, 9, 228, 5, 111, 136, 1, 124, 75, 61, 10, 114, 57, 36, 124, 146, 124, 95, 114, 227, 134, 185, 157, 77, 114, 180, 91, 193, 26, 252, 184, 158, 211, 120, 85, 84, 237, 181, 165, 252, 8, 211, 124, 61, 216, 196, 15, 173, 77, 94, 239, 80, 30, 248, 230, 97, 177, 217, 20, 133, 162, 60, 19, 81, 108, 231, 199, 213, 111, 196, 78, 225, 86, 206, 191, 42, 54, 55, 200, 198, 221, 52, 50, 154, 215, 18, 130, 99, 146, 142, 250, 14, 103, 224, 0, 96, 64};
.const .align 8 .b8 c_sbox3[1024] = {55, 206, 57, 58, 207, 245, 250, 211, 55, 119, 194, 171, 27, 45, 197, 90, 158, 103, 176, 92, 66, 55, 163, 79, 64, 39, 130, 211, 190, 155, 188, 153, 157, 142, 17, 213, 21, 115, 15, 191, 126, 28, 45, 214, 123, 196, 0, 199, 107, 27, 140, 183, 69, 144, 161, 33, 190, 177, 110, 178, 180, 110, 54, 106, 47, 171, 72, 87, 121, 110, 148, 188, 210, 118, 163, 198, 200, 194, 73, 101, 238, 248, 15, 83, 125, 222, 141, 70, 29, 10, 115, 213, 198, 77, 208, 76, 219, 187, 57, 41, 80, 70, 186, 169, 232, 38, 149, 172, 4, 227, 94, 190, 240, 213, 250, 161, 154, 81, 45, 106, 226, 140, 239, 99, 34, 238, 134, 154, 184, 194, 137, 192, 246, 46, 36, 67, 170, 3, 30, 165, 164, 208, 242, 156, 186, 97, 192, 131, 77, 106, 233, 155, 80, 21, 229, 143, 214, 91, 100, 186, 249, 162, 38, 40, 225, 58, 58, 167, 134, 149, 169, 75, 233, 98, 85, 239, 211, 239, 47, 199, 218, 247, 82, 247, 105, 111, 4, 63, 89, 10, 250, 119, 21, 169, 228, 128, 1, 134, 176, 135, 173, 230, 9, 155, 147, 229, 62, 59, 90, 253, 144, 233, 151, 215, 52, 158, 217, 183, 240, 44, 81, 139, 43, 2, 58, 172, 213, 150, 125, 166, 125, 1, 214, 62, 207, 209, 40, 45, 125, 124, 207, 37, 159, 31, 155, 184, 242, 173, 114, 180, 214, 90, 76, 245, 136, 90, 113, 172, 41, 224, 230, 165, 25, 224, 253, 172, 176, 71, 155, 250, 147, 237, 141, 196, 211, 232, 204, 87, 59, 40, 41, 102, 213, 248, 40, 46, 19, 121, 145, 1, 95, 120, 85, 96, 117, 237, 68, 14, 150, 247, 140, 94, 211, 227, 212, 109, 5, 21, 186, 109, 244, 136, 37, 97, 161, 3, 189, 240, 100, 5, 21, 158, 235, 195, 162, 87, 144, 60, 236, 26, 39, 151, 42, 7, 58, 169, 155, 109, 63, 27, 245, 33, 99, 30, 251, 102, 156, 245, 25, 243, 220, 38, 40, 217, 51, 117, 245, 253, 85, 177, 130, 52, 86, 3, 187, 60, 186, 138, 17, 119, 81, 40, 248, 217, 10, 194, 103, 81, 204, 171, 95, 146, 173, 204, 81, 23, 232, 77, 142, 220, 48, 56, 98, 88, 157, 55, 145, 249, 32, 147, 194, 144, 122, 234, 206, 123, 62, 251, 100, 206, 33, 81, 50, 190, 79, 119, 126, 227, 182, 168, 70, 61, 41, 195, 105, 83, 222, 72, 128, 230, 19, 100, 16, 8, 174, 162, 36, 178, 109, 221, 253, 45, 133, 105, 102, 33, 7, 9, 10, 70, 154, 179, 221, 192, 69, 100, 207, 222, 108, 88, 174, 200, 32, 28, 221, 247, 190, 91, 64, 141, 88, 27, 127, 1, 210, 204, 187, 227, 180, 107, 126, 106, 162, 221, 69, 255, 89, 58, 68, 10, 53, 62, 213, 205, 180, 188, 168, 206, 234, 114, 187, 132, 100, 250, 174, 18, 102, 141, 71, 111, 60, 191, 99, 228, 155, 210, 158, 93, 47, 84, 27, 119, 194, 174, 112, 99, 78, 246, 141, 13, 14, 116, 87, 19, 91, 231, 113, 22, 114, 248, 93, 125, 83, 175, 8, 203, 64, 64, 204, 226, 180, 78, 106, 70, 210, 52, 132, 175, 21, 1, 40, 4, 176, 225, 29, 58, 152, 149, 180, 159, 184, 6, 72, 160, 110, 206, 130, 59, 63, 111, 130, 171, 32, 53, 75, 29, 26, 1, 248, 39, 114, 39, 177, 96, 21, 97, 220, 63, 147, 231, 43, 121, 58, 187, 189, 37, 69, 52, 225, 57, 136, 160, 75, 121, 206, 81, 183, 201, 50, 47, 201, 186, 31, 160, 126, 200, 28, 224, 246, 209, 199, 188, 195, 17, 1, 207, 199, 170, 232, 161, 73, 135, 144, 26, 154, 189, 79, 212, 203, 222, 218, 208, 56, 218, 10, 213, 42, 195, 57, 3, 103, 54, 145, 198, 124, 49, 249, 141, 79, 43, 177, 224, 183, 89, 158, 247, 58, 187, 245, 67, 255, 25, 213, 242, 156, 69, 217, 39, 44, 34, 151, 191, 42, 252, 230, 21, 113, 252, 145, 15, 37, 21, 148, 155, 97, 147, 229, 250, 235, 156, 182, 206, 89, 100, 168, 194, 209, 168, 186, 18, 94, 7, 193, 182, 12, 106, 5, 227, 101, 80, 210, 16, 66, 164, 3, 203, 14, 110, 236, 224, 59, 219, 152, 22, 190, 160, 152, 76, 100, 233, 120, 50, 50, 149, 31, 159, 223, 146, 211, 224, 43, 52, 160, 211, 30, 242, 113, 137, 65, 116, 10, 27, 140, 52, 163, 75, 32, 113, 190, 197, 216, 50, 118, 195, 141, 159, 53, 223, 46, 47, 153, 155, 71, 111, 11, 230, 29, 241, 227, 15, 84, 218, 76, 229, 145, 216, 218, 30, 207, 121, 98, 206, 111, 126, 62, 205, 102, 177, 24, 22, 5, 29, 44, 253, 197, 210, 143, 132, 153, 34, 251, 246, 87, 243, 35, 245, 35, 118, 50, 166, 49, 53, 168, 147, 2, 205, 204, 86, 98, 129, 240, 172, 181, 235, 117, 90, 151, 54, 22, 110, 204, 115, 210, 136, 146, 98, 150, 222, 208, 73, 185, 129, 27, 144, 80, 76, 20, 86, 198, 113, 189, 199, 198, 230, 10, 20, 122, 50, 6, 208, 225, 69, 154, 123, 242, 195, 253, 83, 170, 201, 0, 15, 168, 98, 226, 191, 37, 187, 246, 210, 189, 53, 5, 105, 18, 113, 34, 2, 4, 178, 124, 207, 203, 182, 43, 156, 118, 205, 192, 62, 17, 83, 211, 227, 64, 22, 96, 189, 171, 56, 240, 173, 71, 37, 156, 32, 56, 186, 118, 206, 70, 247, 197, 161, 175, 119, 96, 96, 117, 32, 78, 254, 203, 133, 216, 141, 232, 138, 176, 249, 170, 122, 126, 170, 249, 76, 92, 194, 72, 25, 140, 138, 251, 2, 228, 106, 195, 1, 249, 225, 235, 214, 105, 248, 212, 144, 160, 222, 92, 166, 45, 37, 9, 63, 159, 230, 8, 194, 50, 97, 78, 183, 91, 226, 119, 206, 227, 223, 143, 87, 230, 114, 195, 58};
// m03200_init$S0_all has been demoted
// m03200_init$S1_all has been demoted
// m03200_init$S2_all has been demoted
// m03200_init$S3_all has been demoted
.const .align 4 .b8 m03200_init$P[72] = {136, 106, 63, 36, 211, 8, 163, 133, 46, 138, 25, 19, 68, 115, 112, 3, 34, 56, 9, 164, 208, 49, 159, 41, 152, 250, 46, 8, 137, 108, 78, 236, 230, 33, 40, 69, 119, 19, 208, 56, 207, 102, 84, 190, 108, 12, 233, 52, 183, 41, 172, 192, 221, 80, 124, 201, 181, 213, 132, 63, 23, 9, 71, 181, 217, 213, 22, 146, 27, 251, 121, 137};
// m03200_loop$S0_all has been demoted
// m03200_loop$S1_all has been demoted
// m03200_loop$S2_all has been demoted
// m03200_loop$S3_all has been demoted
// m03200_comp$S0_all has been demoted
// m03200_comp$S1_all has been demoted
// m03200_comp$S2_all has been demoted
// m03200_comp$S3_all has been demoted
.entry gpu_decompress(
.param .u64 .ptr .global .align 4 gpu_decompress_param_0,
.param .u64 .ptr .global .align 4 gpu_decompress_param_1,
.param .u64 .ptr .global .align 4 gpu_decompress_param_2,
.param .u64 gpu_decompress_param_3
)
{
.local .align 4 .b8 __local_depot0[260];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<9>;
.reg .b32 %r<58>;
.reg .b64 %rd<45>;
mov.u64 %SPL, __local_depot0;
ld.param.u64 %rd7, [gpu_decompress_param_0];
ld.param.u64 %rd8, [gpu_decompress_param_1];
ld.param.u64 %rd9, [gpu_decompress_param_2];
ld.param.u64 %rd10, [gpu_decompress_param_3];
add.u64 %rd43, %SPL, 0;
mov.u32 %r24, %ctaid.x;
mov.u32 %r25, %ntid.x;
mov.b32 %r26, %envreg3;
mad.lo.s32 %r1, %r24, %r25, %r26;
mov.u32 %r27, %tid.x;
add.s32 %r2, %r1, %r27;
cvt.s64.s32 %rd12, %r2;
setp.ge.u64 %p1, %rd12, %rd10;
@%p1 bra BB0_12;
mul.wide.s32 %rd13, %r2, 12;
add.s64 %rd14, %rd7, %rd13;
ld.global.u32 %r3, [%rd14];
ld.global.u32 %r4, [%rd14+4];
ld.global.u32 %r5, [%rd14+8];
mov.u64 %rd15, 0;
st.local.u32 [%rd43+4], %rd15;
st.local.u32 [%rd43], %rd15;
st.local.u32 [%rd43+12], %rd15;
st.local.u32 [%rd43+8], %rd15;
st.local.u32 [%rd43+20], %rd15;
st.local.u32 [%rd43+16], %rd15;
st.local.u32 [%rd43+28], %rd15;
st.local.u32 [%rd43+24], %rd15;
st.local.u32 [%rd43+36], %rd15;
st.local.u32 [%rd43+32], %rd15;
st.local.u32 [%rd43+44], %rd15;
st.local.u32 [%rd43+40], %rd15;
st.local.u32 [%rd43+52], %rd15;
st.local.u32 [%rd43+48], %rd15;
st.local.u32 [%rd43+60], %rd15;
st.local.u32 [%rd43+56], %rd15;
st.local.u32 [%rd43+68], %rd15;
st.local.u32 [%rd43+64], %rd15;
st.local.u32 [%rd43+76], %rd15;
st.local.u32 [%rd43+72], %rd15;
st.local.u32 [%rd43+84], %rd15;
st.local.u32 [%rd43+80], %rd15;
st.local.u32 [%rd43+92], %rd15;
st.local.u32 [%rd43+88], %rd15;
st.local.u32 [%rd43+100], %rd15;
st.local.u32 [%rd43+96], %rd15;
st.local.u32 [%rd43+108], %rd15;
st.local.u32 [%rd43+104], %rd15;
st.local.u32 [%rd43+116], %rd15;
st.local.u32 [%rd43+112], %rd15;
st.local.u32 [%rd43+124], %rd15;
st.local.u32 [%rd43+120], %rd15;
st.local.u32 [%rd43+132], %rd15;
st.local.u32 [%rd43+128], %rd15;
st.local.u32 [%rd43+140], %rd15;
st.local.u32 [%rd43+136], %rd15;
st.local.u32 [%rd43+148], %rd15;
st.local.u32 [%rd43+144], %rd15;
st.local.u32 [%rd43+156], %rd15;
st.local.u32 [%rd43+152], %rd15;
st.local.u32 [%rd43+164], %rd15;
st.local.u32 [%rd43+160], %rd15;
st.local.u32 [%rd43+172], %rd15;
st.local.u32 [%rd43+168], %rd15;
st.local.u32 [%rd43+180], %rd15;
st.local.u32 [%rd43+176], %rd15;
st.local.u32 [%rd43+188], %rd15;
st.local.u32 [%rd43+184], %rd15;
st.local.u32 [%rd43+196], %rd15;
st.local.u32 [%rd43+192], %rd15;
st.local.u32 [%rd43+204], %rd15;
st.local.u32 [%rd43+200], %rd15;
st.local.u32 [%rd43+212], %rd15;
st.local.u32 [%rd43+208], %rd15;
st.local.u32 [%rd43+220], %rd15;
st.local.u32 [%rd43+216], %rd15;
st.local.u32 [%rd43+228], %rd15;
st.local.u32 [%rd43+224], %rd15;
st.local.u32 [%rd43+236], %rd15;
st.local.u32 [%rd43+232], %rd15;
st.local.u32 [%rd43+244], %rd15;
st.local.u32 [%rd43+240], %rd15;
st.local.u32 [%rd43+252], %rd15;
st.local.u32 [%rd43+248], %rd15;
setp.eq.s32 %p2, %r4, 0;
@%p2 bra BB0_10;
and.b32 %r6, %r4, 3;
setp.eq.s32 %p3, %r6, 0;
mov.u32 %r56, 0;
@%p3 bra BB0_8;
setp.eq.s32 %p4, %r6, 1;
mov.u32 %r52, 0;
@%p4 bra BB0_7;
setp.eq.s32 %p5, %r6, 2;
mov.u32 %r50, 0;
@%p5 bra BB0_6;
mul.wide.u32 %rd16, %r3, 4;
add.s64 %rd17, %rd8, %rd16;
ld.global.u32 %r32, [%rd17];
st.local.u32 [%rd43], %r32;
add.s32 %r3, %r3, 1;
mov.u32 %r50, 1;
BB0_6:
mul.wide.u32 %rd18, %r3, 4;
add.s64 %rd19, %rd8, %rd18;
ld.global.u32 %r33, [%rd19];
mul.wide.u32 %rd20, %r50, 4;
add.s64 %rd21, %rd43, %rd20;
st.local.u32 [%rd21], %r33;
add.s32 %r52, %r50, 1;
add.s32 %r3, %r3, 1;
BB0_7:
mul.wide.u32 %rd22, %r3, 4;
add.s64 %rd23, %rd8, %rd22;
ld.global.u32 %r34, [%rd23];
mul.wide.u32 %rd24, %r52, 4;
add.s64 %rd25, %rd43, %rd24;
st.local.u32 [%rd25], %r34;
add.s32 %r56, %r52, 1;
add.s32 %r3, %r3, 1;
BB0_8:
setp.lt.u32 %p6, %r4, 4;
@%p6 bra BB0_10;
BB0_9:
mul.wide.u32 %rd26, %r3, 4;
add.s64 %rd27, %rd8, %rd26;
ld.global.u32 %r35, [%rd27];
mul.wide.u32 %rd28, %r56, 4;
add.s64 %rd29, %rd43, %rd28;
st.local.u32 [%rd29], %r35;
add.s32 %r36, %r3, 1;
mul.wide.u32 %rd30, %r36, 4;
add.s64 %rd31, %rd8, %rd30;
ld.global.u32 %r37, [%rd31];
add.s32 %r38, %r56, 1;
mul.wide.u32 %rd32, %r38, 4;
add.s64 %rd33, %rd43, %rd32;
st.local.u32 [%rd33], %r37;
add.s32 %r39, %r3, 2;
mul.wide.u32 %rd34, %r39, 4;
add.s64 %rd35, %rd8, %rd34;
ld.global.u32 %r40, [%rd35];
add.s32 %r41, %r56, 2;
mul.wide.u32 %rd36, %r41, 4;
add.s64 %rd37, %rd43, %rd36;
st.local.u32 [%rd37], %r40;
add.s32 %r42, %r3, 3;
mul.wide.u32 %rd38, %r42, 4;
add.s64 %rd39, %rd8, %rd38;
ld.global.u32 %r43, [%rd39];
add.s32 %r44, %r56, 3;
mul.wide.u32 %rd40, %r44, 4;
add.s64 %rd41, %rd43, %rd40;
st.local.u32 [%rd41], %r43;
add.s32 %r3, %r3, 4;
add.s32 %r56, %r56, 4;
setp.lt.u32 %p7, %r56, %r4;
@%p7 bra BB0_9;
BB0_10:
st.local.u32 [%rd43+256], %r5;
mul.wide.s32 %rd42, %r2, 260;
add.s64 %rd44, %rd9, %rd42;
mov.u32 %r57, 0;
BB0_11:
ld.local.u32 %r48, [%rd43];
st.global.u32 [%rd44], %r48;
add.s64 %rd44, %rd44, 4;
add.s64 %rd43, %rd43, 4;
add.s32 %r57, %r57, 1;
setp.lt.u32 %p8, %r57, 65;
@%p8 bra BB0_11;
BB0_12:
ret;
}
// .globl gpu_memset
.entry gpu_memset(
.param .u64 .ptr .global .align 16 gpu_memset_param_0,
.param .u32 gpu_memset_param_1,
.param .u64 gpu_memset_param_2
)
{
.reg .pred %p<2>;
.reg .b32 %r<8>;
.reg .b64 %rd<6>;
ld.param.u64 %rd1, [gpu_memset_param_0];
ld.param.u32 %r2, [gpu_memset_param_1];
ld.param.u64 %rd2, [gpu_memset_param_2];
mov.b32 %r3, %envreg3;
mov.u32 %r4, %ctaid.x;
mov.u32 %r5, %ntid.x;
mad.lo.s32 %r6, %r4, %r5, %r3;
mov.u32 %r7, %tid.x;
add.s32 %r1, %r6, %r7;
cvt.s64.s32 %rd3, %r1;
setp.ge.u64 %p1, %rd3, %rd2;
@%p1 bra BB1_2;
mul.wide.s32 %rd4, %r1, 16;
add.s64 %rd5, %rd1, %rd4;
st.global.v4.u32 [%rd5], {%r2, %r2, %r2, %r2};
BB1_2:
ret;
}
// .globl gpu_atinit
.entry gpu_atinit(
.param .u64 .ptr .global .align 4 gpu_atinit_param_0,
.param .u64 gpu_atinit_param_1
)
{
.reg .pred %p<2>;
.reg .b32 %r<13>;
.reg .b64 %rd<7>;
ld.param.u64 %rd2, [gpu_atinit_param_0];
ld.param.u64 %rd3, [gpu_atinit_param_1];
mov.b32 %r2, %envreg3;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %ntid.x;
mad.lo.s32 %r5, %r3, %r4, %r2;
mov.u32 %r6, %tid.x;
add.s32 %r1, %r5, %r6;
cvt.s64.s32 %rd1, %r1;
setp.ge.u64 %p1, %rd1, %rd3;
@%p1 bra BB2_2;
cvt.u32.u64 %r7, %rd1;
shr.u64 %rd4, %rd1, 32;
cvt.u32.u64 %r8, %rd4;
xor.b32 %r9, %r7, 1549556828;
xor.b32 %r10, %r8, 909522486;
mul.wide.s32 %rd5, %r1, 260;
add.s64 %rd6, %rd2, %rd5;
st.global.u32 [%rd6], %r9;
st.global.u32 [%rd6+4], %r10;
mov.u32 %r11, 0;
st.global.u32 [%rd6+8], %r11;
st.global.u32 [%rd6+12], %r11;
st.global.u32 [%rd6+16], %r11;
st.global.u32 [%rd6+20], %r11;
st.global.u32 [%rd6+24], %r11;
st.global.u32 [%rd6+28], %r11;
st.global.u32 [%rd6+32], %r11;
st.global.u32 [%rd6+36], %r11;
st.global.u32 [%rd6+40], %r11;
st.global.u32 [%rd6+44], %r11;
st.global.u32 [%rd6+48], %r11;
st.global.u32 [%rd6+52], %r11;
st.global.u32 [%rd6+56], %r11;
st.global.u32 [%rd6+60], %r11;
st.global.u32 [%rd6+64], %r11;
st.global.u32 [%rd6+68], %r11;
st.global.u32 [%rd6+72], %r11;
st.global.u32 [%rd6+76], %r11;
st.global.u32 [%rd6+80], %r11;
st.global.u32 [%rd6+84], %r11;
st.global.u32 [%rd6+88], %r11;
st.global.u32 [%rd6+92], %r11;
st.global.u32 [%rd6+96], %r11;
st.global.u32 [%rd6+100], %r11;
st.global.u32 [%rd6+104], %r11;
st.global.u32 [%rd6+108], %r11;
st.global.u32 [%rd6+112], %r11;
st.global.u32 [%rd6+116], %r11;
st.global.u32 [%rd6+120], %r11;
st.global.u32 [%rd6+124], %r11;
st.global.u32 [%rd6+128], %r11;
st.global.u32 [%rd6+132], %r11;
st.global.u32 [%rd6+136], %r11;
st.global.u32 [%rd6+140], %r11;
st.global.u32 [%rd6+144], %r11;
st.global.u32 [%rd6+148], %r11;
st.global.u32 [%rd6+152], %r11;
st.global.u32 [%rd6+156], %r11;
st.global.u32 [%rd6+160], %r11;
st.global.u32 [%rd6+164], %r11;
st.global.u32 [%rd6+168], %r11;
st.global.u32 [%rd6+172], %r11;
st.global.u32 [%rd6+176], %r11;
st.global.u32 [%rd6+180], %r11;
st.global.u32 [%rd6+184], %r11;
st.global.u32 [%rd6+188], %r11;
st.global.u32 [%rd6+192], %r11;
st.global.u32 [%rd6+196], %r11;
st.global.u32 [%rd6+200], %r11;
st.global.u32 [%rd6+204], %r11;
st.global.u32 [%rd6+208], %r11;
st.global.u32 [%rd6+212], %r11;
st.global.u32 [%rd6+216], %r11;
st.global.u32 [%rd6+220], %r11;
st.global.u32 [%rd6+224], %r11;
st.global.u32 [%rd6+228], %r11;
st.global.u32 [%rd6+232], %r11;
st.global.u32 [%rd6+236], %r11;
st.global.u32 [%rd6+240], %r11;
st.global.u32 [%rd6+244], %r11;
st.global.u32 [%rd6+248], %r11;
st.global.u32 [%rd6+252], %r11;
mov.u32 %r12, 7;
st.global.u32 [%rd6+256], %r12;
BB2_2:
ret;
}
// .globl m03200_init
.entry m03200_init(
.param .u64 .ptr .global .align 4 m03200_init_param_0,
.param .u64 .ptr .global .align 4 m03200_init_param_1,
.param .u64 .ptr .global .align 4 m03200_init_param_2,
.param .u64 .ptr .global .align 4 m03200_init_param_3,
.param .u64 .ptr .global .align 4 m03200_init_param_4,
.param .u64 .ptr .global .align 1 m03200_init_param_5,
.param .u64 .ptr .global .align 4 m03200_init_param_6,
.param .u64 .ptr .global .align 4 m03200_init_param_7,
.param .u64 .ptr .global .align 4 m03200_init_param_8,
.param .u64 .ptr .global .align 4 m03200_init_param_9,
.param .u64 .ptr .global .align 4 m03200_init_param_10,
.param .u64 .ptr .global .align 4 m03200_init_param_11,
.param .u64 .ptr .global .align 4 m03200_init_param_12,
.param .u64 .ptr .global .align 4 m03200_init_param_13,
.param .u64 .ptr .global .align 8 m03200_init_param_14,
.param .u64 .ptr .global .align 4 m03200_init_param_15,
.param .u64 .ptr .global .align 4 m03200_init_param_16,
.param .u64 .ptr .global .align 4 m03200_init_param_17,
.param .u64 .ptr .global .align 1 m03200_init_param_18,
.param .u64 .ptr .global .align 4 m03200_init_param_19,
.param .u64 .ptr .global .align 4 m03200_init_param_20,
.param .u64 .ptr .global .align 4 m03200_init_param_21,
.param .u64 .ptr .global .align 4 m03200_init_param_22,
.param .u64 .ptr .global .align 4 m03200_init_param_23,
.param .u32 m03200_init_param_24,
.param .u32 m03200_init_param_25,
.param .u32 m03200_init_param_26,
.param .u32 m03200_init_param_27,
.param .u32 m03200_init_param_28,
.param .u32 m03200_init_param_29,
.param .u32 m03200_init_param_30,
.param .u32 m03200_init_param_31,
.param .u32 m03200_init_param_32,
.param .u32 m03200_init_param_33,
.param .u64 m03200_init_param_34
)
.reqntid 8, 1, 1
{
.local .align 16 .b8 __local_depot3[240];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<18>;
.reg .b16 %rs<8>;
.reg .b32 %r<3909>;
.reg .b64 %rd<1341>;
// demoted variable
.shared .align 4 .b8 m03200_init$S0_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_init$S1_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_init$S2_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_init$S3_all[8192];
mov.u64 %SPL, __local_depot3;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd48, [m03200_init_param_0];
ld.param.u64 %rd49, [m03200_init_param_4];
ld.param.u64 %rd50, [m03200_init_param_17];
ld.param.u32 %r131, [m03200_init_param_27];
ld.param.u64 %rd51, [m03200_init_param_34];
add.u64 %rd52, %SP, 96;
add.u64 %rd1336, %SPL, 96;
mov.u32 %r132, %ctaid.x;
mov.u32 %r133, %ntid.x;
mov.b32 %r134, %envreg3;
mad.lo.s32 %r135, %r132, %r133, %r134;
mov.u32 %r1, %tid.x;
add.s32 %r2, %r135, %r1;
cvt.s64.s32 %rd53, %r2;
cvt.s64.s32 %rd2, %r1;
setp.ge.u64 %p1, %rd53, %rd51;
@%p1 bra BB3_31;
mul.wide.s32 %rd54, %r2, 260;
add.s64 %rd55, %rd48, %rd54;
ld.global.u32 %r3, [%rd55+256];
mov.u32 %r136, 0;
ld.global.u32 %r137, [%rd55];
ld.global.u32 %r138, [%rd55+4];
ld.global.u32 %r139, [%rd55+8];
ld.global.u32 %r140, [%rd55+12];
add.u64 %rd57, %SPL, 0;
ld.global.u32 %r141, [%rd55+16];
ld.global.u32 %r142, [%rd55+20];
ld.global.u32 %r143, [%rd55+24];
ld.global.u32 %r144, [%rd55+28];
ld.global.u32 %r145, [%rd55+32];
ld.global.u32 %r146, [%rd55+36];
ld.global.u32 %r147, [%rd55+40];
ld.global.u32 %r148, [%rd55+44];
ld.global.u32 %r149, [%rd55+48];
ld.global.u32 %r150, [%rd55+52];
ld.global.u32 %r151, [%rd55+56];
ld.global.u32 %r152, [%rd55+60];
ld.global.u32 %r153, [%rd55+64];
ld.global.u32 %r154, [%rd55+68];
st.local.v2.u32 [%rd57], {%r137, %r138};
st.local.v2.u32 [%rd57+8], {%r139, %r140};
st.local.v2.u32 [%rd57+16], {%r141, %r142};
st.local.v2.u32 [%rd57+24], {%r143, %r144};
st.local.v2.u32 [%rd57+32], {%r145, %r146};
st.local.v2.u32 [%rd57+40], {%r147, %r148};
st.local.v2.u32 [%rd57+48], {%r149, %r150};
st.local.v2.u32 [%rd57+56], {%r151, %r152};
st.local.v2.u32 [%rd57+64], {%r153, %r154};
add.u64 %rd6, %SPL, 168;
add.u64 %rd5, %SPL, 80;
mov.u64 %rd1326, %rd6;
mov.u32 %r3867, %r136;
BB3_2:
st.local.u32 [%rd1326], %r136;
add.s64 %rd1326, %rd1326, 4;
add.s32 %r3867, %r3867, 1;
setp.lt.u32 %p2, %r3867, 18;
@%p2 bra BB3_2;
not.b32 %r6, %r3;
mov.u32 %r3868, 0;
BB3_4:
mov.u32 %r157, 72;
sub.s32 %r158, %r157, %r3868;
min.s32 %r8, %r3, %r158;
setp.lt.s32 %p3, %r8, 1;
@%p3 bra BB3_14;
add.s32 %r160, %r3868, -73;
max.s32 %r9, %r6, %r160;
not.b32 %r161, %r9;
and.b32 %r10, %r161, 3;
setp.eq.s32 %p4, %r10, 0;
mov.u32 %r3872, 0;
@%p4 bra BB3_11;
setp.eq.s32 %p5, %r10, 1;
mov.u32 %r3870, 0;
@%p5 bra BB3_10;
setp.eq.s32 %p6, %r10, 2;
mov.u32 %r3869, 0;
@%p6 bra BB3_9;
ld.local.u8 %rs1, [%rd57];
cvt.s64.s32 %rd61, %r3868;
add.s64 %rd62, %rd6, %rd61;
st.local.u8 [%rd62], %rs1;
mov.u32 %r3869, 1;
BB3_9:
cvt.u64.u32 %rd63, %r3869;
add.s64 %rd64, %rd57, %rd63;
ld.local.u8 %rs2, [%rd64];
add.s32 %r165, %r3869, %r3868;
cvt.s64.s32 %rd65, %r165;
add.s64 %rd66, %rd6, %rd65;
st.local.u8 [%rd66], %rs2;
add.s32 %r3870, %r3869, 1;
BB3_10:
cvt.s64.s32 %rd67, %r3870;
add.s64 %rd68, %rd57, %rd67;
ld.local.u8 %rs3, [%rd68];
add.s32 %r166, %r3870, %r3868;
cvt.s64.s32 %rd69, %r166;
add.s64 %rd70, %rd6, %rd69;
st.local.u8 [%rd70], %rs3;
add.s32 %r3872, %r3870, 1;
BB3_11:
setp.gt.u32 %p7, %r9, -5;
@%p7 bra BB3_14;
add.s32 %r167, %r3872, %r3868;
cvt.s64.s32 %rd71, %r167;
add.s64 %rd1328, %rd6, %rd71;
cvt.s64.s32 %rd72, %r3872;
add.s64 %rd1327, %rd57, %rd72;
BB3_13:
ld.local.u8 %rs4, [%rd1327];
ld.local.u8 %rs5, [%rd1327+1];
ld.local.u8 %rs6, [%rd1327+2];
ld.local.u8 %rs7, [%rd1327+3];
st.local.u8 [%rd1328], %rs4;
st.local.u8 [%rd1328+1], %rs5;
st.local.u8 [%rd1328+2], %rs6;
st.local.u8 [%rd1328+3], %rs7;
add.s64 %rd1328, %rd1328, 4;
add.s64 %rd1327, %rd1327, 4;
add.s32 %r3872, %r3872, 4;
setp.lt.s32 %p8, %r3872, %r8;
@%p8 bra BB3_13;
BB3_14:
add.s32 %r168, %r3868, %r8;
add.s32 %r3868, %r168, 1;
setp.lt.s32 %p9, %r3868, 72;
@%p9 bra BB3_4;
ld.local.u32 %r170, [%rd6];
// inline asm
prmt.b32 %r169, %r170, 0, 0x0123;
// inline asm
st.local.u32 [%rd6], %r169;
ld.local.u32 %r172, [%rd6+4];
// inline asm
prmt.b32 %r171, %r172, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+4], %r171;
ld.local.u32 %r174, [%rd6+8];
// inline asm
prmt.b32 %r173, %r174, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+8], %r173;
ld.local.u32 %r176, [%rd6+12];
// inline asm
prmt.b32 %r175, %r176, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+12], %r175;
ld.local.u32 %r178, [%rd6+16];
// inline asm
prmt.b32 %r177, %r178, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+16], %r177;
ld.local.u32 %r180, [%rd6+20];
// inline asm
prmt.b32 %r179, %r180, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+20], %r179;
ld.local.u32 %r182, [%rd6+24];
// inline asm
prmt.b32 %r181, %r182, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+24], %r181;
ld.local.u32 %r184, [%rd6+28];
// inline asm
prmt.b32 %r183, %r184, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+28], %r183;
ld.local.u32 %r186, [%rd6+32];
// inline asm
prmt.b32 %r185, %r186, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+32], %r185;
ld.local.u32 %r188, [%rd6+36];
// inline asm
prmt.b32 %r187, %r188, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+36], %r187;
ld.local.u32 %r190, [%rd6+40];
// inline asm
prmt.b32 %r189, %r190, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+40], %r189;
ld.local.u32 %r192, [%rd6+44];
// inline asm
prmt.b32 %r191, %r192, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+44], %r191;
ld.local.u32 %r194, [%rd6+48];
// inline asm
prmt.b32 %r193, %r194, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+48], %r193;
ld.local.u32 %r196, [%rd6+52];
// inline asm
prmt.b32 %r195, %r196, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+52], %r195;
ld.local.u32 %r198, [%rd6+56];
// inline asm
prmt.b32 %r197, %r198, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+56], %r197;
ld.local.u32 %r200, [%rd6+60];
// inline asm
prmt.b32 %r199, %r200, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+60], %r199;
ld.local.u32 %r202, [%rd6+64];
// inline asm
prmt.b32 %r201, %r202, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+64], %r201;
ld.local.u32 %r204, [%rd6+68];
// inline asm
prmt.b32 %r203, %r204, 0, 0x0123;
// inline asm
st.local.u32 [%rd6+68], %r203;
mul.wide.s32 %rd74, %r2, 4240;
add.s64 %rd16, %rd49, %rd74;
st.global.u32 [%rd16], %r169;
st.global.u32 [%rd16+4], %r171;
st.global.u32 [%rd16+8], %r173;
st.global.u32 [%rd16+12], %r175;
st.global.u32 [%rd16+16], %r177;
st.global.u32 [%rd16+20], %r179;
st.global.u32 [%rd16+24], %r181;
st.global.u32 [%rd16+28], %r183;
st.global.u32 [%rd16+32], %r185;
st.global.u32 [%rd16+36], %r187;
st.global.u32 [%rd16+40], %r189;
st.global.u32 [%rd16+44], %r191;
st.global.u32 [%rd16+48], %r193;
st.global.u32 [%rd16+52], %r195;
st.global.u32 [%rd16+56], %r197;
st.global.u32 [%rd16+60], %r199;
st.global.u32 [%rd16+64], %r201;
st.global.u32 [%rd16+68], %r203;
mul.wide.u32 %rd75, %r131, 560;
add.s64 %rd76, %rd50, %rd75;
ld.global.u32 %r37, [%rd76];
mov.u32 %r3873, 0;
ld.global.u32 %r38, [%rd76+4];
ld.global.u32 %r39, [%rd76+8];
ld.global.u32 %r40, [%rd76+12];
st.local.v4.u32 [%rd5], {%r37, %r38, %r39, %r40};
cvta.to.local.u64 %rd1330, %rd52;
mov.u64 %rd1329, m03200_init$P;
BB3_16:
ld.const.u32 %r206, [%rd1329];
st.local.u32 [%rd1330], %r206;
add.s64 %rd1330, %rd1330, 4;
add.s64 %rd1329, %rd1329, 4;
add.s32 %r3873, %r3873, 1;
setp.lt.u32 %p10, %r3873, 18;
@%p10 bra BB3_16;
cvt.u32.u64 %r208, %rd2;
mul.wide.s32 %rd1335, %r208, 1024;
mov.u32 %r3874, -256;
mov.u64 %rd1334, c_sbox3;
mov.u64 %rd1333, c_sbox2;
mov.u64 %rd1332, c_sbox1;
mov.u64 %rd1331, c_sbox0;
BB3_18:
ld.const.v2.u32 {%r209, %r210}, [%rd1331];
mov.u64 %rd84, m03200_init$S0_all;
add.s64 %rd85, %rd84, %rd1335;
st.shared.u32 [%rd85], %r209;
st.shared.u32 [%rd85+4], %r210;
ld.const.v2.u32 {%r213, %r214}, [%rd1332];
mov.u64 %rd86, m03200_init$S1_all;
add.s64 %rd87, %rd86, %rd1335;
st.shared.u32 [%rd87], %r213;
st.shared.u32 [%rd87+4], %r214;
ld.const.v2.u32 {%r217, %r218}, [%rd1333];
mov.u64 %rd88, m03200_init$S2_all;
add.s64 %rd89, %rd88, %rd1335;
st.shared.u32 [%rd89], %r217;
st.shared.u32 [%rd89+4], %r218;
ld.const.v2.u32 {%r221, %r222}, [%rd1334];
mov.u64 %rd90, m03200_init$S3_all;
add.s64 %rd91, %rd90, %rd1335;
st.shared.u32 [%rd91], %r221;
st.shared.u32 [%rd91+4], %r222;
ld.const.v2.u32 {%r225, %r226}, [%rd1331+8];
st.shared.u32 [%rd85+8], %r225;
st.shared.u32 [%rd85+12], %r226;
ld.const.v2.u32 {%r229, %r230}, [%rd1332+8];
st.shared.u32 [%rd87+8], %r229;
st.shared.u32 [%rd87+12], %r230;
ld.const.v2.u32 {%r233, %r234}, [%rd1333+8];
st.shared.u32 [%rd89+8], %r233;
st.shared.u32 [%rd89+12], %r234;
ld.const.v2.u32 {%r237, %r238}, [%rd1334+8];
st.shared.u32 [%rd91+8], %r237;
st.shared.u32 [%rd91+12], %r238;
ld.const.v2.u32 {%r241, %r242}, [%rd1331+16];
st.shared.u32 [%rd85+16], %r241;
st.shared.u32 [%rd85+20], %r242;
ld.const.v2.u32 {%r245, %r246}, [%rd1332+16];
st.shared.u32 [%rd87+16], %r245;
st.shared.u32 [%rd87+20], %r246;
ld.const.v2.u32 {%r249, %r250}, [%rd1333+16];
st.shared.u32 [%rd89+16], %r249;
st.shared.u32 [%rd89+20], %r250;
ld.const.v2.u32 {%r253, %r254}, [%rd1334+16];
st.shared.u32 [%rd91+16], %r253;
st.shared.u32 [%rd91+20], %r254;
ld.const.v2.u32 {%r257, %r258}, [%rd1331+24];
st.shared.u32 [%rd85+24], %r257;
st.shared.u32 [%rd85+28], %r258;
ld.const.v2.u32 {%r261, %r262}, [%rd1332+24];
st.shared.u32 [%rd87+24], %r261;
st.shared.u32 [%rd87+28], %r262;
ld.const.v2.u32 {%r265, %r266}, [%rd1333+24];
st.shared.u32 [%rd89+24], %r265;
st.shared.u32 [%rd89+28], %r266;
ld.const.v2.u32 {%r269, %r270}, [%rd1334+24];
st.shared.u32 [%rd91+24], %r269;
st.shared.u32 [%rd91+28], %r270;
add.s64 %rd1335, %rd1335, 32;
add.s64 %rd1334, %rd1334, 32;
add.s64 %rd1333, %rd1333, 32;
add.s64 %rd1332, %rd1332, 32;
add.s64 %rd1331, %rd1331, 32;
add.s32 %r3874, %r3874, 8;
setp.ne.s32 %p11, %r3874, 0;
@%p11 bra BB3_18;
cvta.to.local.u64 %rd93, %rd52;
ld.local.v2.u32 {%r276, %r277}, [%rd93];
mov.u32 %r275, 0;
xor.b32 %r3892, %r276, %r169;
xor.b32 %r3891, %r277, %r171;
st.local.v2.u32 [%rd93], {%r3892, %r3891};
add.s64 %rd33, %rd1336, 8;
ld.local.v2.u32 {%r280, %r281}, [%rd1336+8];
xor.b32 %r3890, %r280, %r173;
xor.b32 %r3889, %r281, %r175;
st.local.v2.u32 [%rd1336+8], {%r3890, %r3889};
ld.local.v2.u32 {%r284, %r285}, [%rd1336+16];
xor.b32 %r3888, %r284, %r177;
xor.b32 %r3887, %r285, %r179;
st.local.v2.u32 [%rd1336+16], {%r3888, %r3887};
ld.local.v2.u32 {%r288, %r289}, [%rd1336+24];
xor.b32 %r3886, %r288, %r181;
xor.b32 %r3885, %r289, %r183;
st.local.v2.u32 [%rd1336+24], {%r3886, %r3885};
ld.local.v2.u32 {%r292, %r293}, [%rd1336+32];
xor.b32 %r3884, %r292, %r185;
xor.b32 %r3883, %r293, %r187;
st.local.v2.u32 [%rd1336+32], {%r3884, %r3883};
ld.local.v2.u32 {%r296, %r297}, [%rd1336+40];
xor.b32 %r3882, %r296, %r189;
xor.b32 %r3881, %r297, %r191;
st.local.v2.u32 [%rd1336+40], {%r3882, %r3881};
ld.local.v2.u32 {%r300, %r301}, [%rd1336+48];
xor.b32 %r3880, %r300, %r193;
xor.b32 %r3879, %r301, %r195;
st.local.v2.u32 [%rd1336+48], {%r3880, %r3879};
ld.local.v2.u32 {%r304, %r305}, [%rd1336+56];
xor.b32 %r3878, %r304, %r197;
xor.b32 %r3877, %r305, %r199;
st.local.v2.u32 [%rd1336+56], {%r3878, %r3877};
ld.local.v2.u32 {%r308, %r309}, [%rd1336+64];
xor.b32 %r3876, %r308, %r201;
xor.b32 %r3875, %r309, %r203;
st.local.v2.u32 [%rd1336+64], {%r3876, %r3875};
mov.u32 %r3893, %r275;
mov.u32 %r3897, %r275;
mov.u32 %r3898, %r275;
BB3_20:
and.b32 %r568, %r3893, 2;
mul.wide.u32 %rd94, %r568, 4;
add.s64 %rd95, %rd5, %rd94;
ld.local.u32 %r569, [%rd95];
xor.b32 %r570, %r569, %r3898;
ld.local.u32 %r571, [%rd95+4];
xor.b32 %r325, %r570, %r3892;
mov.u32 %r554, 24;
mov.u32 %r567, 8;
// inline asm
bfe.u32 %r312, %r325, %r554, %r567;
// inline asm
shl.b64 %rd96, %rd2, 10;
add.s64 %rd98, %rd84, %rd96;
mul.wide.u32 %rd99, %r312, 4;
add.s64 %rd100, %rd98, %rd99;
ld.shared.u32 %r572, [%rd100];
mov.u32 %r558, 16;
// inline asm
bfe.u32 %r316, %r325, %r558, %r567;
// inline asm
add.s64 %rd102, %rd86, %rd96;
mul.wide.u32 %rd103, %r316, 4;
add.s64 %rd104, %rd102, %rd103;
ld.shared.u32 %r573, [%rd104];
add.s32 %r574, %r573, %r572;
// inline asm
bfe.u32 %r320, %r325, %r567, %r567;
// inline asm
add.s64 %rd106, %rd88, %rd96;
mul.wide.u32 %rd107, %r320, 4;
add.s64 %rd108, %rd106, %rd107;
ld.shared.u32 %r575, [%rd108];
xor.b32 %r576, %r575, %r574;
// inline asm
bfe.u32 %r324, %r325, %r275, %r567;
// inline asm
add.s64 %rd110, %rd90, %rd96;
mul.wide.u32 %rd111, %r324, 4;
add.s64 %rd112, %rd110, %rd111;
ld.shared.u32 %r577, [%rd112];
add.s32 %r578, %r577, %r576;
xor.b32 %r579, %r571, %r3897;
xor.b32 %r580, %r579, %r3891;
xor.b32 %r341, %r580, %r578;
// inline asm
bfe.u32 %r328, %r341, %r554, %r567;
// inline asm
mul.wide.u32 %rd113, %r328, 4;
add.s64 %rd114, %rd98, %rd113;
ld.shared.u32 %r581, [%rd114];
// inline asm
bfe.u32 %r332, %r341, %r558, %r567;
// inline asm
mul.wide.u32 %rd115, %r332, 4;
add.s64 %rd116, %rd102, %rd115;
ld.shared.u32 %r582, [%rd116];
add.s32 %r583, %r582, %r581;
// inline asm
bfe.u32 %r336, %r341, %r567, %r567;
// inline asm
mul.wide.u32 %rd117, %r336, 4;
add.s64 %rd118, %rd106, %rd117;
ld.shared.u32 %r584, [%rd118];
xor.b32 %r585, %r584, %r583;
// inline asm
bfe.u32 %r340, %r341, %r275, %r567;
// inline asm
mul.wide.u32 %rd119, %r340, 4;
add.s64 %rd120, %rd110, %rd119;
ld.shared.u32 %r586, [%rd120];
add.s32 %r587, %r586, %r585;
xor.b32 %r588, %r3890, %r325;
xor.b32 %r357, %r588, %r587;
// inline asm
bfe.u32 %r344, %r357, %r554, %r567;
// inline asm
mul.wide.u32 %rd121, %r344, 4;
add.s64 %rd122, %rd98, %rd121;
ld.shared.u32 %r589, [%rd122];
// inline asm
bfe.u32 %r348, %r357, %r558, %r567;
// inline asm
mul.wide.u32 %rd123, %r348, 4;
add.s64 %rd124, %rd102, %rd123;
ld.shared.u32 %r590, [%rd124];
add.s32 %r591, %r590, %r589;
// inline asm
bfe.u32 %r352, %r357, %r567, %r567;
// inline asm
mul.wide.u32 %rd125, %r352, 4;
add.s64 %rd126, %rd106, %rd125;
ld.shared.u32 %r592, [%rd126];
xor.b32 %r593, %r592, %r591;
// inline asm
bfe.u32 %r356, %r357, %r275, %r567;
// inline asm
mul.wide.u32 %rd127, %r356, 4;
add.s64 %rd128, %rd110, %rd127;
ld.shared.u32 %r594, [%rd128];
add.s32 %r595, %r594, %r593;
xor.b32 %r596, %r3889, %r341;
xor.b32 %r373, %r596, %r595;
// inline asm
bfe.u32 %r360, %r373, %r554, %r567;
// inline asm
mul.wide.u32 %rd129, %r360, 4;
add.s64 %rd130, %rd98, %rd129;
ld.shared.u32 %r597, [%rd130];
// inline asm
bfe.u32 %r364, %r373, %r558, %r567;
// inline asm
mul.wide.u32 %rd131, %r364, 4;
add.s64 %rd132, %rd102, %rd131;
ld.shared.u32 %r598, [%rd132];
add.s32 %r599, %r598, %r597;
// inline asm
bfe.u32 %r368, %r373, %r567, %r567;
// inline asm
mul.wide.u32 %rd133, %r368, 4;
add.s64 %rd134, %rd106, %rd133;
ld.shared.u32 %r600, [%rd134];
xor.b32 %r601, %r600, %r599;
// inline asm
bfe.u32 %r372, %r373, %r275, %r567;
// inline asm
mul.wide.u32 %rd135, %r372, 4;
add.s64 %rd136, %rd110, %rd135;
ld.shared.u32 %r602, [%rd136];
add.s32 %r603, %r602, %r601;
xor.b32 %r604, %r3888, %r357;
xor.b32 %r389, %r604, %r603;
// inline asm
bfe.u32 %r376, %r389, %r554, %r567;
// inline asm
mul.wide.u32 %rd137, %r376, 4;
add.s64 %rd138, %rd98, %rd137;
ld.shared.u32 %r605, [%rd138];
// inline asm
bfe.u32 %r380, %r389, %r558, %r567;
// inline asm
mul.wide.u32 %rd139, %r380, 4;
add.s64 %rd140, %rd102, %rd139;
ld.shared.u32 %r606, [%rd140];
add.s32 %r607, %r606, %r605;
// inline asm
bfe.u32 %r384, %r389, %r567, %r567;
// inline asm
mul.wide.u32 %rd141, %r384, 4;
add.s64 %rd142, %rd106, %rd141;
ld.shared.u32 %r608, [%rd142];
xor.b32 %r609, %r608, %r607;
// inline asm
bfe.u32 %r388, %r389, %r275, %r567;
// inline asm
mul.wide.u32 %rd143, %r388, 4;
add.s64 %rd144, %rd110, %rd143;
ld.shared.u32 %r610, [%rd144];
add.s32 %r611, %r610, %r609;
xor.b32 %r612, %r3887, %r373;
xor.b32 %r405, %r612, %r611;
// inline asm
bfe.u32 %r392, %r405, %r554, %r567;
// inline asm
mul.wide.u32 %rd145, %r392, 4;
add.s64 %rd146, %rd98, %rd145;
ld.shared.u32 %r613, [%rd146];
// inline asm
bfe.u32 %r396, %r405, %r558, %r567;
// inline asm
mul.wide.u32 %rd147, %r396, 4;
add.s64 %rd148, %rd102, %rd147;
ld.shared.u32 %r614, [%rd148];
add.s32 %r615, %r614, %r613;
// inline asm
bfe.u32 %r400, %r405, %r567, %r567;
// inline asm
mul.wide.u32 %rd149, %r400, 4;
add.s64 %rd150, %rd106, %rd149;
ld.shared.u32 %r616, [%rd150];
xor.b32 %r617, %r616, %r615;
// inline asm
bfe.u32 %r404, %r405, %r275, %r567;
// inline asm
mul.wide.u32 %rd151, %r404, 4;
add.s64 %rd152, %rd110, %rd151;
ld.shared.u32 %r618, [%rd152];
add.s32 %r619, %r618, %r617;
xor.b32 %r620, %r3886, %r389;
xor.b32 %r421, %r620, %r619;
// inline asm
bfe.u32 %r408, %r421, %r554, %r567;
// inline asm
mul.wide.u32 %rd153, %r408, 4;
add.s64 %rd154, %rd98, %rd153;
ld.shared.u32 %r621, [%rd154];
// inline asm
bfe.u32 %r412, %r421, %r558, %r567;
// inline asm
mul.wide.u32 %rd155, %r412, 4;
add.s64 %rd156, %rd102, %rd155;
ld.shared.u32 %r622, [%rd156];
add.s32 %r623, %r622, %r621;
// inline asm
bfe.u32 %r416, %r421, %r567, %r567;
// inline asm
mul.wide.u32 %rd157, %r416, 4;
add.s64 %rd158, %rd106, %rd157;
ld.shared.u32 %r624, [%rd158];
xor.b32 %r625, %r624, %r623;
// inline asm
bfe.u32 %r420, %r421, %r275, %r567;
// inline asm
mul.wide.u32 %rd159, %r420, 4;
add.s64 %rd160, %rd110, %rd159;
ld.shared.u32 %r626, [%rd160];
add.s32 %r627, %r626, %r625;
xor.b32 %r628, %r3885, %r405;
xor.b32 %r437, %r628, %r627;
// inline asm
bfe.u32 %r424, %r437, %r554, %r567;
// inline asm
mul.wide.u32 %rd161, %r424, 4;
add.s64 %rd162, %rd98, %rd161;
ld.shared.u32 %r629, [%rd162];
// inline asm
bfe.u32 %r428, %r437, %r558, %r567;
// inline asm
mul.wide.u32 %rd163, %r428, 4;
add.s64 %rd164, %rd102, %rd163;
ld.shared.u32 %r630, [%rd164];
add.s32 %r631, %r630, %r629;
// inline asm
bfe.u32 %r432, %r437, %r567, %r567;
// inline asm
mul.wide.u32 %rd165, %r432, 4;
add.s64 %rd166, %rd106, %rd165;
ld.shared.u32 %r632, [%rd166];
xor.b32 %r633, %r632, %r631;
// inline asm
bfe.u32 %r436, %r437, %r275, %r567;
// inline asm
mul.wide.u32 %rd167, %r436, 4;
add.s64 %rd168, %rd110, %rd167;
ld.shared.u32 %r634, [%rd168];
add.s32 %r635, %r634, %r633;
xor.b32 %r636, %r3884, %r421;
xor.b32 %r453, %r636, %r635;
// inline asm
bfe.u32 %r440, %r453, %r554, %r567;
// inline asm
mul.wide.u32 %rd169, %r440, 4;
add.s64 %rd170, %rd98, %rd169;
ld.shared.u32 %r637, [%rd170];
// inline asm
bfe.u32 %r444, %r453, %r558, %r567;
// inline asm
mul.wide.u32 %rd171, %r444, 4;
add.s64 %rd172, %rd102, %rd171;
ld.shared.u32 %r638, [%rd172];
add.s32 %r639, %r638, %r637;
// inline asm
bfe.u32 %r448, %r453, %r567, %r567;
// inline asm
mul.wide.u32 %rd173, %r448, 4;
add.s64 %rd174, %rd106, %rd173;
ld.shared.u32 %r640, [%rd174];
xor.b32 %r641, %r640, %r639;
// inline asm
bfe.u32 %r452, %r453, %r275, %r567;
// inline asm
mul.wide.u32 %rd175, %r452, 4;
add.s64 %rd176, %rd110, %rd175;
ld.shared.u32 %r642, [%rd176];
add.s32 %r643, %r642, %r641;
xor.b32 %r644, %r3883, %r437;
xor.b32 %r469, %r644, %r643;
// inline asm
bfe.u32 %r456, %r469, %r554, %r567;
// inline asm
mul.wide.u32 %rd177, %r456, 4;
add.s64 %rd178, %rd98, %rd177;
ld.shared.u32 %r645, [%rd178];
// inline asm
bfe.u32 %r460, %r469, %r558, %r567;
// inline asm
mul.wide.u32 %rd179, %r460, 4;
add.s64 %rd180, %rd102, %rd179;
ld.shared.u32 %r646, [%rd180];
add.s32 %r647, %r646, %r645;
// inline asm
bfe.u32 %r464, %r469, %r567, %r567;
// inline asm
mul.wide.u32 %rd181, %r464, 4;
add.s64 %rd182, %rd106, %rd181;
ld.shared.u32 %r648, [%rd182];
xor.b32 %r649, %r648, %r647;
// inline asm
bfe.u32 %r468, %r469, %r275, %r567;
// inline asm
mul.wide.u32 %rd183, %r468, 4;
add.s64 %rd184, %rd110, %rd183;
ld.shared.u32 %r650, [%rd184];
add.s32 %r651, %r650, %r649;
xor.b32 %r652, %r3882, %r453;
xor.b32 %r485, %r652, %r651;
// inline asm
bfe.u32 %r472, %r485, %r554, %r567;
// inline asm
mul.wide.u32 %rd185, %r472, 4;
add.s64 %rd186, %rd98, %rd185;
ld.shared.u32 %r653, [%rd186];
// inline asm
bfe.u32 %r476, %r485, %r558, %r567;
// inline asm
mul.wide.u32 %rd187, %r476, 4;
add.s64 %rd188, %rd102, %rd187;
ld.shared.u32 %r654, [%rd188];
add.s32 %r655, %r654, %r653;
// inline asm
bfe.u32 %r480, %r485, %r567, %r567;
// inline asm
mul.wide.u32 %rd189, %r480, 4;
add.s64 %rd190, %rd106, %rd189;
ld.shared.u32 %r656, [%rd190];
xor.b32 %r657, %r656, %r655;
// inline asm
bfe.u32 %r484, %r485, %r275, %r567;
// inline asm
mul.wide.u32 %rd191, %r484, 4;
add.s64 %rd192, %rd110, %rd191;
ld.shared.u32 %r658, [%rd192];
add.s32 %r659, %r658, %r657;
xor.b32 %r660, %r3881, %r469;
xor.b32 %r501, %r660, %r659;
// inline asm
bfe.u32 %r488, %r501, %r554, %r567;
// inline asm
mul.wide.u32 %rd193, %r488, 4;
add.s64 %rd194, %rd98, %rd193;
ld.shared.u32 %r661, [%rd194];
// inline asm
bfe.u32 %r492, %r501, %r558, %r567;
// inline asm
mul.wide.u32 %rd195, %r492, 4;
add.s64 %rd196, %rd102, %rd195;
ld.shared.u32 %r662, [%rd196];
add.s32 %r663, %r662, %r661;
// inline asm
bfe.u32 %r496, %r501, %r567, %r567;
// inline asm
mul.wide.u32 %rd197, %r496, 4;
add.s64 %rd198, %rd106, %rd197;
ld.shared.u32 %r664, [%rd198];
xor.b32 %r665, %r664, %r663;
// inline asm
bfe.u32 %r500, %r501, %r275, %r567;
// inline asm
mul.wide.u32 %rd199, %r500, 4;
add.s64 %rd200, %rd110, %rd199;
ld.shared.u32 %r666, [%rd200];
add.s32 %r667, %r666, %r665;
xor.b32 %r668, %r3880, %r485;
xor.b32 %r517, %r668, %r667;
// inline asm
bfe.u32 %r504, %r517, %r554, %r567;
// inline asm
mul.wide.u32 %rd201, %r504, 4;
add.s64 %rd202, %rd98, %rd201;
ld.shared.u32 %r669, [%rd202];
// inline asm
bfe.u32 %r508, %r517, %r558, %r567;
// inline asm
mul.wide.u32 %rd203, %r508, 4;
add.s64 %rd204, %rd102, %rd203;
ld.shared.u32 %r670, [%rd204];
add.s32 %r671, %r670, %r669;
// inline asm
bfe.u32 %r512, %r517, %r567, %r567;
// inline asm
mul.wide.u32 %rd205, %r512, 4;
add.s64 %rd206, %rd106, %rd205;
ld.shared.u32 %r672, [%rd206];
xor.b32 %r673, %r672, %r671;
// inline asm
bfe.u32 %r516, %r517, %r275, %r567;
// inline asm
mul.wide.u32 %rd207, %r516, 4;
add.s64 %rd208, %rd110, %rd207;
ld.shared.u32 %r674, [%rd208];
add.s32 %r675, %r674, %r673;
xor.b32 %r676, %r3879, %r501;
xor.b32 %r533, %r676, %r675;
// inline asm
bfe.u32 %r520, %r533, %r554, %r567;
// inline asm
mul.wide.u32 %rd209, %r520, 4;
add.s64 %rd210, %rd98, %rd209;
ld.shared.u32 %r677, [%rd210];
// inline asm
bfe.u32 %r524, %r533, %r558, %r567;
// inline asm
mul.wide.u32 %rd211, %r524, 4;
add.s64 %rd212, %rd102, %rd211;
ld.shared.u32 %r678, [%rd212];
add.s32 %r679, %r678, %r677;
// inline asm
bfe.u32 %r528, %r533, %r567, %r567;
// inline asm
mul.wide.u32 %rd213, %r528, 4;
add.s64 %rd214, %rd106, %rd213;
ld.shared.u32 %r680, [%rd214];
xor.b32 %r681, %r680, %r679;
// inline asm
bfe.u32 %r532, %r533, %r275, %r567;
// inline asm
mul.wide.u32 %rd215, %r532, 4;
add.s64 %rd216, %rd110, %rd215;
ld.shared.u32 %r682, [%rd216];
add.s32 %r683, %r682, %r681;
xor.b32 %r684, %r3878, %r517;
xor.b32 %r549, %r684, %r683;
// inline asm
bfe.u32 %r536, %r549, %r554, %r567;
// inline asm
mul.wide.u32 %rd217, %r536, 4;
add.s64 %rd218, %rd98, %rd217;
ld.shared.u32 %r685, [%rd218];
// inline asm
bfe.u32 %r540, %r549, %r558, %r567;
// inline asm
mul.wide.u32 %rd219, %r540, 4;
add.s64 %rd220, %rd102, %rd219;
ld.shared.u32 %r686, [%rd220];
add.s32 %r687, %r686, %r685;
// inline asm
bfe.u32 %r544, %r549, %r567, %r567;
// inline asm
mul.wide.u32 %rd221, %r544, 4;
add.s64 %rd222, %rd106, %rd221;
ld.shared.u32 %r688, [%rd222];
xor.b32 %r689, %r688, %r687;
// inline asm
bfe.u32 %r548, %r549, %r275, %r567;
// inline asm
mul.wide.u32 %rd223, %r548, 4;
add.s64 %rd224, %rd110, %rd223;
ld.shared.u32 %r690, [%rd224];
add.s32 %r691, %r690, %r689;
xor.b32 %r692, %r3877, %r533;
xor.b32 %r565, %r692, %r691;
// inline asm
bfe.u32 %r552, %r565, %r554, %r567;
// inline asm
mul.wide.u32 %rd225, %r552, 4;
add.s64 %rd226, %rd98, %rd225;
ld.shared.u32 %r693, [%rd226];
// inline asm
bfe.u32 %r556, %r565, %r558, %r567;
// inline asm
mul.wide.u32 %rd227, %r556, 4;
add.s64 %rd228, %rd102, %rd227;
ld.shared.u32 %r694, [%rd228];
add.s32 %r695, %r694, %r693;
// inline asm
bfe.u32 %r560, %r565, %r567, %r567;
// inline asm
mul.wide.u32 %rd229, %r560, 4;
add.s64 %rd230, %rd106, %rd229;
ld.shared.u32 %r696, [%rd230];
xor.b32 %r697, %r696, %r695;
// inline asm
bfe.u32 %r564, %r565, %r275, %r567;
// inline asm
mul.wide.u32 %rd231, %r564, 4;
add.s64 %rd232, %rd110, %rd231;
ld.shared.u32 %r698, [%rd232];
add.s32 %r699, %r698, %r697;
xor.b32 %r700, %r3876, %r549;
xor.b32 %r3897, %r700, %r699;
xor.b32 %r3898, %r3875, %r565;
st.local.v2.u32 [%rd1336], {%r3898, %r3897};
ld.local.v2.u32 {%r3892, %r3891}, [%rd93];
ld.local.v2.u32 {%r3890, %r3889}, [%rd33];
ld.local.v2.u32 {%r3888, %r3887}, [%rd33+8];
ld.local.v2.u32 {%r3886, %r3885}, [%rd33+16];
ld.local.v2.u32 {%r3884, %r3883}, [%rd33+24];
ld.local.v2.u32 {%r3882, %r3881}, [%rd33+32];
ld.local.v2.u32 {%r3880, %r3879}, [%rd33+40];
ld.local.v2.u32 {%r3878, %r3877}, [%rd33+48];
ld.local.v2.u32 {%r3876, %r3875}, [%rd33+56];
add.s64 %rd1336, %rd1336, 8;
add.s32 %r3893, %r3893, 2;
setp.lt.u32 %p12, %r3893, 18;
@%p12 bra BB3_20;
mul.wide.s32 %rd235, %r1, 1024;
add.s64 %rd1337, %rd84, %rd235;
mov.u32 %r719, 0;
mov.u32 %r3896, %r719;
BB3_22:
mov.u32 %r3863, 16;
mov.u32 %r3862, 8;
mov.u32 %r3861, 24;
xor.b32 %r1232, %r39, %r3898;
xor.b32 %r733, %r1232, %r3892;
// inline asm
bfe.u32 %r720, %r733, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd240, %r720, 4;
add.s64 %rd241, %rd98, %rd240;
ld.shared.u32 %r1233, [%rd241];
// inline asm
bfe.u32 %r724, %r733, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd244, %r724, 4;
add.s64 %rd245, %rd102, %rd244;
ld.shared.u32 %r1234, [%rd245];
add.s32 %r1235, %r1234, %r1233;
// inline asm
bfe.u32 %r728, %r733, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd248, %r728, 4;
add.s64 %rd249, %rd106, %rd248;
ld.shared.u32 %r1236, [%rd249];
xor.b32 %r1237, %r1236, %r1235;
// inline asm
bfe.u32 %r732, %r733, %r719, %r3862;
// inline asm
mul.wide.u32 %rd252, %r732, 4;
add.s64 %rd253, %rd110, %rd252;
ld.shared.u32 %r1238, [%rd253];
add.s32 %r1239, %r1238, %r1237;
xor.b32 %r1240, %r40, %r3897;
xor.b32 %r1241, %r1240, %r3891;
xor.b32 %r749, %r1241, %r1239;
// inline asm
bfe.u32 %r736, %r749, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd254, %r736, 4;
add.s64 %rd255, %rd98, %rd254;
ld.shared.u32 %r1242, [%rd255];
// inline asm
bfe.u32 %r740, %r749, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd256, %r740, 4;
add.s64 %rd257, %rd102, %rd256;
ld.shared.u32 %r1243, [%rd257];
add.s32 %r1244, %r1243, %r1242;
// inline asm
bfe.u32 %r744, %r749, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd258, %r744, 4;
add.s64 %rd259, %rd106, %rd258;
ld.shared.u32 %r1245, [%rd259];
xor.b32 %r1246, %r1245, %r1244;
// inline asm
bfe.u32 %r748, %r749, %r719, %r3862;
// inline asm
mul.wide.u32 %rd260, %r748, 4;
add.s64 %rd261, %rd110, %rd260;
ld.shared.u32 %r1247, [%rd261];
add.s32 %r1248, %r1247, %r1246;
xor.b32 %r1249, %r3890, %r733;
xor.b32 %r765, %r1249, %r1248;
// inline asm
bfe.u32 %r752, %r765, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd262, %r752, 4;
add.s64 %rd263, %rd98, %rd262;
ld.shared.u32 %r1250, [%rd263];
// inline asm
bfe.u32 %r756, %r765, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd264, %r756, 4;
add.s64 %rd265, %rd102, %rd264;
ld.shared.u32 %r1251, [%rd265];
add.s32 %r1252, %r1251, %r1250;
// inline asm
bfe.u32 %r760, %r765, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd266, %r760, 4;
add.s64 %rd267, %rd106, %rd266;
ld.shared.u32 %r1253, [%rd267];
xor.b32 %r1254, %r1253, %r1252;
// inline asm
bfe.u32 %r764, %r765, %r719, %r3862;
// inline asm
mul.wide.u32 %rd268, %r764, 4;
add.s64 %rd269, %rd110, %rd268;
ld.shared.u32 %r1255, [%rd269];
add.s32 %r1256, %r1255, %r1254;
xor.b32 %r1257, %r3889, %r749;
xor.b32 %r781, %r1257, %r1256;
// inline asm
bfe.u32 %r768, %r781, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd270, %r768, 4;
add.s64 %rd271, %rd98, %rd270;
ld.shared.u32 %r1258, [%rd271];
// inline asm
bfe.u32 %r772, %r781, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd272, %r772, 4;
add.s64 %rd273, %rd102, %rd272;
ld.shared.u32 %r1259, [%rd273];
add.s32 %r1260, %r1259, %r1258;
// inline asm
bfe.u32 %r776, %r781, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd274, %r776, 4;
add.s64 %rd275, %rd106, %rd274;
ld.shared.u32 %r1261, [%rd275];
xor.b32 %r1262, %r1261, %r1260;
// inline asm
bfe.u32 %r780, %r781, %r719, %r3862;
// inline asm
mul.wide.u32 %rd276, %r780, 4;
add.s64 %rd277, %rd110, %rd276;
ld.shared.u32 %r1263, [%rd277];
add.s32 %r1264, %r1263, %r1262;
xor.b32 %r1265, %r3888, %r765;
xor.b32 %r797, %r1265, %r1264;
// inline asm
bfe.u32 %r784, %r797, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd278, %r784, 4;
add.s64 %rd279, %rd98, %rd278;
ld.shared.u32 %r1266, [%rd279];
// inline asm
bfe.u32 %r788, %r797, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd280, %r788, 4;
add.s64 %rd281, %rd102, %rd280;
ld.shared.u32 %r1267, [%rd281];
add.s32 %r1268, %r1267, %r1266;
// inline asm
bfe.u32 %r792, %r797, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd282, %r792, 4;
add.s64 %rd283, %rd106, %rd282;
ld.shared.u32 %r1269, [%rd283];
xor.b32 %r1270, %r1269, %r1268;
// inline asm
bfe.u32 %r796, %r797, %r719, %r3862;
// inline asm
mul.wide.u32 %rd284, %r796, 4;
add.s64 %rd285, %rd110, %rd284;
ld.shared.u32 %r1271, [%rd285];
add.s32 %r1272, %r1271, %r1270;
xor.b32 %r1273, %r3887, %r781;
xor.b32 %r813, %r1273, %r1272;
// inline asm
bfe.u32 %r800, %r813, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd286, %r800, 4;
add.s64 %rd287, %rd98, %rd286;
ld.shared.u32 %r1274, [%rd287];
// inline asm
bfe.u32 %r804, %r813, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd288, %r804, 4;
add.s64 %rd289, %rd102, %rd288;
ld.shared.u32 %r1275, [%rd289];
add.s32 %r1276, %r1275, %r1274;
// inline asm
bfe.u32 %r808, %r813, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd290, %r808, 4;
add.s64 %rd291, %rd106, %rd290;
ld.shared.u32 %r1277, [%rd291];
xor.b32 %r1278, %r1277, %r1276;
// inline asm
bfe.u32 %r812, %r813, %r719, %r3862;
// inline asm
mul.wide.u32 %rd292, %r812, 4;
add.s64 %rd293, %rd110, %rd292;
ld.shared.u32 %r1279, [%rd293];
add.s32 %r1280, %r1279, %r1278;
xor.b32 %r1281, %r3886, %r797;
xor.b32 %r829, %r1281, %r1280;
// inline asm
bfe.u32 %r816, %r829, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd294, %r816, 4;
add.s64 %rd295, %rd98, %rd294;
ld.shared.u32 %r1282, [%rd295];
// inline asm
bfe.u32 %r820, %r829, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd296, %r820, 4;
add.s64 %rd297, %rd102, %rd296;
ld.shared.u32 %r1283, [%rd297];
add.s32 %r1284, %r1283, %r1282;
// inline asm
bfe.u32 %r824, %r829, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd298, %r824, 4;
add.s64 %rd299, %rd106, %rd298;
ld.shared.u32 %r1285, [%rd299];
xor.b32 %r1286, %r1285, %r1284;
// inline asm
bfe.u32 %r828, %r829, %r719, %r3862;
// inline asm
mul.wide.u32 %rd300, %r828, 4;
add.s64 %rd301, %rd110, %rd300;
ld.shared.u32 %r1287, [%rd301];
add.s32 %r1288, %r1287, %r1286;
xor.b32 %r1289, %r3885, %r813;
xor.b32 %r845, %r1289, %r1288;
// inline asm
bfe.u32 %r832, %r845, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd302, %r832, 4;
add.s64 %rd303, %rd98, %rd302;
ld.shared.u32 %r1290, [%rd303];
// inline asm
bfe.u32 %r836, %r845, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd304, %r836, 4;
add.s64 %rd305, %rd102, %rd304;
ld.shared.u32 %r1291, [%rd305];
add.s32 %r1292, %r1291, %r1290;
// inline asm
bfe.u32 %r840, %r845, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd306, %r840, 4;
add.s64 %rd307, %rd106, %rd306;
ld.shared.u32 %r1293, [%rd307];
xor.b32 %r1294, %r1293, %r1292;
// inline asm
bfe.u32 %r844, %r845, %r719, %r3862;
// inline asm
mul.wide.u32 %rd308, %r844, 4;
add.s64 %rd309, %rd110, %rd308;
ld.shared.u32 %r1295, [%rd309];
add.s32 %r1296, %r1295, %r1294;
xor.b32 %r1297, %r3884, %r829;
xor.b32 %r861, %r1297, %r1296;
// inline asm
bfe.u32 %r848, %r861, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd310, %r848, 4;
add.s64 %rd311, %rd98, %rd310;
ld.shared.u32 %r1298, [%rd311];
// inline asm
bfe.u32 %r852, %r861, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd312, %r852, 4;
add.s64 %rd313, %rd102, %rd312;
ld.shared.u32 %r1299, [%rd313];
add.s32 %r1300, %r1299, %r1298;
// inline asm
bfe.u32 %r856, %r861, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd314, %r856, 4;
add.s64 %rd315, %rd106, %rd314;
ld.shared.u32 %r1301, [%rd315];
xor.b32 %r1302, %r1301, %r1300;
// inline asm
bfe.u32 %r860, %r861, %r719, %r3862;
// inline asm
mul.wide.u32 %rd316, %r860, 4;
add.s64 %rd317, %rd110, %rd316;
ld.shared.u32 %r1303, [%rd317];
add.s32 %r1304, %r1303, %r1302;
xor.b32 %r1305, %r3883, %r845;
xor.b32 %r877, %r1305, %r1304;
// inline asm
bfe.u32 %r864, %r877, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd318, %r864, 4;
add.s64 %rd319, %rd98, %rd318;
ld.shared.u32 %r1306, [%rd319];
// inline asm
bfe.u32 %r868, %r877, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd320, %r868, 4;
add.s64 %rd321, %rd102, %rd320;
ld.shared.u32 %r1307, [%rd321];
add.s32 %r1308, %r1307, %r1306;
// inline asm
bfe.u32 %r872, %r877, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd322, %r872, 4;
add.s64 %rd323, %rd106, %rd322;
ld.shared.u32 %r1309, [%rd323];
xor.b32 %r1310, %r1309, %r1308;
// inline asm
bfe.u32 %r876, %r877, %r719, %r3862;
// inline asm
mul.wide.u32 %rd324, %r876, 4;
add.s64 %rd325, %rd110, %rd324;
ld.shared.u32 %r1311, [%rd325];
add.s32 %r1312, %r1311, %r1310;
xor.b32 %r1313, %r3882, %r861;
xor.b32 %r893, %r1313, %r1312;
// inline asm
bfe.u32 %r880, %r893, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd326, %r880, 4;
add.s64 %rd327, %rd98, %rd326;
ld.shared.u32 %r1314, [%rd327];
// inline asm
bfe.u32 %r884, %r893, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd328, %r884, 4;
add.s64 %rd329, %rd102, %rd328;
ld.shared.u32 %r1315, [%rd329];
add.s32 %r1316, %r1315, %r1314;
// inline asm
bfe.u32 %r888, %r893, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd330, %r888, 4;
add.s64 %rd331, %rd106, %rd330;
ld.shared.u32 %r1317, [%rd331];
xor.b32 %r1318, %r1317, %r1316;
// inline asm
bfe.u32 %r892, %r893, %r719, %r3862;
// inline asm
mul.wide.u32 %rd332, %r892, 4;
add.s64 %rd333, %rd110, %rd332;
ld.shared.u32 %r1319, [%rd333];
add.s32 %r1320, %r1319, %r1318;
xor.b32 %r1321, %r3881, %r877;
xor.b32 %r909, %r1321, %r1320;
// inline asm
bfe.u32 %r896, %r909, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd334, %r896, 4;
add.s64 %rd335, %rd98, %rd334;
ld.shared.u32 %r1322, [%rd335];
// inline asm
bfe.u32 %r900, %r909, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd336, %r900, 4;
add.s64 %rd337, %rd102, %rd336;
ld.shared.u32 %r1323, [%rd337];
add.s32 %r1324, %r1323, %r1322;
// inline asm
bfe.u32 %r904, %r909, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd338, %r904, 4;
add.s64 %rd339, %rd106, %rd338;
ld.shared.u32 %r1325, [%rd339];
xor.b32 %r1326, %r1325, %r1324;
// inline asm
bfe.u32 %r908, %r909, %r719, %r3862;
// inline asm
mul.wide.u32 %rd340, %r908, 4;
add.s64 %rd341, %rd110, %rd340;
ld.shared.u32 %r1327, [%rd341];
add.s32 %r1328, %r1327, %r1326;
xor.b32 %r1329, %r3880, %r893;
xor.b32 %r925, %r1329, %r1328;
// inline asm
bfe.u32 %r912, %r925, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd342, %r912, 4;
add.s64 %rd343, %rd98, %rd342;
ld.shared.u32 %r1330, [%rd343];
// inline asm
bfe.u32 %r916, %r925, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd344, %r916, 4;
add.s64 %rd345, %rd102, %rd344;
ld.shared.u32 %r1331, [%rd345];
add.s32 %r1332, %r1331, %r1330;
// inline asm
bfe.u32 %r920, %r925, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd346, %r920, 4;
add.s64 %rd347, %rd106, %rd346;
ld.shared.u32 %r1333, [%rd347];
xor.b32 %r1334, %r1333, %r1332;
// inline asm
bfe.u32 %r924, %r925, %r719, %r3862;
// inline asm
mul.wide.u32 %rd348, %r924, 4;
add.s64 %rd349, %rd110, %rd348;
ld.shared.u32 %r1335, [%rd349];
add.s32 %r1336, %r1335, %r1334;
xor.b32 %r1337, %r3879, %r909;
xor.b32 %r941, %r1337, %r1336;
// inline asm
bfe.u32 %r928, %r941, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd350, %r928, 4;
add.s64 %rd351, %rd98, %rd350;
ld.shared.u32 %r1338, [%rd351];
// inline asm
bfe.u32 %r932, %r941, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd352, %r932, 4;
add.s64 %rd353, %rd102, %rd352;
ld.shared.u32 %r1339, [%rd353];
add.s32 %r1340, %r1339, %r1338;
// inline asm
bfe.u32 %r936, %r941, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd354, %r936, 4;
add.s64 %rd355, %rd106, %rd354;
ld.shared.u32 %r1341, [%rd355];
xor.b32 %r1342, %r1341, %r1340;
// inline asm
bfe.u32 %r940, %r941, %r719, %r3862;
// inline asm
mul.wide.u32 %rd356, %r940, 4;
add.s64 %rd357, %rd110, %rd356;
ld.shared.u32 %r1343, [%rd357];
add.s32 %r1344, %r1343, %r1342;
xor.b32 %r1345, %r3878, %r925;
xor.b32 %r957, %r1345, %r1344;
// inline asm
bfe.u32 %r944, %r957, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd358, %r944, 4;
add.s64 %rd359, %rd98, %rd358;
ld.shared.u32 %r1346, [%rd359];
// inline asm
bfe.u32 %r948, %r957, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd360, %r948, 4;
add.s64 %rd361, %rd102, %rd360;
ld.shared.u32 %r1347, [%rd361];
add.s32 %r1348, %r1347, %r1346;
// inline asm
bfe.u32 %r952, %r957, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd362, %r952, 4;
add.s64 %rd363, %rd106, %rd362;
ld.shared.u32 %r1349, [%rd363];
xor.b32 %r1350, %r1349, %r1348;
// inline asm
bfe.u32 %r956, %r957, %r719, %r3862;
// inline asm
mul.wide.u32 %rd364, %r956, 4;
add.s64 %rd365, %rd110, %rd364;
ld.shared.u32 %r1351, [%rd365];
add.s32 %r1352, %r1351, %r1350;
xor.b32 %r1353, %r3877, %r941;
xor.b32 %r973, %r1353, %r1352;
// inline asm
bfe.u32 %r960, %r973, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd366, %r960, 4;
add.s64 %rd367, %rd98, %rd366;
ld.shared.u32 %r1354, [%rd367];
// inline asm
bfe.u32 %r964, %r973, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd368, %r964, 4;
add.s64 %rd369, %rd102, %rd368;
ld.shared.u32 %r1355, [%rd369];
add.s32 %r1356, %r1355, %r1354;
// inline asm
bfe.u32 %r968, %r973, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd370, %r968, 4;
add.s64 %rd371, %rd106, %rd370;
ld.shared.u32 %r1357, [%rd371];
xor.b32 %r1358, %r1357, %r1356;
// inline asm
bfe.u32 %r972, %r973, %r719, %r3862;
// inline asm
mul.wide.u32 %rd372, %r972, 4;
add.s64 %rd373, %rd110, %rd372;
ld.shared.u32 %r1359, [%rd373];
add.s32 %r1360, %r1359, %r1358;
xor.b32 %r1361, %r3876, %r957;
xor.b32 %r1362, %r1361, %r1360;
xor.b32 %r1363, %r3875, %r973;
st.shared.u32 [%rd1337], %r1363;
st.shared.u32 [%rd1337+4], %r1362;
xor.b32 %r1364, %r1363, %r37;
xor.b32 %r989, %r1364, %r3892;
// inline asm
bfe.u32 %r976, %r989, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd374, %r976, 4;
add.s64 %rd375, %rd98, %rd374;
ld.shared.u32 %r1365, [%rd375];
// inline asm
bfe.u32 %r980, %r989, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd376, %r980, 4;
add.s64 %rd377, %rd102, %rd376;
ld.shared.u32 %r1366, [%rd377];
add.s32 %r1367, %r1366, %r1365;
// inline asm
bfe.u32 %r984, %r989, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd378, %r984, 4;
add.s64 %rd379, %rd106, %rd378;
ld.shared.u32 %r1368, [%rd379];
xor.b32 %r1369, %r1368, %r1367;
// inline asm
bfe.u32 %r988, %r989, %r719, %r3862;
// inline asm
mul.wide.u32 %rd380, %r988, 4;
add.s64 %rd381, %rd110, %rd380;
ld.shared.u32 %r1370, [%rd381];
add.s32 %r1371, %r1370, %r1369;
xor.b32 %r1372, %r38, %r1362;
xor.b32 %r1373, %r1372, %r3891;
xor.b32 %r1005, %r1373, %r1371;
// inline asm
bfe.u32 %r992, %r1005, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd382, %r992, 4;
add.s64 %rd383, %rd98, %rd382;
ld.shared.u32 %r1374, [%rd383];
// inline asm
bfe.u32 %r996, %r1005, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd384, %r996, 4;
add.s64 %rd385, %rd102, %rd384;
ld.shared.u32 %r1375, [%rd385];
add.s32 %r1376, %r1375, %r1374;
// inline asm
bfe.u32 %r1000, %r1005, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd386, %r1000, 4;
add.s64 %rd387, %rd106, %rd386;
ld.shared.u32 %r1377, [%rd387];
xor.b32 %r1378, %r1377, %r1376;
// inline asm
bfe.u32 %r1004, %r1005, %r719, %r3862;
// inline asm
mul.wide.u32 %rd388, %r1004, 4;
add.s64 %rd389, %rd110, %rd388;
ld.shared.u32 %r1379, [%rd389];
add.s32 %r1380, %r1379, %r1378;
xor.b32 %r1381, %r3890, %r989;
xor.b32 %r1021, %r1381, %r1380;
// inline asm
bfe.u32 %r1008, %r1021, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd390, %r1008, 4;
add.s64 %rd391, %rd98, %rd390;
ld.shared.u32 %r1382, [%rd391];
// inline asm
bfe.u32 %r1012, %r1021, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd392, %r1012, 4;
add.s64 %rd393, %rd102, %rd392;
ld.shared.u32 %r1383, [%rd393];
add.s32 %r1384, %r1383, %r1382;
// inline asm
bfe.u32 %r1016, %r1021, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd394, %r1016, 4;
add.s64 %rd395, %rd106, %rd394;
ld.shared.u32 %r1385, [%rd395];
xor.b32 %r1386, %r1385, %r1384;
// inline asm
bfe.u32 %r1020, %r1021, %r719, %r3862;
// inline asm
mul.wide.u32 %rd396, %r1020, 4;
add.s64 %rd397, %rd110, %rd396;
ld.shared.u32 %r1387, [%rd397];
add.s32 %r1388, %r1387, %r1386;
xor.b32 %r1389, %r3889, %r1005;
xor.b32 %r1037, %r1389, %r1388;
// inline asm
bfe.u32 %r1024, %r1037, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd398, %r1024, 4;
add.s64 %rd399, %rd98, %rd398;
ld.shared.u32 %r1390, [%rd399];
// inline asm
bfe.u32 %r1028, %r1037, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd400, %r1028, 4;
add.s64 %rd401, %rd102, %rd400;
ld.shared.u32 %r1391, [%rd401];
add.s32 %r1392, %r1391, %r1390;
// inline asm
bfe.u32 %r1032, %r1037, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd402, %r1032, 4;
add.s64 %rd403, %rd106, %rd402;
ld.shared.u32 %r1393, [%rd403];
xor.b32 %r1394, %r1393, %r1392;
// inline asm
bfe.u32 %r1036, %r1037, %r719, %r3862;
// inline asm
mul.wide.u32 %rd404, %r1036, 4;
add.s64 %rd405, %rd110, %rd404;
ld.shared.u32 %r1395, [%rd405];
add.s32 %r1396, %r1395, %r1394;
xor.b32 %r1397, %r3888, %r1021;
xor.b32 %r1053, %r1397, %r1396;
// inline asm
bfe.u32 %r1040, %r1053, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd406, %r1040, 4;
add.s64 %rd407, %rd98, %rd406;
ld.shared.u32 %r1398, [%rd407];
// inline asm
bfe.u32 %r1044, %r1053, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd408, %r1044, 4;
add.s64 %rd409, %rd102, %rd408;
ld.shared.u32 %r1399, [%rd409];
add.s32 %r1400, %r1399, %r1398;
// inline asm
bfe.u32 %r1048, %r1053, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd410, %r1048, 4;
add.s64 %rd411, %rd106, %rd410;
ld.shared.u32 %r1401, [%rd411];
xor.b32 %r1402, %r1401, %r1400;
// inline asm
bfe.u32 %r1052, %r1053, %r719, %r3862;
// inline asm
mul.wide.u32 %rd412, %r1052, 4;
add.s64 %rd413, %rd110, %rd412;
ld.shared.u32 %r1403, [%rd413];
add.s32 %r1404, %r1403, %r1402;
xor.b32 %r1405, %r3887, %r1037;
xor.b32 %r1069, %r1405, %r1404;
// inline asm
bfe.u32 %r1056, %r1069, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd414, %r1056, 4;
add.s64 %rd415, %rd98, %rd414;
ld.shared.u32 %r1406, [%rd415];
// inline asm
bfe.u32 %r1060, %r1069, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd416, %r1060, 4;
add.s64 %rd417, %rd102, %rd416;
ld.shared.u32 %r1407, [%rd417];
add.s32 %r1408, %r1407, %r1406;
// inline asm
bfe.u32 %r1064, %r1069, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd418, %r1064, 4;
add.s64 %rd419, %rd106, %rd418;
ld.shared.u32 %r1409, [%rd419];
xor.b32 %r1410, %r1409, %r1408;
// inline asm
bfe.u32 %r1068, %r1069, %r719, %r3862;
// inline asm
mul.wide.u32 %rd420, %r1068, 4;
add.s64 %rd421, %rd110, %rd420;
ld.shared.u32 %r1411, [%rd421];
add.s32 %r1412, %r1411, %r1410;
xor.b32 %r1413, %r3886, %r1053;
xor.b32 %r1085, %r1413, %r1412;
// inline asm
bfe.u32 %r1072, %r1085, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd422, %r1072, 4;
add.s64 %rd423, %rd98, %rd422;
ld.shared.u32 %r1414, [%rd423];
// inline asm
bfe.u32 %r1076, %r1085, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd424, %r1076, 4;
add.s64 %rd425, %rd102, %rd424;
ld.shared.u32 %r1415, [%rd425];
add.s32 %r1416, %r1415, %r1414;
// inline asm
bfe.u32 %r1080, %r1085, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd426, %r1080, 4;
add.s64 %rd427, %rd106, %rd426;
ld.shared.u32 %r1417, [%rd427];
xor.b32 %r1418, %r1417, %r1416;
// inline asm
bfe.u32 %r1084, %r1085, %r719, %r3862;
// inline asm
mul.wide.u32 %rd428, %r1084, 4;
add.s64 %rd429, %rd110, %rd428;
ld.shared.u32 %r1419, [%rd429];
add.s32 %r1420, %r1419, %r1418;
xor.b32 %r1421, %r3885, %r1069;
xor.b32 %r1101, %r1421, %r1420;
// inline asm
bfe.u32 %r1088, %r1101, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd430, %r1088, 4;
add.s64 %rd431, %rd98, %rd430;
ld.shared.u32 %r1422, [%rd431];
// inline asm
bfe.u32 %r1092, %r1101, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd432, %r1092, 4;
add.s64 %rd433, %rd102, %rd432;
ld.shared.u32 %r1423, [%rd433];
add.s32 %r1424, %r1423, %r1422;
// inline asm
bfe.u32 %r1096, %r1101, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd434, %r1096, 4;
add.s64 %rd435, %rd106, %rd434;
ld.shared.u32 %r1425, [%rd435];
xor.b32 %r1426, %r1425, %r1424;
// inline asm
bfe.u32 %r1100, %r1101, %r719, %r3862;
// inline asm
mul.wide.u32 %rd436, %r1100, 4;
add.s64 %rd437, %rd110, %rd436;
ld.shared.u32 %r1427, [%rd437];
add.s32 %r1428, %r1427, %r1426;
xor.b32 %r1429, %r3884, %r1085;
xor.b32 %r1117, %r1429, %r1428;
// inline asm
bfe.u32 %r1104, %r1117, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd438, %r1104, 4;
add.s64 %rd439, %rd98, %rd438;
ld.shared.u32 %r1430, [%rd439];
// inline asm
bfe.u32 %r1108, %r1117, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd440, %r1108, 4;
add.s64 %rd441, %rd102, %rd440;
ld.shared.u32 %r1431, [%rd441];
add.s32 %r1432, %r1431, %r1430;
// inline asm
bfe.u32 %r1112, %r1117, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd442, %r1112, 4;
add.s64 %rd443, %rd106, %rd442;
ld.shared.u32 %r1433, [%rd443];
xor.b32 %r1434, %r1433, %r1432;
// inline asm
bfe.u32 %r1116, %r1117, %r719, %r3862;
// inline asm
mul.wide.u32 %rd444, %r1116, 4;
add.s64 %rd445, %rd110, %rd444;
ld.shared.u32 %r1435, [%rd445];
add.s32 %r1436, %r1435, %r1434;
xor.b32 %r1437, %r3883, %r1101;
xor.b32 %r1133, %r1437, %r1436;
// inline asm
bfe.u32 %r1120, %r1133, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd446, %r1120, 4;
add.s64 %rd447, %rd98, %rd446;
ld.shared.u32 %r1438, [%rd447];
// inline asm
bfe.u32 %r1124, %r1133, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd448, %r1124, 4;
add.s64 %rd449, %rd102, %rd448;
ld.shared.u32 %r1439, [%rd449];
add.s32 %r1440, %r1439, %r1438;
// inline asm
bfe.u32 %r1128, %r1133, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd450, %r1128, 4;
add.s64 %rd451, %rd106, %rd450;
ld.shared.u32 %r1441, [%rd451];
xor.b32 %r1442, %r1441, %r1440;
// inline asm
bfe.u32 %r1132, %r1133, %r719, %r3862;
// inline asm
mul.wide.u32 %rd452, %r1132, 4;
add.s64 %rd453, %rd110, %rd452;
ld.shared.u32 %r1443, [%rd453];
add.s32 %r1444, %r1443, %r1442;
xor.b32 %r1445, %r3882, %r1117;
xor.b32 %r1149, %r1445, %r1444;
// inline asm
bfe.u32 %r1136, %r1149, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd454, %r1136, 4;
add.s64 %rd455, %rd98, %rd454;
ld.shared.u32 %r1446, [%rd455];
// inline asm
bfe.u32 %r1140, %r1149, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd456, %r1140, 4;
add.s64 %rd457, %rd102, %rd456;
ld.shared.u32 %r1447, [%rd457];
add.s32 %r1448, %r1447, %r1446;
// inline asm
bfe.u32 %r1144, %r1149, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd458, %r1144, 4;
add.s64 %rd459, %rd106, %rd458;
ld.shared.u32 %r1449, [%rd459];
xor.b32 %r1450, %r1449, %r1448;
// inline asm
bfe.u32 %r1148, %r1149, %r719, %r3862;
// inline asm
mul.wide.u32 %rd460, %r1148, 4;
add.s64 %rd461, %rd110, %rd460;
ld.shared.u32 %r1451, [%rd461];
add.s32 %r1452, %r1451, %r1450;
xor.b32 %r1453, %r3881, %r1133;
xor.b32 %r1165, %r1453, %r1452;
// inline asm
bfe.u32 %r1152, %r1165, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd462, %r1152, 4;
add.s64 %rd463, %rd98, %rd462;
ld.shared.u32 %r1454, [%rd463];
// inline asm
bfe.u32 %r1156, %r1165, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd464, %r1156, 4;
add.s64 %rd465, %rd102, %rd464;
ld.shared.u32 %r1455, [%rd465];
add.s32 %r1456, %r1455, %r1454;
// inline asm
bfe.u32 %r1160, %r1165, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd466, %r1160, 4;
add.s64 %rd467, %rd106, %rd466;
ld.shared.u32 %r1457, [%rd467];
xor.b32 %r1458, %r1457, %r1456;
// inline asm
bfe.u32 %r1164, %r1165, %r719, %r3862;
// inline asm
mul.wide.u32 %rd468, %r1164, 4;
add.s64 %rd469, %rd110, %rd468;
ld.shared.u32 %r1459, [%rd469];
add.s32 %r1460, %r1459, %r1458;
xor.b32 %r1461, %r3880, %r1149;
xor.b32 %r1181, %r1461, %r1460;
// inline asm
bfe.u32 %r1168, %r1181, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd470, %r1168, 4;
add.s64 %rd471, %rd98, %rd470;
ld.shared.u32 %r1462, [%rd471];
// inline asm
bfe.u32 %r1172, %r1181, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd472, %r1172, 4;
add.s64 %rd473, %rd102, %rd472;
ld.shared.u32 %r1463, [%rd473];
add.s32 %r1464, %r1463, %r1462;
// inline asm
bfe.u32 %r1176, %r1181, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd474, %r1176, 4;
add.s64 %rd475, %rd106, %rd474;
ld.shared.u32 %r1465, [%rd475];
xor.b32 %r1466, %r1465, %r1464;
// inline asm
bfe.u32 %r1180, %r1181, %r719, %r3862;
// inline asm
mul.wide.u32 %rd476, %r1180, 4;
add.s64 %rd477, %rd110, %rd476;
ld.shared.u32 %r1467, [%rd477];
add.s32 %r1468, %r1467, %r1466;
xor.b32 %r1469, %r3879, %r1165;
xor.b32 %r1197, %r1469, %r1468;
// inline asm
bfe.u32 %r1184, %r1197, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd478, %r1184, 4;
add.s64 %rd479, %rd98, %rd478;
ld.shared.u32 %r1470, [%rd479];
// inline asm
bfe.u32 %r1188, %r1197, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd480, %r1188, 4;
add.s64 %rd481, %rd102, %rd480;
ld.shared.u32 %r1471, [%rd481];
add.s32 %r1472, %r1471, %r1470;
// inline asm
bfe.u32 %r1192, %r1197, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd482, %r1192, 4;
add.s64 %rd483, %rd106, %rd482;
ld.shared.u32 %r1473, [%rd483];
xor.b32 %r1474, %r1473, %r1472;
// inline asm
bfe.u32 %r1196, %r1197, %r719, %r3862;
// inline asm
mul.wide.u32 %rd484, %r1196, 4;
add.s64 %rd485, %rd110, %rd484;
ld.shared.u32 %r1475, [%rd485];
add.s32 %r1476, %r1475, %r1474;
xor.b32 %r1477, %r3878, %r1181;
xor.b32 %r1213, %r1477, %r1476;
// inline asm
bfe.u32 %r1200, %r1213, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd486, %r1200, 4;
add.s64 %rd487, %rd98, %rd486;
ld.shared.u32 %r1478, [%rd487];
// inline asm
bfe.u32 %r1204, %r1213, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd488, %r1204, 4;
add.s64 %rd489, %rd102, %rd488;
ld.shared.u32 %r1479, [%rd489];
add.s32 %r1480, %r1479, %r1478;
// inline asm
bfe.u32 %r1208, %r1213, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd490, %r1208, 4;
add.s64 %rd491, %rd106, %rd490;
ld.shared.u32 %r1481, [%rd491];
xor.b32 %r1482, %r1481, %r1480;
// inline asm
bfe.u32 %r1212, %r1213, %r719, %r3862;
// inline asm
mul.wide.u32 %rd492, %r1212, 4;
add.s64 %rd493, %rd110, %rd492;
ld.shared.u32 %r1483, [%rd493];
add.s32 %r1484, %r1483, %r1482;
xor.b32 %r1485, %r3877, %r1197;
xor.b32 %r1229, %r1485, %r1484;
// inline asm
bfe.u32 %r1216, %r1229, %r3861, %r3862;
// inline asm
mul.wide.u32 %rd494, %r1216, 4;
add.s64 %rd495, %rd98, %rd494;
ld.shared.u32 %r1486, [%rd495];
// inline asm
bfe.u32 %r1220, %r1229, %r3863, %r3862;
// inline asm
mul.wide.u32 %rd496, %r1220, 4;
add.s64 %rd497, %rd102, %rd496;
ld.shared.u32 %r1487, [%rd497];
add.s32 %r1488, %r1487, %r1486;
// inline asm
bfe.u32 %r1224, %r1229, %r3862, %r3862;
// inline asm
mul.wide.u32 %rd498, %r1224, 4;
add.s64 %rd499, %rd106, %rd498;
ld.shared.u32 %r1489, [%rd499];
xor.b32 %r1490, %r1489, %r1488;
// inline asm
bfe.u32 %r1228, %r1229, %r719, %r3862;
// inline asm
mul.wide.u32 %rd500, %r1228, 4;
add.s64 %rd501, %rd110, %rd500;
ld.shared.u32 %r1491, [%rd501];
add.s32 %r1492, %r1491, %r1490;
xor.b32 %r1493, %r3876, %r1213;
xor.b32 %r3897, %r1493, %r1492;
xor.b32 %r3898, %r3875, %r1229;
st.shared.u32 [%rd1337+8], %r3898;
st.shared.u32 [%rd1337+12], %r3897;
add.s64 %rd1337, %rd1337, 16;
add.s32 %r3896, %r3896, 4;
setp.lt.u32 %p13, %r3896, 256;
@%p13 bra BB3_22;
mov.u32 %r3866, %tid.x;
mul.wide.s32 %rd1325, %r3866, 1024;
mov.u64 %rd1320, m03200_init$S1_all;
add.s64 %rd1338, %rd1320, %rd1325;
mov.u32 %r1494, 0;
mov.u32 %r3899, %r1494;
BB3_24:
mov.u32 %r3854, 16;
mov.u32 %r3853, 8;
mov.u32 %r3852, 24;
xor.b32 %r2007, %r39, %r3898;
xor.b32 %r1508, %r2007, %r3892;
// inline asm
bfe.u32 %r1495, %r1508, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd507, %r1495, 4;
add.s64 %rd508, %rd98, %rd507;
ld.shared.u32 %r2008, [%rd508];
// inline asm
bfe.u32 %r1499, %r1508, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd511, %r1499, 4;
add.s64 %rd512, %rd102, %rd511;
ld.shared.u32 %r2009, [%rd512];
add.s32 %r2010, %r2009, %r2008;
// inline asm
bfe.u32 %r1503, %r1508, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd515, %r1503, 4;
add.s64 %rd516, %rd106, %rd515;
ld.shared.u32 %r2011, [%rd516];
xor.b32 %r2012, %r2011, %r2010;
// inline asm
bfe.u32 %r1507, %r1508, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd519, %r1507, 4;
add.s64 %rd520, %rd110, %rd519;
ld.shared.u32 %r2013, [%rd520];
add.s32 %r2014, %r2013, %r2012;
xor.b32 %r2015, %r40, %r3897;
xor.b32 %r2016, %r2015, %r3891;
xor.b32 %r1524, %r2016, %r2014;
// inline asm
bfe.u32 %r1511, %r1524, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd521, %r1511, 4;
add.s64 %rd522, %rd98, %rd521;
ld.shared.u32 %r2017, [%rd522];
// inline asm
bfe.u32 %r1515, %r1524, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd523, %r1515, 4;
add.s64 %rd524, %rd102, %rd523;
ld.shared.u32 %r2018, [%rd524];
add.s32 %r2019, %r2018, %r2017;
// inline asm
bfe.u32 %r1519, %r1524, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd525, %r1519, 4;
add.s64 %rd526, %rd106, %rd525;
ld.shared.u32 %r2020, [%rd526];
xor.b32 %r2021, %r2020, %r2019;
// inline asm
bfe.u32 %r1523, %r1524, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd527, %r1523, 4;
add.s64 %rd528, %rd110, %rd527;
ld.shared.u32 %r2022, [%rd528];
add.s32 %r2023, %r2022, %r2021;
xor.b32 %r2024, %r3890, %r1508;
xor.b32 %r1540, %r2024, %r2023;
// inline asm
bfe.u32 %r1527, %r1540, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd529, %r1527, 4;
add.s64 %rd530, %rd98, %rd529;
ld.shared.u32 %r2025, [%rd530];
// inline asm
bfe.u32 %r1531, %r1540, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd531, %r1531, 4;
add.s64 %rd532, %rd102, %rd531;
ld.shared.u32 %r2026, [%rd532];
add.s32 %r2027, %r2026, %r2025;
// inline asm
bfe.u32 %r1535, %r1540, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd533, %r1535, 4;
add.s64 %rd534, %rd106, %rd533;
ld.shared.u32 %r2028, [%rd534];
xor.b32 %r2029, %r2028, %r2027;
// inline asm
bfe.u32 %r1539, %r1540, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd535, %r1539, 4;
add.s64 %rd536, %rd110, %rd535;
ld.shared.u32 %r2030, [%rd536];
add.s32 %r2031, %r2030, %r2029;
xor.b32 %r2032, %r3889, %r1524;
xor.b32 %r1556, %r2032, %r2031;
// inline asm
bfe.u32 %r1543, %r1556, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd537, %r1543, 4;
add.s64 %rd538, %rd98, %rd537;
ld.shared.u32 %r2033, [%rd538];
// inline asm
bfe.u32 %r1547, %r1556, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd539, %r1547, 4;
add.s64 %rd540, %rd102, %rd539;
ld.shared.u32 %r2034, [%rd540];
add.s32 %r2035, %r2034, %r2033;
// inline asm
bfe.u32 %r1551, %r1556, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd541, %r1551, 4;
add.s64 %rd542, %rd106, %rd541;
ld.shared.u32 %r2036, [%rd542];
xor.b32 %r2037, %r2036, %r2035;
// inline asm
bfe.u32 %r1555, %r1556, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd543, %r1555, 4;
add.s64 %rd544, %rd110, %rd543;
ld.shared.u32 %r2038, [%rd544];
add.s32 %r2039, %r2038, %r2037;
xor.b32 %r2040, %r3888, %r1540;
xor.b32 %r1572, %r2040, %r2039;
// inline asm
bfe.u32 %r1559, %r1572, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd545, %r1559, 4;
add.s64 %rd546, %rd98, %rd545;
ld.shared.u32 %r2041, [%rd546];
// inline asm
bfe.u32 %r1563, %r1572, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd547, %r1563, 4;
add.s64 %rd548, %rd102, %rd547;
ld.shared.u32 %r2042, [%rd548];
add.s32 %r2043, %r2042, %r2041;
// inline asm
bfe.u32 %r1567, %r1572, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd549, %r1567, 4;
add.s64 %rd550, %rd106, %rd549;
ld.shared.u32 %r2044, [%rd550];
xor.b32 %r2045, %r2044, %r2043;
// inline asm
bfe.u32 %r1571, %r1572, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd551, %r1571, 4;
add.s64 %rd552, %rd110, %rd551;
ld.shared.u32 %r2046, [%rd552];
add.s32 %r2047, %r2046, %r2045;
xor.b32 %r2048, %r3887, %r1556;
xor.b32 %r1588, %r2048, %r2047;
// inline asm
bfe.u32 %r1575, %r1588, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd553, %r1575, 4;
add.s64 %rd554, %rd98, %rd553;
ld.shared.u32 %r2049, [%rd554];
// inline asm
bfe.u32 %r1579, %r1588, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd555, %r1579, 4;
add.s64 %rd556, %rd102, %rd555;
ld.shared.u32 %r2050, [%rd556];
add.s32 %r2051, %r2050, %r2049;
// inline asm
bfe.u32 %r1583, %r1588, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd557, %r1583, 4;
add.s64 %rd558, %rd106, %rd557;
ld.shared.u32 %r2052, [%rd558];
xor.b32 %r2053, %r2052, %r2051;
// inline asm
bfe.u32 %r1587, %r1588, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd559, %r1587, 4;
add.s64 %rd560, %rd110, %rd559;
ld.shared.u32 %r2054, [%rd560];
add.s32 %r2055, %r2054, %r2053;
xor.b32 %r2056, %r3886, %r1572;
xor.b32 %r1604, %r2056, %r2055;
// inline asm
bfe.u32 %r1591, %r1604, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd561, %r1591, 4;
add.s64 %rd562, %rd98, %rd561;
ld.shared.u32 %r2057, [%rd562];
// inline asm
bfe.u32 %r1595, %r1604, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd563, %r1595, 4;
add.s64 %rd564, %rd102, %rd563;
ld.shared.u32 %r2058, [%rd564];
add.s32 %r2059, %r2058, %r2057;
// inline asm
bfe.u32 %r1599, %r1604, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd565, %r1599, 4;
add.s64 %rd566, %rd106, %rd565;
ld.shared.u32 %r2060, [%rd566];
xor.b32 %r2061, %r2060, %r2059;
// inline asm
bfe.u32 %r1603, %r1604, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd567, %r1603, 4;
add.s64 %rd568, %rd110, %rd567;
ld.shared.u32 %r2062, [%rd568];
add.s32 %r2063, %r2062, %r2061;
xor.b32 %r2064, %r3885, %r1588;
xor.b32 %r1620, %r2064, %r2063;
// inline asm
bfe.u32 %r1607, %r1620, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd569, %r1607, 4;
add.s64 %rd570, %rd98, %rd569;
ld.shared.u32 %r2065, [%rd570];
// inline asm
bfe.u32 %r1611, %r1620, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd571, %r1611, 4;
add.s64 %rd572, %rd102, %rd571;
ld.shared.u32 %r2066, [%rd572];
add.s32 %r2067, %r2066, %r2065;
// inline asm
bfe.u32 %r1615, %r1620, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd573, %r1615, 4;
add.s64 %rd574, %rd106, %rd573;
ld.shared.u32 %r2068, [%rd574];
xor.b32 %r2069, %r2068, %r2067;
// inline asm
bfe.u32 %r1619, %r1620, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd575, %r1619, 4;
add.s64 %rd576, %rd110, %rd575;
ld.shared.u32 %r2070, [%rd576];
add.s32 %r2071, %r2070, %r2069;
xor.b32 %r2072, %r3884, %r1604;
xor.b32 %r1636, %r2072, %r2071;
// inline asm
bfe.u32 %r1623, %r1636, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd577, %r1623, 4;
add.s64 %rd578, %rd98, %rd577;
ld.shared.u32 %r2073, [%rd578];
// inline asm
bfe.u32 %r1627, %r1636, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd579, %r1627, 4;
add.s64 %rd580, %rd102, %rd579;
ld.shared.u32 %r2074, [%rd580];
add.s32 %r2075, %r2074, %r2073;
// inline asm
bfe.u32 %r1631, %r1636, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd581, %r1631, 4;
add.s64 %rd582, %rd106, %rd581;
ld.shared.u32 %r2076, [%rd582];
xor.b32 %r2077, %r2076, %r2075;
// inline asm
bfe.u32 %r1635, %r1636, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd583, %r1635, 4;
add.s64 %rd584, %rd110, %rd583;
ld.shared.u32 %r2078, [%rd584];
add.s32 %r2079, %r2078, %r2077;
xor.b32 %r2080, %r3883, %r1620;
xor.b32 %r1652, %r2080, %r2079;
// inline asm
bfe.u32 %r1639, %r1652, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd585, %r1639, 4;
add.s64 %rd586, %rd98, %rd585;
ld.shared.u32 %r2081, [%rd586];
// inline asm
bfe.u32 %r1643, %r1652, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd587, %r1643, 4;
add.s64 %rd588, %rd102, %rd587;
ld.shared.u32 %r2082, [%rd588];
add.s32 %r2083, %r2082, %r2081;
// inline asm
bfe.u32 %r1647, %r1652, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd589, %r1647, 4;
add.s64 %rd590, %rd106, %rd589;
ld.shared.u32 %r2084, [%rd590];
xor.b32 %r2085, %r2084, %r2083;
// inline asm
bfe.u32 %r1651, %r1652, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd591, %r1651, 4;
add.s64 %rd592, %rd110, %rd591;
ld.shared.u32 %r2086, [%rd592];
add.s32 %r2087, %r2086, %r2085;
xor.b32 %r2088, %r3882, %r1636;
xor.b32 %r1668, %r2088, %r2087;
// inline asm
bfe.u32 %r1655, %r1668, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd593, %r1655, 4;
add.s64 %rd594, %rd98, %rd593;
ld.shared.u32 %r2089, [%rd594];
// inline asm
bfe.u32 %r1659, %r1668, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd595, %r1659, 4;
add.s64 %rd596, %rd102, %rd595;
ld.shared.u32 %r2090, [%rd596];
add.s32 %r2091, %r2090, %r2089;
// inline asm
bfe.u32 %r1663, %r1668, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd597, %r1663, 4;
add.s64 %rd598, %rd106, %rd597;
ld.shared.u32 %r2092, [%rd598];
xor.b32 %r2093, %r2092, %r2091;
// inline asm
bfe.u32 %r1667, %r1668, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd599, %r1667, 4;
add.s64 %rd600, %rd110, %rd599;
ld.shared.u32 %r2094, [%rd600];
add.s32 %r2095, %r2094, %r2093;
xor.b32 %r2096, %r3881, %r1652;
xor.b32 %r1684, %r2096, %r2095;
// inline asm
bfe.u32 %r1671, %r1684, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd601, %r1671, 4;
add.s64 %rd602, %rd98, %rd601;
ld.shared.u32 %r2097, [%rd602];
// inline asm
bfe.u32 %r1675, %r1684, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd603, %r1675, 4;
add.s64 %rd604, %rd102, %rd603;
ld.shared.u32 %r2098, [%rd604];
add.s32 %r2099, %r2098, %r2097;
// inline asm
bfe.u32 %r1679, %r1684, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd605, %r1679, 4;
add.s64 %rd606, %rd106, %rd605;
ld.shared.u32 %r2100, [%rd606];
xor.b32 %r2101, %r2100, %r2099;
// inline asm
bfe.u32 %r1683, %r1684, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd607, %r1683, 4;
add.s64 %rd608, %rd110, %rd607;
ld.shared.u32 %r2102, [%rd608];
add.s32 %r2103, %r2102, %r2101;
xor.b32 %r2104, %r3880, %r1668;
xor.b32 %r1700, %r2104, %r2103;
// inline asm
bfe.u32 %r1687, %r1700, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd609, %r1687, 4;
add.s64 %rd610, %rd98, %rd609;
ld.shared.u32 %r2105, [%rd610];
// inline asm
bfe.u32 %r1691, %r1700, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd611, %r1691, 4;
add.s64 %rd612, %rd102, %rd611;
ld.shared.u32 %r2106, [%rd612];
add.s32 %r2107, %r2106, %r2105;
// inline asm
bfe.u32 %r1695, %r1700, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd613, %r1695, 4;
add.s64 %rd614, %rd106, %rd613;
ld.shared.u32 %r2108, [%rd614];
xor.b32 %r2109, %r2108, %r2107;
// inline asm
bfe.u32 %r1699, %r1700, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd615, %r1699, 4;
add.s64 %rd616, %rd110, %rd615;
ld.shared.u32 %r2110, [%rd616];
add.s32 %r2111, %r2110, %r2109;
xor.b32 %r2112, %r3879, %r1684;
xor.b32 %r1716, %r2112, %r2111;
// inline asm
bfe.u32 %r1703, %r1716, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd617, %r1703, 4;
add.s64 %rd618, %rd98, %rd617;
ld.shared.u32 %r2113, [%rd618];
// inline asm
bfe.u32 %r1707, %r1716, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd619, %r1707, 4;
add.s64 %rd620, %rd102, %rd619;
ld.shared.u32 %r2114, [%rd620];
add.s32 %r2115, %r2114, %r2113;
// inline asm
bfe.u32 %r1711, %r1716, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd621, %r1711, 4;
add.s64 %rd622, %rd106, %rd621;
ld.shared.u32 %r2116, [%rd622];
xor.b32 %r2117, %r2116, %r2115;
// inline asm
bfe.u32 %r1715, %r1716, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd623, %r1715, 4;
add.s64 %rd624, %rd110, %rd623;
ld.shared.u32 %r2118, [%rd624];
add.s32 %r2119, %r2118, %r2117;
xor.b32 %r2120, %r3878, %r1700;
xor.b32 %r1732, %r2120, %r2119;
// inline asm
bfe.u32 %r1719, %r1732, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd625, %r1719, 4;
add.s64 %rd626, %rd98, %rd625;
ld.shared.u32 %r2121, [%rd626];
// inline asm
bfe.u32 %r1723, %r1732, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd627, %r1723, 4;
add.s64 %rd628, %rd102, %rd627;
ld.shared.u32 %r2122, [%rd628];
add.s32 %r2123, %r2122, %r2121;
// inline asm
bfe.u32 %r1727, %r1732, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd629, %r1727, 4;
add.s64 %rd630, %rd106, %rd629;
ld.shared.u32 %r2124, [%rd630];
xor.b32 %r2125, %r2124, %r2123;
// inline asm
bfe.u32 %r1731, %r1732, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd631, %r1731, 4;
add.s64 %rd632, %rd110, %rd631;
ld.shared.u32 %r2126, [%rd632];
add.s32 %r2127, %r2126, %r2125;
xor.b32 %r2128, %r3877, %r1716;
xor.b32 %r1748, %r2128, %r2127;
// inline asm
bfe.u32 %r1735, %r1748, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd633, %r1735, 4;
add.s64 %rd634, %rd98, %rd633;
ld.shared.u32 %r2129, [%rd634];
// inline asm
bfe.u32 %r1739, %r1748, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd635, %r1739, 4;
add.s64 %rd636, %rd102, %rd635;
ld.shared.u32 %r2130, [%rd636];
add.s32 %r2131, %r2130, %r2129;
// inline asm
bfe.u32 %r1743, %r1748, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd637, %r1743, 4;
add.s64 %rd638, %rd106, %rd637;
ld.shared.u32 %r2132, [%rd638];
xor.b32 %r2133, %r2132, %r2131;
// inline asm
bfe.u32 %r1747, %r1748, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd639, %r1747, 4;
add.s64 %rd640, %rd110, %rd639;
ld.shared.u32 %r2134, [%rd640];
add.s32 %r2135, %r2134, %r2133;
xor.b32 %r2136, %r3876, %r1732;
xor.b32 %r2137, %r2136, %r2135;
xor.b32 %r2138, %r3875, %r1748;
st.shared.u32 [%rd1338], %r2138;
st.shared.u32 [%rd1338+4], %r2137;
xor.b32 %r2139, %r2138, %r37;
xor.b32 %r1764, %r2139, %r3892;
// inline asm
bfe.u32 %r1751, %r1764, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd641, %r1751, 4;
add.s64 %rd642, %rd98, %rd641;
ld.shared.u32 %r2140, [%rd642];
// inline asm
bfe.u32 %r1755, %r1764, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd643, %r1755, 4;
add.s64 %rd644, %rd102, %rd643;
ld.shared.u32 %r2141, [%rd644];
add.s32 %r2142, %r2141, %r2140;
// inline asm
bfe.u32 %r1759, %r1764, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd645, %r1759, 4;
add.s64 %rd646, %rd106, %rd645;
ld.shared.u32 %r2143, [%rd646];
xor.b32 %r2144, %r2143, %r2142;
// inline asm
bfe.u32 %r1763, %r1764, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd647, %r1763, 4;
add.s64 %rd648, %rd110, %rd647;
ld.shared.u32 %r2145, [%rd648];
add.s32 %r2146, %r2145, %r2144;
xor.b32 %r2147, %r38, %r2137;
xor.b32 %r2148, %r2147, %r3891;
xor.b32 %r1780, %r2148, %r2146;
// inline asm
bfe.u32 %r1767, %r1780, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd649, %r1767, 4;
add.s64 %rd650, %rd98, %rd649;
ld.shared.u32 %r2149, [%rd650];
// inline asm
bfe.u32 %r1771, %r1780, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd651, %r1771, 4;
add.s64 %rd652, %rd102, %rd651;
ld.shared.u32 %r2150, [%rd652];
add.s32 %r2151, %r2150, %r2149;
// inline asm
bfe.u32 %r1775, %r1780, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd653, %r1775, 4;
add.s64 %rd654, %rd106, %rd653;
ld.shared.u32 %r2152, [%rd654];
xor.b32 %r2153, %r2152, %r2151;
// inline asm
bfe.u32 %r1779, %r1780, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd655, %r1779, 4;
add.s64 %rd656, %rd110, %rd655;
ld.shared.u32 %r2154, [%rd656];
add.s32 %r2155, %r2154, %r2153;
xor.b32 %r2156, %r3890, %r1764;
xor.b32 %r1796, %r2156, %r2155;
// inline asm
bfe.u32 %r1783, %r1796, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd657, %r1783, 4;
add.s64 %rd658, %rd98, %rd657;
ld.shared.u32 %r2157, [%rd658];
// inline asm
bfe.u32 %r1787, %r1796, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd659, %r1787, 4;
add.s64 %rd660, %rd102, %rd659;
ld.shared.u32 %r2158, [%rd660];
add.s32 %r2159, %r2158, %r2157;
// inline asm
bfe.u32 %r1791, %r1796, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd661, %r1791, 4;
add.s64 %rd662, %rd106, %rd661;
ld.shared.u32 %r2160, [%rd662];
xor.b32 %r2161, %r2160, %r2159;
// inline asm
bfe.u32 %r1795, %r1796, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd663, %r1795, 4;
add.s64 %rd664, %rd110, %rd663;
ld.shared.u32 %r2162, [%rd664];
add.s32 %r2163, %r2162, %r2161;
xor.b32 %r2164, %r3889, %r1780;
xor.b32 %r1812, %r2164, %r2163;
// inline asm
bfe.u32 %r1799, %r1812, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd665, %r1799, 4;
add.s64 %rd666, %rd98, %rd665;
ld.shared.u32 %r2165, [%rd666];
// inline asm
bfe.u32 %r1803, %r1812, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd667, %r1803, 4;
add.s64 %rd668, %rd102, %rd667;
ld.shared.u32 %r2166, [%rd668];
add.s32 %r2167, %r2166, %r2165;
// inline asm
bfe.u32 %r1807, %r1812, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd669, %r1807, 4;
add.s64 %rd670, %rd106, %rd669;
ld.shared.u32 %r2168, [%rd670];
xor.b32 %r2169, %r2168, %r2167;
// inline asm
bfe.u32 %r1811, %r1812, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd671, %r1811, 4;
add.s64 %rd672, %rd110, %rd671;
ld.shared.u32 %r2170, [%rd672];
add.s32 %r2171, %r2170, %r2169;
xor.b32 %r2172, %r3888, %r1796;
xor.b32 %r1828, %r2172, %r2171;
// inline asm
bfe.u32 %r1815, %r1828, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd673, %r1815, 4;
add.s64 %rd674, %rd98, %rd673;
ld.shared.u32 %r2173, [%rd674];
// inline asm
bfe.u32 %r1819, %r1828, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd675, %r1819, 4;
add.s64 %rd676, %rd102, %rd675;
ld.shared.u32 %r2174, [%rd676];
add.s32 %r2175, %r2174, %r2173;
// inline asm
bfe.u32 %r1823, %r1828, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd677, %r1823, 4;
add.s64 %rd678, %rd106, %rd677;
ld.shared.u32 %r2176, [%rd678];
xor.b32 %r2177, %r2176, %r2175;
// inline asm
bfe.u32 %r1827, %r1828, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd679, %r1827, 4;
add.s64 %rd680, %rd110, %rd679;
ld.shared.u32 %r2178, [%rd680];
add.s32 %r2179, %r2178, %r2177;
xor.b32 %r2180, %r3887, %r1812;
xor.b32 %r1844, %r2180, %r2179;
// inline asm
bfe.u32 %r1831, %r1844, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd681, %r1831, 4;
add.s64 %rd682, %rd98, %rd681;
ld.shared.u32 %r2181, [%rd682];
// inline asm
bfe.u32 %r1835, %r1844, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd683, %r1835, 4;
add.s64 %rd684, %rd102, %rd683;
ld.shared.u32 %r2182, [%rd684];
add.s32 %r2183, %r2182, %r2181;
// inline asm
bfe.u32 %r1839, %r1844, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd685, %r1839, 4;
add.s64 %rd686, %rd106, %rd685;
ld.shared.u32 %r2184, [%rd686];
xor.b32 %r2185, %r2184, %r2183;
// inline asm
bfe.u32 %r1843, %r1844, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd687, %r1843, 4;
add.s64 %rd688, %rd110, %rd687;
ld.shared.u32 %r2186, [%rd688];
add.s32 %r2187, %r2186, %r2185;
xor.b32 %r2188, %r3886, %r1828;
xor.b32 %r1860, %r2188, %r2187;
// inline asm
bfe.u32 %r1847, %r1860, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd689, %r1847, 4;
add.s64 %rd690, %rd98, %rd689;
ld.shared.u32 %r2189, [%rd690];
// inline asm
bfe.u32 %r1851, %r1860, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd691, %r1851, 4;
add.s64 %rd692, %rd102, %rd691;
ld.shared.u32 %r2190, [%rd692];
add.s32 %r2191, %r2190, %r2189;
// inline asm
bfe.u32 %r1855, %r1860, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd693, %r1855, 4;
add.s64 %rd694, %rd106, %rd693;
ld.shared.u32 %r2192, [%rd694];
xor.b32 %r2193, %r2192, %r2191;
// inline asm
bfe.u32 %r1859, %r1860, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd695, %r1859, 4;
add.s64 %rd696, %rd110, %rd695;
ld.shared.u32 %r2194, [%rd696];
add.s32 %r2195, %r2194, %r2193;
xor.b32 %r2196, %r3885, %r1844;
xor.b32 %r1876, %r2196, %r2195;
// inline asm
bfe.u32 %r1863, %r1876, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd697, %r1863, 4;
add.s64 %rd698, %rd98, %rd697;
ld.shared.u32 %r2197, [%rd698];
// inline asm
bfe.u32 %r1867, %r1876, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd699, %r1867, 4;
add.s64 %rd700, %rd102, %rd699;
ld.shared.u32 %r2198, [%rd700];
add.s32 %r2199, %r2198, %r2197;
// inline asm
bfe.u32 %r1871, %r1876, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd701, %r1871, 4;
add.s64 %rd702, %rd106, %rd701;
ld.shared.u32 %r2200, [%rd702];
xor.b32 %r2201, %r2200, %r2199;
// inline asm
bfe.u32 %r1875, %r1876, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd703, %r1875, 4;
add.s64 %rd704, %rd110, %rd703;
ld.shared.u32 %r2202, [%rd704];
add.s32 %r2203, %r2202, %r2201;
xor.b32 %r2204, %r3884, %r1860;
xor.b32 %r1892, %r2204, %r2203;
// inline asm
bfe.u32 %r1879, %r1892, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd705, %r1879, 4;
add.s64 %rd706, %rd98, %rd705;
ld.shared.u32 %r2205, [%rd706];
// inline asm
bfe.u32 %r1883, %r1892, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd707, %r1883, 4;
add.s64 %rd708, %rd102, %rd707;
ld.shared.u32 %r2206, [%rd708];
add.s32 %r2207, %r2206, %r2205;
// inline asm
bfe.u32 %r1887, %r1892, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd709, %r1887, 4;
add.s64 %rd710, %rd106, %rd709;
ld.shared.u32 %r2208, [%rd710];
xor.b32 %r2209, %r2208, %r2207;
// inline asm
bfe.u32 %r1891, %r1892, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd711, %r1891, 4;
add.s64 %rd712, %rd110, %rd711;
ld.shared.u32 %r2210, [%rd712];
add.s32 %r2211, %r2210, %r2209;
xor.b32 %r2212, %r3883, %r1876;
xor.b32 %r1908, %r2212, %r2211;
// inline asm
bfe.u32 %r1895, %r1908, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd713, %r1895, 4;
add.s64 %rd714, %rd98, %rd713;
ld.shared.u32 %r2213, [%rd714];
// inline asm
bfe.u32 %r1899, %r1908, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd715, %r1899, 4;
add.s64 %rd716, %rd102, %rd715;
ld.shared.u32 %r2214, [%rd716];
add.s32 %r2215, %r2214, %r2213;
// inline asm
bfe.u32 %r1903, %r1908, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd717, %r1903, 4;
add.s64 %rd718, %rd106, %rd717;
ld.shared.u32 %r2216, [%rd718];
xor.b32 %r2217, %r2216, %r2215;
// inline asm
bfe.u32 %r1907, %r1908, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd719, %r1907, 4;
add.s64 %rd720, %rd110, %rd719;
ld.shared.u32 %r2218, [%rd720];
add.s32 %r2219, %r2218, %r2217;
xor.b32 %r2220, %r3882, %r1892;
xor.b32 %r1924, %r2220, %r2219;
// inline asm
bfe.u32 %r1911, %r1924, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd721, %r1911, 4;
add.s64 %rd722, %rd98, %rd721;
ld.shared.u32 %r2221, [%rd722];
// inline asm
bfe.u32 %r1915, %r1924, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd723, %r1915, 4;
add.s64 %rd724, %rd102, %rd723;
ld.shared.u32 %r2222, [%rd724];
add.s32 %r2223, %r2222, %r2221;
// inline asm
bfe.u32 %r1919, %r1924, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd725, %r1919, 4;
add.s64 %rd726, %rd106, %rd725;
ld.shared.u32 %r2224, [%rd726];
xor.b32 %r2225, %r2224, %r2223;
// inline asm
bfe.u32 %r1923, %r1924, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd727, %r1923, 4;
add.s64 %rd728, %rd110, %rd727;
ld.shared.u32 %r2226, [%rd728];
add.s32 %r2227, %r2226, %r2225;
xor.b32 %r2228, %r3881, %r1908;
xor.b32 %r1940, %r2228, %r2227;
// inline asm
bfe.u32 %r1927, %r1940, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd729, %r1927, 4;
add.s64 %rd730, %rd98, %rd729;
ld.shared.u32 %r2229, [%rd730];
// inline asm
bfe.u32 %r1931, %r1940, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd731, %r1931, 4;
add.s64 %rd732, %rd102, %rd731;
ld.shared.u32 %r2230, [%rd732];
add.s32 %r2231, %r2230, %r2229;
// inline asm
bfe.u32 %r1935, %r1940, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd733, %r1935, 4;
add.s64 %rd734, %rd106, %rd733;
ld.shared.u32 %r2232, [%rd734];
xor.b32 %r2233, %r2232, %r2231;
// inline asm
bfe.u32 %r1939, %r1940, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd735, %r1939, 4;
add.s64 %rd736, %rd110, %rd735;
ld.shared.u32 %r2234, [%rd736];
add.s32 %r2235, %r2234, %r2233;
xor.b32 %r2236, %r3880, %r1924;
xor.b32 %r1956, %r2236, %r2235;
// inline asm
bfe.u32 %r1943, %r1956, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd737, %r1943, 4;
add.s64 %rd738, %rd98, %rd737;
ld.shared.u32 %r2237, [%rd738];
// inline asm
bfe.u32 %r1947, %r1956, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd739, %r1947, 4;
add.s64 %rd740, %rd102, %rd739;
ld.shared.u32 %r2238, [%rd740];
add.s32 %r2239, %r2238, %r2237;
// inline asm
bfe.u32 %r1951, %r1956, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd741, %r1951, 4;
add.s64 %rd742, %rd106, %rd741;
ld.shared.u32 %r2240, [%rd742];
xor.b32 %r2241, %r2240, %r2239;
// inline asm
bfe.u32 %r1955, %r1956, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd743, %r1955, 4;
add.s64 %rd744, %rd110, %rd743;
ld.shared.u32 %r2242, [%rd744];
add.s32 %r2243, %r2242, %r2241;
xor.b32 %r2244, %r3879, %r1940;
xor.b32 %r1972, %r2244, %r2243;
// inline asm
bfe.u32 %r1959, %r1972, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd745, %r1959, 4;
add.s64 %rd746, %rd98, %rd745;
ld.shared.u32 %r2245, [%rd746];
// inline asm
bfe.u32 %r1963, %r1972, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd747, %r1963, 4;
add.s64 %rd748, %rd102, %rd747;
ld.shared.u32 %r2246, [%rd748];
add.s32 %r2247, %r2246, %r2245;
// inline asm
bfe.u32 %r1967, %r1972, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd749, %r1967, 4;
add.s64 %rd750, %rd106, %rd749;
ld.shared.u32 %r2248, [%rd750];
xor.b32 %r2249, %r2248, %r2247;
// inline asm
bfe.u32 %r1971, %r1972, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd751, %r1971, 4;
add.s64 %rd752, %rd110, %rd751;
ld.shared.u32 %r2250, [%rd752];
add.s32 %r2251, %r2250, %r2249;
xor.b32 %r2252, %r3878, %r1956;
xor.b32 %r1988, %r2252, %r2251;
// inline asm
bfe.u32 %r1975, %r1988, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd753, %r1975, 4;
add.s64 %rd754, %rd98, %rd753;
ld.shared.u32 %r2253, [%rd754];
// inline asm
bfe.u32 %r1979, %r1988, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd755, %r1979, 4;
add.s64 %rd756, %rd102, %rd755;
ld.shared.u32 %r2254, [%rd756];
add.s32 %r2255, %r2254, %r2253;
// inline asm
bfe.u32 %r1983, %r1988, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd757, %r1983, 4;
add.s64 %rd758, %rd106, %rd757;
ld.shared.u32 %r2256, [%rd758];
xor.b32 %r2257, %r2256, %r2255;
// inline asm
bfe.u32 %r1987, %r1988, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd759, %r1987, 4;
add.s64 %rd760, %rd110, %rd759;
ld.shared.u32 %r2258, [%rd760];
add.s32 %r2259, %r2258, %r2257;
xor.b32 %r2260, %r3877, %r1972;
xor.b32 %r2004, %r2260, %r2259;
// inline asm
bfe.u32 %r1991, %r2004, %r3852, %r3853;
// inline asm
mul.wide.u32 %rd761, %r1991, 4;
add.s64 %rd762, %rd98, %rd761;
ld.shared.u32 %r2261, [%rd762];
// inline asm
bfe.u32 %r1995, %r2004, %r3854, %r3853;
// inline asm
mul.wide.u32 %rd763, %r1995, 4;
add.s64 %rd764, %rd102, %rd763;
ld.shared.u32 %r2262, [%rd764];
add.s32 %r2263, %r2262, %r2261;
// inline asm
bfe.u32 %r1999, %r2004, %r3853, %r3853;
// inline asm
mul.wide.u32 %rd765, %r1999, 4;
add.s64 %rd766, %rd106, %rd765;
ld.shared.u32 %r2264, [%rd766];
xor.b32 %r2265, %r2264, %r2263;
// inline asm
bfe.u32 %r2003, %r2004, %r1494, %r3853;
// inline asm
mul.wide.u32 %rd767, %r2003, 4;
add.s64 %rd768, %rd110, %rd767;
ld.shared.u32 %r2266, [%rd768];
add.s32 %r2267, %r2266, %r2265;
xor.b32 %r2268, %r3876, %r1988;
xor.b32 %r3897, %r2268, %r2267;
xor.b32 %r3898, %r3875, %r2004;
st.shared.u32 [%rd1338+8], %r3898;
st.shared.u32 [%rd1338+12], %r3897;
add.s64 %rd1338, %rd1338, 16;
add.s32 %r3899, %r3899, 4;
setp.lt.u32 %p14, %r3899, 256;
@%p14 bra BB3_24;
mov.u32 %r3864, %tid.x;
mul.wide.s32 %rd1323, %r3864, 1024;
mov.u64 %rd1321, m03200_init$S2_all;
add.s64 %rd1339, %rd1321, %rd1323;
mov.u32 %r2269, 0;
mov.u32 %r3902, %r2269;
BB3_26:
mov.u32 %r3857, 16;
mov.u32 %r3856, 8;
mov.u32 %r3855, 24;
xor.b32 %r2782, %r39, %r3898;
xor.b32 %r2283, %r2782, %r3892;
// inline asm
bfe.u32 %r2270, %r2283, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd774, %r2270, 4;
add.s64 %rd775, %rd98, %rd774;
ld.shared.u32 %r2783, [%rd775];
// inline asm
bfe.u32 %r2274, %r2283, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd778, %r2274, 4;
add.s64 %rd779, %rd102, %rd778;
ld.shared.u32 %r2784, [%rd779];
add.s32 %r2785, %r2784, %r2783;
// inline asm
bfe.u32 %r2278, %r2283, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd782, %r2278, 4;
add.s64 %rd783, %rd106, %rd782;
ld.shared.u32 %r2786, [%rd783];
xor.b32 %r2787, %r2786, %r2785;
// inline asm
bfe.u32 %r2282, %r2283, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd786, %r2282, 4;
add.s64 %rd787, %rd110, %rd786;
ld.shared.u32 %r2788, [%rd787];
add.s32 %r2789, %r2788, %r2787;
xor.b32 %r2790, %r40, %r3897;
xor.b32 %r2791, %r2790, %r3891;
xor.b32 %r2299, %r2791, %r2789;
// inline asm
bfe.u32 %r2286, %r2299, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd788, %r2286, 4;
add.s64 %rd789, %rd98, %rd788;
ld.shared.u32 %r2792, [%rd789];
// inline asm
bfe.u32 %r2290, %r2299, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd790, %r2290, 4;
add.s64 %rd791, %rd102, %rd790;
ld.shared.u32 %r2793, [%rd791];
add.s32 %r2794, %r2793, %r2792;
// inline asm
bfe.u32 %r2294, %r2299, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd792, %r2294, 4;
add.s64 %rd793, %rd106, %rd792;
ld.shared.u32 %r2795, [%rd793];
xor.b32 %r2796, %r2795, %r2794;
// inline asm
bfe.u32 %r2298, %r2299, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd794, %r2298, 4;
add.s64 %rd795, %rd110, %rd794;
ld.shared.u32 %r2797, [%rd795];
add.s32 %r2798, %r2797, %r2796;
xor.b32 %r2799, %r3890, %r2283;
xor.b32 %r2315, %r2799, %r2798;
// inline asm
bfe.u32 %r2302, %r2315, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd796, %r2302, 4;
add.s64 %rd797, %rd98, %rd796;
ld.shared.u32 %r2800, [%rd797];
// inline asm
bfe.u32 %r2306, %r2315, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd798, %r2306, 4;
add.s64 %rd799, %rd102, %rd798;
ld.shared.u32 %r2801, [%rd799];
add.s32 %r2802, %r2801, %r2800;
// inline asm
bfe.u32 %r2310, %r2315, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd800, %r2310, 4;
add.s64 %rd801, %rd106, %rd800;
ld.shared.u32 %r2803, [%rd801];
xor.b32 %r2804, %r2803, %r2802;
// inline asm
bfe.u32 %r2314, %r2315, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd802, %r2314, 4;
add.s64 %rd803, %rd110, %rd802;
ld.shared.u32 %r2805, [%rd803];
add.s32 %r2806, %r2805, %r2804;
xor.b32 %r2807, %r3889, %r2299;
xor.b32 %r2331, %r2807, %r2806;
// inline asm
bfe.u32 %r2318, %r2331, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd804, %r2318, 4;
add.s64 %rd805, %rd98, %rd804;
ld.shared.u32 %r2808, [%rd805];
// inline asm
bfe.u32 %r2322, %r2331, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd806, %r2322, 4;
add.s64 %rd807, %rd102, %rd806;
ld.shared.u32 %r2809, [%rd807];
add.s32 %r2810, %r2809, %r2808;
// inline asm
bfe.u32 %r2326, %r2331, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd808, %r2326, 4;
add.s64 %rd809, %rd106, %rd808;
ld.shared.u32 %r2811, [%rd809];
xor.b32 %r2812, %r2811, %r2810;
// inline asm
bfe.u32 %r2330, %r2331, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd810, %r2330, 4;
add.s64 %rd811, %rd110, %rd810;
ld.shared.u32 %r2813, [%rd811];
add.s32 %r2814, %r2813, %r2812;
xor.b32 %r2815, %r3888, %r2315;
xor.b32 %r2347, %r2815, %r2814;
// inline asm
bfe.u32 %r2334, %r2347, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd812, %r2334, 4;
add.s64 %rd813, %rd98, %rd812;
ld.shared.u32 %r2816, [%rd813];
// inline asm
bfe.u32 %r2338, %r2347, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd814, %r2338, 4;
add.s64 %rd815, %rd102, %rd814;
ld.shared.u32 %r2817, [%rd815];
add.s32 %r2818, %r2817, %r2816;
// inline asm
bfe.u32 %r2342, %r2347, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd816, %r2342, 4;
add.s64 %rd817, %rd106, %rd816;
ld.shared.u32 %r2819, [%rd817];
xor.b32 %r2820, %r2819, %r2818;
// inline asm
bfe.u32 %r2346, %r2347, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd818, %r2346, 4;
add.s64 %rd819, %rd110, %rd818;
ld.shared.u32 %r2821, [%rd819];
add.s32 %r2822, %r2821, %r2820;
xor.b32 %r2823, %r3887, %r2331;
xor.b32 %r2363, %r2823, %r2822;
// inline asm
bfe.u32 %r2350, %r2363, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd820, %r2350, 4;
add.s64 %rd821, %rd98, %rd820;
ld.shared.u32 %r2824, [%rd821];
// inline asm
bfe.u32 %r2354, %r2363, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd822, %r2354, 4;
add.s64 %rd823, %rd102, %rd822;
ld.shared.u32 %r2825, [%rd823];
add.s32 %r2826, %r2825, %r2824;
// inline asm
bfe.u32 %r2358, %r2363, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd824, %r2358, 4;
add.s64 %rd825, %rd106, %rd824;
ld.shared.u32 %r2827, [%rd825];
xor.b32 %r2828, %r2827, %r2826;
// inline asm
bfe.u32 %r2362, %r2363, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd826, %r2362, 4;
add.s64 %rd827, %rd110, %rd826;
ld.shared.u32 %r2829, [%rd827];
add.s32 %r2830, %r2829, %r2828;
xor.b32 %r2831, %r3886, %r2347;
xor.b32 %r2379, %r2831, %r2830;
// inline asm
bfe.u32 %r2366, %r2379, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd828, %r2366, 4;
add.s64 %rd829, %rd98, %rd828;
ld.shared.u32 %r2832, [%rd829];
// inline asm
bfe.u32 %r2370, %r2379, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd830, %r2370, 4;
add.s64 %rd831, %rd102, %rd830;
ld.shared.u32 %r2833, [%rd831];
add.s32 %r2834, %r2833, %r2832;
// inline asm
bfe.u32 %r2374, %r2379, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd832, %r2374, 4;
add.s64 %rd833, %rd106, %rd832;
ld.shared.u32 %r2835, [%rd833];
xor.b32 %r2836, %r2835, %r2834;
// inline asm
bfe.u32 %r2378, %r2379, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd834, %r2378, 4;
add.s64 %rd835, %rd110, %rd834;
ld.shared.u32 %r2837, [%rd835];
add.s32 %r2838, %r2837, %r2836;
xor.b32 %r2839, %r3885, %r2363;
xor.b32 %r2395, %r2839, %r2838;
// inline asm
bfe.u32 %r2382, %r2395, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd836, %r2382, 4;
add.s64 %rd837, %rd98, %rd836;
ld.shared.u32 %r2840, [%rd837];
// inline asm
bfe.u32 %r2386, %r2395, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd838, %r2386, 4;
add.s64 %rd839, %rd102, %rd838;
ld.shared.u32 %r2841, [%rd839];
add.s32 %r2842, %r2841, %r2840;
// inline asm
bfe.u32 %r2390, %r2395, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd840, %r2390, 4;
add.s64 %rd841, %rd106, %rd840;
ld.shared.u32 %r2843, [%rd841];
xor.b32 %r2844, %r2843, %r2842;
// inline asm
bfe.u32 %r2394, %r2395, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd842, %r2394, 4;
add.s64 %rd843, %rd110, %rd842;
ld.shared.u32 %r2845, [%rd843];
add.s32 %r2846, %r2845, %r2844;
xor.b32 %r2847, %r3884, %r2379;
xor.b32 %r2411, %r2847, %r2846;
// inline asm
bfe.u32 %r2398, %r2411, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd844, %r2398, 4;
add.s64 %rd845, %rd98, %rd844;
ld.shared.u32 %r2848, [%rd845];
// inline asm
bfe.u32 %r2402, %r2411, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd846, %r2402, 4;
add.s64 %rd847, %rd102, %rd846;
ld.shared.u32 %r2849, [%rd847];
add.s32 %r2850, %r2849, %r2848;
// inline asm
bfe.u32 %r2406, %r2411, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd848, %r2406, 4;
add.s64 %rd849, %rd106, %rd848;
ld.shared.u32 %r2851, [%rd849];
xor.b32 %r2852, %r2851, %r2850;
// inline asm
bfe.u32 %r2410, %r2411, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd850, %r2410, 4;
add.s64 %rd851, %rd110, %rd850;
ld.shared.u32 %r2853, [%rd851];
add.s32 %r2854, %r2853, %r2852;
xor.b32 %r2855, %r3883, %r2395;
xor.b32 %r2427, %r2855, %r2854;
// inline asm
bfe.u32 %r2414, %r2427, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd852, %r2414, 4;
add.s64 %rd853, %rd98, %rd852;
ld.shared.u32 %r2856, [%rd853];
// inline asm
bfe.u32 %r2418, %r2427, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd854, %r2418, 4;
add.s64 %rd855, %rd102, %rd854;
ld.shared.u32 %r2857, [%rd855];
add.s32 %r2858, %r2857, %r2856;
// inline asm
bfe.u32 %r2422, %r2427, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd856, %r2422, 4;
add.s64 %rd857, %rd106, %rd856;
ld.shared.u32 %r2859, [%rd857];
xor.b32 %r2860, %r2859, %r2858;
// inline asm
bfe.u32 %r2426, %r2427, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd858, %r2426, 4;
add.s64 %rd859, %rd110, %rd858;
ld.shared.u32 %r2861, [%rd859];
add.s32 %r2862, %r2861, %r2860;
xor.b32 %r2863, %r3882, %r2411;
xor.b32 %r2443, %r2863, %r2862;
// inline asm
bfe.u32 %r2430, %r2443, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd860, %r2430, 4;
add.s64 %rd861, %rd98, %rd860;
ld.shared.u32 %r2864, [%rd861];
// inline asm
bfe.u32 %r2434, %r2443, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd862, %r2434, 4;
add.s64 %rd863, %rd102, %rd862;
ld.shared.u32 %r2865, [%rd863];
add.s32 %r2866, %r2865, %r2864;
// inline asm
bfe.u32 %r2438, %r2443, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd864, %r2438, 4;
add.s64 %rd865, %rd106, %rd864;
ld.shared.u32 %r2867, [%rd865];
xor.b32 %r2868, %r2867, %r2866;
// inline asm
bfe.u32 %r2442, %r2443, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd866, %r2442, 4;
add.s64 %rd867, %rd110, %rd866;
ld.shared.u32 %r2869, [%rd867];
add.s32 %r2870, %r2869, %r2868;
xor.b32 %r2871, %r3881, %r2427;
xor.b32 %r2459, %r2871, %r2870;
// inline asm
bfe.u32 %r2446, %r2459, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd868, %r2446, 4;
add.s64 %rd869, %rd98, %rd868;
ld.shared.u32 %r2872, [%rd869];
// inline asm
bfe.u32 %r2450, %r2459, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd870, %r2450, 4;
add.s64 %rd871, %rd102, %rd870;
ld.shared.u32 %r2873, [%rd871];
add.s32 %r2874, %r2873, %r2872;
// inline asm
bfe.u32 %r2454, %r2459, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd872, %r2454, 4;
add.s64 %rd873, %rd106, %rd872;
ld.shared.u32 %r2875, [%rd873];
xor.b32 %r2876, %r2875, %r2874;
// inline asm
bfe.u32 %r2458, %r2459, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd874, %r2458, 4;
add.s64 %rd875, %rd110, %rd874;
ld.shared.u32 %r2877, [%rd875];
add.s32 %r2878, %r2877, %r2876;
xor.b32 %r2879, %r3880, %r2443;
xor.b32 %r2475, %r2879, %r2878;
// inline asm
bfe.u32 %r2462, %r2475, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd876, %r2462, 4;
add.s64 %rd877, %rd98, %rd876;
ld.shared.u32 %r2880, [%rd877];
// inline asm
bfe.u32 %r2466, %r2475, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd878, %r2466, 4;
add.s64 %rd879, %rd102, %rd878;
ld.shared.u32 %r2881, [%rd879];
add.s32 %r2882, %r2881, %r2880;
// inline asm
bfe.u32 %r2470, %r2475, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd880, %r2470, 4;
add.s64 %rd881, %rd106, %rd880;
ld.shared.u32 %r2883, [%rd881];
xor.b32 %r2884, %r2883, %r2882;
// inline asm
bfe.u32 %r2474, %r2475, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd882, %r2474, 4;
add.s64 %rd883, %rd110, %rd882;
ld.shared.u32 %r2885, [%rd883];
add.s32 %r2886, %r2885, %r2884;
xor.b32 %r2887, %r3879, %r2459;
xor.b32 %r2491, %r2887, %r2886;
// inline asm
bfe.u32 %r2478, %r2491, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd884, %r2478, 4;
add.s64 %rd885, %rd98, %rd884;
ld.shared.u32 %r2888, [%rd885];
// inline asm
bfe.u32 %r2482, %r2491, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd886, %r2482, 4;
add.s64 %rd887, %rd102, %rd886;
ld.shared.u32 %r2889, [%rd887];
add.s32 %r2890, %r2889, %r2888;
// inline asm
bfe.u32 %r2486, %r2491, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd888, %r2486, 4;
add.s64 %rd889, %rd106, %rd888;
ld.shared.u32 %r2891, [%rd889];
xor.b32 %r2892, %r2891, %r2890;
// inline asm
bfe.u32 %r2490, %r2491, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd890, %r2490, 4;
add.s64 %rd891, %rd110, %rd890;
ld.shared.u32 %r2893, [%rd891];
add.s32 %r2894, %r2893, %r2892;
xor.b32 %r2895, %r3878, %r2475;
xor.b32 %r2507, %r2895, %r2894;
// inline asm
bfe.u32 %r2494, %r2507, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd892, %r2494, 4;
add.s64 %rd893, %rd98, %rd892;
ld.shared.u32 %r2896, [%rd893];
// inline asm
bfe.u32 %r2498, %r2507, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd894, %r2498, 4;
add.s64 %rd895, %rd102, %rd894;
ld.shared.u32 %r2897, [%rd895];
add.s32 %r2898, %r2897, %r2896;
// inline asm
bfe.u32 %r2502, %r2507, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd896, %r2502, 4;
add.s64 %rd897, %rd106, %rd896;
ld.shared.u32 %r2899, [%rd897];
xor.b32 %r2900, %r2899, %r2898;
// inline asm
bfe.u32 %r2506, %r2507, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd898, %r2506, 4;
add.s64 %rd899, %rd110, %rd898;
ld.shared.u32 %r2901, [%rd899];
add.s32 %r2902, %r2901, %r2900;
xor.b32 %r2903, %r3877, %r2491;
xor.b32 %r2523, %r2903, %r2902;
// inline asm
bfe.u32 %r2510, %r2523, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd900, %r2510, 4;
add.s64 %rd901, %rd98, %rd900;
ld.shared.u32 %r2904, [%rd901];
// inline asm
bfe.u32 %r2514, %r2523, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd902, %r2514, 4;
add.s64 %rd903, %rd102, %rd902;
ld.shared.u32 %r2905, [%rd903];
add.s32 %r2906, %r2905, %r2904;
// inline asm
bfe.u32 %r2518, %r2523, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd904, %r2518, 4;
add.s64 %rd905, %rd106, %rd904;
ld.shared.u32 %r2907, [%rd905];
xor.b32 %r2908, %r2907, %r2906;
// inline asm
bfe.u32 %r2522, %r2523, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd906, %r2522, 4;
add.s64 %rd907, %rd110, %rd906;
ld.shared.u32 %r2909, [%rd907];
add.s32 %r2910, %r2909, %r2908;
xor.b32 %r2911, %r3876, %r2507;
xor.b32 %r2912, %r2911, %r2910;
xor.b32 %r2913, %r3875, %r2523;
st.shared.u32 [%rd1339], %r2913;
st.shared.u32 [%rd1339+4], %r2912;
xor.b32 %r2914, %r2913, %r37;
xor.b32 %r2539, %r2914, %r3892;
// inline asm
bfe.u32 %r2526, %r2539, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd908, %r2526, 4;
add.s64 %rd909, %rd98, %rd908;
ld.shared.u32 %r2915, [%rd909];
// inline asm
bfe.u32 %r2530, %r2539, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd910, %r2530, 4;
add.s64 %rd911, %rd102, %rd910;
ld.shared.u32 %r2916, [%rd911];
add.s32 %r2917, %r2916, %r2915;
// inline asm
bfe.u32 %r2534, %r2539, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd912, %r2534, 4;
add.s64 %rd913, %rd106, %rd912;
ld.shared.u32 %r2918, [%rd913];
xor.b32 %r2919, %r2918, %r2917;
// inline asm
bfe.u32 %r2538, %r2539, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd914, %r2538, 4;
add.s64 %rd915, %rd110, %rd914;
ld.shared.u32 %r2920, [%rd915];
add.s32 %r2921, %r2920, %r2919;
xor.b32 %r2922, %r38, %r2912;
xor.b32 %r2923, %r2922, %r3891;
xor.b32 %r2555, %r2923, %r2921;
// inline asm
bfe.u32 %r2542, %r2555, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd916, %r2542, 4;
add.s64 %rd917, %rd98, %rd916;
ld.shared.u32 %r2924, [%rd917];
// inline asm
bfe.u32 %r2546, %r2555, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd918, %r2546, 4;
add.s64 %rd919, %rd102, %rd918;
ld.shared.u32 %r2925, [%rd919];
add.s32 %r2926, %r2925, %r2924;
// inline asm
bfe.u32 %r2550, %r2555, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd920, %r2550, 4;
add.s64 %rd921, %rd106, %rd920;
ld.shared.u32 %r2927, [%rd921];
xor.b32 %r2928, %r2927, %r2926;
// inline asm
bfe.u32 %r2554, %r2555, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd922, %r2554, 4;
add.s64 %rd923, %rd110, %rd922;
ld.shared.u32 %r2929, [%rd923];
add.s32 %r2930, %r2929, %r2928;
xor.b32 %r2931, %r3890, %r2539;
xor.b32 %r2571, %r2931, %r2930;
// inline asm
bfe.u32 %r2558, %r2571, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd924, %r2558, 4;
add.s64 %rd925, %rd98, %rd924;
ld.shared.u32 %r2932, [%rd925];
// inline asm
bfe.u32 %r2562, %r2571, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd926, %r2562, 4;
add.s64 %rd927, %rd102, %rd926;
ld.shared.u32 %r2933, [%rd927];
add.s32 %r2934, %r2933, %r2932;
// inline asm
bfe.u32 %r2566, %r2571, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd928, %r2566, 4;
add.s64 %rd929, %rd106, %rd928;
ld.shared.u32 %r2935, [%rd929];
xor.b32 %r2936, %r2935, %r2934;
// inline asm
bfe.u32 %r2570, %r2571, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd930, %r2570, 4;
add.s64 %rd931, %rd110, %rd930;
ld.shared.u32 %r2937, [%rd931];
add.s32 %r2938, %r2937, %r2936;
xor.b32 %r2939, %r3889, %r2555;
xor.b32 %r2587, %r2939, %r2938;
// inline asm
bfe.u32 %r2574, %r2587, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd932, %r2574, 4;
add.s64 %rd933, %rd98, %rd932;
ld.shared.u32 %r2940, [%rd933];
// inline asm
bfe.u32 %r2578, %r2587, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd934, %r2578, 4;
add.s64 %rd935, %rd102, %rd934;
ld.shared.u32 %r2941, [%rd935];
add.s32 %r2942, %r2941, %r2940;
// inline asm
bfe.u32 %r2582, %r2587, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd936, %r2582, 4;
add.s64 %rd937, %rd106, %rd936;
ld.shared.u32 %r2943, [%rd937];
xor.b32 %r2944, %r2943, %r2942;
// inline asm
bfe.u32 %r2586, %r2587, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd938, %r2586, 4;
add.s64 %rd939, %rd110, %rd938;
ld.shared.u32 %r2945, [%rd939];
add.s32 %r2946, %r2945, %r2944;
xor.b32 %r2947, %r3888, %r2571;
xor.b32 %r2603, %r2947, %r2946;
// inline asm
bfe.u32 %r2590, %r2603, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd940, %r2590, 4;
add.s64 %rd941, %rd98, %rd940;
ld.shared.u32 %r2948, [%rd941];
// inline asm
bfe.u32 %r2594, %r2603, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd942, %r2594, 4;
add.s64 %rd943, %rd102, %rd942;
ld.shared.u32 %r2949, [%rd943];
add.s32 %r2950, %r2949, %r2948;
// inline asm
bfe.u32 %r2598, %r2603, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd944, %r2598, 4;
add.s64 %rd945, %rd106, %rd944;
ld.shared.u32 %r2951, [%rd945];
xor.b32 %r2952, %r2951, %r2950;
// inline asm
bfe.u32 %r2602, %r2603, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd946, %r2602, 4;
add.s64 %rd947, %rd110, %rd946;
ld.shared.u32 %r2953, [%rd947];
add.s32 %r2954, %r2953, %r2952;
xor.b32 %r2955, %r3887, %r2587;
xor.b32 %r2619, %r2955, %r2954;
// inline asm
bfe.u32 %r2606, %r2619, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd948, %r2606, 4;
add.s64 %rd949, %rd98, %rd948;
ld.shared.u32 %r2956, [%rd949];
// inline asm
bfe.u32 %r2610, %r2619, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd950, %r2610, 4;
add.s64 %rd951, %rd102, %rd950;
ld.shared.u32 %r2957, [%rd951];
add.s32 %r2958, %r2957, %r2956;
// inline asm
bfe.u32 %r2614, %r2619, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd952, %r2614, 4;
add.s64 %rd953, %rd106, %rd952;
ld.shared.u32 %r2959, [%rd953];
xor.b32 %r2960, %r2959, %r2958;
// inline asm
bfe.u32 %r2618, %r2619, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd954, %r2618, 4;
add.s64 %rd955, %rd110, %rd954;
ld.shared.u32 %r2961, [%rd955];
add.s32 %r2962, %r2961, %r2960;
xor.b32 %r2963, %r3886, %r2603;
xor.b32 %r2635, %r2963, %r2962;
// inline asm
bfe.u32 %r2622, %r2635, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd956, %r2622, 4;
add.s64 %rd957, %rd98, %rd956;
ld.shared.u32 %r2964, [%rd957];
// inline asm
bfe.u32 %r2626, %r2635, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd958, %r2626, 4;
add.s64 %rd959, %rd102, %rd958;
ld.shared.u32 %r2965, [%rd959];
add.s32 %r2966, %r2965, %r2964;
// inline asm
bfe.u32 %r2630, %r2635, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd960, %r2630, 4;
add.s64 %rd961, %rd106, %rd960;
ld.shared.u32 %r2967, [%rd961];
xor.b32 %r2968, %r2967, %r2966;
// inline asm
bfe.u32 %r2634, %r2635, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd962, %r2634, 4;
add.s64 %rd963, %rd110, %rd962;
ld.shared.u32 %r2969, [%rd963];
add.s32 %r2970, %r2969, %r2968;
xor.b32 %r2971, %r3885, %r2619;
xor.b32 %r2651, %r2971, %r2970;
// inline asm
bfe.u32 %r2638, %r2651, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd964, %r2638, 4;
add.s64 %rd965, %rd98, %rd964;
ld.shared.u32 %r2972, [%rd965];
// inline asm
bfe.u32 %r2642, %r2651, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd966, %r2642, 4;
add.s64 %rd967, %rd102, %rd966;
ld.shared.u32 %r2973, [%rd967];
add.s32 %r2974, %r2973, %r2972;
// inline asm
bfe.u32 %r2646, %r2651, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd968, %r2646, 4;
add.s64 %rd969, %rd106, %rd968;
ld.shared.u32 %r2975, [%rd969];
xor.b32 %r2976, %r2975, %r2974;
// inline asm
bfe.u32 %r2650, %r2651, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd970, %r2650, 4;
add.s64 %rd971, %rd110, %rd970;
ld.shared.u32 %r2977, [%rd971];
add.s32 %r2978, %r2977, %r2976;
xor.b32 %r2979, %r3884, %r2635;
xor.b32 %r2667, %r2979, %r2978;
// inline asm
bfe.u32 %r2654, %r2667, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd972, %r2654, 4;
add.s64 %rd973, %rd98, %rd972;
ld.shared.u32 %r2980, [%rd973];
// inline asm
bfe.u32 %r2658, %r2667, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd974, %r2658, 4;
add.s64 %rd975, %rd102, %rd974;
ld.shared.u32 %r2981, [%rd975];
add.s32 %r2982, %r2981, %r2980;
// inline asm
bfe.u32 %r2662, %r2667, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd976, %r2662, 4;
add.s64 %rd977, %rd106, %rd976;
ld.shared.u32 %r2983, [%rd977];
xor.b32 %r2984, %r2983, %r2982;
// inline asm
bfe.u32 %r2666, %r2667, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd978, %r2666, 4;
add.s64 %rd979, %rd110, %rd978;
ld.shared.u32 %r2985, [%rd979];
add.s32 %r2986, %r2985, %r2984;
xor.b32 %r2987, %r3883, %r2651;
xor.b32 %r2683, %r2987, %r2986;
// inline asm
bfe.u32 %r2670, %r2683, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd980, %r2670, 4;
add.s64 %rd981, %rd98, %rd980;
ld.shared.u32 %r2988, [%rd981];
// inline asm
bfe.u32 %r2674, %r2683, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd982, %r2674, 4;
add.s64 %rd983, %rd102, %rd982;
ld.shared.u32 %r2989, [%rd983];
add.s32 %r2990, %r2989, %r2988;
// inline asm
bfe.u32 %r2678, %r2683, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd984, %r2678, 4;
add.s64 %rd985, %rd106, %rd984;
ld.shared.u32 %r2991, [%rd985];
xor.b32 %r2992, %r2991, %r2990;
// inline asm
bfe.u32 %r2682, %r2683, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd986, %r2682, 4;
add.s64 %rd987, %rd110, %rd986;
ld.shared.u32 %r2993, [%rd987];
add.s32 %r2994, %r2993, %r2992;
xor.b32 %r2995, %r3882, %r2667;
xor.b32 %r2699, %r2995, %r2994;
// inline asm
bfe.u32 %r2686, %r2699, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd988, %r2686, 4;
add.s64 %rd989, %rd98, %rd988;
ld.shared.u32 %r2996, [%rd989];
// inline asm
bfe.u32 %r2690, %r2699, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd990, %r2690, 4;
add.s64 %rd991, %rd102, %rd990;
ld.shared.u32 %r2997, [%rd991];
add.s32 %r2998, %r2997, %r2996;
// inline asm
bfe.u32 %r2694, %r2699, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd992, %r2694, 4;
add.s64 %rd993, %rd106, %rd992;
ld.shared.u32 %r2999, [%rd993];
xor.b32 %r3000, %r2999, %r2998;
// inline asm
bfe.u32 %r2698, %r2699, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd994, %r2698, 4;
add.s64 %rd995, %rd110, %rd994;
ld.shared.u32 %r3001, [%rd995];
add.s32 %r3002, %r3001, %r3000;
xor.b32 %r3003, %r3881, %r2683;
xor.b32 %r2715, %r3003, %r3002;
// inline asm
bfe.u32 %r2702, %r2715, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd996, %r2702, 4;
add.s64 %rd997, %rd98, %rd996;
ld.shared.u32 %r3004, [%rd997];
// inline asm
bfe.u32 %r2706, %r2715, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd998, %r2706, 4;
add.s64 %rd999, %rd102, %rd998;
ld.shared.u32 %r3005, [%rd999];
add.s32 %r3006, %r3005, %r3004;
// inline asm
bfe.u32 %r2710, %r2715, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd1000, %r2710, 4;
add.s64 %rd1001, %rd106, %rd1000;
ld.shared.u32 %r3007, [%rd1001];
xor.b32 %r3008, %r3007, %r3006;
// inline asm
bfe.u32 %r2714, %r2715, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd1002, %r2714, 4;
add.s64 %rd1003, %rd110, %rd1002;
ld.shared.u32 %r3009, [%rd1003];
add.s32 %r3010, %r3009, %r3008;
xor.b32 %r3011, %r3880, %r2699;
xor.b32 %r2731, %r3011, %r3010;
// inline asm
bfe.u32 %r2718, %r2731, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd1004, %r2718, 4;
add.s64 %rd1005, %rd98, %rd1004;
ld.shared.u32 %r3012, [%rd1005];
// inline asm
bfe.u32 %r2722, %r2731, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd1006, %r2722, 4;
add.s64 %rd1007, %rd102, %rd1006;
ld.shared.u32 %r3013, [%rd1007];
add.s32 %r3014, %r3013, %r3012;
// inline asm
bfe.u32 %r2726, %r2731, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd1008, %r2726, 4;
add.s64 %rd1009, %rd106, %rd1008;
ld.shared.u32 %r3015, [%rd1009];
xor.b32 %r3016, %r3015, %r3014;
// inline asm
bfe.u32 %r2730, %r2731, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd1010, %r2730, 4;
add.s64 %rd1011, %rd110, %rd1010;
ld.shared.u32 %r3017, [%rd1011];
add.s32 %r3018, %r3017, %r3016;
xor.b32 %r3019, %r3879, %r2715;
xor.b32 %r2747, %r3019, %r3018;
// inline asm
bfe.u32 %r2734, %r2747, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd1012, %r2734, 4;
add.s64 %rd1013, %rd98, %rd1012;
ld.shared.u32 %r3020, [%rd1013];
// inline asm
bfe.u32 %r2738, %r2747, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd1014, %r2738, 4;
add.s64 %rd1015, %rd102, %rd1014;
ld.shared.u32 %r3021, [%rd1015];
add.s32 %r3022, %r3021, %r3020;
// inline asm
bfe.u32 %r2742, %r2747, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd1016, %r2742, 4;
add.s64 %rd1017, %rd106, %rd1016;
ld.shared.u32 %r3023, [%rd1017];
xor.b32 %r3024, %r3023, %r3022;
// inline asm
bfe.u32 %r2746, %r2747, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd1018, %r2746, 4;
add.s64 %rd1019, %rd110, %rd1018;
ld.shared.u32 %r3025, [%rd1019];
add.s32 %r3026, %r3025, %r3024;
xor.b32 %r3027, %r3878, %r2731;
xor.b32 %r2763, %r3027, %r3026;
// inline asm
bfe.u32 %r2750, %r2763, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd1020, %r2750, 4;
add.s64 %rd1021, %rd98, %rd1020;
ld.shared.u32 %r3028, [%rd1021];
// inline asm
bfe.u32 %r2754, %r2763, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd1022, %r2754, 4;
add.s64 %rd1023, %rd102, %rd1022;
ld.shared.u32 %r3029, [%rd1023];
add.s32 %r3030, %r3029, %r3028;
// inline asm
bfe.u32 %r2758, %r2763, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd1024, %r2758, 4;
add.s64 %rd1025, %rd106, %rd1024;
ld.shared.u32 %r3031, [%rd1025];
xor.b32 %r3032, %r3031, %r3030;
// inline asm
bfe.u32 %r2762, %r2763, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd1026, %r2762, 4;
add.s64 %rd1027, %rd110, %rd1026;
ld.shared.u32 %r3033, [%rd1027];
add.s32 %r3034, %r3033, %r3032;
xor.b32 %r3035, %r3877, %r2747;
xor.b32 %r2779, %r3035, %r3034;
// inline asm
bfe.u32 %r2766, %r2779, %r3855, %r3856;
// inline asm
mul.wide.u32 %rd1028, %r2766, 4;
add.s64 %rd1029, %rd98, %rd1028;
ld.shared.u32 %r3036, [%rd1029];
// inline asm
bfe.u32 %r2770, %r2779, %r3857, %r3856;
// inline asm
mul.wide.u32 %rd1030, %r2770, 4;
add.s64 %rd1031, %rd102, %rd1030;
ld.shared.u32 %r3037, [%rd1031];
add.s32 %r3038, %r3037, %r3036;
// inline asm
bfe.u32 %r2774, %r2779, %r3856, %r3856;
// inline asm
mul.wide.u32 %rd1032, %r2774, 4;
add.s64 %rd1033, %rd106, %rd1032;
ld.shared.u32 %r3039, [%rd1033];
xor.b32 %r3040, %r3039, %r3038;
// inline asm
bfe.u32 %r2778, %r2779, %r2269, %r3856;
// inline asm
mul.wide.u32 %rd1034, %r2778, 4;
add.s64 %rd1035, %rd110, %rd1034;
ld.shared.u32 %r3041, [%rd1035];
add.s32 %r3042, %r3041, %r3040;
xor.b32 %r3043, %r3876, %r2763;
xor.b32 %r3897, %r3043, %r3042;
xor.b32 %r3898, %r3875, %r2779;
st.shared.u32 [%rd1339+8], %r3898;
st.shared.u32 [%rd1339+12], %r3897;
add.s64 %rd1339, %rd1339, 16;
add.s32 %r3902, %r3902, 4;
setp.lt.u32 %p15, %r3902, 256;
@%p15 bra BB3_26;
mov.u32 %r3865, %tid.x;
mul.wide.s32 %rd1324, %r3865, 1024;
mov.u64 %rd1322, m03200_init$S3_all;
add.s64 %rd1340, %rd1322, %rd1324;
mov.u32 %r3044, 0;
mov.u32 %r3905, %r3044;
BB3_28:
mov.u32 %r3860, 16;
mov.u32 %r3859, 8;
mov.u32 %r3858, 24;
xor.b32 %r3557, %r39, %r3898;
xor.b32 %r3058, %r3557, %r3892;
// inline asm
bfe.u32 %r3045, %r3058, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1041, %r3045, 4;
add.s64 %rd1042, %rd98, %rd1041;
ld.shared.u32 %r3558, [%rd1042];
// inline asm
bfe.u32 %r3049, %r3058, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1045, %r3049, 4;
add.s64 %rd1046, %rd102, %rd1045;
ld.shared.u32 %r3559, [%rd1046];
add.s32 %r3560, %r3559, %r3558;
// inline asm
bfe.u32 %r3053, %r3058, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1049, %r3053, 4;
add.s64 %rd1050, %rd106, %rd1049;
ld.shared.u32 %r3561, [%rd1050];
xor.b32 %r3562, %r3561, %r3560;
// inline asm
bfe.u32 %r3057, %r3058, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1053, %r3057, 4;
add.s64 %rd1054, %rd110, %rd1053;
ld.shared.u32 %r3563, [%rd1054];
add.s32 %r3564, %r3563, %r3562;
xor.b32 %r3565, %r40, %r3897;
xor.b32 %r3566, %r3565, %r3891;
xor.b32 %r3074, %r3566, %r3564;
// inline asm
bfe.u32 %r3061, %r3074, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1055, %r3061, 4;
add.s64 %rd1056, %rd98, %rd1055;
ld.shared.u32 %r3567, [%rd1056];
// inline asm
bfe.u32 %r3065, %r3074, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1057, %r3065, 4;
add.s64 %rd1058, %rd102, %rd1057;
ld.shared.u32 %r3568, [%rd1058];
add.s32 %r3569, %r3568, %r3567;
// inline asm
bfe.u32 %r3069, %r3074, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1059, %r3069, 4;
add.s64 %rd1060, %rd106, %rd1059;
ld.shared.u32 %r3570, [%rd1060];
xor.b32 %r3571, %r3570, %r3569;
// inline asm
bfe.u32 %r3073, %r3074, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1061, %r3073, 4;
add.s64 %rd1062, %rd110, %rd1061;
ld.shared.u32 %r3572, [%rd1062];
add.s32 %r3573, %r3572, %r3571;
xor.b32 %r3574, %r3890, %r3058;
xor.b32 %r3090, %r3574, %r3573;
// inline asm
bfe.u32 %r3077, %r3090, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1063, %r3077, 4;
add.s64 %rd1064, %rd98, %rd1063;
ld.shared.u32 %r3575, [%rd1064];
// inline asm
bfe.u32 %r3081, %r3090, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1065, %r3081, 4;
add.s64 %rd1066, %rd102, %rd1065;
ld.shared.u32 %r3576, [%rd1066];
add.s32 %r3577, %r3576, %r3575;
// inline asm
bfe.u32 %r3085, %r3090, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1067, %r3085, 4;
add.s64 %rd1068, %rd106, %rd1067;
ld.shared.u32 %r3578, [%rd1068];
xor.b32 %r3579, %r3578, %r3577;
// inline asm
bfe.u32 %r3089, %r3090, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1069, %r3089, 4;
add.s64 %rd1070, %rd110, %rd1069;
ld.shared.u32 %r3580, [%rd1070];
add.s32 %r3581, %r3580, %r3579;
xor.b32 %r3582, %r3889, %r3074;
xor.b32 %r3106, %r3582, %r3581;
// inline asm
bfe.u32 %r3093, %r3106, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1071, %r3093, 4;
add.s64 %rd1072, %rd98, %rd1071;
ld.shared.u32 %r3583, [%rd1072];
// inline asm
bfe.u32 %r3097, %r3106, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1073, %r3097, 4;
add.s64 %rd1074, %rd102, %rd1073;
ld.shared.u32 %r3584, [%rd1074];
add.s32 %r3585, %r3584, %r3583;
// inline asm
bfe.u32 %r3101, %r3106, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1075, %r3101, 4;
add.s64 %rd1076, %rd106, %rd1075;
ld.shared.u32 %r3586, [%rd1076];
xor.b32 %r3587, %r3586, %r3585;
// inline asm
bfe.u32 %r3105, %r3106, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1077, %r3105, 4;
add.s64 %rd1078, %rd110, %rd1077;
ld.shared.u32 %r3588, [%rd1078];
add.s32 %r3589, %r3588, %r3587;
xor.b32 %r3590, %r3888, %r3090;
xor.b32 %r3122, %r3590, %r3589;
// inline asm
bfe.u32 %r3109, %r3122, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1079, %r3109, 4;
add.s64 %rd1080, %rd98, %rd1079;
ld.shared.u32 %r3591, [%rd1080];
// inline asm
bfe.u32 %r3113, %r3122, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1081, %r3113, 4;
add.s64 %rd1082, %rd102, %rd1081;
ld.shared.u32 %r3592, [%rd1082];
add.s32 %r3593, %r3592, %r3591;
// inline asm
bfe.u32 %r3117, %r3122, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1083, %r3117, 4;
add.s64 %rd1084, %rd106, %rd1083;
ld.shared.u32 %r3594, [%rd1084];
xor.b32 %r3595, %r3594, %r3593;
// inline asm
bfe.u32 %r3121, %r3122, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1085, %r3121, 4;
add.s64 %rd1086, %rd110, %rd1085;
ld.shared.u32 %r3596, [%rd1086];
add.s32 %r3597, %r3596, %r3595;
xor.b32 %r3598, %r3887, %r3106;
xor.b32 %r3138, %r3598, %r3597;
// inline asm
bfe.u32 %r3125, %r3138, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1087, %r3125, 4;
add.s64 %rd1088, %rd98, %rd1087;
ld.shared.u32 %r3599, [%rd1088];
// inline asm
bfe.u32 %r3129, %r3138, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1089, %r3129, 4;
add.s64 %rd1090, %rd102, %rd1089;
ld.shared.u32 %r3600, [%rd1090];
add.s32 %r3601, %r3600, %r3599;
// inline asm
bfe.u32 %r3133, %r3138, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1091, %r3133, 4;
add.s64 %rd1092, %rd106, %rd1091;
ld.shared.u32 %r3602, [%rd1092];
xor.b32 %r3603, %r3602, %r3601;
// inline asm
bfe.u32 %r3137, %r3138, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1093, %r3137, 4;
add.s64 %rd1094, %rd110, %rd1093;
ld.shared.u32 %r3604, [%rd1094];
add.s32 %r3605, %r3604, %r3603;
xor.b32 %r3606, %r3886, %r3122;
xor.b32 %r3154, %r3606, %r3605;
// inline asm
bfe.u32 %r3141, %r3154, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1095, %r3141, 4;
add.s64 %rd1096, %rd98, %rd1095;
ld.shared.u32 %r3607, [%rd1096];
// inline asm
bfe.u32 %r3145, %r3154, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1097, %r3145, 4;
add.s64 %rd1098, %rd102, %rd1097;
ld.shared.u32 %r3608, [%rd1098];
add.s32 %r3609, %r3608, %r3607;
// inline asm
bfe.u32 %r3149, %r3154, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1099, %r3149, 4;
add.s64 %rd1100, %rd106, %rd1099;
ld.shared.u32 %r3610, [%rd1100];
xor.b32 %r3611, %r3610, %r3609;
// inline asm
bfe.u32 %r3153, %r3154, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1101, %r3153, 4;
add.s64 %rd1102, %rd110, %rd1101;
ld.shared.u32 %r3612, [%rd1102];
add.s32 %r3613, %r3612, %r3611;
xor.b32 %r3614, %r3885, %r3138;
xor.b32 %r3170, %r3614, %r3613;
// inline asm
bfe.u32 %r3157, %r3170, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1103, %r3157, 4;
add.s64 %rd1104, %rd98, %rd1103;
ld.shared.u32 %r3615, [%rd1104];
// inline asm
bfe.u32 %r3161, %r3170, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1105, %r3161, 4;
add.s64 %rd1106, %rd102, %rd1105;
ld.shared.u32 %r3616, [%rd1106];
add.s32 %r3617, %r3616, %r3615;
// inline asm
bfe.u32 %r3165, %r3170, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1107, %r3165, 4;
add.s64 %rd1108, %rd106, %rd1107;
ld.shared.u32 %r3618, [%rd1108];
xor.b32 %r3619, %r3618, %r3617;
// inline asm
bfe.u32 %r3169, %r3170, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1109, %r3169, 4;
add.s64 %rd1110, %rd110, %rd1109;
ld.shared.u32 %r3620, [%rd1110];
add.s32 %r3621, %r3620, %r3619;
xor.b32 %r3622, %r3884, %r3154;
xor.b32 %r3186, %r3622, %r3621;
// inline asm
bfe.u32 %r3173, %r3186, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1111, %r3173, 4;
add.s64 %rd1112, %rd98, %rd1111;
ld.shared.u32 %r3623, [%rd1112];
// inline asm
bfe.u32 %r3177, %r3186, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1113, %r3177, 4;
add.s64 %rd1114, %rd102, %rd1113;
ld.shared.u32 %r3624, [%rd1114];
add.s32 %r3625, %r3624, %r3623;
// inline asm
bfe.u32 %r3181, %r3186, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1115, %r3181, 4;
add.s64 %rd1116, %rd106, %rd1115;
ld.shared.u32 %r3626, [%rd1116];
xor.b32 %r3627, %r3626, %r3625;
// inline asm
bfe.u32 %r3185, %r3186, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1117, %r3185, 4;
add.s64 %rd1118, %rd110, %rd1117;
ld.shared.u32 %r3628, [%rd1118];
add.s32 %r3629, %r3628, %r3627;
xor.b32 %r3630, %r3883, %r3170;
xor.b32 %r3202, %r3630, %r3629;
// inline asm
bfe.u32 %r3189, %r3202, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1119, %r3189, 4;
add.s64 %rd1120, %rd98, %rd1119;
ld.shared.u32 %r3631, [%rd1120];
// inline asm
bfe.u32 %r3193, %r3202, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1121, %r3193, 4;
add.s64 %rd1122, %rd102, %rd1121;
ld.shared.u32 %r3632, [%rd1122];
add.s32 %r3633, %r3632, %r3631;
// inline asm
bfe.u32 %r3197, %r3202, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1123, %r3197, 4;
add.s64 %rd1124, %rd106, %rd1123;
ld.shared.u32 %r3634, [%rd1124];
xor.b32 %r3635, %r3634, %r3633;
// inline asm
bfe.u32 %r3201, %r3202, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1125, %r3201, 4;
add.s64 %rd1126, %rd110, %rd1125;
ld.shared.u32 %r3636, [%rd1126];
add.s32 %r3637, %r3636, %r3635;
xor.b32 %r3638, %r3882, %r3186;
xor.b32 %r3218, %r3638, %r3637;
// inline asm
bfe.u32 %r3205, %r3218, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1127, %r3205, 4;
add.s64 %rd1128, %rd98, %rd1127;
ld.shared.u32 %r3639, [%rd1128];
// inline asm
bfe.u32 %r3209, %r3218, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1129, %r3209, 4;
add.s64 %rd1130, %rd102, %rd1129;
ld.shared.u32 %r3640, [%rd1130];
add.s32 %r3641, %r3640, %r3639;
// inline asm
bfe.u32 %r3213, %r3218, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1131, %r3213, 4;
add.s64 %rd1132, %rd106, %rd1131;
ld.shared.u32 %r3642, [%rd1132];
xor.b32 %r3643, %r3642, %r3641;
// inline asm
bfe.u32 %r3217, %r3218, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1133, %r3217, 4;
add.s64 %rd1134, %rd110, %rd1133;
ld.shared.u32 %r3644, [%rd1134];
add.s32 %r3645, %r3644, %r3643;
xor.b32 %r3646, %r3881, %r3202;
xor.b32 %r3234, %r3646, %r3645;
// inline asm
bfe.u32 %r3221, %r3234, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1135, %r3221, 4;
add.s64 %rd1136, %rd98, %rd1135;
ld.shared.u32 %r3647, [%rd1136];
// inline asm
bfe.u32 %r3225, %r3234, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1137, %r3225, 4;
add.s64 %rd1138, %rd102, %rd1137;
ld.shared.u32 %r3648, [%rd1138];
add.s32 %r3649, %r3648, %r3647;
// inline asm
bfe.u32 %r3229, %r3234, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1139, %r3229, 4;
add.s64 %rd1140, %rd106, %rd1139;
ld.shared.u32 %r3650, [%rd1140];
xor.b32 %r3651, %r3650, %r3649;
// inline asm
bfe.u32 %r3233, %r3234, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1141, %r3233, 4;
add.s64 %rd1142, %rd110, %rd1141;
ld.shared.u32 %r3652, [%rd1142];
add.s32 %r3653, %r3652, %r3651;
xor.b32 %r3654, %r3880, %r3218;
xor.b32 %r3250, %r3654, %r3653;
// inline asm
bfe.u32 %r3237, %r3250, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1143, %r3237, 4;
add.s64 %rd1144, %rd98, %rd1143;
ld.shared.u32 %r3655, [%rd1144];
// inline asm
bfe.u32 %r3241, %r3250, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1145, %r3241, 4;
add.s64 %rd1146, %rd102, %rd1145;
ld.shared.u32 %r3656, [%rd1146];
add.s32 %r3657, %r3656, %r3655;
// inline asm
bfe.u32 %r3245, %r3250, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1147, %r3245, 4;
add.s64 %rd1148, %rd106, %rd1147;
ld.shared.u32 %r3658, [%rd1148];
xor.b32 %r3659, %r3658, %r3657;
// inline asm
bfe.u32 %r3249, %r3250, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1149, %r3249, 4;
add.s64 %rd1150, %rd110, %rd1149;
ld.shared.u32 %r3660, [%rd1150];
add.s32 %r3661, %r3660, %r3659;
xor.b32 %r3662, %r3879, %r3234;
xor.b32 %r3266, %r3662, %r3661;
// inline asm
bfe.u32 %r3253, %r3266, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1151, %r3253, 4;
add.s64 %rd1152, %rd98, %rd1151;
ld.shared.u32 %r3663, [%rd1152];
// inline asm
bfe.u32 %r3257, %r3266, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1153, %r3257, 4;
add.s64 %rd1154, %rd102, %rd1153;
ld.shared.u32 %r3664, [%rd1154];
add.s32 %r3665, %r3664, %r3663;
// inline asm
bfe.u32 %r3261, %r3266, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1155, %r3261, 4;
add.s64 %rd1156, %rd106, %rd1155;
ld.shared.u32 %r3666, [%rd1156];
xor.b32 %r3667, %r3666, %r3665;
// inline asm
bfe.u32 %r3265, %r3266, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1157, %r3265, 4;
add.s64 %rd1158, %rd110, %rd1157;
ld.shared.u32 %r3668, [%rd1158];
add.s32 %r3669, %r3668, %r3667;
xor.b32 %r3670, %r3878, %r3250;
xor.b32 %r3282, %r3670, %r3669;
// inline asm
bfe.u32 %r3269, %r3282, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1159, %r3269, 4;
add.s64 %rd1160, %rd98, %rd1159;
ld.shared.u32 %r3671, [%rd1160];
// inline asm
bfe.u32 %r3273, %r3282, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1161, %r3273, 4;
add.s64 %rd1162, %rd102, %rd1161;
ld.shared.u32 %r3672, [%rd1162];
add.s32 %r3673, %r3672, %r3671;
// inline asm
bfe.u32 %r3277, %r3282, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1163, %r3277, 4;
add.s64 %rd1164, %rd106, %rd1163;
ld.shared.u32 %r3674, [%rd1164];
xor.b32 %r3675, %r3674, %r3673;
// inline asm
bfe.u32 %r3281, %r3282, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1165, %r3281, 4;
add.s64 %rd1166, %rd110, %rd1165;
ld.shared.u32 %r3676, [%rd1166];
add.s32 %r3677, %r3676, %r3675;
xor.b32 %r3678, %r3877, %r3266;
xor.b32 %r3298, %r3678, %r3677;
// inline asm
bfe.u32 %r3285, %r3298, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1167, %r3285, 4;
add.s64 %rd1168, %rd98, %rd1167;
ld.shared.u32 %r3679, [%rd1168];
// inline asm
bfe.u32 %r3289, %r3298, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1169, %r3289, 4;
add.s64 %rd1170, %rd102, %rd1169;
ld.shared.u32 %r3680, [%rd1170];
add.s32 %r3681, %r3680, %r3679;
// inline asm
bfe.u32 %r3293, %r3298, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1171, %r3293, 4;
add.s64 %rd1172, %rd106, %rd1171;
ld.shared.u32 %r3682, [%rd1172];
xor.b32 %r3683, %r3682, %r3681;
// inline asm
bfe.u32 %r3297, %r3298, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1173, %r3297, 4;
add.s64 %rd1174, %rd110, %rd1173;
ld.shared.u32 %r3684, [%rd1174];
add.s32 %r3685, %r3684, %r3683;
xor.b32 %r3686, %r3876, %r3282;
xor.b32 %r3687, %r3686, %r3685;
xor.b32 %r3688, %r3875, %r3298;
st.shared.u32 [%rd1340], %r3688;
st.shared.u32 [%rd1340+4], %r3687;
xor.b32 %r3689, %r3688, %r37;
xor.b32 %r3314, %r3689, %r3892;
// inline asm
bfe.u32 %r3301, %r3314, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1175, %r3301, 4;
add.s64 %rd1176, %rd98, %rd1175;
ld.shared.u32 %r3690, [%rd1176];
// inline asm
bfe.u32 %r3305, %r3314, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1177, %r3305, 4;
add.s64 %rd1178, %rd102, %rd1177;
ld.shared.u32 %r3691, [%rd1178];
add.s32 %r3692, %r3691, %r3690;
// inline asm
bfe.u32 %r3309, %r3314, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1179, %r3309, 4;
add.s64 %rd1180, %rd106, %rd1179;
ld.shared.u32 %r3693, [%rd1180];
xor.b32 %r3694, %r3693, %r3692;
// inline asm
bfe.u32 %r3313, %r3314, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1181, %r3313, 4;
add.s64 %rd1182, %rd110, %rd1181;
ld.shared.u32 %r3695, [%rd1182];
add.s32 %r3696, %r3695, %r3694;
xor.b32 %r3697, %r38, %r3687;
xor.b32 %r3698, %r3697, %r3891;
xor.b32 %r3330, %r3698, %r3696;
// inline asm
bfe.u32 %r3317, %r3330, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1183, %r3317, 4;
add.s64 %rd1184, %rd98, %rd1183;
ld.shared.u32 %r3699, [%rd1184];
// inline asm
bfe.u32 %r3321, %r3330, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1185, %r3321, 4;
add.s64 %rd1186, %rd102, %rd1185;
ld.shared.u32 %r3700, [%rd1186];
add.s32 %r3701, %r3700, %r3699;
// inline asm
bfe.u32 %r3325, %r3330, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1187, %r3325, 4;
add.s64 %rd1188, %rd106, %rd1187;
ld.shared.u32 %r3702, [%rd1188];
xor.b32 %r3703, %r3702, %r3701;
// inline asm
bfe.u32 %r3329, %r3330, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1189, %r3329, 4;
add.s64 %rd1190, %rd110, %rd1189;
ld.shared.u32 %r3704, [%rd1190];
add.s32 %r3705, %r3704, %r3703;
xor.b32 %r3706, %r3890, %r3314;
xor.b32 %r3346, %r3706, %r3705;
// inline asm
bfe.u32 %r3333, %r3346, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1191, %r3333, 4;
add.s64 %rd1192, %rd98, %rd1191;
ld.shared.u32 %r3707, [%rd1192];
// inline asm
bfe.u32 %r3337, %r3346, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1193, %r3337, 4;
add.s64 %rd1194, %rd102, %rd1193;
ld.shared.u32 %r3708, [%rd1194];
add.s32 %r3709, %r3708, %r3707;
// inline asm
bfe.u32 %r3341, %r3346, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1195, %r3341, 4;
add.s64 %rd1196, %rd106, %rd1195;
ld.shared.u32 %r3710, [%rd1196];
xor.b32 %r3711, %r3710, %r3709;
// inline asm
bfe.u32 %r3345, %r3346, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1197, %r3345, 4;
add.s64 %rd1198, %rd110, %rd1197;
ld.shared.u32 %r3712, [%rd1198];
add.s32 %r3713, %r3712, %r3711;
xor.b32 %r3714, %r3889, %r3330;
xor.b32 %r3362, %r3714, %r3713;
// inline asm
bfe.u32 %r3349, %r3362, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1199, %r3349, 4;
add.s64 %rd1200, %rd98, %rd1199;
ld.shared.u32 %r3715, [%rd1200];
// inline asm
bfe.u32 %r3353, %r3362, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1201, %r3353, 4;
add.s64 %rd1202, %rd102, %rd1201;
ld.shared.u32 %r3716, [%rd1202];
add.s32 %r3717, %r3716, %r3715;
// inline asm
bfe.u32 %r3357, %r3362, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1203, %r3357, 4;
add.s64 %rd1204, %rd106, %rd1203;
ld.shared.u32 %r3718, [%rd1204];
xor.b32 %r3719, %r3718, %r3717;
// inline asm
bfe.u32 %r3361, %r3362, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1205, %r3361, 4;
add.s64 %rd1206, %rd110, %rd1205;
ld.shared.u32 %r3720, [%rd1206];
add.s32 %r3721, %r3720, %r3719;
xor.b32 %r3722, %r3888, %r3346;
xor.b32 %r3378, %r3722, %r3721;
// inline asm
bfe.u32 %r3365, %r3378, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1207, %r3365, 4;
add.s64 %rd1208, %rd98, %rd1207;
ld.shared.u32 %r3723, [%rd1208];
// inline asm
bfe.u32 %r3369, %r3378, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1209, %r3369, 4;
add.s64 %rd1210, %rd102, %rd1209;
ld.shared.u32 %r3724, [%rd1210];
add.s32 %r3725, %r3724, %r3723;
// inline asm
bfe.u32 %r3373, %r3378, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1211, %r3373, 4;
add.s64 %rd1212, %rd106, %rd1211;
ld.shared.u32 %r3726, [%rd1212];
xor.b32 %r3727, %r3726, %r3725;
// inline asm
bfe.u32 %r3377, %r3378, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1213, %r3377, 4;
add.s64 %rd1214, %rd110, %rd1213;
ld.shared.u32 %r3728, [%rd1214];
add.s32 %r3729, %r3728, %r3727;
xor.b32 %r3730, %r3887, %r3362;
xor.b32 %r3394, %r3730, %r3729;
// inline asm
bfe.u32 %r3381, %r3394, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1215, %r3381, 4;
add.s64 %rd1216, %rd98, %rd1215;
ld.shared.u32 %r3731, [%rd1216];
// inline asm
bfe.u32 %r3385, %r3394, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1217, %r3385, 4;
add.s64 %rd1218, %rd102, %rd1217;
ld.shared.u32 %r3732, [%rd1218];
add.s32 %r3733, %r3732, %r3731;
// inline asm
bfe.u32 %r3389, %r3394, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1219, %r3389, 4;
add.s64 %rd1220, %rd106, %rd1219;
ld.shared.u32 %r3734, [%rd1220];
xor.b32 %r3735, %r3734, %r3733;
// inline asm
bfe.u32 %r3393, %r3394, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1221, %r3393, 4;
add.s64 %rd1222, %rd110, %rd1221;
ld.shared.u32 %r3736, [%rd1222];
add.s32 %r3737, %r3736, %r3735;
xor.b32 %r3738, %r3886, %r3378;
xor.b32 %r3410, %r3738, %r3737;
// inline asm
bfe.u32 %r3397, %r3410, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1223, %r3397, 4;
add.s64 %rd1224, %rd98, %rd1223;
ld.shared.u32 %r3739, [%rd1224];
// inline asm
bfe.u32 %r3401, %r3410, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1225, %r3401, 4;
add.s64 %rd1226, %rd102, %rd1225;
ld.shared.u32 %r3740, [%rd1226];
add.s32 %r3741, %r3740, %r3739;
// inline asm
bfe.u32 %r3405, %r3410, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1227, %r3405, 4;
add.s64 %rd1228, %rd106, %rd1227;
ld.shared.u32 %r3742, [%rd1228];
xor.b32 %r3743, %r3742, %r3741;
// inline asm
bfe.u32 %r3409, %r3410, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1229, %r3409, 4;
add.s64 %rd1230, %rd110, %rd1229;
ld.shared.u32 %r3744, [%rd1230];
add.s32 %r3745, %r3744, %r3743;
xor.b32 %r3746, %r3885, %r3394;
xor.b32 %r3426, %r3746, %r3745;
// inline asm
bfe.u32 %r3413, %r3426, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1231, %r3413, 4;
add.s64 %rd1232, %rd98, %rd1231;
ld.shared.u32 %r3747, [%rd1232];
// inline asm
bfe.u32 %r3417, %r3426, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1233, %r3417, 4;
add.s64 %rd1234, %rd102, %rd1233;
ld.shared.u32 %r3748, [%rd1234];
add.s32 %r3749, %r3748, %r3747;
// inline asm
bfe.u32 %r3421, %r3426, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1235, %r3421, 4;
add.s64 %rd1236, %rd106, %rd1235;
ld.shared.u32 %r3750, [%rd1236];
xor.b32 %r3751, %r3750, %r3749;
// inline asm
bfe.u32 %r3425, %r3426, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1237, %r3425, 4;
add.s64 %rd1238, %rd110, %rd1237;
ld.shared.u32 %r3752, [%rd1238];
add.s32 %r3753, %r3752, %r3751;
xor.b32 %r3754, %r3884, %r3410;
xor.b32 %r3442, %r3754, %r3753;
// inline asm
bfe.u32 %r3429, %r3442, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1239, %r3429, 4;
add.s64 %rd1240, %rd98, %rd1239;
ld.shared.u32 %r3755, [%rd1240];
// inline asm
bfe.u32 %r3433, %r3442, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1241, %r3433, 4;
add.s64 %rd1242, %rd102, %rd1241;
ld.shared.u32 %r3756, [%rd1242];
add.s32 %r3757, %r3756, %r3755;
// inline asm
bfe.u32 %r3437, %r3442, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1243, %r3437, 4;
add.s64 %rd1244, %rd106, %rd1243;
ld.shared.u32 %r3758, [%rd1244];
xor.b32 %r3759, %r3758, %r3757;
// inline asm
bfe.u32 %r3441, %r3442, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1245, %r3441, 4;
add.s64 %rd1246, %rd110, %rd1245;
ld.shared.u32 %r3760, [%rd1246];
add.s32 %r3761, %r3760, %r3759;
xor.b32 %r3762, %r3883, %r3426;
xor.b32 %r3458, %r3762, %r3761;
// inline asm
bfe.u32 %r3445, %r3458, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1247, %r3445, 4;
add.s64 %rd1248, %rd98, %rd1247;
ld.shared.u32 %r3763, [%rd1248];
// inline asm
bfe.u32 %r3449, %r3458, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1249, %r3449, 4;
add.s64 %rd1250, %rd102, %rd1249;
ld.shared.u32 %r3764, [%rd1250];
add.s32 %r3765, %r3764, %r3763;
// inline asm
bfe.u32 %r3453, %r3458, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1251, %r3453, 4;
add.s64 %rd1252, %rd106, %rd1251;
ld.shared.u32 %r3766, [%rd1252];
xor.b32 %r3767, %r3766, %r3765;
// inline asm
bfe.u32 %r3457, %r3458, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1253, %r3457, 4;
add.s64 %rd1254, %rd110, %rd1253;
ld.shared.u32 %r3768, [%rd1254];
add.s32 %r3769, %r3768, %r3767;
xor.b32 %r3770, %r3882, %r3442;
xor.b32 %r3474, %r3770, %r3769;
// inline asm
bfe.u32 %r3461, %r3474, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1255, %r3461, 4;
add.s64 %rd1256, %rd98, %rd1255;
ld.shared.u32 %r3771, [%rd1256];
// inline asm
bfe.u32 %r3465, %r3474, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1257, %r3465, 4;
add.s64 %rd1258, %rd102, %rd1257;
ld.shared.u32 %r3772, [%rd1258];
add.s32 %r3773, %r3772, %r3771;
// inline asm
bfe.u32 %r3469, %r3474, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1259, %r3469, 4;
add.s64 %rd1260, %rd106, %rd1259;
ld.shared.u32 %r3774, [%rd1260];
xor.b32 %r3775, %r3774, %r3773;
// inline asm
bfe.u32 %r3473, %r3474, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1261, %r3473, 4;
add.s64 %rd1262, %rd110, %rd1261;
ld.shared.u32 %r3776, [%rd1262];
add.s32 %r3777, %r3776, %r3775;
xor.b32 %r3778, %r3881, %r3458;
xor.b32 %r3490, %r3778, %r3777;
// inline asm
bfe.u32 %r3477, %r3490, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1263, %r3477, 4;
add.s64 %rd1264, %rd98, %rd1263;
ld.shared.u32 %r3779, [%rd1264];
// inline asm
bfe.u32 %r3481, %r3490, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1265, %r3481, 4;
add.s64 %rd1266, %rd102, %rd1265;
ld.shared.u32 %r3780, [%rd1266];
add.s32 %r3781, %r3780, %r3779;
// inline asm
bfe.u32 %r3485, %r3490, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1267, %r3485, 4;
add.s64 %rd1268, %rd106, %rd1267;
ld.shared.u32 %r3782, [%rd1268];
xor.b32 %r3783, %r3782, %r3781;
// inline asm
bfe.u32 %r3489, %r3490, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1269, %r3489, 4;
add.s64 %rd1270, %rd110, %rd1269;
ld.shared.u32 %r3784, [%rd1270];
add.s32 %r3785, %r3784, %r3783;
xor.b32 %r3786, %r3880, %r3474;
xor.b32 %r3506, %r3786, %r3785;
// inline asm
bfe.u32 %r3493, %r3506, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1271, %r3493, 4;
add.s64 %rd1272, %rd98, %rd1271;
ld.shared.u32 %r3787, [%rd1272];
// inline asm
bfe.u32 %r3497, %r3506, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1273, %r3497, 4;
add.s64 %rd1274, %rd102, %rd1273;
ld.shared.u32 %r3788, [%rd1274];
add.s32 %r3789, %r3788, %r3787;
// inline asm
bfe.u32 %r3501, %r3506, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1275, %r3501, 4;
add.s64 %rd1276, %rd106, %rd1275;
ld.shared.u32 %r3790, [%rd1276];
xor.b32 %r3791, %r3790, %r3789;
// inline asm
bfe.u32 %r3505, %r3506, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1277, %r3505, 4;
add.s64 %rd1278, %rd110, %rd1277;
ld.shared.u32 %r3792, [%rd1278];
add.s32 %r3793, %r3792, %r3791;
xor.b32 %r3794, %r3879, %r3490;
xor.b32 %r3522, %r3794, %r3793;
// inline asm
bfe.u32 %r3509, %r3522, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1279, %r3509, 4;
add.s64 %rd1280, %rd98, %rd1279;
ld.shared.u32 %r3795, [%rd1280];
// inline asm
bfe.u32 %r3513, %r3522, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1281, %r3513, 4;
add.s64 %rd1282, %rd102, %rd1281;
ld.shared.u32 %r3796, [%rd1282];
add.s32 %r3797, %r3796, %r3795;
// inline asm
bfe.u32 %r3517, %r3522, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1283, %r3517, 4;
add.s64 %rd1284, %rd106, %rd1283;
ld.shared.u32 %r3798, [%rd1284];
xor.b32 %r3799, %r3798, %r3797;
// inline asm
bfe.u32 %r3521, %r3522, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1285, %r3521, 4;
add.s64 %rd1286, %rd110, %rd1285;
ld.shared.u32 %r3800, [%rd1286];
add.s32 %r3801, %r3800, %r3799;
xor.b32 %r3802, %r3878, %r3506;
xor.b32 %r3538, %r3802, %r3801;
// inline asm
bfe.u32 %r3525, %r3538, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1287, %r3525, 4;
add.s64 %rd1288, %rd98, %rd1287;
ld.shared.u32 %r3803, [%rd1288];
// inline asm
bfe.u32 %r3529, %r3538, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1289, %r3529, 4;
add.s64 %rd1290, %rd102, %rd1289;
ld.shared.u32 %r3804, [%rd1290];
add.s32 %r3805, %r3804, %r3803;
// inline asm
bfe.u32 %r3533, %r3538, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1291, %r3533, 4;
add.s64 %rd1292, %rd106, %rd1291;
ld.shared.u32 %r3806, [%rd1292];
xor.b32 %r3807, %r3806, %r3805;
// inline asm
bfe.u32 %r3537, %r3538, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1293, %r3537, 4;
add.s64 %rd1294, %rd110, %rd1293;
ld.shared.u32 %r3808, [%rd1294];
add.s32 %r3809, %r3808, %r3807;
xor.b32 %r3810, %r3877, %r3522;
xor.b32 %r3554, %r3810, %r3809;
// inline asm
bfe.u32 %r3541, %r3554, %r3858, %r3859;
// inline asm
mul.wide.u32 %rd1295, %r3541, 4;
add.s64 %rd1296, %rd98, %rd1295;
ld.shared.u32 %r3811, [%rd1296];
// inline asm
bfe.u32 %r3545, %r3554, %r3860, %r3859;
// inline asm
mul.wide.u32 %rd1297, %r3545, 4;
add.s64 %rd1298, %rd102, %rd1297;
ld.shared.u32 %r3812, [%rd1298];
add.s32 %r3813, %r3812, %r3811;
// inline asm
bfe.u32 %r3549, %r3554, %r3859, %r3859;
// inline asm
mul.wide.u32 %rd1299, %r3549, 4;
add.s64 %rd1300, %rd106, %rd1299;
ld.shared.u32 %r3814, [%rd1300];
xor.b32 %r3815, %r3814, %r3813;
// inline asm
bfe.u32 %r3553, %r3554, %r3044, %r3859;
// inline asm
mul.wide.u32 %rd1301, %r3553, 4;
add.s64 %rd1302, %rd110, %rd1301;
ld.shared.u32 %r3816, [%rd1302];
add.s32 %r3817, %r3816, %r3815;
xor.b32 %r3818, %r3876, %r3538;
xor.b32 %r3897, %r3818, %r3817;
xor.b32 %r3898, %r3875, %r3554;
st.shared.u32 [%rd1340+8], %r3898;
st.shared.u32 [%rd1340+12], %r3897;
add.s64 %rd1340, %rd1340, 16;
add.s32 %r3905, %r3905, 4;
setp.lt.u32 %p16, %r3905, 256;
@%p16 bra BB3_28;
st.global.u32 [%rd16+72], %r3892;
st.global.u32 [%rd16+76], %r3891;
st.global.u32 [%rd16+80], %r3890;
st.global.u32 [%rd16+84], %r3889;
st.global.u32 [%rd16+88], %r3888;
st.global.u32 [%rd16+92], %r3887;
st.global.u32 [%rd16+96], %r3886;
st.global.u32 [%rd16+100], %r3885;
st.global.u32 [%rd16+104], %r3884;
st.global.u32 [%rd16+108], %r3883;
st.global.u32 [%rd16+112], %r3882;
st.global.u32 [%rd16+116], %r3881;
st.global.u32 [%rd16+120], %r3880;
st.global.u32 [%rd16+124], %r3879;
st.global.u32 [%rd16+128], %r3878;
st.global.u32 [%rd16+132], %r3877;
st.global.u32 [%rd16+136], %r3876;
st.global.u32 [%rd16+140], %r3875;
mov.u32 %r3908, 0;
BB3_30:
mul.wide.u32 %rd1306, %r3908, 4;
add.s64 %rd1307, %rd98, %rd1306;
ld.shared.u32 %r3820, [%rd1307];
add.s64 %rd1310, %rd16, %rd1306;
ld.shared.u32 %r3821, [%rd1307+4];
ld.shared.u32 %r3822, [%rd1307+8];
ld.shared.u32 %r3823, [%rd1307+12];
ld.shared.u32 %r3824, [%rd1307+16];
ld.shared.u32 %r3825, [%rd1307+20];
ld.shared.u32 %r3826, [%rd1307+24];
ld.shared.u32 %r3827, [%rd1307+28];
st.global.u32 [%rd1310+144], %r3820;
add.s64 %rd1313, %rd102, %rd1306;
st.global.u32 [%rd1310+148], %r3821;
st.global.u32 [%rd1310+152], %r3822;
st.global.u32 [%rd1310+156], %r3823;
st.global.u32 [%rd1310+160], %r3824;
st.global.u32 [%rd1310+164], %r3825;
st.global.u32 [%rd1310+168], %r3826;
st.global.u32 [%rd1310+172], %r3827;
ld.shared.u32 %r3828, [%rd1313];
ld.shared.u32 %r3829, [%rd1313+4];
ld.shared.u32 %r3830, [%rd1313+8];
ld.shared.u32 %r3831, [%rd1313+12];
ld.shared.u32 %r3832, [%rd1313+16];
ld.shared.u32 %r3833, [%rd1313+20];
ld.shared.u32 %r3834, [%rd1313+24];
ld.shared.u32 %r3835, [%rd1313+28];
st.global.u32 [%rd1310+1168], %r3828;
add.s64 %rd1316, %rd106, %rd1306;
st.global.u32 [%rd1310+1172], %r3829;
st.global.u32 [%rd1310+1176], %r3830;
st.global.u32 [%rd1310+1180], %r3831;
st.global.u32 [%rd1310+1184], %r3832;
st.global.u32 [%rd1310+1188], %r3833;
st.global.u32 [%rd1310+1192], %r3834;
st.global.u32 [%rd1310+1196], %r3835;
ld.shared.u32 %r3836, [%rd1316];
ld.shared.u32 %r3837, [%rd1316+4];
ld.shared.u32 %r3838, [%rd1316+8];
ld.shared.u32 %r3839, [%rd1316+12];
ld.shared.u32 %r3840, [%rd1316+16];
ld.shared.u32 %r3841, [%rd1316+20];
ld.shared.u32 %r3842, [%rd1316+24];
ld.shared.u32 %r3843, [%rd1316+28];
st.global.u32 [%rd1310+2192], %r3836;
add.s64 %rd1319, %rd110, %rd1306;
st.global.u32 [%rd1310+2196], %r3837;
st.global.u32 [%rd1310+2200], %r3838;
st.global.u32 [%rd1310+2204], %r3839;
st.global.u32 [%rd1310+2208], %r3840;
st.global.u32 [%rd1310+2212], %r3841;
st.global.u32 [%rd1310+2216], %r3842;
st.global.u32 [%rd1310+2220], %r3843;
ld.shared.u32 %r3844, [%rd1319];
ld.shared.u32 %r3845, [%rd1319+4];
ld.shared.u32 %r3846, [%rd1319+8];
ld.shared.u32 %r3847, [%rd1319+12];
ld.shared.u32 %r3848, [%rd1319+16];
ld.shared.u32 %r3849, [%rd1319+20];
ld.shared.u32 %r3850, [%rd1319+24];
ld.shared.u32 %r3851, [%rd1319+28];
st.global.u32 [%rd1310+3216], %r3844;
st.global.u32 [%rd1310+3220], %r3845;
st.global.u32 [%rd1310+3224], %r3846;
st.global.u32 [%rd1310+3228], %r3847;
st.global.u32 [%rd1310+3232], %r3848;
st.global.u32 [%rd1310+3236], %r3849;
st.global.u32 [%rd1310+3240], %r3850;
st.global.u32 [%rd1310+3244], %r3851;
add.s32 %r3908, %r3908, 8;
setp.ne.s32 %p17, %r3908, 256;
@%p17 bra BB3_30;
BB3_31:
ret;
}
// .globl m03200_loop
.entry m03200_loop(
.param .u64 .ptr .global .align 4 m03200_loop_param_0,
.param .u64 .ptr .global .align 4 m03200_loop_param_1,
.param .u64 .ptr .global .align 4 m03200_loop_param_2,
.param .u64 .ptr .global .align 4 m03200_loop_param_3,
.param .u64 .ptr .global .align 4 m03200_loop_param_4,
.param .u64 .ptr .global .align 1 m03200_loop_param_5,
.param .u64 .ptr .global .align 4 m03200_loop_param_6,
.param .u64 .ptr .global .align 4 m03200_loop_param_7,
.param .u64 .ptr .global .align 4 m03200_loop_param_8,
.param .u64 .ptr .global .align 4 m03200_loop_param_9,
.param .u64 .ptr .global .align 4 m03200_loop_param_10,
.param .u64 .ptr .global .align 4 m03200_loop_param_11,
.param .u64 .ptr .global .align 4 m03200_loop_param_12,
.param .u64 .ptr .global .align 4 m03200_loop_param_13,
.param .u64 .ptr .global .align 8 m03200_loop_param_14,
.param .u64 .ptr .global .align 4 m03200_loop_param_15,
.param .u64 .ptr .global .align 4 m03200_loop_param_16,
.param .u64 .ptr .global .align 4 m03200_loop_param_17,
.param .u64 .ptr .global .align 1 m03200_loop_param_18,
.param .u64 .ptr .global .align 4 m03200_loop_param_19,
.param .u64 .ptr .global .align 4 m03200_loop_param_20,
.param .u64 .ptr .global .align 4 m03200_loop_param_21,
.param .u64 .ptr .global .align 4 m03200_loop_param_22,
.param .u64 .ptr .global .align 4 m03200_loop_param_23,
.param .u32 m03200_loop_param_24,
.param .u32 m03200_loop_param_25,
.param .u32 m03200_loop_param_26,
.param .u32 m03200_loop_param_27,
.param .u32 m03200_loop_param_28,
.param .u32 m03200_loop_param_29,
.param .u32 m03200_loop_param_30,
.param .u32 m03200_loop_param_31,
.param .u32 m03200_loop_param_32,
.param .u32 m03200_loop_param_33,
.param .u64 m03200_loop_param_34
)
.reqntid 8, 1, 1
{
.reg .pred %p<14>;
.reg .b32 %r<10353>;
.reg .b64 %rd<3503>;
// demoted variable
.shared .align 4 .b8 m03200_loop$S0_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_loop$S1_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_loop$S2_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_loop$S3_all[8192];
ld.param.u64 %rd31, [m03200_loop_param_4];
ld.param.u64 %rd32, [m03200_loop_param_17];
ld.param.u32 %r169, [m03200_loop_param_27];
ld.param.u32 %r170, [m03200_loop_param_29];
ld.param.u64 %rd33, [m03200_loop_param_34];
mov.b32 %r171, %envreg3;
mov.u32 %r172, %ctaid.x;
mov.u32 %r173, %ntid.x;
mad.lo.s32 %r174, %r172, %r173, %r171;
mov.u32 %r1, %tid.x;
add.s32 %r2, %r174, %r1;
cvt.s64.s32 %rd34, %r2;
cvt.s64.s32 %rd1, %r1;
setp.ge.u64 %p1, %rd34, %rd33;
@%p1 bra BB4_24;
mul.wide.s32 %rd35, %r2, 4240;
add.s64 %rd2, %rd31, %rd35;
ld.global.u32 %r3, [%rd2];
ld.global.u32 %r4, [%rd2+4];
ld.global.u32 %r5, [%rd2+8];
ld.global.u32 %r6, [%rd2+12];
ld.global.u32 %r7, [%rd2+16];
ld.global.u32 %r8, [%rd2+20];
ld.global.u32 %r9, [%rd2+24];
ld.global.u32 %r10, [%rd2+28];
ld.global.u32 %r11, [%rd2+32];
ld.global.u32 %r12, [%rd2+36];
ld.global.u32 %r13, [%rd2+40];
ld.global.u32 %r14, [%rd2+44];
ld.global.u32 %r15, [%rd2+48];
ld.global.u32 %r16, [%rd2+52];
ld.global.u32 %r17, [%rd2+56];
ld.global.u32 %r18, [%rd2+60];
ld.global.u32 %r19, [%rd2+64];
ld.global.u32 %r20, [%rd2+68];
ld.global.u32 %r10351, [%rd2+72];
ld.global.u32 %r10350, [%rd2+76];
ld.global.u32 %r10349, [%rd2+80];
ld.global.u32 %r10348, [%rd2+84];
ld.global.u32 %r10347, [%rd2+88];
ld.global.u32 %r10346, [%rd2+92];
ld.global.u32 %r10345, [%rd2+96];
ld.global.u32 %r10344, [%rd2+100];
ld.global.u32 %r10343, [%rd2+104];
ld.global.u32 %r10342, [%rd2+108];
ld.global.u32 %r10341, [%rd2+112];
ld.global.u32 %r10340, [%rd2+116];
ld.global.u32 %r10339, [%rd2+120];
ld.global.u32 %r10338, [%rd2+124];
ld.global.u32 %r10337, [%rd2+128];
ld.global.u32 %r10336, [%rd2+132];
ld.global.u32 %r10335, [%rd2+136];
ld.global.u32 %r10334, [%rd2+140];
mov.u32 %r10290, 0;
BB4_2:
mul.wide.u32 %rd38, %r10290, 4;
add.s64 %rd39, %rd2, %rd38;
ld.global.u32 %r176, [%rd39+144];
shl.b64 %rd40, %rd1, 10;
mov.u64 %rd41, m03200_loop$S0_all;
add.s64 %rd42, %rd41, %rd40;
add.s64 %rd43, %rd42, %rd38;
ld.global.u32 %r177, [%rd39+1168];
ld.global.u32 %r178, [%rd39+2192];
ld.global.u32 %r179, [%rd39+3216];
ld.global.u32 %r180, [%rd39+148];
ld.global.u32 %r181, [%rd39+1172];
ld.global.u32 %r182, [%rd39+2196];
ld.global.u32 %r183, [%rd39+3220];
ld.global.u32 %r184, [%rd39+152];
ld.global.u32 %r185, [%rd39+1176];
ld.global.u32 %r186, [%rd39+2200];
ld.global.u32 %r187, [%rd39+3224];
ld.global.u32 %r188, [%rd39+156];
ld.global.u32 %r189, [%rd39+1180];
ld.global.u32 %r190, [%rd39+2204];
ld.global.u32 %r191, [%rd39+3228];
ld.global.u32 %r192, [%rd39+160];
ld.global.u32 %r193, [%rd39+1184];
ld.global.u32 %r194, [%rd39+2208];
ld.global.u32 %r195, [%rd39+3232];
ld.global.u32 %r196, [%rd39+164];
ld.global.u32 %r197, [%rd39+1188];
ld.global.u32 %r198, [%rd39+2212];
ld.global.u32 %r199, [%rd39+3236];
ld.global.u32 %r200, [%rd39+168];
ld.global.u32 %r201, [%rd39+1192];
ld.global.u32 %r202, [%rd39+2216];
ld.global.u32 %r203, [%rd39+3240];
ld.global.u32 %r204, [%rd39+172];
ld.global.u32 %r205, [%rd39+1196];
ld.global.u32 %r206, [%rd39+2220];
ld.global.u32 %r207, [%rd39+3244];
st.shared.u32 [%rd43], %r176;
mov.u64 %rd44, m03200_loop$S1_all;
add.s64 %rd45, %rd44, %rd40;
add.s64 %rd46, %rd45, %rd38;
st.shared.u32 [%rd43+4], %r180;
st.shared.u32 [%rd43+8], %r184;
st.shared.u32 [%rd43+12], %r188;
st.shared.u32 [%rd43+16], %r192;
st.shared.u32 [%rd43+20], %r196;
st.shared.u32 [%rd43+24], %r200;
st.shared.u32 [%rd43+28], %r204;
st.shared.u32 [%rd46], %r177;
mov.u64 %rd47, m03200_loop$S2_all;
add.s64 %rd48, %rd47, %rd40;
add.s64 %rd49, %rd48, %rd38;
st.shared.u32 [%rd46+4], %r181;
st.shared.u32 [%rd46+8], %r185;
st.shared.u32 [%rd46+12], %r189;
st.shared.u32 [%rd46+16], %r193;
st.shared.u32 [%rd46+20], %r197;
st.shared.u32 [%rd46+24], %r201;
st.shared.u32 [%rd46+28], %r205;
st.shared.u32 [%rd49], %r178;
mov.u64 %rd50, m03200_loop$S3_all;
add.s64 %rd51, %rd50, %rd40;
add.s64 %rd52, %rd51, %rd38;
st.shared.u32 [%rd49+4], %r182;
st.shared.u32 [%rd49+8], %r186;
st.shared.u32 [%rd49+12], %r190;
st.shared.u32 [%rd49+16], %r194;
st.shared.u32 [%rd49+20], %r198;
st.shared.u32 [%rd49+24], %r202;
st.shared.u32 [%rd49+28], %r206;
st.shared.u32 [%rd52], %r179;
st.shared.u32 [%rd52+4], %r183;
st.shared.u32 [%rd52+8], %r187;
st.shared.u32 [%rd52+12], %r191;
st.shared.u32 [%rd52+16], %r195;
st.shared.u32 [%rd52+20], %r199;
st.shared.u32 [%rd52+24], %r203;
st.shared.u32 [%rd52+28], %r207;
add.s32 %r10290, %r10290, 8;
setp.ne.s32 %p2, %r10290, 256;
@%p2 bra BB4_2;
setp.eq.s32 %p3, %r170, 0;
@%p3 bra BB4_22;
mul.wide.u32 %rd53, %r169, 560;
add.s64 %rd54, %rd32, %rd53;
ld.global.u32 %r41, [%rd54];
ld.global.u32 %r42, [%rd54+4];
ld.global.u32 %r43, [%rd54+8];
ld.global.u32 %r44, [%rd54+12];
mul.wide.s32 %rd55, %r1, 1024;
add.s64 %rd3, %rd41, %rd55;
add.s64 %rd4, %rd44, %rd55;
add.s64 %rd5, %rd47, %rd55;
add.s64 %rd6, %rd50, %rd55;
mov.u32 %r208, 0;
mov.u32 %r10309, %r208;
BB4_5:
xor.b32 %r222, %r10351, %r3;
mov.u32 %r2499, 24;
mov.u32 %r2512, 8;
// inline asm
bfe.u32 %r209, %r222, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd63, %r209, 4;
add.s64 %rd64, %rd42, %rd63;
ld.shared.u32 %r2514, [%rd64];
mov.u32 %r2503, 16;
// inline asm
bfe.u32 %r213, %r222, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd67, %r213, 4;
add.s64 %rd68, %rd45, %rd67;
ld.shared.u32 %r2515, [%rd68];
add.s32 %r2516, %r2515, %r2514;
// inline asm
bfe.u32 %r217, %r222, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd71, %r217, 4;
add.s64 %rd72, %rd48, %rd71;
ld.shared.u32 %r2517, [%rd72];
xor.b32 %r2518, %r2517, %r2516;
// inline asm
bfe.u32 %r221, %r222, %r208, %r2512;
// inline asm
mul.wide.u32 %rd75, %r221, 4;
add.s64 %rd76, %rd51, %rd75;
ld.shared.u32 %r2519, [%rd76];
add.s32 %r2520, %r2519, %r2518;
xor.b32 %r2521, %r10350, %r4;
xor.b32 %r238, %r2521, %r2520;
// inline asm
bfe.u32 %r225, %r238, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd77, %r225, 4;
add.s64 %rd78, %rd42, %rd77;
ld.shared.u32 %r2522, [%rd78];
// inline asm
bfe.u32 %r229, %r238, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd79, %r229, 4;
add.s64 %rd80, %rd45, %rd79;
ld.shared.u32 %r2523, [%rd80];
add.s32 %r2524, %r2523, %r2522;
// inline asm
bfe.u32 %r233, %r238, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd81, %r233, 4;
add.s64 %rd82, %rd48, %rd81;
ld.shared.u32 %r2525, [%rd82];
xor.b32 %r2526, %r2525, %r2524;
// inline asm
bfe.u32 %r237, %r238, %r208, %r2512;
// inline asm
mul.wide.u32 %rd83, %r237, 4;
add.s64 %rd84, %rd51, %rd83;
ld.shared.u32 %r2527, [%rd84];
add.s32 %r2528, %r2527, %r2526;
xor.b32 %r2529, %r10349, %r5;
xor.b32 %r2530, %r2529, %r222;
xor.b32 %r254, %r2530, %r2528;
// inline asm
bfe.u32 %r241, %r254, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd85, %r241, 4;
add.s64 %rd86, %rd42, %rd85;
ld.shared.u32 %r2531, [%rd86];
// inline asm
bfe.u32 %r245, %r254, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd87, %r245, 4;
add.s64 %rd88, %rd45, %rd87;
ld.shared.u32 %r2532, [%rd88];
add.s32 %r2533, %r2532, %r2531;
// inline asm
bfe.u32 %r249, %r254, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd89, %r249, 4;
add.s64 %rd90, %rd48, %rd89;
ld.shared.u32 %r2534, [%rd90];
xor.b32 %r2535, %r2534, %r2533;
// inline asm
bfe.u32 %r253, %r254, %r208, %r2512;
// inline asm
mul.wide.u32 %rd91, %r253, 4;
add.s64 %rd92, %rd51, %rd91;
ld.shared.u32 %r2536, [%rd92];
add.s32 %r2537, %r2536, %r2535;
xor.b32 %r2538, %r10348, %r6;
xor.b32 %r2539, %r2538, %r238;
xor.b32 %r270, %r2539, %r2537;
// inline asm
bfe.u32 %r257, %r270, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd93, %r257, 4;
add.s64 %rd94, %rd42, %rd93;
ld.shared.u32 %r2540, [%rd94];
// inline asm
bfe.u32 %r261, %r270, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd95, %r261, 4;
add.s64 %rd96, %rd45, %rd95;
ld.shared.u32 %r2541, [%rd96];
add.s32 %r2542, %r2541, %r2540;
// inline asm
bfe.u32 %r265, %r270, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd97, %r265, 4;
add.s64 %rd98, %rd48, %rd97;
ld.shared.u32 %r2543, [%rd98];
xor.b32 %r2544, %r2543, %r2542;
// inline asm
bfe.u32 %r269, %r270, %r208, %r2512;
// inline asm
mul.wide.u32 %rd99, %r269, 4;
add.s64 %rd100, %rd51, %rd99;
ld.shared.u32 %r2545, [%rd100];
add.s32 %r2546, %r2545, %r2544;
xor.b32 %r2547, %r10347, %r7;
xor.b32 %r2548, %r2547, %r254;
xor.b32 %r286, %r2548, %r2546;
// inline asm
bfe.u32 %r273, %r286, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd101, %r273, 4;
add.s64 %rd102, %rd42, %rd101;
ld.shared.u32 %r2549, [%rd102];
// inline asm
bfe.u32 %r277, %r286, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd103, %r277, 4;
add.s64 %rd104, %rd45, %rd103;
ld.shared.u32 %r2550, [%rd104];
add.s32 %r2551, %r2550, %r2549;
// inline asm
bfe.u32 %r281, %r286, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd105, %r281, 4;
add.s64 %rd106, %rd48, %rd105;
ld.shared.u32 %r2552, [%rd106];
xor.b32 %r2553, %r2552, %r2551;
// inline asm
bfe.u32 %r285, %r286, %r208, %r2512;
// inline asm
mul.wide.u32 %rd107, %r285, 4;
add.s64 %rd108, %rd51, %rd107;
ld.shared.u32 %r2554, [%rd108];
add.s32 %r2555, %r2554, %r2553;
xor.b32 %r2556, %r10346, %r8;
xor.b32 %r2557, %r2556, %r270;
xor.b32 %r302, %r2557, %r2555;
// inline asm
bfe.u32 %r289, %r302, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd109, %r289, 4;
add.s64 %rd110, %rd42, %rd109;
ld.shared.u32 %r2558, [%rd110];
// inline asm
bfe.u32 %r293, %r302, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd111, %r293, 4;
add.s64 %rd112, %rd45, %rd111;
ld.shared.u32 %r2559, [%rd112];
add.s32 %r2560, %r2559, %r2558;
// inline asm
bfe.u32 %r297, %r302, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd113, %r297, 4;
add.s64 %rd114, %rd48, %rd113;
ld.shared.u32 %r2561, [%rd114];
xor.b32 %r2562, %r2561, %r2560;
// inline asm
bfe.u32 %r301, %r302, %r208, %r2512;
// inline asm
mul.wide.u32 %rd115, %r301, 4;
add.s64 %rd116, %rd51, %rd115;
ld.shared.u32 %r2563, [%rd116];
add.s32 %r2564, %r2563, %r2562;
xor.b32 %r2565, %r10345, %r9;
xor.b32 %r2566, %r2565, %r286;
xor.b32 %r318, %r2566, %r2564;
// inline asm
bfe.u32 %r305, %r318, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd117, %r305, 4;
add.s64 %rd118, %rd42, %rd117;
ld.shared.u32 %r2567, [%rd118];
// inline asm
bfe.u32 %r309, %r318, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd119, %r309, 4;
add.s64 %rd120, %rd45, %rd119;
ld.shared.u32 %r2568, [%rd120];
add.s32 %r2569, %r2568, %r2567;
// inline asm
bfe.u32 %r313, %r318, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd121, %r313, 4;
add.s64 %rd122, %rd48, %rd121;
ld.shared.u32 %r2570, [%rd122];
xor.b32 %r2571, %r2570, %r2569;
// inline asm
bfe.u32 %r317, %r318, %r208, %r2512;
// inline asm
mul.wide.u32 %rd123, %r317, 4;
add.s64 %rd124, %rd51, %rd123;
ld.shared.u32 %r2572, [%rd124];
add.s32 %r2573, %r2572, %r2571;
xor.b32 %r2574, %r10344, %r10;
xor.b32 %r2575, %r2574, %r302;
xor.b32 %r334, %r2575, %r2573;
// inline asm
bfe.u32 %r321, %r334, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd125, %r321, 4;
add.s64 %rd126, %rd42, %rd125;
ld.shared.u32 %r2576, [%rd126];
// inline asm
bfe.u32 %r325, %r334, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd127, %r325, 4;
add.s64 %rd128, %rd45, %rd127;
ld.shared.u32 %r2577, [%rd128];
add.s32 %r2578, %r2577, %r2576;
// inline asm
bfe.u32 %r329, %r334, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd129, %r329, 4;
add.s64 %rd130, %rd48, %rd129;
ld.shared.u32 %r2579, [%rd130];
xor.b32 %r2580, %r2579, %r2578;
// inline asm
bfe.u32 %r333, %r334, %r208, %r2512;
// inline asm
mul.wide.u32 %rd131, %r333, 4;
add.s64 %rd132, %rd51, %rd131;
ld.shared.u32 %r2581, [%rd132];
add.s32 %r2582, %r2581, %r2580;
xor.b32 %r2583, %r10343, %r11;
xor.b32 %r2584, %r2583, %r318;
xor.b32 %r350, %r2584, %r2582;
// inline asm
bfe.u32 %r337, %r350, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd133, %r337, 4;
add.s64 %rd134, %rd42, %rd133;
ld.shared.u32 %r2585, [%rd134];
// inline asm
bfe.u32 %r341, %r350, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd135, %r341, 4;
add.s64 %rd136, %rd45, %rd135;
ld.shared.u32 %r2586, [%rd136];
add.s32 %r2587, %r2586, %r2585;
// inline asm
bfe.u32 %r345, %r350, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd137, %r345, 4;
add.s64 %rd138, %rd48, %rd137;
ld.shared.u32 %r2588, [%rd138];
xor.b32 %r2589, %r2588, %r2587;
// inline asm
bfe.u32 %r349, %r350, %r208, %r2512;
// inline asm
mul.wide.u32 %rd139, %r349, 4;
add.s64 %rd140, %rd51, %rd139;
ld.shared.u32 %r2590, [%rd140];
add.s32 %r2591, %r2590, %r2589;
xor.b32 %r2592, %r10342, %r12;
xor.b32 %r2593, %r2592, %r334;
xor.b32 %r366, %r2593, %r2591;
// inline asm
bfe.u32 %r353, %r366, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd141, %r353, 4;
add.s64 %rd142, %rd42, %rd141;
ld.shared.u32 %r2594, [%rd142];
// inline asm
bfe.u32 %r357, %r366, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd143, %r357, 4;
add.s64 %rd144, %rd45, %rd143;
ld.shared.u32 %r2595, [%rd144];
add.s32 %r2596, %r2595, %r2594;
// inline asm
bfe.u32 %r361, %r366, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd145, %r361, 4;
add.s64 %rd146, %rd48, %rd145;
ld.shared.u32 %r2597, [%rd146];
xor.b32 %r2598, %r2597, %r2596;
// inline asm
bfe.u32 %r365, %r366, %r208, %r2512;
// inline asm
mul.wide.u32 %rd147, %r365, 4;
add.s64 %rd148, %rd51, %rd147;
ld.shared.u32 %r2599, [%rd148];
add.s32 %r2600, %r2599, %r2598;
xor.b32 %r2601, %r10341, %r13;
xor.b32 %r2602, %r2601, %r350;
xor.b32 %r382, %r2602, %r2600;
// inline asm
bfe.u32 %r369, %r382, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd149, %r369, 4;
add.s64 %rd150, %rd42, %rd149;
ld.shared.u32 %r2603, [%rd150];
// inline asm
bfe.u32 %r373, %r382, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd151, %r373, 4;
add.s64 %rd152, %rd45, %rd151;
ld.shared.u32 %r2604, [%rd152];
add.s32 %r2605, %r2604, %r2603;
// inline asm
bfe.u32 %r377, %r382, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd153, %r377, 4;
add.s64 %rd154, %rd48, %rd153;
ld.shared.u32 %r2606, [%rd154];
xor.b32 %r2607, %r2606, %r2605;
// inline asm
bfe.u32 %r381, %r382, %r208, %r2512;
// inline asm
mul.wide.u32 %rd155, %r381, 4;
add.s64 %rd156, %rd51, %rd155;
ld.shared.u32 %r2608, [%rd156];
add.s32 %r2609, %r2608, %r2607;
xor.b32 %r2610, %r10340, %r14;
xor.b32 %r2611, %r2610, %r366;
xor.b32 %r398, %r2611, %r2609;
// inline asm
bfe.u32 %r385, %r398, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd157, %r385, 4;
add.s64 %rd158, %rd42, %rd157;
ld.shared.u32 %r2612, [%rd158];
// inline asm
bfe.u32 %r389, %r398, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd159, %r389, 4;
add.s64 %rd160, %rd45, %rd159;
ld.shared.u32 %r2613, [%rd160];
add.s32 %r2614, %r2613, %r2612;
// inline asm
bfe.u32 %r393, %r398, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd161, %r393, 4;
add.s64 %rd162, %rd48, %rd161;
ld.shared.u32 %r2615, [%rd162];
xor.b32 %r2616, %r2615, %r2614;
// inline asm
bfe.u32 %r397, %r398, %r208, %r2512;
// inline asm
mul.wide.u32 %rd163, %r397, 4;
add.s64 %rd164, %rd51, %rd163;
ld.shared.u32 %r2617, [%rd164];
add.s32 %r2618, %r2617, %r2616;
xor.b32 %r2619, %r10339, %r15;
xor.b32 %r2620, %r2619, %r382;
xor.b32 %r414, %r2620, %r2618;
// inline asm
bfe.u32 %r401, %r414, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd165, %r401, 4;
add.s64 %rd166, %rd42, %rd165;
ld.shared.u32 %r2621, [%rd166];
// inline asm
bfe.u32 %r405, %r414, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd167, %r405, 4;
add.s64 %rd168, %rd45, %rd167;
ld.shared.u32 %r2622, [%rd168];
add.s32 %r2623, %r2622, %r2621;
// inline asm
bfe.u32 %r409, %r414, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd169, %r409, 4;
add.s64 %rd170, %rd48, %rd169;
ld.shared.u32 %r2624, [%rd170];
xor.b32 %r2625, %r2624, %r2623;
// inline asm
bfe.u32 %r413, %r414, %r208, %r2512;
// inline asm
mul.wide.u32 %rd171, %r413, 4;
add.s64 %rd172, %rd51, %rd171;
ld.shared.u32 %r2626, [%rd172];
add.s32 %r2627, %r2626, %r2625;
xor.b32 %r2628, %r10338, %r16;
xor.b32 %r2629, %r2628, %r398;
xor.b32 %r430, %r2629, %r2627;
// inline asm
bfe.u32 %r417, %r430, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd173, %r417, 4;
add.s64 %rd174, %rd42, %rd173;
ld.shared.u32 %r2630, [%rd174];
// inline asm
bfe.u32 %r421, %r430, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd175, %r421, 4;
add.s64 %rd176, %rd45, %rd175;
ld.shared.u32 %r2631, [%rd176];
add.s32 %r2632, %r2631, %r2630;
// inline asm
bfe.u32 %r425, %r430, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd177, %r425, 4;
add.s64 %rd178, %rd48, %rd177;
ld.shared.u32 %r2633, [%rd178];
xor.b32 %r2634, %r2633, %r2632;
// inline asm
bfe.u32 %r429, %r430, %r208, %r2512;
// inline asm
mul.wide.u32 %rd179, %r429, 4;
add.s64 %rd180, %rd51, %rd179;
ld.shared.u32 %r2635, [%rd180];
add.s32 %r2636, %r2635, %r2634;
xor.b32 %r2637, %r10337, %r17;
xor.b32 %r2638, %r2637, %r414;
xor.b32 %r446, %r2638, %r2636;
// inline asm
bfe.u32 %r433, %r446, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd181, %r433, 4;
add.s64 %rd182, %rd42, %rd181;
ld.shared.u32 %r2639, [%rd182];
// inline asm
bfe.u32 %r437, %r446, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd183, %r437, 4;
add.s64 %rd184, %rd45, %rd183;
ld.shared.u32 %r2640, [%rd184];
add.s32 %r2641, %r2640, %r2639;
// inline asm
bfe.u32 %r441, %r446, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd185, %r441, 4;
add.s64 %rd186, %rd48, %rd185;
ld.shared.u32 %r2642, [%rd186];
xor.b32 %r2643, %r2642, %r2641;
// inline asm
bfe.u32 %r445, %r446, %r208, %r2512;
// inline asm
mul.wide.u32 %rd187, %r445, 4;
add.s64 %rd188, %rd51, %rd187;
ld.shared.u32 %r2644, [%rd188];
add.s32 %r2645, %r2644, %r2643;
xor.b32 %r2646, %r10336, %r18;
xor.b32 %r2647, %r2646, %r430;
xor.b32 %r462, %r2647, %r2645;
// inline asm
bfe.u32 %r449, %r462, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd189, %r449, 4;
add.s64 %rd190, %rd42, %rd189;
ld.shared.u32 %r2648, [%rd190];
// inline asm
bfe.u32 %r453, %r462, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd191, %r453, 4;
add.s64 %rd192, %rd45, %rd191;
ld.shared.u32 %r2649, [%rd192];
add.s32 %r2650, %r2649, %r2648;
// inline asm
bfe.u32 %r457, %r462, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd193, %r457, 4;
add.s64 %rd194, %rd48, %rd193;
ld.shared.u32 %r2651, [%rd194];
xor.b32 %r2652, %r2651, %r2650;
// inline asm
bfe.u32 %r461, %r462, %r208, %r2512;
// inline asm
mul.wide.u32 %rd195, %r461, 4;
add.s64 %rd196, %rd51, %rd195;
ld.shared.u32 %r2653, [%rd196];
add.s32 %r2654, %r2653, %r2652;
xor.b32 %r2655, %r10335, %r19;
xor.b32 %r2656, %r2655, %r446;
xor.b32 %r64, %r2656, %r2654;
xor.b32 %r2657, %r10334, %r20;
xor.b32 %r65, %r2657, %r462;
// inline asm
bfe.u32 %r465, %r208, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd197, %r465, 4;
add.s64 %rd198, %rd42, %rd197;
ld.shared.u32 %r2658, [%rd198];
// inline asm
bfe.u32 %r469, %r208, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd199, %r469, 4;
add.s64 %rd200, %rd45, %rd199;
ld.shared.u32 %r2659, [%rd200];
add.s32 %r2660, %r2659, %r2658;
// inline asm
bfe.u32 %r473, %r208, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd201, %r473, 4;
add.s64 %rd202, %rd48, %rd201;
ld.shared.u32 %r2661, [%rd202];
xor.b32 %r2662, %r2661, %r2660;
// inline asm
bfe.u32 %r477, %r208, %r208, %r2512;
// inline asm
mul.wide.u32 %rd203, %r477, 4;
add.s64 %rd204, %rd51, %rd203;
ld.shared.u32 %r2663, [%rd204];
add.s32 %r494, %r2663, %r2662;
// inline asm
bfe.u32 %r481, %r494, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd205, %r481, 4;
add.s64 %rd206, %rd42, %rd205;
ld.shared.u32 %r2664, [%rd206];
// inline asm
bfe.u32 %r485, %r494, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd207, %r485, 4;
add.s64 %rd208, %rd45, %rd207;
ld.shared.u32 %r2665, [%rd208];
add.s32 %r2666, %r2665, %r2664;
// inline asm
bfe.u32 %r489, %r494, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd209, %r489, 4;
add.s64 %rd210, %rd48, %rd209;
ld.shared.u32 %r2667, [%rd210];
xor.b32 %r2668, %r2667, %r2666;
// inline asm
bfe.u32 %r493, %r494, %r208, %r2512;
// inline asm
mul.wide.u32 %rd211, %r493, 4;
add.s64 %rd212, %rd51, %rd211;
ld.shared.u32 %r2669, [%rd212];
add.s32 %r2670, %r2669, %r2668;
xor.b32 %r510, %r2529, %r2670;
// inline asm
bfe.u32 %r497, %r510, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd213, %r497, 4;
add.s64 %rd214, %rd42, %rd213;
ld.shared.u32 %r2671, [%rd214];
// inline asm
bfe.u32 %r501, %r510, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd215, %r501, 4;
add.s64 %rd216, %rd45, %rd215;
ld.shared.u32 %r2672, [%rd216];
add.s32 %r2673, %r2672, %r2671;
// inline asm
bfe.u32 %r505, %r510, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd217, %r505, 4;
add.s64 %rd218, %rd48, %rd217;
ld.shared.u32 %r2674, [%rd218];
xor.b32 %r2675, %r2674, %r2673;
// inline asm
bfe.u32 %r509, %r510, %r208, %r2512;
// inline asm
mul.wide.u32 %rd219, %r509, 4;
add.s64 %rd220, %rd51, %rd219;
ld.shared.u32 %r2676, [%rd220];
add.s32 %r2677, %r2676, %r2675;
xor.b32 %r2678, %r2538, %r494;
xor.b32 %r526, %r2678, %r2677;
// inline asm
bfe.u32 %r513, %r526, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd221, %r513, 4;
add.s64 %rd222, %rd42, %rd221;
ld.shared.u32 %r2679, [%rd222];
// inline asm
bfe.u32 %r517, %r526, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd223, %r517, 4;
add.s64 %rd224, %rd45, %rd223;
ld.shared.u32 %r2680, [%rd224];
add.s32 %r2681, %r2680, %r2679;
// inline asm
bfe.u32 %r521, %r526, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd225, %r521, 4;
add.s64 %rd226, %rd48, %rd225;
ld.shared.u32 %r2682, [%rd226];
xor.b32 %r2683, %r2682, %r2681;
// inline asm
bfe.u32 %r525, %r526, %r208, %r2512;
// inline asm
mul.wide.u32 %rd227, %r525, 4;
add.s64 %rd228, %rd51, %rd227;
ld.shared.u32 %r2684, [%rd228];
add.s32 %r2685, %r2684, %r2683;
xor.b32 %r2686, %r2547, %r510;
xor.b32 %r542, %r2686, %r2685;
// inline asm
bfe.u32 %r529, %r542, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd229, %r529, 4;
add.s64 %rd230, %rd42, %rd229;
ld.shared.u32 %r2687, [%rd230];
// inline asm
bfe.u32 %r533, %r542, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd231, %r533, 4;
add.s64 %rd232, %rd45, %rd231;
ld.shared.u32 %r2688, [%rd232];
add.s32 %r2689, %r2688, %r2687;
// inline asm
bfe.u32 %r537, %r542, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd233, %r537, 4;
add.s64 %rd234, %rd48, %rd233;
ld.shared.u32 %r2690, [%rd234];
xor.b32 %r2691, %r2690, %r2689;
// inline asm
bfe.u32 %r541, %r542, %r208, %r2512;
// inline asm
mul.wide.u32 %rd235, %r541, 4;
add.s64 %rd236, %rd51, %rd235;
ld.shared.u32 %r2692, [%rd236];
add.s32 %r2693, %r2692, %r2691;
xor.b32 %r2694, %r2556, %r526;
xor.b32 %r558, %r2694, %r2693;
// inline asm
bfe.u32 %r545, %r558, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd237, %r545, 4;
add.s64 %rd238, %rd42, %rd237;
ld.shared.u32 %r2695, [%rd238];
// inline asm
bfe.u32 %r549, %r558, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd239, %r549, 4;
add.s64 %rd240, %rd45, %rd239;
ld.shared.u32 %r2696, [%rd240];
add.s32 %r2697, %r2696, %r2695;
// inline asm
bfe.u32 %r553, %r558, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd241, %r553, 4;
add.s64 %rd242, %rd48, %rd241;
ld.shared.u32 %r2698, [%rd242];
xor.b32 %r2699, %r2698, %r2697;
// inline asm
bfe.u32 %r557, %r558, %r208, %r2512;
// inline asm
mul.wide.u32 %rd243, %r557, 4;
add.s64 %rd244, %rd51, %rd243;
ld.shared.u32 %r2700, [%rd244];
add.s32 %r2701, %r2700, %r2699;
xor.b32 %r2702, %r2565, %r542;
xor.b32 %r574, %r2702, %r2701;
// inline asm
bfe.u32 %r561, %r574, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd245, %r561, 4;
add.s64 %rd246, %rd42, %rd245;
ld.shared.u32 %r2703, [%rd246];
// inline asm
bfe.u32 %r565, %r574, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd247, %r565, 4;
add.s64 %rd248, %rd45, %rd247;
ld.shared.u32 %r2704, [%rd248];
add.s32 %r2705, %r2704, %r2703;
// inline asm
bfe.u32 %r569, %r574, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd249, %r569, 4;
add.s64 %rd250, %rd48, %rd249;
ld.shared.u32 %r2706, [%rd250];
xor.b32 %r2707, %r2706, %r2705;
// inline asm
bfe.u32 %r573, %r574, %r208, %r2512;
// inline asm
mul.wide.u32 %rd251, %r573, 4;
add.s64 %rd252, %rd51, %rd251;
ld.shared.u32 %r2708, [%rd252];
add.s32 %r2709, %r2708, %r2707;
xor.b32 %r2710, %r2574, %r558;
xor.b32 %r590, %r2710, %r2709;
// inline asm
bfe.u32 %r577, %r590, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd253, %r577, 4;
add.s64 %rd254, %rd42, %rd253;
ld.shared.u32 %r2711, [%rd254];
// inline asm
bfe.u32 %r581, %r590, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd255, %r581, 4;
add.s64 %rd256, %rd45, %rd255;
ld.shared.u32 %r2712, [%rd256];
add.s32 %r2713, %r2712, %r2711;
// inline asm
bfe.u32 %r585, %r590, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd257, %r585, 4;
add.s64 %rd258, %rd48, %rd257;
ld.shared.u32 %r2714, [%rd258];
xor.b32 %r2715, %r2714, %r2713;
// inline asm
bfe.u32 %r589, %r590, %r208, %r2512;
// inline asm
mul.wide.u32 %rd259, %r589, 4;
add.s64 %rd260, %rd51, %rd259;
ld.shared.u32 %r2716, [%rd260];
add.s32 %r2717, %r2716, %r2715;
xor.b32 %r2718, %r2583, %r574;
xor.b32 %r606, %r2718, %r2717;
// inline asm
bfe.u32 %r593, %r606, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd261, %r593, 4;
add.s64 %rd262, %rd42, %rd261;
ld.shared.u32 %r2719, [%rd262];
// inline asm
bfe.u32 %r597, %r606, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd263, %r597, 4;
add.s64 %rd264, %rd45, %rd263;
ld.shared.u32 %r2720, [%rd264];
add.s32 %r2721, %r2720, %r2719;
// inline asm
bfe.u32 %r601, %r606, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd265, %r601, 4;
add.s64 %rd266, %rd48, %rd265;
ld.shared.u32 %r2722, [%rd266];
xor.b32 %r2723, %r2722, %r2721;
// inline asm
bfe.u32 %r605, %r606, %r208, %r2512;
// inline asm
mul.wide.u32 %rd267, %r605, 4;
add.s64 %rd268, %rd51, %rd267;
ld.shared.u32 %r2724, [%rd268];
add.s32 %r2725, %r2724, %r2723;
xor.b32 %r2726, %r2592, %r590;
xor.b32 %r622, %r2726, %r2725;
// inline asm
bfe.u32 %r609, %r622, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd269, %r609, 4;
add.s64 %rd270, %rd42, %rd269;
ld.shared.u32 %r2727, [%rd270];
// inline asm
bfe.u32 %r613, %r622, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd271, %r613, 4;
add.s64 %rd272, %rd45, %rd271;
ld.shared.u32 %r2728, [%rd272];
add.s32 %r2729, %r2728, %r2727;
// inline asm
bfe.u32 %r617, %r622, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd273, %r617, 4;
add.s64 %rd274, %rd48, %rd273;
ld.shared.u32 %r2730, [%rd274];
xor.b32 %r2731, %r2730, %r2729;
// inline asm
bfe.u32 %r621, %r622, %r208, %r2512;
// inline asm
mul.wide.u32 %rd275, %r621, 4;
add.s64 %rd276, %rd51, %rd275;
ld.shared.u32 %r2732, [%rd276];
add.s32 %r2733, %r2732, %r2731;
xor.b32 %r2734, %r2601, %r606;
xor.b32 %r638, %r2734, %r2733;
// inline asm
bfe.u32 %r625, %r638, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd277, %r625, 4;
add.s64 %rd278, %rd42, %rd277;
ld.shared.u32 %r2735, [%rd278];
// inline asm
bfe.u32 %r629, %r638, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd279, %r629, 4;
add.s64 %rd280, %rd45, %rd279;
ld.shared.u32 %r2736, [%rd280];
add.s32 %r2737, %r2736, %r2735;
// inline asm
bfe.u32 %r633, %r638, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd281, %r633, 4;
add.s64 %rd282, %rd48, %rd281;
ld.shared.u32 %r2738, [%rd282];
xor.b32 %r2739, %r2738, %r2737;
// inline asm
bfe.u32 %r637, %r638, %r208, %r2512;
// inline asm
mul.wide.u32 %rd283, %r637, 4;
add.s64 %rd284, %rd51, %rd283;
ld.shared.u32 %r2740, [%rd284];
add.s32 %r2741, %r2740, %r2739;
xor.b32 %r2742, %r2610, %r622;
xor.b32 %r654, %r2742, %r2741;
// inline asm
bfe.u32 %r641, %r654, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd285, %r641, 4;
add.s64 %rd286, %rd42, %rd285;
ld.shared.u32 %r2743, [%rd286];
// inline asm
bfe.u32 %r645, %r654, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd287, %r645, 4;
add.s64 %rd288, %rd45, %rd287;
ld.shared.u32 %r2744, [%rd288];
add.s32 %r2745, %r2744, %r2743;
// inline asm
bfe.u32 %r649, %r654, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd289, %r649, 4;
add.s64 %rd290, %rd48, %rd289;
ld.shared.u32 %r2746, [%rd290];
xor.b32 %r2747, %r2746, %r2745;
// inline asm
bfe.u32 %r653, %r654, %r208, %r2512;
// inline asm
mul.wide.u32 %rd291, %r653, 4;
add.s64 %rd292, %rd51, %rd291;
ld.shared.u32 %r2748, [%rd292];
add.s32 %r2749, %r2748, %r2747;
xor.b32 %r2750, %r2619, %r638;
xor.b32 %r670, %r2750, %r2749;
// inline asm
bfe.u32 %r657, %r670, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd293, %r657, 4;
add.s64 %rd294, %rd42, %rd293;
ld.shared.u32 %r2751, [%rd294];
// inline asm
bfe.u32 %r661, %r670, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd295, %r661, 4;
add.s64 %rd296, %rd45, %rd295;
ld.shared.u32 %r2752, [%rd296];
add.s32 %r2753, %r2752, %r2751;
// inline asm
bfe.u32 %r665, %r670, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd297, %r665, 4;
add.s64 %rd298, %rd48, %rd297;
ld.shared.u32 %r2754, [%rd298];
xor.b32 %r2755, %r2754, %r2753;
// inline asm
bfe.u32 %r669, %r670, %r208, %r2512;
// inline asm
mul.wide.u32 %rd299, %r669, 4;
add.s64 %rd300, %rd51, %rd299;
ld.shared.u32 %r2756, [%rd300];
add.s32 %r2757, %r2756, %r2755;
xor.b32 %r2758, %r2628, %r654;
xor.b32 %r686, %r2758, %r2757;
// inline asm
bfe.u32 %r673, %r686, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd301, %r673, 4;
add.s64 %rd302, %rd42, %rd301;
ld.shared.u32 %r2759, [%rd302];
// inline asm
bfe.u32 %r677, %r686, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd303, %r677, 4;
add.s64 %rd304, %rd45, %rd303;
ld.shared.u32 %r2760, [%rd304];
add.s32 %r2761, %r2760, %r2759;
// inline asm
bfe.u32 %r681, %r686, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd305, %r681, 4;
add.s64 %rd306, %rd48, %rd305;
ld.shared.u32 %r2762, [%rd306];
xor.b32 %r2763, %r2762, %r2761;
// inline asm
bfe.u32 %r685, %r686, %r208, %r2512;
// inline asm
mul.wide.u32 %rd307, %r685, 4;
add.s64 %rd308, %rd51, %rd307;
ld.shared.u32 %r2764, [%rd308];
add.s32 %r2765, %r2764, %r2763;
xor.b32 %r2766, %r2637, %r670;
xor.b32 %r702, %r2766, %r2765;
// inline asm
bfe.u32 %r689, %r702, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd309, %r689, 4;
add.s64 %rd310, %rd42, %rd309;
ld.shared.u32 %r2767, [%rd310];
// inline asm
bfe.u32 %r693, %r702, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd311, %r693, 4;
add.s64 %rd312, %rd45, %rd311;
ld.shared.u32 %r2768, [%rd312];
add.s32 %r2769, %r2768, %r2767;
// inline asm
bfe.u32 %r697, %r702, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd313, %r697, 4;
add.s64 %rd314, %rd48, %rd313;
ld.shared.u32 %r2770, [%rd314];
xor.b32 %r2771, %r2770, %r2769;
// inline asm
bfe.u32 %r701, %r702, %r208, %r2512;
// inline asm
mul.wide.u32 %rd315, %r701, 4;
add.s64 %rd316, %rd51, %rd315;
ld.shared.u32 %r2772, [%rd316];
add.s32 %r2773, %r2772, %r2771;
xor.b32 %r2774, %r2646, %r686;
xor.b32 %r718, %r2774, %r2773;
// inline asm
bfe.u32 %r705, %r718, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd317, %r705, 4;
add.s64 %rd318, %rd42, %rd317;
ld.shared.u32 %r2775, [%rd318];
// inline asm
bfe.u32 %r709, %r718, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd319, %r709, 4;
add.s64 %rd320, %rd45, %rd319;
ld.shared.u32 %r2776, [%rd320];
add.s32 %r2777, %r2776, %r2775;
// inline asm
bfe.u32 %r713, %r718, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd321, %r713, 4;
add.s64 %rd322, %rd48, %rd321;
ld.shared.u32 %r2778, [%rd322];
xor.b32 %r2779, %r2778, %r2777;
// inline asm
bfe.u32 %r717, %r718, %r208, %r2512;
// inline asm
mul.wide.u32 %rd323, %r717, 4;
add.s64 %rd324, %rd51, %rd323;
ld.shared.u32 %r2780, [%rd324];
add.s32 %r2781, %r2780, %r2779;
xor.b32 %r2782, %r2655, %r702;
xor.b32 %r66, %r2782, %r2781;
xor.b32 %r67, %r2657, %r718;
xor.b32 %r734, %r462, %r718;
// inline asm
bfe.u32 %r721, %r734, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd325, %r721, 4;
add.s64 %rd326, %rd42, %rd325;
ld.shared.u32 %r2783, [%rd326];
// inline asm
bfe.u32 %r725, %r734, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd327, %r725, 4;
add.s64 %rd328, %rd45, %rd327;
ld.shared.u32 %r2784, [%rd328];
add.s32 %r2785, %r2784, %r2783;
// inline asm
bfe.u32 %r729, %r734, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd329, %r729, 4;
add.s64 %rd330, %rd48, %rd329;
ld.shared.u32 %r2786, [%rd330];
xor.b32 %r2787, %r2786, %r2785;
// inline asm
bfe.u32 %r733, %r734, %r208, %r2512;
// inline asm
mul.wide.u32 %rd331, %r733, 4;
add.s64 %rd332, %rd51, %rd331;
ld.shared.u32 %r2788, [%rd332];
add.s32 %r2789, %r2788, %r2787;
xor.b32 %r2790, %r64, %r66;
xor.b32 %r750, %r2790, %r2789;
// inline asm
bfe.u32 %r737, %r750, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd333, %r737, 4;
add.s64 %rd334, %rd42, %rd333;
ld.shared.u32 %r2791, [%rd334];
// inline asm
bfe.u32 %r741, %r750, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd335, %r741, 4;
add.s64 %rd336, %rd45, %rd335;
ld.shared.u32 %r2792, [%rd336];
add.s32 %r2793, %r2792, %r2791;
// inline asm
bfe.u32 %r745, %r750, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd337, %r745, 4;
add.s64 %rd338, %rd48, %rd337;
ld.shared.u32 %r2794, [%rd338];
xor.b32 %r2795, %r2794, %r2793;
// inline asm
bfe.u32 %r749, %r750, %r208, %r2512;
// inline asm
mul.wide.u32 %rd339, %r749, 4;
add.s64 %rd340, %rd51, %rd339;
ld.shared.u32 %r2796, [%rd340];
add.s32 %r2797, %r2796, %r2795;
xor.b32 %r766, %r65, %r2797;
// inline asm
bfe.u32 %r753, %r766, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd341, %r753, 4;
add.s64 %rd342, %rd42, %rd341;
ld.shared.u32 %r2798, [%rd342];
// inline asm
bfe.u32 %r757, %r766, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd343, %r757, 4;
add.s64 %rd344, %rd45, %rd343;
ld.shared.u32 %r2799, [%rd344];
add.s32 %r2800, %r2799, %r2798;
// inline asm
bfe.u32 %r761, %r766, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd345, %r761, 4;
add.s64 %rd346, %rd48, %rd345;
ld.shared.u32 %r2801, [%rd346];
xor.b32 %r2802, %r2801, %r2800;
// inline asm
bfe.u32 %r765, %r766, %r208, %r2512;
// inline asm
mul.wide.u32 %rd347, %r765, 4;
add.s64 %rd348, %rd51, %rd347;
ld.shared.u32 %r2803, [%rd348];
add.s32 %r2804, %r2803, %r2802;
xor.b32 %r2805, %r64, %r2789;
xor.b32 %r782, %r2805, %r2804;
// inline asm
bfe.u32 %r769, %r782, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd349, %r769, 4;
add.s64 %rd350, %rd42, %rd349;
ld.shared.u32 %r2806, [%rd350];
// inline asm
bfe.u32 %r773, %r782, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd351, %r773, 4;
add.s64 %rd352, %rd45, %rd351;
ld.shared.u32 %r2807, [%rd352];
add.s32 %r2808, %r2807, %r2806;
// inline asm
bfe.u32 %r777, %r782, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd353, %r777, 4;
add.s64 %rd354, %rd48, %rd353;
ld.shared.u32 %r2809, [%rd354];
xor.b32 %r2810, %r2809, %r2808;
// inline asm
bfe.u32 %r781, %r782, %r208, %r2512;
// inline asm
mul.wide.u32 %rd355, %r781, 4;
add.s64 %rd356, %rd51, %rd355;
ld.shared.u32 %r2811, [%rd356];
add.s32 %r2812, %r2811, %r2810;
xor.b32 %r2813, %r2547, %r766;
xor.b32 %r798, %r2813, %r2812;
// inline asm
bfe.u32 %r785, %r798, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd357, %r785, 4;
add.s64 %rd358, %rd42, %rd357;
ld.shared.u32 %r2814, [%rd358];
// inline asm
bfe.u32 %r789, %r798, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd359, %r789, 4;
add.s64 %rd360, %rd45, %rd359;
ld.shared.u32 %r2815, [%rd360];
add.s32 %r2816, %r2815, %r2814;
// inline asm
bfe.u32 %r793, %r798, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd361, %r793, 4;
add.s64 %rd362, %rd48, %rd361;
ld.shared.u32 %r2817, [%rd362];
xor.b32 %r2818, %r2817, %r2816;
// inline asm
bfe.u32 %r797, %r798, %r208, %r2512;
// inline asm
mul.wide.u32 %rd363, %r797, 4;
add.s64 %rd364, %rd51, %rd363;
ld.shared.u32 %r2819, [%rd364];
add.s32 %r2820, %r2819, %r2818;
xor.b32 %r2821, %r2556, %r782;
xor.b32 %r814, %r2821, %r2820;
// inline asm
bfe.u32 %r801, %r814, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd365, %r801, 4;
add.s64 %rd366, %rd42, %rd365;
ld.shared.u32 %r2822, [%rd366];
// inline asm
bfe.u32 %r805, %r814, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd367, %r805, 4;
add.s64 %rd368, %rd45, %rd367;
ld.shared.u32 %r2823, [%rd368];
add.s32 %r2824, %r2823, %r2822;
// inline asm
bfe.u32 %r809, %r814, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd369, %r809, 4;
add.s64 %rd370, %rd48, %rd369;
ld.shared.u32 %r2825, [%rd370];
xor.b32 %r2826, %r2825, %r2824;
// inline asm
bfe.u32 %r813, %r814, %r208, %r2512;
// inline asm
mul.wide.u32 %rd371, %r813, 4;
add.s64 %rd372, %rd51, %rd371;
ld.shared.u32 %r2827, [%rd372];
add.s32 %r2828, %r2827, %r2826;
xor.b32 %r2829, %r2565, %r798;
xor.b32 %r830, %r2829, %r2828;
// inline asm
bfe.u32 %r817, %r830, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd373, %r817, 4;
add.s64 %rd374, %rd42, %rd373;
ld.shared.u32 %r2830, [%rd374];
// inline asm
bfe.u32 %r821, %r830, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd375, %r821, 4;
add.s64 %rd376, %rd45, %rd375;
ld.shared.u32 %r2831, [%rd376];
add.s32 %r2832, %r2831, %r2830;
// inline asm
bfe.u32 %r825, %r830, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd377, %r825, 4;
add.s64 %rd378, %rd48, %rd377;
ld.shared.u32 %r2833, [%rd378];
xor.b32 %r2834, %r2833, %r2832;
// inline asm
bfe.u32 %r829, %r830, %r208, %r2512;
// inline asm
mul.wide.u32 %rd379, %r829, 4;
add.s64 %rd380, %rd51, %rd379;
ld.shared.u32 %r2835, [%rd380];
add.s32 %r2836, %r2835, %r2834;
xor.b32 %r2837, %r2574, %r814;
xor.b32 %r846, %r2837, %r2836;
// inline asm
bfe.u32 %r833, %r846, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd381, %r833, 4;
add.s64 %rd382, %rd42, %rd381;
ld.shared.u32 %r2838, [%rd382];
// inline asm
bfe.u32 %r837, %r846, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd383, %r837, 4;
add.s64 %rd384, %rd45, %rd383;
ld.shared.u32 %r2839, [%rd384];
add.s32 %r2840, %r2839, %r2838;
// inline asm
bfe.u32 %r841, %r846, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd385, %r841, 4;
add.s64 %rd386, %rd48, %rd385;
ld.shared.u32 %r2841, [%rd386];
xor.b32 %r2842, %r2841, %r2840;
// inline asm
bfe.u32 %r845, %r846, %r208, %r2512;
// inline asm
mul.wide.u32 %rd387, %r845, 4;
add.s64 %rd388, %rd51, %rd387;
ld.shared.u32 %r2843, [%rd388];
add.s32 %r2844, %r2843, %r2842;
xor.b32 %r2845, %r2583, %r830;
xor.b32 %r862, %r2845, %r2844;
// inline asm
bfe.u32 %r849, %r862, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd389, %r849, 4;
add.s64 %rd390, %rd42, %rd389;
ld.shared.u32 %r2846, [%rd390];
// inline asm
bfe.u32 %r853, %r862, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd391, %r853, 4;
add.s64 %rd392, %rd45, %rd391;
ld.shared.u32 %r2847, [%rd392];
add.s32 %r2848, %r2847, %r2846;
// inline asm
bfe.u32 %r857, %r862, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd393, %r857, 4;
add.s64 %rd394, %rd48, %rd393;
ld.shared.u32 %r2849, [%rd394];
xor.b32 %r2850, %r2849, %r2848;
// inline asm
bfe.u32 %r861, %r862, %r208, %r2512;
// inline asm
mul.wide.u32 %rd395, %r861, 4;
add.s64 %rd396, %rd51, %rd395;
ld.shared.u32 %r2851, [%rd396];
add.s32 %r2852, %r2851, %r2850;
xor.b32 %r2853, %r2592, %r846;
xor.b32 %r878, %r2853, %r2852;
// inline asm
bfe.u32 %r865, %r878, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd397, %r865, 4;
add.s64 %rd398, %rd42, %rd397;
ld.shared.u32 %r2854, [%rd398];
// inline asm
bfe.u32 %r869, %r878, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd399, %r869, 4;
add.s64 %rd400, %rd45, %rd399;
ld.shared.u32 %r2855, [%rd400];
add.s32 %r2856, %r2855, %r2854;
// inline asm
bfe.u32 %r873, %r878, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd401, %r873, 4;
add.s64 %rd402, %rd48, %rd401;
ld.shared.u32 %r2857, [%rd402];
xor.b32 %r2858, %r2857, %r2856;
// inline asm
bfe.u32 %r877, %r878, %r208, %r2512;
// inline asm
mul.wide.u32 %rd403, %r877, 4;
add.s64 %rd404, %rd51, %rd403;
ld.shared.u32 %r2859, [%rd404];
add.s32 %r2860, %r2859, %r2858;
xor.b32 %r2861, %r2601, %r862;
xor.b32 %r894, %r2861, %r2860;
// inline asm
bfe.u32 %r881, %r894, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd405, %r881, 4;
add.s64 %rd406, %rd42, %rd405;
ld.shared.u32 %r2862, [%rd406];
// inline asm
bfe.u32 %r885, %r894, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd407, %r885, 4;
add.s64 %rd408, %rd45, %rd407;
ld.shared.u32 %r2863, [%rd408];
add.s32 %r2864, %r2863, %r2862;
// inline asm
bfe.u32 %r889, %r894, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd409, %r889, 4;
add.s64 %rd410, %rd48, %rd409;
ld.shared.u32 %r2865, [%rd410];
xor.b32 %r2866, %r2865, %r2864;
// inline asm
bfe.u32 %r893, %r894, %r208, %r2512;
// inline asm
mul.wide.u32 %rd411, %r893, 4;
add.s64 %rd412, %rd51, %rd411;
ld.shared.u32 %r2867, [%rd412];
add.s32 %r2868, %r2867, %r2866;
xor.b32 %r2869, %r2610, %r878;
xor.b32 %r910, %r2869, %r2868;
// inline asm
bfe.u32 %r897, %r910, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd413, %r897, 4;
add.s64 %rd414, %rd42, %rd413;
ld.shared.u32 %r2870, [%rd414];
// inline asm
bfe.u32 %r901, %r910, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd415, %r901, 4;
add.s64 %rd416, %rd45, %rd415;
ld.shared.u32 %r2871, [%rd416];
add.s32 %r2872, %r2871, %r2870;
// inline asm
bfe.u32 %r905, %r910, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd417, %r905, 4;
add.s64 %rd418, %rd48, %rd417;
ld.shared.u32 %r2873, [%rd418];
xor.b32 %r2874, %r2873, %r2872;
// inline asm
bfe.u32 %r909, %r910, %r208, %r2512;
// inline asm
mul.wide.u32 %rd419, %r909, 4;
add.s64 %rd420, %rd51, %rd419;
ld.shared.u32 %r2875, [%rd420];
add.s32 %r2876, %r2875, %r2874;
xor.b32 %r2877, %r2619, %r894;
xor.b32 %r926, %r2877, %r2876;
// inline asm
bfe.u32 %r913, %r926, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd421, %r913, 4;
add.s64 %rd422, %rd42, %rd421;
ld.shared.u32 %r2878, [%rd422];
// inline asm
bfe.u32 %r917, %r926, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd423, %r917, 4;
add.s64 %rd424, %rd45, %rd423;
ld.shared.u32 %r2879, [%rd424];
add.s32 %r2880, %r2879, %r2878;
// inline asm
bfe.u32 %r921, %r926, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd425, %r921, 4;
add.s64 %rd426, %rd48, %rd425;
ld.shared.u32 %r2881, [%rd426];
xor.b32 %r2882, %r2881, %r2880;
// inline asm
bfe.u32 %r925, %r926, %r208, %r2512;
// inline asm
mul.wide.u32 %rd427, %r925, 4;
add.s64 %rd428, %rd51, %rd427;
ld.shared.u32 %r2883, [%rd428];
add.s32 %r2884, %r2883, %r2882;
xor.b32 %r2885, %r2628, %r910;
xor.b32 %r942, %r2885, %r2884;
// inline asm
bfe.u32 %r929, %r942, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd429, %r929, 4;
add.s64 %rd430, %rd42, %rd429;
ld.shared.u32 %r2886, [%rd430];
// inline asm
bfe.u32 %r933, %r942, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd431, %r933, 4;
add.s64 %rd432, %rd45, %rd431;
ld.shared.u32 %r2887, [%rd432];
add.s32 %r2888, %r2887, %r2886;
// inline asm
bfe.u32 %r937, %r942, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd433, %r937, 4;
add.s64 %rd434, %rd48, %rd433;
ld.shared.u32 %r2889, [%rd434];
xor.b32 %r2890, %r2889, %r2888;
// inline asm
bfe.u32 %r941, %r942, %r208, %r2512;
// inline asm
mul.wide.u32 %rd435, %r941, 4;
add.s64 %rd436, %rd51, %rd435;
ld.shared.u32 %r2891, [%rd436];
add.s32 %r2892, %r2891, %r2890;
xor.b32 %r2893, %r2637, %r926;
xor.b32 %r958, %r2893, %r2892;
// inline asm
bfe.u32 %r945, %r958, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd437, %r945, 4;
add.s64 %rd438, %rd42, %rd437;
ld.shared.u32 %r2894, [%rd438];
// inline asm
bfe.u32 %r949, %r958, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd439, %r949, 4;
add.s64 %rd440, %rd45, %rd439;
ld.shared.u32 %r2895, [%rd440];
add.s32 %r2896, %r2895, %r2894;
// inline asm
bfe.u32 %r953, %r958, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd441, %r953, 4;
add.s64 %rd442, %rd48, %rd441;
ld.shared.u32 %r2897, [%rd442];
xor.b32 %r2898, %r2897, %r2896;
// inline asm
bfe.u32 %r957, %r958, %r208, %r2512;
// inline asm
mul.wide.u32 %rd443, %r957, 4;
add.s64 %rd444, %rd51, %rd443;
ld.shared.u32 %r2899, [%rd444];
add.s32 %r2900, %r2899, %r2898;
xor.b32 %r2901, %r2646, %r942;
xor.b32 %r974, %r2901, %r2900;
// inline asm
bfe.u32 %r961, %r974, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd445, %r961, 4;
add.s64 %rd446, %rd42, %rd445;
ld.shared.u32 %r2902, [%rd446];
// inline asm
bfe.u32 %r965, %r974, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd447, %r965, 4;
add.s64 %rd448, %rd45, %rd447;
ld.shared.u32 %r2903, [%rd448];
add.s32 %r2904, %r2903, %r2902;
// inline asm
bfe.u32 %r969, %r974, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd449, %r969, 4;
add.s64 %rd450, %rd48, %rd449;
ld.shared.u32 %r2905, [%rd450];
xor.b32 %r2906, %r2905, %r2904;
// inline asm
bfe.u32 %r973, %r974, %r208, %r2512;
// inline asm
mul.wide.u32 %rd451, %r973, 4;
add.s64 %rd452, %rd51, %rd451;
ld.shared.u32 %r2907, [%rd452];
add.s32 %r2908, %r2907, %r2906;
xor.b32 %r2909, %r2655, %r958;
xor.b32 %r68, %r2909, %r2908;
xor.b32 %r69, %r2657, %r974;
xor.b32 %r990, %r462, %r974;
// inline asm
bfe.u32 %r977, %r990, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd453, %r977, 4;
add.s64 %rd454, %rd42, %rd453;
ld.shared.u32 %r2910, [%rd454];
// inline asm
bfe.u32 %r981, %r990, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd455, %r981, 4;
add.s64 %rd456, %rd45, %rd455;
ld.shared.u32 %r2911, [%rd456];
add.s32 %r2912, %r2911, %r2910;
// inline asm
bfe.u32 %r985, %r990, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd457, %r985, 4;
add.s64 %rd458, %rd48, %rd457;
ld.shared.u32 %r2913, [%rd458];
xor.b32 %r2914, %r2913, %r2912;
// inline asm
bfe.u32 %r989, %r990, %r208, %r2512;
// inline asm
mul.wide.u32 %rd459, %r989, 4;
add.s64 %rd460, %rd51, %rd459;
ld.shared.u32 %r2915, [%rd460];
add.s32 %r2916, %r2915, %r2914;
xor.b32 %r2917, %r64, %r68;
xor.b32 %r1006, %r2917, %r2916;
// inline asm
bfe.u32 %r993, %r1006, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd461, %r993, 4;
add.s64 %rd462, %rd42, %rd461;
ld.shared.u32 %r2918, [%rd462];
// inline asm
bfe.u32 %r997, %r1006, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd463, %r997, 4;
add.s64 %rd464, %rd45, %rd463;
ld.shared.u32 %r2919, [%rd464];
add.s32 %r2920, %r2919, %r2918;
// inline asm
bfe.u32 %r1001, %r1006, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd465, %r1001, 4;
add.s64 %rd466, %rd48, %rd465;
ld.shared.u32 %r2921, [%rd466];
xor.b32 %r2922, %r2921, %r2920;
// inline asm
bfe.u32 %r1005, %r1006, %r208, %r2512;
// inline asm
mul.wide.u32 %rd467, %r1005, 4;
add.s64 %rd468, %rd51, %rd467;
ld.shared.u32 %r2923, [%rd468];
add.s32 %r2924, %r2923, %r2922;
xor.b32 %r2925, %r67, %r990;
xor.b32 %r1022, %r2925, %r2924;
// inline asm
bfe.u32 %r1009, %r1022, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd469, %r1009, 4;
add.s64 %rd470, %rd42, %rd469;
ld.shared.u32 %r2926, [%rd470];
// inline asm
bfe.u32 %r1013, %r1022, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd471, %r1013, 4;
add.s64 %rd472, %rd45, %rd471;
ld.shared.u32 %r2927, [%rd472];
add.s32 %r2928, %r2927, %r2926;
// inline asm
bfe.u32 %r1017, %r1022, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd473, %r1017, 4;
add.s64 %rd474, %rd48, %rd473;
ld.shared.u32 %r2929, [%rd474];
xor.b32 %r2930, %r2929, %r2928;
// inline asm
bfe.u32 %r1021, %r1022, %r208, %r2512;
// inline asm
mul.wide.u32 %rd475, %r1021, 4;
add.s64 %rd476, %rd51, %rd475;
ld.shared.u32 %r2931, [%rd476];
add.s32 %r2932, %r2931, %r2930;
xor.b32 %r2933, %r66, %r1006;
xor.b32 %r1038, %r2933, %r2932;
// inline asm
bfe.u32 %r1025, %r1038, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd477, %r1025, 4;
add.s64 %rd478, %rd42, %rd477;
ld.shared.u32 %r2934, [%rd478];
// inline asm
bfe.u32 %r1029, %r1038, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd479, %r1029, 4;
add.s64 %rd480, %rd45, %rd479;
ld.shared.u32 %r2935, [%rd480];
add.s32 %r2936, %r2935, %r2934;
// inline asm
bfe.u32 %r1033, %r1038, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd481, %r1033, 4;
add.s64 %rd482, %rd48, %rd481;
ld.shared.u32 %r2937, [%rd482];
xor.b32 %r2938, %r2937, %r2936;
// inline asm
bfe.u32 %r1037, %r1038, %r208, %r2512;
// inline asm
mul.wide.u32 %rd483, %r1037, 4;
add.s64 %rd484, %rd51, %rd483;
ld.shared.u32 %r2939, [%rd484];
add.s32 %r2940, %r2939, %r2938;
xor.b32 %r2941, %r69, %r1022;
xor.b32 %r1054, %r2941, %r2940;
// inline asm
bfe.u32 %r1041, %r1054, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd485, %r1041, 4;
add.s64 %rd486, %rd42, %rd485;
ld.shared.u32 %r2942, [%rd486];
// inline asm
bfe.u32 %r1045, %r1054, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd487, %r1045, 4;
add.s64 %rd488, %rd45, %rd487;
ld.shared.u32 %r2943, [%rd488];
add.s32 %r2944, %r2943, %r2942;
// inline asm
bfe.u32 %r1049, %r1054, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd489, %r1049, 4;
add.s64 %rd490, %rd48, %rd489;
ld.shared.u32 %r2945, [%rd490];
xor.b32 %r2946, %r2945, %r2944;
// inline asm
bfe.u32 %r1053, %r1054, %r208, %r2512;
// inline asm
mul.wide.u32 %rd491, %r1053, 4;
add.s64 %rd492, %rd51, %rd491;
ld.shared.u32 %r2947, [%rd492];
add.s32 %r2948, %r2947, %r2946;
xor.b32 %r2949, %r68, %r1038;
xor.b32 %r1070, %r2949, %r2948;
// inline asm
bfe.u32 %r1057, %r1070, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd493, %r1057, 4;
add.s64 %rd494, %rd42, %rd493;
ld.shared.u32 %r2950, [%rd494];
// inline asm
bfe.u32 %r1061, %r1070, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd495, %r1061, 4;
add.s64 %rd496, %rd45, %rd495;
ld.shared.u32 %r2951, [%rd496];
add.s32 %r2952, %r2951, %r2950;
// inline asm
bfe.u32 %r1065, %r1070, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd497, %r1065, 4;
add.s64 %rd498, %rd48, %rd497;
ld.shared.u32 %r2953, [%rd498];
xor.b32 %r2954, %r2953, %r2952;
// inline asm
bfe.u32 %r1069, %r1070, %r208, %r2512;
// inline asm
mul.wide.u32 %rd499, %r1069, 4;
add.s64 %rd500, %rd51, %rd499;
ld.shared.u32 %r2955, [%rd500];
add.s32 %r2956, %r2955, %r2954;
xor.b32 %r2957, %r2565, %r1054;
xor.b32 %r1086, %r2957, %r2956;
// inline asm
bfe.u32 %r1073, %r1086, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd501, %r1073, 4;
add.s64 %rd502, %rd42, %rd501;
ld.shared.u32 %r2958, [%rd502];
// inline asm
bfe.u32 %r1077, %r1086, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd503, %r1077, 4;
add.s64 %rd504, %rd45, %rd503;
ld.shared.u32 %r2959, [%rd504];
add.s32 %r2960, %r2959, %r2958;
// inline asm
bfe.u32 %r1081, %r1086, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd505, %r1081, 4;
add.s64 %rd506, %rd48, %rd505;
ld.shared.u32 %r2961, [%rd506];
xor.b32 %r2962, %r2961, %r2960;
// inline asm
bfe.u32 %r1085, %r1086, %r208, %r2512;
// inline asm
mul.wide.u32 %rd507, %r1085, 4;
add.s64 %rd508, %rd51, %rd507;
ld.shared.u32 %r2963, [%rd508];
add.s32 %r2964, %r2963, %r2962;
xor.b32 %r2965, %r2574, %r1070;
xor.b32 %r1102, %r2965, %r2964;
// inline asm
bfe.u32 %r1089, %r1102, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd509, %r1089, 4;
add.s64 %rd510, %rd42, %rd509;
ld.shared.u32 %r2966, [%rd510];
// inline asm
bfe.u32 %r1093, %r1102, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd511, %r1093, 4;
add.s64 %rd512, %rd45, %rd511;
ld.shared.u32 %r2967, [%rd512];
add.s32 %r2968, %r2967, %r2966;
// inline asm
bfe.u32 %r1097, %r1102, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd513, %r1097, 4;
add.s64 %rd514, %rd48, %rd513;
ld.shared.u32 %r2969, [%rd514];
xor.b32 %r2970, %r2969, %r2968;
// inline asm
bfe.u32 %r1101, %r1102, %r208, %r2512;
// inline asm
mul.wide.u32 %rd515, %r1101, 4;
add.s64 %rd516, %rd51, %rd515;
ld.shared.u32 %r2971, [%rd516];
add.s32 %r2972, %r2971, %r2970;
xor.b32 %r2973, %r2583, %r1086;
xor.b32 %r1118, %r2973, %r2972;
// inline asm
bfe.u32 %r1105, %r1118, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd517, %r1105, 4;
add.s64 %rd518, %rd42, %rd517;
ld.shared.u32 %r2974, [%rd518];
// inline asm
bfe.u32 %r1109, %r1118, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd519, %r1109, 4;
add.s64 %rd520, %rd45, %rd519;
ld.shared.u32 %r2975, [%rd520];
add.s32 %r2976, %r2975, %r2974;
// inline asm
bfe.u32 %r1113, %r1118, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd521, %r1113, 4;
add.s64 %rd522, %rd48, %rd521;
ld.shared.u32 %r2977, [%rd522];
xor.b32 %r2978, %r2977, %r2976;
// inline asm
bfe.u32 %r1117, %r1118, %r208, %r2512;
// inline asm
mul.wide.u32 %rd523, %r1117, 4;
add.s64 %rd524, %rd51, %rd523;
ld.shared.u32 %r2979, [%rd524];
add.s32 %r2980, %r2979, %r2978;
xor.b32 %r2981, %r2592, %r1102;
xor.b32 %r1134, %r2981, %r2980;
// inline asm
bfe.u32 %r1121, %r1134, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd525, %r1121, 4;
add.s64 %rd526, %rd42, %rd525;
ld.shared.u32 %r2982, [%rd526];
// inline asm
bfe.u32 %r1125, %r1134, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd527, %r1125, 4;
add.s64 %rd528, %rd45, %rd527;
ld.shared.u32 %r2983, [%rd528];
add.s32 %r2984, %r2983, %r2982;
// inline asm
bfe.u32 %r1129, %r1134, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd529, %r1129, 4;
add.s64 %rd530, %rd48, %rd529;
ld.shared.u32 %r2985, [%rd530];
xor.b32 %r2986, %r2985, %r2984;
// inline asm
bfe.u32 %r1133, %r1134, %r208, %r2512;
// inline asm
mul.wide.u32 %rd531, %r1133, 4;
add.s64 %rd532, %rd51, %rd531;
ld.shared.u32 %r2987, [%rd532];
add.s32 %r2988, %r2987, %r2986;
xor.b32 %r2989, %r2601, %r1118;
xor.b32 %r1150, %r2989, %r2988;
// inline asm
bfe.u32 %r1137, %r1150, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd533, %r1137, 4;
add.s64 %rd534, %rd42, %rd533;
ld.shared.u32 %r2990, [%rd534];
// inline asm
bfe.u32 %r1141, %r1150, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd535, %r1141, 4;
add.s64 %rd536, %rd45, %rd535;
ld.shared.u32 %r2991, [%rd536];
add.s32 %r2992, %r2991, %r2990;
// inline asm
bfe.u32 %r1145, %r1150, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd537, %r1145, 4;
add.s64 %rd538, %rd48, %rd537;
ld.shared.u32 %r2993, [%rd538];
xor.b32 %r2994, %r2993, %r2992;
// inline asm
bfe.u32 %r1149, %r1150, %r208, %r2512;
// inline asm
mul.wide.u32 %rd539, %r1149, 4;
add.s64 %rd540, %rd51, %rd539;
ld.shared.u32 %r2995, [%rd540];
add.s32 %r2996, %r2995, %r2994;
xor.b32 %r2997, %r2610, %r1134;
xor.b32 %r1166, %r2997, %r2996;
// inline asm
bfe.u32 %r1153, %r1166, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd541, %r1153, 4;
add.s64 %rd542, %rd42, %rd541;
ld.shared.u32 %r2998, [%rd542];
// inline asm
bfe.u32 %r1157, %r1166, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd543, %r1157, 4;
add.s64 %rd544, %rd45, %rd543;
ld.shared.u32 %r2999, [%rd544];
add.s32 %r3000, %r2999, %r2998;
// inline asm
bfe.u32 %r1161, %r1166, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd545, %r1161, 4;
add.s64 %rd546, %rd48, %rd545;
ld.shared.u32 %r3001, [%rd546];
xor.b32 %r3002, %r3001, %r3000;
// inline asm
bfe.u32 %r1165, %r1166, %r208, %r2512;
// inline asm
mul.wide.u32 %rd547, %r1165, 4;
add.s64 %rd548, %rd51, %rd547;
ld.shared.u32 %r3003, [%rd548];
add.s32 %r3004, %r3003, %r3002;
xor.b32 %r3005, %r2619, %r1150;
xor.b32 %r1182, %r3005, %r3004;
// inline asm
bfe.u32 %r1169, %r1182, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd549, %r1169, 4;
add.s64 %rd550, %rd42, %rd549;
ld.shared.u32 %r3006, [%rd550];
// inline asm
bfe.u32 %r1173, %r1182, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd551, %r1173, 4;
add.s64 %rd552, %rd45, %rd551;
ld.shared.u32 %r3007, [%rd552];
add.s32 %r3008, %r3007, %r3006;
// inline asm
bfe.u32 %r1177, %r1182, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd553, %r1177, 4;
add.s64 %rd554, %rd48, %rd553;
ld.shared.u32 %r3009, [%rd554];
xor.b32 %r3010, %r3009, %r3008;
// inline asm
bfe.u32 %r1181, %r1182, %r208, %r2512;
// inline asm
mul.wide.u32 %rd555, %r1181, 4;
add.s64 %rd556, %rd51, %rd555;
ld.shared.u32 %r3011, [%rd556];
add.s32 %r3012, %r3011, %r3010;
xor.b32 %r3013, %r2628, %r1166;
xor.b32 %r1198, %r3013, %r3012;
// inline asm
bfe.u32 %r1185, %r1198, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd557, %r1185, 4;
add.s64 %rd558, %rd42, %rd557;
ld.shared.u32 %r3014, [%rd558];
// inline asm
bfe.u32 %r1189, %r1198, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd559, %r1189, 4;
add.s64 %rd560, %rd45, %rd559;
ld.shared.u32 %r3015, [%rd560];
add.s32 %r3016, %r3015, %r3014;
// inline asm
bfe.u32 %r1193, %r1198, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd561, %r1193, 4;
add.s64 %rd562, %rd48, %rd561;
ld.shared.u32 %r3017, [%rd562];
xor.b32 %r3018, %r3017, %r3016;
// inline asm
bfe.u32 %r1197, %r1198, %r208, %r2512;
// inline asm
mul.wide.u32 %rd563, %r1197, 4;
add.s64 %rd564, %rd51, %rd563;
ld.shared.u32 %r3019, [%rd564];
add.s32 %r3020, %r3019, %r3018;
xor.b32 %r3021, %r2637, %r1182;
xor.b32 %r1214, %r3021, %r3020;
// inline asm
bfe.u32 %r1201, %r1214, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd565, %r1201, 4;
add.s64 %rd566, %rd42, %rd565;
ld.shared.u32 %r3022, [%rd566];
// inline asm
bfe.u32 %r1205, %r1214, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd567, %r1205, 4;
add.s64 %rd568, %rd45, %rd567;
ld.shared.u32 %r3023, [%rd568];
add.s32 %r3024, %r3023, %r3022;
// inline asm
bfe.u32 %r1209, %r1214, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd569, %r1209, 4;
add.s64 %rd570, %rd48, %rd569;
ld.shared.u32 %r3025, [%rd570];
xor.b32 %r3026, %r3025, %r3024;
// inline asm
bfe.u32 %r1213, %r1214, %r208, %r2512;
// inline asm
mul.wide.u32 %rd571, %r1213, 4;
add.s64 %rd572, %rd51, %rd571;
ld.shared.u32 %r3027, [%rd572];
add.s32 %r3028, %r3027, %r3026;
xor.b32 %r3029, %r2646, %r1198;
xor.b32 %r1230, %r3029, %r3028;
// inline asm
bfe.u32 %r1217, %r1230, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd573, %r1217, 4;
add.s64 %rd574, %rd42, %rd573;
ld.shared.u32 %r3030, [%rd574];
// inline asm
bfe.u32 %r1221, %r1230, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd575, %r1221, 4;
add.s64 %rd576, %rd45, %rd575;
ld.shared.u32 %r3031, [%rd576];
add.s32 %r3032, %r3031, %r3030;
// inline asm
bfe.u32 %r1225, %r1230, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd577, %r1225, 4;
add.s64 %rd578, %rd48, %rd577;
ld.shared.u32 %r3033, [%rd578];
xor.b32 %r3034, %r3033, %r3032;
// inline asm
bfe.u32 %r1229, %r1230, %r208, %r2512;
// inline asm
mul.wide.u32 %rd579, %r1229, 4;
add.s64 %rd580, %rd51, %rd579;
ld.shared.u32 %r3035, [%rd580];
add.s32 %r3036, %r3035, %r3034;
xor.b32 %r3037, %r2655, %r1214;
xor.b32 %r70, %r3037, %r3036;
xor.b32 %r71, %r2657, %r1230;
xor.b32 %r1246, %r462, %r1230;
// inline asm
bfe.u32 %r1233, %r1246, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd581, %r1233, 4;
add.s64 %rd582, %rd42, %rd581;
ld.shared.u32 %r3038, [%rd582];
// inline asm
bfe.u32 %r1237, %r1246, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd583, %r1237, 4;
add.s64 %rd584, %rd45, %rd583;
ld.shared.u32 %r3039, [%rd584];
add.s32 %r3040, %r3039, %r3038;
// inline asm
bfe.u32 %r1241, %r1246, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd585, %r1241, 4;
add.s64 %rd586, %rd48, %rd585;
ld.shared.u32 %r3041, [%rd586];
xor.b32 %r3042, %r3041, %r3040;
// inline asm
bfe.u32 %r1245, %r1246, %r208, %r2512;
// inline asm
mul.wide.u32 %rd587, %r1245, 4;
add.s64 %rd588, %rd51, %rd587;
ld.shared.u32 %r3043, [%rd588];
add.s32 %r3044, %r3043, %r3042;
xor.b32 %r3045, %r64, %r70;
xor.b32 %r1262, %r3045, %r3044;
// inline asm
bfe.u32 %r1249, %r1262, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd589, %r1249, 4;
add.s64 %rd590, %rd42, %rd589;
ld.shared.u32 %r3046, [%rd590];
// inline asm
bfe.u32 %r1253, %r1262, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd591, %r1253, 4;
add.s64 %rd592, %rd45, %rd591;
ld.shared.u32 %r3047, [%rd592];
add.s32 %r3048, %r3047, %r3046;
// inline asm
bfe.u32 %r1257, %r1262, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd593, %r1257, 4;
add.s64 %rd594, %rd48, %rd593;
ld.shared.u32 %r3049, [%rd594];
xor.b32 %r3050, %r3049, %r3048;
// inline asm
bfe.u32 %r1261, %r1262, %r208, %r2512;
// inline asm
mul.wide.u32 %rd595, %r1261, 4;
add.s64 %rd596, %rd51, %rd595;
ld.shared.u32 %r3051, [%rd596];
add.s32 %r3052, %r3051, %r3050;
xor.b32 %r3053, %r67, %r1246;
xor.b32 %r1278, %r3053, %r3052;
// inline asm
bfe.u32 %r1265, %r1278, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd597, %r1265, 4;
add.s64 %rd598, %rd42, %rd597;
ld.shared.u32 %r3054, [%rd598];
// inline asm
bfe.u32 %r1269, %r1278, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd599, %r1269, 4;
add.s64 %rd600, %rd45, %rd599;
ld.shared.u32 %r3055, [%rd600];
add.s32 %r3056, %r3055, %r3054;
// inline asm
bfe.u32 %r1273, %r1278, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd601, %r1273, 4;
add.s64 %rd602, %rd48, %rd601;
ld.shared.u32 %r3057, [%rd602];
xor.b32 %r3058, %r3057, %r3056;
// inline asm
bfe.u32 %r1277, %r1278, %r208, %r2512;
// inline asm
mul.wide.u32 %rd603, %r1277, 4;
add.s64 %rd604, %rd51, %rd603;
ld.shared.u32 %r3059, [%rd604];
add.s32 %r3060, %r3059, %r3058;
xor.b32 %r3061, %r66, %r1262;
xor.b32 %r1294, %r3061, %r3060;
// inline asm
bfe.u32 %r1281, %r1294, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd605, %r1281, 4;
add.s64 %rd606, %rd42, %rd605;
ld.shared.u32 %r3062, [%rd606];
// inline asm
bfe.u32 %r1285, %r1294, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd607, %r1285, 4;
add.s64 %rd608, %rd45, %rd607;
ld.shared.u32 %r3063, [%rd608];
add.s32 %r3064, %r3063, %r3062;
// inline asm
bfe.u32 %r1289, %r1294, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd609, %r1289, 4;
add.s64 %rd610, %rd48, %rd609;
ld.shared.u32 %r3065, [%rd610];
xor.b32 %r3066, %r3065, %r3064;
// inline asm
bfe.u32 %r1293, %r1294, %r208, %r2512;
// inline asm
mul.wide.u32 %rd611, %r1293, 4;
add.s64 %rd612, %rd51, %rd611;
ld.shared.u32 %r3067, [%rd612];
add.s32 %r3068, %r3067, %r3066;
xor.b32 %r3069, %r69, %r1278;
xor.b32 %r1310, %r3069, %r3068;
// inline asm
bfe.u32 %r1297, %r1310, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd613, %r1297, 4;
add.s64 %rd614, %rd42, %rd613;
ld.shared.u32 %r3070, [%rd614];
// inline asm
bfe.u32 %r1301, %r1310, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd615, %r1301, 4;
add.s64 %rd616, %rd45, %rd615;
ld.shared.u32 %r3071, [%rd616];
add.s32 %r3072, %r3071, %r3070;
// inline asm
bfe.u32 %r1305, %r1310, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd617, %r1305, 4;
add.s64 %rd618, %rd48, %rd617;
ld.shared.u32 %r3073, [%rd618];
xor.b32 %r3074, %r3073, %r3072;
// inline asm
bfe.u32 %r1309, %r1310, %r208, %r2512;
// inline asm
mul.wide.u32 %rd619, %r1309, 4;
add.s64 %rd620, %rd51, %rd619;
ld.shared.u32 %r3075, [%rd620];
add.s32 %r3076, %r3075, %r3074;
xor.b32 %r3077, %r68, %r1294;
xor.b32 %r1326, %r3077, %r3076;
// inline asm
bfe.u32 %r1313, %r1326, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd621, %r1313, 4;
add.s64 %rd622, %rd42, %rd621;
ld.shared.u32 %r3078, [%rd622];
// inline asm
bfe.u32 %r1317, %r1326, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd623, %r1317, 4;
add.s64 %rd624, %rd45, %rd623;
ld.shared.u32 %r3079, [%rd624];
add.s32 %r3080, %r3079, %r3078;
// inline asm
bfe.u32 %r1321, %r1326, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd625, %r1321, 4;
add.s64 %rd626, %rd48, %rd625;
ld.shared.u32 %r3081, [%rd626];
xor.b32 %r3082, %r3081, %r3080;
// inline asm
bfe.u32 %r1325, %r1326, %r208, %r2512;
// inline asm
mul.wide.u32 %rd627, %r1325, 4;
add.s64 %rd628, %rd51, %rd627;
ld.shared.u32 %r3083, [%rd628];
add.s32 %r3084, %r3083, %r3082;
xor.b32 %r3085, %r71, %r1310;
xor.b32 %r1342, %r3085, %r3084;
// inline asm
bfe.u32 %r1329, %r1342, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd629, %r1329, 4;
add.s64 %rd630, %rd42, %rd629;
ld.shared.u32 %r3086, [%rd630];
// inline asm
bfe.u32 %r1333, %r1342, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd631, %r1333, 4;
add.s64 %rd632, %rd45, %rd631;
ld.shared.u32 %r3087, [%rd632];
add.s32 %r3088, %r3087, %r3086;
// inline asm
bfe.u32 %r1337, %r1342, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd633, %r1337, 4;
add.s64 %rd634, %rd48, %rd633;
ld.shared.u32 %r3089, [%rd634];
xor.b32 %r3090, %r3089, %r3088;
// inline asm
bfe.u32 %r1341, %r1342, %r208, %r2512;
// inline asm
mul.wide.u32 %rd635, %r1341, 4;
add.s64 %rd636, %rd51, %rd635;
ld.shared.u32 %r3091, [%rd636];
add.s32 %r3092, %r3091, %r3090;
xor.b32 %r3093, %r70, %r1326;
xor.b32 %r1358, %r3093, %r3092;
// inline asm
bfe.u32 %r1345, %r1358, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd637, %r1345, 4;
add.s64 %rd638, %rd42, %rd637;
ld.shared.u32 %r3094, [%rd638];
// inline asm
bfe.u32 %r1349, %r1358, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd639, %r1349, 4;
add.s64 %rd640, %rd45, %rd639;
ld.shared.u32 %r3095, [%rd640];
add.s32 %r3096, %r3095, %r3094;
// inline asm
bfe.u32 %r1353, %r1358, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd641, %r1353, 4;
add.s64 %rd642, %rd48, %rd641;
ld.shared.u32 %r3097, [%rd642];
xor.b32 %r3098, %r3097, %r3096;
// inline asm
bfe.u32 %r1357, %r1358, %r208, %r2512;
// inline asm
mul.wide.u32 %rd643, %r1357, 4;
add.s64 %rd644, %rd51, %rd643;
ld.shared.u32 %r3099, [%rd644];
add.s32 %r3100, %r3099, %r3098;
xor.b32 %r3101, %r2583, %r1342;
xor.b32 %r1374, %r3101, %r3100;
// inline asm
bfe.u32 %r1361, %r1374, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd645, %r1361, 4;
add.s64 %rd646, %rd42, %rd645;
ld.shared.u32 %r3102, [%rd646];
// inline asm
bfe.u32 %r1365, %r1374, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd647, %r1365, 4;
add.s64 %rd648, %rd45, %rd647;
ld.shared.u32 %r3103, [%rd648];
add.s32 %r3104, %r3103, %r3102;
// inline asm
bfe.u32 %r1369, %r1374, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd649, %r1369, 4;
add.s64 %rd650, %rd48, %rd649;
ld.shared.u32 %r3105, [%rd650];
xor.b32 %r3106, %r3105, %r3104;
// inline asm
bfe.u32 %r1373, %r1374, %r208, %r2512;
// inline asm
mul.wide.u32 %rd651, %r1373, 4;
add.s64 %rd652, %rd51, %rd651;
ld.shared.u32 %r3107, [%rd652];
add.s32 %r3108, %r3107, %r3106;
xor.b32 %r3109, %r2592, %r1358;
xor.b32 %r1390, %r3109, %r3108;
// inline asm
bfe.u32 %r1377, %r1390, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd653, %r1377, 4;
add.s64 %rd654, %rd42, %rd653;
ld.shared.u32 %r3110, [%rd654];
// inline asm
bfe.u32 %r1381, %r1390, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd655, %r1381, 4;
add.s64 %rd656, %rd45, %rd655;
ld.shared.u32 %r3111, [%rd656];
add.s32 %r3112, %r3111, %r3110;
// inline asm
bfe.u32 %r1385, %r1390, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd657, %r1385, 4;
add.s64 %rd658, %rd48, %rd657;
ld.shared.u32 %r3113, [%rd658];
xor.b32 %r3114, %r3113, %r3112;
// inline asm
bfe.u32 %r1389, %r1390, %r208, %r2512;
// inline asm
mul.wide.u32 %rd659, %r1389, 4;
add.s64 %rd660, %rd51, %rd659;
ld.shared.u32 %r3115, [%rd660];
add.s32 %r3116, %r3115, %r3114;
xor.b32 %r3117, %r2601, %r1374;
xor.b32 %r1406, %r3117, %r3116;
// inline asm
bfe.u32 %r1393, %r1406, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd661, %r1393, 4;
add.s64 %rd662, %rd42, %rd661;
ld.shared.u32 %r3118, [%rd662];
// inline asm
bfe.u32 %r1397, %r1406, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd663, %r1397, 4;
add.s64 %rd664, %rd45, %rd663;
ld.shared.u32 %r3119, [%rd664];
add.s32 %r3120, %r3119, %r3118;
// inline asm
bfe.u32 %r1401, %r1406, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd665, %r1401, 4;
add.s64 %rd666, %rd48, %rd665;
ld.shared.u32 %r3121, [%rd666];
xor.b32 %r3122, %r3121, %r3120;
// inline asm
bfe.u32 %r1405, %r1406, %r208, %r2512;
// inline asm
mul.wide.u32 %rd667, %r1405, 4;
add.s64 %rd668, %rd51, %rd667;
ld.shared.u32 %r3123, [%rd668];
add.s32 %r3124, %r3123, %r3122;
xor.b32 %r3125, %r2610, %r1390;
xor.b32 %r1422, %r3125, %r3124;
// inline asm
bfe.u32 %r1409, %r1422, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd669, %r1409, 4;
add.s64 %rd670, %rd42, %rd669;
ld.shared.u32 %r3126, [%rd670];
// inline asm
bfe.u32 %r1413, %r1422, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd671, %r1413, 4;
add.s64 %rd672, %rd45, %rd671;
ld.shared.u32 %r3127, [%rd672];
add.s32 %r3128, %r3127, %r3126;
// inline asm
bfe.u32 %r1417, %r1422, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd673, %r1417, 4;
add.s64 %rd674, %rd48, %rd673;
ld.shared.u32 %r3129, [%rd674];
xor.b32 %r3130, %r3129, %r3128;
// inline asm
bfe.u32 %r1421, %r1422, %r208, %r2512;
// inline asm
mul.wide.u32 %rd675, %r1421, 4;
add.s64 %rd676, %rd51, %rd675;
ld.shared.u32 %r3131, [%rd676];
add.s32 %r3132, %r3131, %r3130;
xor.b32 %r3133, %r2619, %r1406;
xor.b32 %r1438, %r3133, %r3132;
// inline asm
bfe.u32 %r1425, %r1438, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd677, %r1425, 4;
add.s64 %rd678, %rd42, %rd677;
ld.shared.u32 %r3134, [%rd678];
// inline asm
bfe.u32 %r1429, %r1438, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd679, %r1429, 4;
add.s64 %rd680, %rd45, %rd679;
ld.shared.u32 %r3135, [%rd680];
add.s32 %r3136, %r3135, %r3134;
// inline asm
bfe.u32 %r1433, %r1438, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd681, %r1433, 4;
add.s64 %rd682, %rd48, %rd681;
ld.shared.u32 %r3137, [%rd682];
xor.b32 %r3138, %r3137, %r3136;
// inline asm
bfe.u32 %r1437, %r1438, %r208, %r2512;
// inline asm
mul.wide.u32 %rd683, %r1437, 4;
add.s64 %rd684, %rd51, %rd683;
ld.shared.u32 %r3139, [%rd684];
add.s32 %r3140, %r3139, %r3138;
xor.b32 %r3141, %r2628, %r1422;
xor.b32 %r1454, %r3141, %r3140;
// inline asm
bfe.u32 %r1441, %r1454, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd685, %r1441, 4;
add.s64 %rd686, %rd42, %rd685;
ld.shared.u32 %r3142, [%rd686];
// inline asm
bfe.u32 %r1445, %r1454, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd687, %r1445, 4;
add.s64 %rd688, %rd45, %rd687;
ld.shared.u32 %r3143, [%rd688];
add.s32 %r3144, %r3143, %r3142;
// inline asm
bfe.u32 %r1449, %r1454, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd689, %r1449, 4;
add.s64 %rd690, %rd48, %rd689;
ld.shared.u32 %r3145, [%rd690];
xor.b32 %r3146, %r3145, %r3144;
// inline asm
bfe.u32 %r1453, %r1454, %r208, %r2512;
// inline asm
mul.wide.u32 %rd691, %r1453, 4;
add.s64 %rd692, %rd51, %rd691;
ld.shared.u32 %r3147, [%rd692];
add.s32 %r3148, %r3147, %r3146;
xor.b32 %r3149, %r2637, %r1438;
xor.b32 %r1470, %r3149, %r3148;
// inline asm
bfe.u32 %r1457, %r1470, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd693, %r1457, 4;
add.s64 %rd694, %rd42, %rd693;
ld.shared.u32 %r3150, [%rd694];
// inline asm
bfe.u32 %r1461, %r1470, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd695, %r1461, 4;
add.s64 %rd696, %rd45, %rd695;
ld.shared.u32 %r3151, [%rd696];
add.s32 %r3152, %r3151, %r3150;
// inline asm
bfe.u32 %r1465, %r1470, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd697, %r1465, 4;
add.s64 %rd698, %rd48, %rd697;
ld.shared.u32 %r3153, [%rd698];
xor.b32 %r3154, %r3153, %r3152;
// inline asm
bfe.u32 %r1469, %r1470, %r208, %r2512;
// inline asm
mul.wide.u32 %rd699, %r1469, 4;
add.s64 %rd700, %rd51, %rd699;
ld.shared.u32 %r3155, [%rd700];
add.s32 %r3156, %r3155, %r3154;
xor.b32 %r3157, %r2646, %r1454;
xor.b32 %r1486, %r3157, %r3156;
// inline asm
bfe.u32 %r1473, %r1486, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd701, %r1473, 4;
add.s64 %rd702, %rd42, %rd701;
ld.shared.u32 %r3158, [%rd702];
// inline asm
bfe.u32 %r1477, %r1486, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd703, %r1477, 4;
add.s64 %rd704, %rd45, %rd703;
ld.shared.u32 %r3159, [%rd704];
add.s32 %r3160, %r3159, %r3158;
// inline asm
bfe.u32 %r1481, %r1486, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd705, %r1481, 4;
add.s64 %rd706, %rd48, %rd705;
ld.shared.u32 %r3161, [%rd706];
xor.b32 %r3162, %r3161, %r3160;
// inline asm
bfe.u32 %r1485, %r1486, %r208, %r2512;
// inline asm
mul.wide.u32 %rd707, %r1485, 4;
add.s64 %rd708, %rd51, %rd707;
ld.shared.u32 %r3163, [%rd708];
add.s32 %r3164, %r3163, %r3162;
xor.b32 %r3165, %r2655, %r1470;
xor.b32 %r72, %r3165, %r3164;
xor.b32 %r73, %r2657, %r1486;
xor.b32 %r1502, %r462, %r1486;
// inline asm
bfe.u32 %r1489, %r1502, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd709, %r1489, 4;
add.s64 %rd710, %rd42, %rd709;
ld.shared.u32 %r3166, [%rd710];
// inline asm
bfe.u32 %r1493, %r1502, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd711, %r1493, 4;
add.s64 %rd712, %rd45, %rd711;
ld.shared.u32 %r3167, [%rd712];
add.s32 %r3168, %r3167, %r3166;
// inline asm
bfe.u32 %r1497, %r1502, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd713, %r1497, 4;
add.s64 %rd714, %rd48, %rd713;
ld.shared.u32 %r3169, [%rd714];
xor.b32 %r3170, %r3169, %r3168;
// inline asm
bfe.u32 %r1501, %r1502, %r208, %r2512;
// inline asm
mul.wide.u32 %rd715, %r1501, 4;
add.s64 %rd716, %rd51, %rd715;
ld.shared.u32 %r3171, [%rd716];
add.s32 %r3172, %r3171, %r3170;
xor.b32 %r3173, %r64, %r72;
xor.b32 %r1518, %r3173, %r3172;
// inline asm
bfe.u32 %r1505, %r1518, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd717, %r1505, 4;
add.s64 %rd718, %rd42, %rd717;
ld.shared.u32 %r3174, [%rd718];
// inline asm
bfe.u32 %r1509, %r1518, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd719, %r1509, 4;
add.s64 %rd720, %rd45, %rd719;
ld.shared.u32 %r3175, [%rd720];
add.s32 %r3176, %r3175, %r3174;
// inline asm
bfe.u32 %r1513, %r1518, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd721, %r1513, 4;
add.s64 %rd722, %rd48, %rd721;
ld.shared.u32 %r3177, [%rd722];
xor.b32 %r3178, %r3177, %r3176;
// inline asm
bfe.u32 %r1517, %r1518, %r208, %r2512;
// inline asm
mul.wide.u32 %rd723, %r1517, 4;
add.s64 %rd724, %rd51, %rd723;
ld.shared.u32 %r3179, [%rd724];
add.s32 %r3180, %r3179, %r3178;
xor.b32 %r3181, %r67, %r1502;
xor.b32 %r1534, %r3181, %r3180;
// inline asm
bfe.u32 %r1521, %r1534, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd725, %r1521, 4;
add.s64 %rd726, %rd42, %rd725;
ld.shared.u32 %r3182, [%rd726];
// inline asm
bfe.u32 %r1525, %r1534, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd727, %r1525, 4;
add.s64 %rd728, %rd45, %rd727;
ld.shared.u32 %r3183, [%rd728];
add.s32 %r3184, %r3183, %r3182;
// inline asm
bfe.u32 %r1529, %r1534, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd729, %r1529, 4;
add.s64 %rd730, %rd48, %rd729;
ld.shared.u32 %r3185, [%rd730];
xor.b32 %r3186, %r3185, %r3184;
// inline asm
bfe.u32 %r1533, %r1534, %r208, %r2512;
// inline asm
mul.wide.u32 %rd731, %r1533, 4;
add.s64 %rd732, %rd51, %rd731;
ld.shared.u32 %r3187, [%rd732];
add.s32 %r3188, %r3187, %r3186;
xor.b32 %r3189, %r66, %r1518;
xor.b32 %r1550, %r3189, %r3188;
// inline asm
bfe.u32 %r1537, %r1550, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd733, %r1537, 4;
add.s64 %rd734, %rd42, %rd733;
ld.shared.u32 %r3190, [%rd734];
// inline asm
bfe.u32 %r1541, %r1550, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd735, %r1541, 4;
add.s64 %rd736, %rd45, %rd735;
ld.shared.u32 %r3191, [%rd736];
add.s32 %r3192, %r3191, %r3190;
// inline asm
bfe.u32 %r1545, %r1550, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd737, %r1545, 4;
add.s64 %rd738, %rd48, %rd737;
ld.shared.u32 %r3193, [%rd738];
xor.b32 %r3194, %r3193, %r3192;
// inline asm
bfe.u32 %r1549, %r1550, %r208, %r2512;
// inline asm
mul.wide.u32 %rd739, %r1549, 4;
add.s64 %rd740, %rd51, %rd739;
ld.shared.u32 %r3195, [%rd740];
add.s32 %r3196, %r3195, %r3194;
xor.b32 %r3197, %r69, %r1534;
xor.b32 %r1566, %r3197, %r3196;
// inline asm
bfe.u32 %r1553, %r1566, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd741, %r1553, 4;
add.s64 %rd742, %rd42, %rd741;
ld.shared.u32 %r3198, [%rd742];
// inline asm
bfe.u32 %r1557, %r1566, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd743, %r1557, 4;
add.s64 %rd744, %rd45, %rd743;
ld.shared.u32 %r3199, [%rd744];
add.s32 %r3200, %r3199, %r3198;
// inline asm
bfe.u32 %r1561, %r1566, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd745, %r1561, 4;
add.s64 %rd746, %rd48, %rd745;
ld.shared.u32 %r3201, [%rd746];
xor.b32 %r3202, %r3201, %r3200;
// inline asm
bfe.u32 %r1565, %r1566, %r208, %r2512;
// inline asm
mul.wide.u32 %rd747, %r1565, 4;
add.s64 %rd748, %rd51, %rd747;
ld.shared.u32 %r3203, [%rd748];
add.s32 %r3204, %r3203, %r3202;
xor.b32 %r3205, %r68, %r1550;
xor.b32 %r1582, %r3205, %r3204;
// inline asm
bfe.u32 %r1569, %r1582, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd749, %r1569, 4;
add.s64 %rd750, %rd42, %rd749;
ld.shared.u32 %r3206, [%rd750];
// inline asm
bfe.u32 %r1573, %r1582, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd751, %r1573, 4;
add.s64 %rd752, %rd45, %rd751;
ld.shared.u32 %r3207, [%rd752];
add.s32 %r3208, %r3207, %r3206;
// inline asm
bfe.u32 %r1577, %r1582, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd753, %r1577, 4;
add.s64 %rd754, %rd48, %rd753;
ld.shared.u32 %r3209, [%rd754];
xor.b32 %r3210, %r3209, %r3208;
// inline asm
bfe.u32 %r1581, %r1582, %r208, %r2512;
// inline asm
mul.wide.u32 %rd755, %r1581, 4;
add.s64 %rd756, %rd51, %rd755;
ld.shared.u32 %r3211, [%rd756];
add.s32 %r3212, %r3211, %r3210;
xor.b32 %r3213, %r71, %r1566;
xor.b32 %r1598, %r3213, %r3212;
// inline asm
bfe.u32 %r1585, %r1598, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd757, %r1585, 4;
add.s64 %rd758, %rd42, %rd757;
ld.shared.u32 %r3214, [%rd758];
// inline asm
bfe.u32 %r1589, %r1598, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd759, %r1589, 4;
add.s64 %rd760, %rd45, %rd759;
ld.shared.u32 %r3215, [%rd760];
add.s32 %r3216, %r3215, %r3214;
// inline asm
bfe.u32 %r1593, %r1598, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd761, %r1593, 4;
add.s64 %rd762, %rd48, %rd761;
ld.shared.u32 %r3217, [%rd762];
xor.b32 %r3218, %r3217, %r3216;
// inline asm
bfe.u32 %r1597, %r1598, %r208, %r2512;
// inline asm
mul.wide.u32 %rd763, %r1597, 4;
add.s64 %rd764, %rd51, %rd763;
ld.shared.u32 %r3219, [%rd764];
add.s32 %r3220, %r3219, %r3218;
xor.b32 %r3221, %r70, %r1582;
xor.b32 %r1614, %r3221, %r3220;
// inline asm
bfe.u32 %r1601, %r1614, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd765, %r1601, 4;
add.s64 %rd766, %rd42, %rd765;
ld.shared.u32 %r3222, [%rd766];
// inline asm
bfe.u32 %r1605, %r1614, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd767, %r1605, 4;
add.s64 %rd768, %rd45, %rd767;
ld.shared.u32 %r3223, [%rd768];
add.s32 %r3224, %r3223, %r3222;
// inline asm
bfe.u32 %r1609, %r1614, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd769, %r1609, 4;
add.s64 %rd770, %rd48, %rd769;
ld.shared.u32 %r3225, [%rd770];
xor.b32 %r3226, %r3225, %r3224;
// inline asm
bfe.u32 %r1613, %r1614, %r208, %r2512;
// inline asm
mul.wide.u32 %rd771, %r1613, 4;
add.s64 %rd772, %rd51, %rd771;
ld.shared.u32 %r3227, [%rd772];
add.s32 %r3228, %r3227, %r3226;
xor.b32 %r3229, %r73, %r1598;
xor.b32 %r1630, %r3229, %r3228;
// inline asm
bfe.u32 %r1617, %r1630, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd773, %r1617, 4;
add.s64 %rd774, %rd42, %rd773;
ld.shared.u32 %r3230, [%rd774];
// inline asm
bfe.u32 %r1621, %r1630, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd775, %r1621, 4;
add.s64 %rd776, %rd45, %rd775;
ld.shared.u32 %r3231, [%rd776];
add.s32 %r3232, %r3231, %r3230;
// inline asm
bfe.u32 %r1625, %r1630, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd777, %r1625, 4;
add.s64 %rd778, %rd48, %rd777;
ld.shared.u32 %r3233, [%rd778];
xor.b32 %r3234, %r3233, %r3232;
// inline asm
bfe.u32 %r1629, %r1630, %r208, %r2512;
// inline asm
mul.wide.u32 %rd779, %r1629, 4;
add.s64 %rd780, %rd51, %rd779;
ld.shared.u32 %r3235, [%rd780];
add.s32 %r3236, %r3235, %r3234;
xor.b32 %r3237, %r72, %r1614;
xor.b32 %r1646, %r3237, %r3236;
// inline asm
bfe.u32 %r1633, %r1646, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd781, %r1633, 4;
add.s64 %rd782, %rd42, %rd781;
ld.shared.u32 %r3238, [%rd782];
// inline asm
bfe.u32 %r1637, %r1646, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd783, %r1637, 4;
add.s64 %rd784, %rd45, %rd783;
ld.shared.u32 %r3239, [%rd784];
add.s32 %r3240, %r3239, %r3238;
// inline asm
bfe.u32 %r1641, %r1646, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd785, %r1641, 4;
add.s64 %rd786, %rd48, %rd785;
ld.shared.u32 %r3241, [%rd786];
xor.b32 %r3242, %r3241, %r3240;
// inline asm
bfe.u32 %r1645, %r1646, %r208, %r2512;
// inline asm
mul.wide.u32 %rd787, %r1645, 4;
add.s64 %rd788, %rd51, %rd787;
ld.shared.u32 %r3243, [%rd788];
add.s32 %r3244, %r3243, %r3242;
xor.b32 %r3245, %r2601, %r1630;
xor.b32 %r1662, %r3245, %r3244;
// inline asm
bfe.u32 %r1649, %r1662, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd789, %r1649, 4;
add.s64 %rd790, %rd42, %rd789;
ld.shared.u32 %r3246, [%rd790];
// inline asm
bfe.u32 %r1653, %r1662, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd791, %r1653, 4;
add.s64 %rd792, %rd45, %rd791;
ld.shared.u32 %r3247, [%rd792];
add.s32 %r3248, %r3247, %r3246;
// inline asm
bfe.u32 %r1657, %r1662, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd793, %r1657, 4;
add.s64 %rd794, %rd48, %rd793;
ld.shared.u32 %r3249, [%rd794];
xor.b32 %r3250, %r3249, %r3248;
// inline asm
bfe.u32 %r1661, %r1662, %r208, %r2512;
// inline asm
mul.wide.u32 %rd795, %r1661, 4;
add.s64 %rd796, %rd51, %rd795;
ld.shared.u32 %r3251, [%rd796];
add.s32 %r3252, %r3251, %r3250;
xor.b32 %r3253, %r2610, %r1646;
xor.b32 %r1678, %r3253, %r3252;
// inline asm
bfe.u32 %r1665, %r1678, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd797, %r1665, 4;
add.s64 %rd798, %rd42, %rd797;
ld.shared.u32 %r3254, [%rd798];
// inline asm
bfe.u32 %r1669, %r1678, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd799, %r1669, 4;
add.s64 %rd800, %rd45, %rd799;
ld.shared.u32 %r3255, [%rd800];
add.s32 %r3256, %r3255, %r3254;
// inline asm
bfe.u32 %r1673, %r1678, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd801, %r1673, 4;
add.s64 %rd802, %rd48, %rd801;
ld.shared.u32 %r3257, [%rd802];
xor.b32 %r3258, %r3257, %r3256;
// inline asm
bfe.u32 %r1677, %r1678, %r208, %r2512;
// inline asm
mul.wide.u32 %rd803, %r1677, 4;
add.s64 %rd804, %rd51, %rd803;
ld.shared.u32 %r3259, [%rd804];
add.s32 %r3260, %r3259, %r3258;
xor.b32 %r3261, %r2619, %r1662;
xor.b32 %r1694, %r3261, %r3260;
// inline asm
bfe.u32 %r1681, %r1694, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd805, %r1681, 4;
add.s64 %rd806, %rd42, %rd805;
ld.shared.u32 %r3262, [%rd806];
// inline asm
bfe.u32 %r1685, %r1694, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd807, %r1685, 4;
add.s64 %rd808, %rd45, %rd807;
ld.shared.u32 %r3263, [%rd808];
add.s32 %r3264, %r3263, %r3262;
// inline asm
bfe.u32 %r1689, %r1694, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd809, %r1689, 4;
add.s64 %rd810, %rd48, %rd809;
ld.shared.u32 %r3265, [%rd810];
xor.b32 %r3266, %r3265, %r3264;
// inline asm
bfe.u32 %r1693, %r1694, %r208, %r2512;
// inline asm
mul.wide.u32 %rd811, %r1693, 4;
add.s64 %rd812, %rd51, %rd811;
ld.shared.u32 %r3267, [%rd812];
add.s32 %r3268, %r3267, %r3266;
xor.b32 %r3269, %r2628, %r1678;
xor.b32 %r1710, %r3269, %r3268;
// inline asm
bfe.u32 %r1697, %r1710, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd813, %r1697, 4;
add.s64 %rd814, %rd42, %rd813;
ld.shared.u32 %r3270, [%rd814];
// inline asm
bfe.u32 %r1701, %r1710, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd815, %r1701, 4;
add.s64 %rd816, %rd45, %rd815;
ld.shared.u32 %r3271, [%rd816];
add.s32 %r3272, %r3271, %r3270;
// inline asm
bfe.u32 %r1705, %r1710, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd817, %r1705, 4;
add.s64 %rd818, %rd48, %rd817;
ld.shared.u32 %r3273, [%rd818];
xor.b32 %r3274, %r3273, %r3272;
// inline asm
bfe.u32 %r1709, %r1710, %r208, %r2512;
// inline asm
mul.wide.u32 %rd819, %r1709, 4;
add.s64 %rd820, %rd51, %rd819;
ld.shared.u32 %r3275, [%rd820];
add.s32 %r3276, %r3275, %r3274;
xor.b32 %r3277, %r2637, %r1694;
xor.b32 %r1726, %r3277, %r3276;
// inline asm
bfe.u32 %r1713, %r1726, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd821, %r1713, 4;
add.s64 %rd822, %rd42, %rd821;
ld.shared.u32 %r3278, [%rd822];
// inline asm
bfe.u32 %r1717, %r1726, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd823, %r1717, 4;
add.s64 %rd824, %rd45, %rd823;
ld.shared.u32 %r3279, [%rd824];
add.s32 %r3280, %r3279, %r3278;
// inline asm
bfe.u32 %r1721, %r1726, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd825, %r1721, 4;
add.s64 %rd826, %rd48, %rd825;
ld.shared.u32 %r3281, [%rd826];
xor.b32 %r3282, %r3281, %r3280;
// inline asm
bfe.u32 %r1725, %r1726, %r208, %r2512;
// inline asm
mul.wide.u32 %rd827, %r1725, 4;
add.s64 %rd828, %rd51, %rd827;
ld.shared.u32 %r3283, [%rd828];
add.s32 %r3284, %r3283, %r3282;
xor.b32 %r3285, %r2646, %r1710;
xor.b32 %r1742, %r3285, %r3284;
// inline asm
bfe.u32 %r1729, %r1742, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd829, %r1729, 4;
add.s64 %rd830, %rd42, %rd829;
ld.shared.u32 %r3286, [%rd830];
// inline asm
bfe.u32 %r1733, %r1742, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd831, %r1733, 4;
add.s64 %rd832, %rd45, %rd831;
ld.shared.u32 %r3287, [%rd832];
add.s32 %r3288, %r3287, %r3286;
// inline asm
bfe.u32 %r1737, %r1742, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd833, %r1737, 4;
add.s64 %rd834, %rd48, %rd833;
ld.shared.u32 %r3289, [%rd834];
xor.b32 %r3290, %r3289, %r3288;
// inline asm
bfe.u32 %r1741, %r1742, %r208, %r2512;
// inline asm
mul.wide.u32 %rd835, %r1741, 4;
add.s64 %rd836, %rd51, %rd835;
ld.shared.u32 %r3291, [%rd836];
add.s32 %r3292, %r3291, %r3290;
xor.b32 %r3293, %r2655, %r1726;
xor.b32 %r74, %r3293, %r3292;
xor.b32 %r75, %r2657, %r1742;
xor.b32 %r1758, %r462, %r1742;
// inline asm
bfe.u32 %r1745, %r1758, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd837, %r1745, 4;
add.s64 %rd838, %rd42, %rd837;
ld.shared.u32 %r3294, [%rd838];
// inline asm
bfe.u32 %r1749, %r1758, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd839, %r1749, 4;
add.s64 %rd840, %rd45, %rd839;
ld.shared.u32 %r3295, [%rd840];
add.s32 %r3296, %r3295, %r3294;
// inline asm
bfe.u32 %r1753, %r1758, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd841, %r1753, 4;
add.s64 %rd842, %rd48, %rd841;
ld.shared.u32 %r3297, [%rd842];
xor.b32 %r3298, %r3297, %r3296;
// inline asm
bfe.u32 %r1757, %r1758, %r208, %r2512;
// inline asm
mul.wide.u32 %rd843, %r1757, 4;
add.s64 %rd844, %rd51, %rd843;
ld.shared.u32 %r3299, [%rd844];
add.s32 %r3300, %r3299, %r3298;
xor.b32 %r3301, %r64, %r74;
xor.b32 %r1774, %r3301, %r3300;
// inline asm
bfe.u32 %r1761, %r1774, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd845, %r1761, 4;
add.s64 %rd846, %rd42, %rd845;
ld.shared.u32 %r3302, [%rd846];
// inline asm
bfe.u32 %r1765, %r1774, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd847, %r1765, 4;
add.s64 %rd848, %rd45, %rd847;
ld.shared.u32 %r3303, [%rd848];
add.s32 %r3304, %r3303, %r3302;
// inline asm
bfe.u32 %r1769, %r1774, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd849, %r1769, 4;
add.s64 %rd850, %rd48, %rd849;
ld.shared.u32 %r3305, [%rd850];
xor.b32 %r3306, %r3305, %r3304;
// inline asm
bfe.u32 %r1773, %r1774, %r208, %r2512;
// inline asm
mul.wide.u32 %rd851, %r1773, 4;
add.s64 %rd852, %rd51, %rd851;
ld.shared.u32 %r3307, [%rd852];
add.s32 %r3308, %r3307, %r3306;
xor.b32 %r3309, %r67, %r1758;
xor.b32 %r1790, %r3309, %r3308;
// inline asm
bfe.u32 %r1777, %r1790, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd853, %r1777, 4;
add.s64 %rd854, %rd42, %rd853;
ld.shared.u32 %r3310, [%rd854];
// inline asm
bfe.u32 %r1781, %r1790, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd855, %r1781, 4;
add.s64 %rd856, %rd45, %rd855;
ld.shared.u32 %r3311, [%rd856];
add.s32 %r3312, %r3311, %r3310;
// inline asm
bfe.u32 %r1785, %r1790, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd857, %r1785, 4;
add.s64 %rd858, %rd48, %rd857;
ld.shared.u32 %r3313, [%rd858];
xor.b32 %r3314, %r3313, %r3312;
// inline asm
bfe.u32 %r1789, %r1790, %r208, %r2512;
// inline asm
mul.wide.u32 %rd859, %r1789, 4;
add.s64 %rd860, %rd51, %rd859;
ld.shared.u32 %r3315, [%rd860];
add.s32 %r3316, %r3315, %r3314;
xor.b32 %r3317, %r66, %r1774;
xor.b32 %r1806, %r3317, %r3316;
// inline asm
bfe.u32 %r1793, %r1806, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd861, %r1793, 4;
add.s64 %rd862, %rd42, %rd861;
ld.shared.u32 %r3318, [%rd862];
// inline asm
bfe.u32 %r1797, %r1806, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd863, %r1797, 4;
add.s64 %rd864, %rd45, %rd863;
ld.shared.u32 %r3319, [%rd864];
add.s32 %r3320, %r3319, %r3318;
// inline asm
bfe.u32 %r1801, %r1806, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd865, %r1801, 4;
add.s64 %rd866, %rd48, %rd865;
ld.shared.u32 %r3321, [%rd866];
xor.b32 %r3322, %r3321, %r3320;
// inline asm
bfe.u32 %r1805, %r1806, %r208, %r2512;
// inline asm
mul.wide.u32 %rd867, %r1805, 4;
add.s64 %rd868, %rd51, %rd867;
ld.shared.u32 %r3323, [%rd868];
add.s32 %r3324, %r3323, %r3322;
xor.b32 %r3325, %r69, %r1790;
xor.b32 %r1822, %r3325, %r3324;
// inline asm
bfe.u32 %r1809, %r1822, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd869, %r1809, 4;
add.s64 %rd870, %rd42, %rd869;
ld.shared.u32 %r3326, [%rd870];
// inline asm
bfe.u32 %r1813, %r1822, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd871, %r1813, 4;
add.s64 %rd872, %rd45, %rd871;
ld.shared.u32 %r3327, [%rd872];
add.s32 %r3328, %r3327, %r3326;
// inline asm
bfe.u32 %r1817, %r1822, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd873, %r1817, 4;
add.s64 %rd874, %rd48, %rd873;
ld.shared.u32 %r3329, [%rd874];
xor.b32 %r3330, %r3329, %r3328;
// inline asm
bfe.u32 %r1821, %r1822, %r208, %r2512;
// inline asm
mul.wide.u32 %rd875, %r1821, 4;
add.s64 %rd876, %rd51, %rd875;
ld.shared.u32 %r3331, [%rd876];
add.s32 %r3332, %r3331, %r3330;
xor.b32 %r3333, %r68, %r1806;
xor.b32 %r1838, %r3333, %r3332;
// inline asm
bfe.u32 %r1825, %r1838, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd877, %r1825, 4;
add.s64 %rd878, %rd42, %rd877;
ld.shared.u32 %r3334, [%rd878];
// inline asm
bfe.u32 %r1829, %r1838, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd879, %r1829, 4;
add.s64 %rd880, %rd45, %rd879;
ld.shared.u32 %r3335, [%rd880];
add.s32 %r3336, %r3335, %r3334;
// inline asm
bfe.u32 %r1833, %r1838, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd881, %r1833, 4;
add.s64 %rd882, %rd48, %rd881;
ld.shared.u32 %r3337, [%rd882];
xor.b32 %r3338, %r3337, %r3336;
// inline asm
bfe.u32 %r1837, %r1838, %r208, %r2512;
// inline asm
mul.wide.u32 %rd883, %r1837, 4;
add.s64 %rd884, %rd51, %rd883;
ld.shared.u32 %r3339, [%rd884];
add.s32 %r3340, %r3339, %r3338;
xor.b32 %r3341, %r71, %r1822;
xor.b32 %r1854, %r3341, %r3340;
// inline asm
bfe.u32 %r1841, %r1854, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd885, %r1841, 4;
add.s64 %rd886, %rd42, %rd885;
ld.shared.u32 %r3342, [%rd886];
// inline asm
bfe.u32 %r1845, %r1854, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd887, %r1845, 4;
add.s64 %rd888, %rd45, %rd887;
ld.shared.u32 %r3343, [%rd888];
add.s32 %r3344, %r3343, %r3342;
// inline asm
bfe.u32 %r1849, %r1854, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd889, %r1849, 4;
add.s64 %rd890, %rd48, %rd889;
ld.shared.u32 %r3345, [%rd890];
xor.b32 %r3346, %r3345, %r3344;
// inline asm
bfe.u32 %r1853, %r1854, %r208, %r2512;
// inline asm
mul.wide.u32 %rd891, %r1853, 4;
add.s64 %rd892, %rd51, %rd891;
ld.shared.u32 %r3347, [%rd892];
add.s32 %r3348, %r3347, %r3346;
xor.b32 %r3349, %r70, %r1838;
xor.b32 %r1870, %r3349, %r3348;
// inline asm
bfe.u32 %r1857, %r1870, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd893, %r1857, 4;
add.s64 %rd894, %rd42, %rd893;
ld.shared.u32 %r3350, [%rd894];
// inline asm
bfe.u32 %r1861, %r1870, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd895, %r1861, 4;
add.s64 %rd896, %rd45, %rd895;
ld.shared.u32 %r3351, [%rd896];
add.s32 %r3352, %r3351, %r3350;
// inline asm
bfe.u32 %r1865, %r1870, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd897, %r1865, 4;
add.s64 %rd898, %rd48, %rd897;
ld.shared.u32 %r3353, [%rd898];
xor.b32 %r3354, %r3353, %r3352;
// inline asm
bfe.u32 %r1869, %r1870, %r208, %r2512;
// inline asm
mul.wide.u32 %rd899, %r1869, 4;
add.s64 %rd900, %rd51, %rd899;
ld.shared.u32 %r3355, [%rd900];
add.s32 %r3356, %r3355, %r3354;
xor.b32 %r3357, %r73, %r1854;
xor.b32 %r1886, %r3357, %r3356;
// inline asm
bfe.u32 %r1873, %r1886, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd901, %r1873, 4;
add.s64 %rd902, %rd42, %rd901;
ld.shared.u32 %r3358, [%rd902];
// inline asm
bfe.u32 %r1877, %r1886, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd903, %r1877, 4;
add.s64 %rd904, %rd45, %rd903;
ld.shared.u32 %r3359, [%rd904];
add.s32 %r3360, %r3359, %r3358;
// inline asm
bfe.u32 %r1881, %r1886, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd905, %r1881, 4;
add.s64 %rd906, %rd48, %rd905;
ld.shared.u32 %r3361, [%rd906];
xor.b32 %r3362, %r3361, %r3360;
// inline asm
bfe.u32 %r1885, %r1886, %r208, %r2512;
// inline asm
mul.wide.u32 %rd907, %r1885, 4;
add.s64 %rd908, %rd51, %rd907;
ld.shared.u32 %r3363, [%rd908];
add.s32 %r3364, %r3363, %r3362;
xor.b32 %r3365, %r72, %r1870;
xor.b32 %r1902, %r3365, %r3364;
// inline asm
bfe.u32 %r1889, %r1902, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd909, %r1889, 4;
add.s64 %rd910, %rd42, %rd909;
ld.shared.u32 %r3366, [%rd910];
// inline asm
bfe.u32 %r1893, %r1902, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd911, %r1893, 4;
add.s64 %rd912, %rd45, %rd911;
ld.shared.u32 %r3367, [%rd912];
add.s32 %r3368, %r3367, %r3366;
// inline asm
bfe.u32 %r1897, %r1902, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd913, %r1897, 4;
add.s64 %rd914, %rd48, %rd913;
ld.shared.u32 %r3369, [%rd914];
xor.b32 %r3370, %r3369, %r3368;
// inline asm
bfe.u32 %r1901, %r1902, %r208, %r2512;
// inline asm
mul.wide.u32 %rd915, %r1901, 4;
add.s64 %rd916, %rd51, %rd915;
ld.shared.u32 %r3371, [%rd916];
add.s32 %r3372, %r3371, %r3370;
xor.b32 %r3373, %r75, %r1886;
xor.b32 %r1918, %r3373, %r3372;
// inline asm
bfe.u32 %r1905, %r1918, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd917, %r1905, 4;
add.s64 %rd918, %rd42, %rd917;
ld.shared.u32 %r3374, [%rd918];
// inline asm
bfe.u32 %r1909, %r1918, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd919, %r1909, 4;
add.s64 %rd920, %rd45, %rd919;
ld.shared.u32 %r3375, [%rd920];
add.s32 %r3376, %r3375, %r3374;
// inline asm
bfe.u32 %r1913, %r1918, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd921, %r1913, 4;
add.s64 %rd922, %rd48, %rd921;
ld.shared.u32 %r3377, [%rd922];
xor.b32 %r3378, %r3377, %r3376;
// inline asm
bfe.u32 %r1917, %r1918, %r208, %r2512;
// inline asm
mul.wide.u32 %rd923, %r1917, 4;
add.s64 %rd924, %rd51, %rd923;
ld.shared.u32 %r3379, [%rd924];
add.s32 %r3380, %r3379, %r3378;
xor.b32 %r3381, %r74, %r1902;
xor.b32 %r1934, %r3381, %r3380;
// inline asm
bfe.u32 %r1921, %r1934, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd925, %r1921, 4;
add.s64 %rd926, %rd42, %rd925;
ld.shared.u32 %r3382, [%rd926];
// inline asm
bfe.u32 %r1925, %r1934, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd927, %r1925, 4;
add.s64 %rd928, %rd45, %rd927;
ld.shared.u32 %r3383, [%rd928];
add.s32 %r3384, %r3383, %r3382;
// inline asm
bfe.u32 %r1929, %r1934, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd929, %r1929, 4;
add.s64 %rd930, %rd48, %rd929;
ld.shared.u32 %r3385, [%rd930];
xor.b32 %r3386, %r3385, %r3384;
// inline asm
bfe.u32 %r1933, %r1934, %r208, %r2512;
// inline asm
mul.wide.u32 %rd931, %r1933, 4;
add.s64 %rd932, %rd51, %rd931;
ld.shared.u32 %r3387, [%rd932];
add.s32 %r3388, %r3387, %r3386;
xor.b32 %r3389, %r2619, %r1918;
xor.b32 %r1950, %r3389, %r3388;
// inline asm
bfe.u32 %r1937, %r1950, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd933, %r1937, 4;
add.s64 %rd934, %rd42, %rd933;
ld.shared.u32 %r3390, [%rd934];
// inline asm
bfe.u32 %r1941, %r1950, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd935, %r1941, 4;
add.s64 %rd936, %rd45, %rd935;
ld.shared.u32 %r3391, [%rd936];
add.s32 %r3392, %r3391, %r3390;
// inline asm
bfe.u32 %r1945, %r1950, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd937, %r1945, 4;
add.s64 %rd938, %rd48, %rd937;
ld.shared.u32 %r3393, [%rd938];
xor.b32 %r3394, %r3393, %r3392;
// inline asm
bfe.u32 %r1949, %r1950, %r208, %r2512;
// inline asm
mul.wide.u32 %rd939, %r1949, 4;
add.s64 %rd940, %rd51, %rd939;
ld.shared.u32 %r3395, [%rd940];
add.s32 %r3396, %r3395, %r3394;
xor.b32 %r3397, %r2628, %r1934;
xor.b32 %r1966, %r3397, %r3396;
// inline asm
bfe.u32 %r1953, %r1966, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd941, %r1953, 4;
add.s64 %rd942, %rd42, %rd941;
ld.shared.u32 %r3398, [%rd942];
// inline asm
bfe.u32 %r1957, %r1966, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd943, %r1957, 4;
add.s64 %rd944, %rd45, %rd943;
ld.shared.u32 %r3399, [%rd944];
add.s32 %r3400, %r3399, %r3398;
// inline asm
bfe.u32 %r1961, %r1966, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd945, %r1961, 4;
add.s64 %rd946, %rd48, %rd945;
ld.shared.u32 %r3401, [%rd946];
xor.b32 %r3402, %r3401, %r3400;
// inline asm
bfe.u32 %r1965, %r1966, %r208, %r2512;
// inline asm
mul.wide.u32 %rd947, %r1965, 4;
add.s64 %rd948, %rd51, %rd947;
ld.shared.u32 %r3403, [%rd948];
add.s32 %r3404, %r3403, %r3402;
xor.b32 %r3405, %r2637, %r1950;
xor.b32 %r1982, %r3405, %r3404;
// inline asm
bfe.u32 %r1969, %r1982, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd949, %r1969, 4;
add.s64 %rd950, %rd42, %rd949;
ld.shared.u32 %r3406, [%rd950];
// inline asm
bfe.u32 %r1973, %r1982, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd951, %r1973, 4;
add.s64 %rd952, %rd45, %rd951;
ld.shared.u32 %r3407, [%rd952];
add.s32 %r3408, %r3407, %r3406;
// inline asm
bfe.u32 %r1977, %r1982, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd953, %r1977, 4;
add.s64 %rd954, %rd48, %rd953;
ld.shared.u32 %r3409, [%rd954];
xor.b32 %r3410, %r3409, %r3408;
// inline asm
bfe.u32 %r1981, %r1982, %r208, %r2512;
// inline asm
mul.wide.u32 %rd955, %r1981, 4;
add.s64 %rd956, %rd51, %rd955;
ld.shared.u32 %r3411, [%rd956];
add.s32 %r3412, %r3411, %r3410;
xor.b32 %r3413, %r2646, %r1966;
xor.b32 %r1998, %r3413, %r3412;
// inline asm
bfe.u32 %r1985, %r1998, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd957, %r1985, 4;
add.s64 %rd958, %rd42, %rd957;
ld.shared.u32 %r3414, [%rd958];
// inline asm
bfe.u32 %r1989, %r1998, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd959, %r1989, 4;
add.s64 %rd960, %rd45, %rd959;
ld.shared.u32 %r3415, [%rd960];
add.s32 %r3416, %r3415, %r3414;
// inline asm
bfe.u32 %r1993, %r1998, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd961, %r1993, 4;
add.s64 %rd962, %rd48, %rd961;
ld.shared.u32 %r3417, [%rd962];
xor.b32 %r3418, %r3417, %r3416;
// inline asm
bfe.u32 %r1997, %r1998, %r208, %r2512;
// inline asm
mul.wide.u32 %rd963, %r1997, 4;
add.s64 %rd964, %rd51, %rd963;
ld.shared.u32 %r3419, [%rd964];
add.s32 %r3420, %r3419, %r3418;
xor.b32 %r3421, %r2655, %r1982;
xor.b32 %r76, %r3421, %r3420;
xor.b32 %r77, %r2657, %r1998;
xor.b32 %r2014, %r462, %r1998;
// inline asm
bfe.u32 %r2001, %r2014, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd965, %r2001, 4;
add.s64 %rd966, %rd42, %rd965;
ld.shared.u32 %r3422, [%rd966];
// inline asm
bfe.u32 %r2005, %r2014, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd967, %r2005, 4;
add.s64 %rd968, %rd45, %rd967;
ld.shared.u32 %r3423, [%rd968];
add.s32 %r3424, %r3423, %r3422;
// inline asm
bfe.u32 %r2009, %r2014, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd969, %r2009, 4;
add.s64 %rd970, %rd48, %rd969;
ld.shared.u32 %r3425, [%rd970];
xor.b32 %r3426, %r3425, %r3424;
// inline asm
bfe.u32 %r2013, %r2014, %r208, %r2512;
// inline asm
mul.wide.u32 %rd971, %r2013, 4;
add.s64 %rd972, %rd51, %rd971;
ld.shared.u32 %r3427, [%rd972];
add.s32 %r3428, %r3427, %r3426;
xor.b32 %r3429, %r64, %r76;
xor.b32 %r2030, %r3429, %r3428;
// inline asm
bfe.u32 %r2017, %r2030, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd973, %r2017, 4;
add.s64 %rd974, %rd42, %rd973;
ld.shared.u32 %r3430, [%rd974];
// inline asm
bfe.u32 %r2021, %r2030, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd975, %r2021, 4;
add.s64 %rd976, %rd45, %rd975;
ld.shared.u32 %r3431, [%rd976];
add.s32 %r3432, %r3431, %r3430;
// inline asm
bfe.u32 %r2025, %r2030, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd977, %r2025, 4;
add.s64 %rd978, %rd48, %rd977;
ld.shared.u32 %r3433, [%rd978];
xor.b32 %r3434, %r3433, %r3432;
// inline asm
bfe.u32 %r2029, %r2030, %r208, %r2512;
// inline asm
mul.wide.u32 %rd979, %r2029, 4;
add.s64 %rd980, %rd51, %rd979;
ld.shared.u32 %r3435, [%rd980];
add.s32 %r3436, %r3435, %r3434;
xor.b32 %r3437, %r67, %r2014;
xor.b32 %r2046, %r3437, %r3436;
// inline asm
bfe.u32 %r2033, %r2046, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd981, %r2033, 4;
add.s64 %rd982, %rd42, %rd981;
ld.shared.u32 %r3438, [%rd982];
// inline asm
bfe.u32 %r2037, %r2046, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd983, %r2037, 4;
add.s64 %rd984, %rd45, %rd983;
ld.shared.u32 %r3439, [%rd984];
add.s32 %r3440, %r3439, %r3438;
// inline asm
bfe.u32 %r2041, %r2046, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd985, %r2041, 4;
add.s64 %rd986, %rd48, %rd985;
ld.shared.u32 %r3441, [%rd986];
xor.b32 %r3442, %r3441, %r3440;
// inline asm
bfe.u32 %r2045, %r2046, %r208, %r2512;
// inline asm
mul.wide.u32 %rd987, %r2045, 4;
add.s64 %rd988, %rd51, %rd987;
ld.shared.u32 %r3443, [%rd988];
add.s32 %r3444, %r3443, %r3442;
xor.b32 %r3445, %r66, %r2030;
xor.b32 %r2062, %r3445, %r3444;
// inline asm
bfe.u32 %r2049, %r2062, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd989, %r2049, 4;
add.s64 %rd990, %rd42, %rd989;
ld.shared.u32 %r3446, [%rd990];
// inline asm
bfe.u32 %r2053, %r2062, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd991, %r2053, 4;
add.s64 %rd992, %rd45, %rd991;
ld.shared.u32 %r3447, [%rd992];
add.s32 %r3448, %r3447, %r3446;
// inline asm
bfe.u32 %r2057, %r2062, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd993, %r2057, 4;
add.s64 %rd994, %rd48, %rd993;
ld.shared.u32 %r3449, [%rd994];
xor.b32 %r3450, %r3449, %r3448;
// inline asm
bfe.u32 %r2061, %r2062, %r208, %r2512;
// inline asm
mul.wide.u32 %rd995, %r2061, 4;
add.s64 %rd996, %rd51, %rd995;
ld.shared.u32 %r3451, [%rd996];
add.s32 %r3452, %r3451, %r3450;
xor.b32 %r3453, %r69, %r2046;
xor.b32 %r2078, %r3453, %r3452;
// inline asm
bfe.u32 %r2065, %r2078, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd997, %r2065, 4;
add.s64 %rd998, %rd42, %rd997;
ld.shared.u32 %r3454, [%rd998];
// inline asm
bfe.u32 %r2069, %r2078, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd999, %r2069, 4;
add.s64 %rd1000, %rd45, %rd999;
ld.shared.u32 %r3455, [%rd1000];
add.s32 %r3456, %r3455, %r3454;
// inline asm
bfe.u32 %r2073, %r2078, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1001, %r2073, 4;
add.s64 %rd1002, %rd48, %rd1001;
ld.shared.u32 %r3457, [%rd1002];
xor.b32 %r3458, %r3457, %r3456;
// inline asm
bfe.u32 %r2077, %r2078, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1003, %r2077, 4;
add.s64 %rd1004, %rd51, %rd1003;
ld.shared.u32 %r3459, [%rd1004];
add.s32 %r3460, %r3459, %r3458;
xor.b32 %r3461, %r68, %r2062;
xor.b32 %r2094, %r3461, %r3460;
// inline asm
bfe.u32 %r2081, %r2094, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1005, %r2081, 4;
add.s64 %rd1006, %rd42, %rd1005;
ld.shared.u32 %r3462, [%rd1006];
// inline asm
bfe.u32 %r2085, %r2094, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1007, %r2085, 4;
add.s64 %rd1008, %rd45, %rd1007;
ld.shared.u32 %r3463, [%rd1008];
add.s32 %r3464, %r3463, %r3462;
// inline asm
bfe.u32 %r2089, %r2094, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1009, %r2089, 4;
add.s64 %rd1010, %rd48, %rd1009;
ld.shared.u32 %r3465, [%rd1010];
xor.b32 %r3466, %r3465, %r3464;
// inline asm
bfe.u32 %r2093, %r2094, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1011, %r2093, 4;
add.s64 %rd1012, %rd51, %rd1011;
ld.shared.u32 %r3467, [%rd1012];
add.s32 %r3468, %r3467, %r3466;
xor.b32 %r3469, %r71, %r2078;
xor.b32 %r2110, %r3469, %r3468;
// inline asm
bfe.u32 %r2097, %r2110, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1013, %r2097, 4;
add.s64 %rd1014, %rd42, %rd1013;
ld.shared.u32 %r3470, [%rd1014];
// inline asm
bfe.u32 %r2101, %r2110, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1015, %r2101, 4;
add.s64 %rd1016, %rd45, %rd1015;
ld.shared.u32 %r3471, [%rd1016];
add.s32 %r3472, %r3471, %r3470;
// inline asm
bfe.u32 %r2105, %r2110, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1017, %r2105, 4;
add.s64 %rd1018, %rd48, %rd1017;
ld.shared.u32 %r3473, [%rd1018];
xor.b32 %r3474, %r3473, %r3472;
// inline asm
bfe.u32 %r2109, %r2110, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1019, %r2109, 4;
add.s64 %rd1020, %rd51, %rd1019;
ld.shared.u32 %r3475, [%rd1020];
add.s32 %r3476, %r3475, %r3474;
xor.b32 %r3477, %r70, %r2094;
xor.b32 %r2126, %r3477, %r3476;
// inline asm
bfe.u32 %r2113, %r2126, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1021, %r2113, 4;
add.s64 %rd1022, %rd42, %rd1021;
ld.shared.u32 %r3478, [%rd1022];
// inline asm
bfe.u32 %r2117, %r2126, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1023, %r2117, 4;
add.s64 %rd1024, %rd45, %rd1023;
ld.shared.u32 %r3479, [%rd1024];
add.s32 %r3480, %r3479, %r3478;
// inline asm
bfe.u32 %r2121, %r2126, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1025, %r2121, 4;
add.s64 %rd1026, %rd48, %rd1025;
ld.shared.u32 %r3481, [%rd1026];
xor.b32 %r3482, %r3481, %r3480;
// inline asm
bfe.u32 %r2125, %r2126, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1027, %r2125, 4;
add.s64 %rd1028, %rd51, %rd1027;
ld.shared.u32 %r3483, [%rd1028];
add.s32 %r3484, %r3483, %r3482;
xor.b32 %r3485, %r73, %r2110;
xor.b32 %r2142, %r3485, %r3484;
// inline asm
bfe.u32 %r2129, %r2142, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1029, %r2129, 4;
add.s64 %rd1030, %rd42, %rd1029;
ld.shared.u32 %r3486, [%rd1030];
// inline asm
bfe.u32 %r2133, %r2142, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1031, %r2133, 4;
add.s64 %rd1032, %rd45, %rd1031;
ld.shared.u32 %r3487, [%rd1032];
add.s32 %r3488, %r3487, %r3486;
// inline asm
bfe.u32 %r2137, %r2142, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1033, %r2137, 4;
add.s64 %rd1034, %rd48, %rd1033;
ld.shared.u32 %r3489, [%rd1034];
xor.b32 %r3490, %r3489, %r3488;
// inline asm
bfe.u32 %r2141, %r2142, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1035, %r2141, 4;
add.s64 %rd1036, %rd51, %rd1035;
ld.shared.u32 %r3491, [%rd1036];
add.s32 %r3492, %r3491, %r3490;
xor.b32 %r3493, %r72, %r2126;
xor.b32 %r2158, %r3493, %r3492;
// inline asm
bfe.u32 %r2145, %r2158, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1037, %r2145, 4;
add.s64 %rd1038, %rd42, %rd1037;
ld.shared.u32 %r3494, [%rd1038];
// inline asm
bfe.u32 %r2149, %r2158, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1039, %r2149, 4;
add.s64 %rd1040, %rd45, %rd1039;
ld.shared.u32 %r3495, [%rd1040];
add.s32 %r3496, %r3495, %r3494;
// inline asm
bfe.u32 %r2153, %r2158, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1041, %r2153, 4;
add.s64 %rd1042, %rd48, %rd1041;
ld.shared.u32 %r3497, [%rd1042];
xor.b32 %r3498, %r3497, %r3496;
// inline asm
bfe.u32 %r2157, %r2158, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1043, %r2157, 4;
add.s64 %rd1044, %rd51, %rd1043;
ld.shared.u32 %r3499, [%rd1044];
add.s32 %r3500, %r3499, %r3498;
xor.b32 %r3501, %r75, %r2142;
xor.b32 %r2174, %r3501, %r3500;
// inline asm
bfe.u32 %r2161, %r2174, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1045, %r2161, 4;
add.s64 %rd1046, %rd42, %rd1045;
ld.shared.u32 %r3502, [%rd1046];
// inline asm
bfe.u32 %r2165, %r2174, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1047, %r2165, 4;
add.s64 %rd1048, %rd45, %rd1047;
ld.shared.u32 %r3503, [%rd1048];
add.s32 %r3504, %r3503, %r3502;
// inline asm
bfe.u32 %r2169, %r2174, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1049, %r2169, 4;
add.s64 %rd1050, %rd48, %rd1049;
ld.shared.u32 %r3505, [%rd1050];
xor.b32 %r3506, %r3505, %r3504;
// inline asm
bfe.u32 %r2173, %r2174, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1051, %r2173, 4;
add.s64 %rd1052, %rd51, %rd1051;
ld.shared.u32 %r3507, [%rd1052];
add.s32 %r3508, %r3507, %r3506;
xor.b32 %r3509, %r74, %r2158;
xor.b32 %r2190, %r3509, %r3508;
// inline asm
bfe.u32 %r2177, %r2190, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1053, %r2177, 4;
add.s64 %rd1054, %rd42, %rd1053;
ld.shared.u32 %r3510, [%rd1054];
// inline asm
bfe.u32 %r2181, %r2190, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1055, %r2181, 4;
add.s64 %rd1056, %rd45, %rd1055;
ld.shared.u32 %r3511, [%rd1056];
add.s32 %r3512, %r3511, %r3510;
// inline asm
bfe.u32 %r2185, %r2190, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1057, %r2185, 4;
add.s64 %rd1058, %rd48, %rd1057;
ld.shared.u32 %r3513, [%rd1058];
xor.b32 %r3514, %r3513, %r3512;
// inline asm
bfe.u32 %r2189, %r2190, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1059, %r2189, 4;
add.s64 %rd1060, %rd51, %rd1059;
ld.shared.u32 %r3515, [%rd1060];
add.s32 %r3516, %r3515, %r3514;
xor.b32 %r3517, %r77, %r2174;
xor.b32 %r2206, %r3517, %r3516;
// inline asm
bfe.u32 %r2193, %r2206, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1061, %r2193, 4;
add.s64 %rd1062, %rd42, %rd1061;
ld.shared.u32 %r3518, [%rd1062];
// inline asm
bfe.u32 %r2197, %r2206, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1063, %r2197, 4;
add.s64 %rd1064, %rd45, %rd1063;
ld.shared.u32 %r3519, [%rd1064];
add.s32 %r3520, %r3519, %r3518;
// inline asm
bfe.u32 %r2201, %r2206, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1065, %r2201, 4;
add.s64 %rd1066, %rd48, %rd1065;
ld.shared.u32 %r3521, [%rd1066];
xor.b32 %r3522, %r3521, %r3520;
// inline asm
bfe.u32 %r2205, %r2206, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1067, %r2205, 4;
add.s64 %rd1068, %rd51, %rd1067;
ld.shared.u32 %r3523, [%rd1068];
add.s32 %r3524, %r3523, %r3522;
xor.b32 %r3525, %r76, %r2190;
xor.b32 %r2222, %r3525, %r3524;
// inline asm
bfe.u32 %r2209, %r2222, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1069, %r2209, 4;
add.s64 %rd1070, %rd42, %rd1069;
ld.shared.u32 %r3526, [%rd1070];
// inline asm
bfe.u32 %r2213, %r2222, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1071, %r2213, 4;
add.s64 %rd1072, %rd45, %rd1071;
ld.shared.u32 %r3527, [%rd1072];
add.s32 %r3528, %r3527, %r3526;
// inline asm
bfe.u32 %r2217, %r2222, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1073, %r2217, 4;
add.s64 %rd1074, %rd48, %rd1073;
ld.shared.u32 %r3529, [%rd1074];
xor.b32 %r3530, %r3529, %r3528;
// inline asm
bfe.u32 %r2221, %r2222, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1075, %r2221, 4;
add.s64 %rd1076, %rd51, %rd1075;
ld.shared.u32 %r3531, [%rd1076];
add.s32 %r3532, %r3531, %r3530;
xor.b32 %r3533, %r2637, %r2206;
xor.b32 %r2238, %r3533, %r3532;
// inline asm
bfe.u32 %r2225, %r2238, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1077, %r2225, 4;
add.s64 %rd1078, %rd42, %rd1077;
ld.shared.u32 %r3534, [%rd1078];
// inline asm
bfe.u32 %r2229, %r2238, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1079, %r2229, 4;
add.s64 %rd1080, %rd45, %rd1079;
ld.shared.u32 %r3535, [%rd1080];
add.s32 %r3536, %r3535, %r3534;
// inline asm
bfe.u32 %r2233, %r2238, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1081, %r2233, 4;
add.s64 %rd1082, %rd48, %rd1081;
ld.shared.u32 %r3537, [%rd1082];
xor.b32 %r3538, %r3537, %r3536;
// inline asm
bfe.u32 %r2237, %r2238, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1083, %r2237, 4;
add.s64 %rd1084, %rd51, %rd1083;
ld.shared.u32 %r3539, [%rd1084];
add.s32 %r3540, %r3539, %r3538;
xor.b32 %r3541, %r2646, %r2222;
xor.b32 %r2254, %r3541, %r3540;
// inline asm
bfe.u32 %r2241, %r2254, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1085, %r2241, 4;
add.s64 %rd1086, %rd42, %rd1085;
ld.shared.u32 %r3542, [%rd1086];
// inline asm
bfe.u32 %r2245, %r2254, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1087, %r2245, 4;
add.s64 %rd1088, %rd45, %rd1087;
ld.shared.u32 %r3543, [%rd1088];
add.s32 %r3544, %r3543, %r3542;
// inline asm
bfe.u32 %r2249, %r2254, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1089, %r2249, 4;
add.s64 %rd1090, %rd48, %rd1089;
ld.shared.u32 %r3545, [%rd1090];
xor.b32 %r3546, %r3545, %r3544;
// inline asm
bfe.u32 %r2253, %r2254, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1091, %r2253, 4;
add.s64 %rd1092, %rd51, %rd1091;
ld.shared.u32 %r3547, [%rd1092];
add.s32 %r3548, %r3547, %r3546;
xor.b32 %r3549, %r2655, %r2238;
xor.b32 %r78, %r3549, %r3548;
xor.b32 %r79, %r2657, %r2254;
xor.b32 %r2270, %r462, %r2254;
// inline asm
bfe.u32 %r2257, %r2270, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1093, %r2257, 4;
add.s64 %rd1094, %rd42, %rd1093;
ld.shared.u32 %r3550, [%rd1094];
// inline asm
bfe.u32 %r2261, %r2270, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1095, %r2261, 4;
add.s64 %rd1096, %rd45, %rd1095;
ld.shared.u32 %r3551, [%rd1096];
add.s32 %r3552, %r3551, %r3550;
// inline asm
bfe.u32 %r2265, %r2270, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1097, %r2265, 4;
add.s64 %rd1098, %rd48, %rd1097;
ld.shared.u32 %r3553, [%rd1098];
xor.b32 %r3554, %r3553, %r3552;
// inline asm
bfe.u32 %r2269, %r2270, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1099, %r2269, 4;
add.s64 %rd1100, %rd51, %rd1099;
ld.shared.u32 %r3555, [%rd1100];
add.s32 %r3556, %r3555, %r3554;
xor.b32 %r3557, %r64, %r78;
xor.b32 %r2286, %r3557, %r3556;
// inline asm
bfe.u32 %r2273, %r2286, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1101, %r2273, 4;
add.s64 %rd1102, %rd42, %rd1101;
ld.shared.u32 %r3558, [%rd1102];
// inline asm
bfe.u32 %r2277, %r2286, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1103, %r2277, 4;
add.s64 %rd1104, %rd45, %rd1103;
ld.shared.u32 %r3559, [%rd1104];
add.s32 %r3560, %r3559, %r3558;
// inline asm
bfe.u32 %r2281, %r2286, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1105, %r2281, 4;
add.s64 %rd1106, %rd48, %rd1105;
ld.shared.u32 %r3561, [%rd1106];
xor.b32 %r3562, %r3561, %r3560;
// inline asm
bfe.u32 %r2285, %r2286, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1107, %r2285, 4;
add.s64 %rd1108, %rd51, %rd1107;
ld.shared.u32 %r3563, [%rd1108];
add.s32 %r3564, %r3563, %r3562;
xor.b32 %r3565, %r67, %r2270;
xor.b32 %r2302, %r3565, %r3564;
// inline asm
bfe.u32 %r2289, %r2302, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1109, %r2289, 4;
add.s64 %rd1110, %rd42, %rd1109;
ld.shared.u32 %r3566, [%rd1110];
// inline asm
bfe.u32 %r2293, %r2302, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1111, %r2293, 4;
add.s64 %rd1112, %rd45, %rd1111;
ld.shared.u32 %r3567, [%rd1112];
add.s32 %r3568, %r3567, %r3566;
// inline asm
bfe.u32 %r2297, %r2302, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1113, %r2297, 4;
add.s64 %rd1114, %rd48, %rd1113;
ld.shared.u32 %r3569, [%rd1114];
xor.b32 %r3570, %r3569, %r3568;
// inline asm
bfe.u32 %r2301, %r2302, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1115, %r2301, 4;
add.s64 %rd1116, %rd51, %rd1115;
ld.shared.u32 %r3571, [%rd1116];
add.s32 %r3572, %r3571, %r3570;
xor.b32 %r3573, %r66, %r2286;
xor.b32 %r2318, %r3573, %r3572;
// inline asm
bfe.u32 %r2305, %r2318, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1117, %r2305, 4;
add.s64 %rd1118, %rd42, %rd1117;
ld.shared.u32 %r3574, [%rd1118];
// inline asm
bfe.u32 %r2309, %r2318, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1119, %r2309, 4;
add.s64 %rd1120, %rd45, %rd1119;
ld.shared.u32 %r3575, [%rd1120];
add.s32 %r3576, %r3575, %r3574;
// inline asm
bfe.u32 %r2313, %r2318, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1121, %r2313, 4;
add.s64 %rd1122, %rd48, %rd1121;
ld.shared.u32 %r3577, [%rd1122];
xor.b32 %r3578, %r3577, %r3576;
// inline asm
bfe.u32 %r2317, %r2318, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1123, %r2317, 4;
add.s64 %rd1124, %rd51, %rd1123;
ld.shared.u32 %r3579, [%rd1124];
add.s32 %r3580, %r3579, %r3578;
xor.b32 %r3581, %r69, %r2302;
xor.b32 %r2334, %r3581, %r3580;
// inline asm
bfe.u32 %r2321, %r2334, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1125, %r2321, 4;
add.s64 %rd1126, %rd42, %rd1125;
ld.shared.u32 %r3582, [%rd1126];
// inline asm
bfe.u32 %r2325, %r2334, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1127, %r2325, 4;
add.s64 %rd1128, %rd45, %rd1127;
ld.shared.u32 %r3583, [%rd1128];
add.s32 %r3584, %r3583, %r3582;
// inline asm
bfe.u32 %r2329, %r2334, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1129, %r2329, 4;
add.s64 %rd1130, %rd48, %rd1129;
ld.shared.u32 %r3585, [%rd1130];
xor.b32 %r3586, %r3585, %r3584;
// inline asm
bfe.u32 %r2333, %r2334, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1131, %r2333, 4;
add.s64 %rd1132, %rd51, %rd1131;
ld.shared.u32 %r3587, [%rd1132];
add.s32 %r3588, %r3587, %r3586;
xor.b32 %r3589, %r68, %r2318;
xor.b32 %r2350, %r3589, %r3588;
// inline asm
bfe.u32 %r2337, %r2350, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1133, %r2337, 4;
add.s64 %rd1134, %rd42, %rd1133;
ld.shared.u32 %r3590, [%rd1134];
// inline asm
bfe.u32 %r2341, %r2350, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1135, %r2341, 4;
add.s64 %rd1136, %rd45, %rd1135;
ld.shared.u32 %r3591, [%rd1136];
add.s32 %r3592, %r3591, %r3590;
// inline asm
bfe.u32 %r2345, %r2350, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1137, %r2345, 4;
add.s64 %rd1138, %rd48, %rd1137;
ld.shared.u32 %r3593, [%rd1138];
xor.b32 %r3594, %r3593, %r3592;
// inline asm
bfe.u32 %r2349, %r2350, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1139, %r2349, 4;
add.s64 %rd1140, %rd51, %rd1139;
ld.shared.u32 %r3595, [%rd1140];
add.s32 %r3596, %r3595, %r3594;
xor.b32 %r3597, %r71, %r2334;
xor.b32 %r2366, %r3597, %r3596;
// inline asm
bfe.u32 %r2353, %r2366, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1141, %r2353, 4;
add.s64 %rd1142, %rd42, %rd1141;
ld.shared.u32 %r3598, [%rd1142];
// inline asm
bfe.u32 %r2357, %r2366, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1143, %r2357, 4;
add.s64 %rd1144, %rd45, %rd1143;
ld.shared.u32 %r3599, [%rd1144];
add.s32 %r3600, %r3599, %r3598;
// inline asm
bfe.u32 %r2361, %r2366, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1145, %r2361, 4;
add.s64 %rd1146, %rd48, %rd1145;
ld.shared.u32 %r3601, [%rd1146];
xor.b32 %r3602, %r3601, %r3600;
// inline asm
bfe.u32 %r2365, %r2366, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1147, %r2365, 4;
add.s64 %rd1148, %rd51, %rd1147;
ld.shared.u32 %r3603, [%rd1148];
add.s32 %r3604, %r3603, %r3602;
xor.b32 %r3605, %r70, %r2350;
xor.b32 %r2382, %r3605, %r3604;
// inline asm
bfe.u32 %r2369, %r2382, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1149, %r2369, 4;
add.s64 %rd1150, %rd42, %rd1149;
ld.shared.u32 %r3606, [%rd1150];
// inline asm
bfe.u32 %r2373, %r2382, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1151, %r2373, 4;
add.s64 %rd1152, %rd45, %rd1151;
ld.shared.u32 %r3607, [%rd1152];
add.s32 %r3608, %r3607, %r3606;
// inline asm
bfe.u32 %r2377, %r2382, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1153, %r2377, 4;
add.s64 %rd1154, %rd48, %rd1153;
ld.shared.u32 %r3609, [%rd1154];
xor.b32 %r3610, %r3609, %r3608;
// inline asm
bfe.u32 %r2381, %r2382, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1155, %r2381, 4;
add.s64 %rd1156, %rd51, %rd1155;
ld.shared.u32 %r3611, [%rd1156];
add.s32 %r3612, %r3611, %r3610;
xor.b32 %r3613, %r73, %r2366;
xor.b32 %r2398, %r3613, %r3612;
// inline asm
bfe.u32 %r2385, %r2398, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1157, %r2385, 4;
add.s64 %rd1158, %rd42, %rd1157;
ld.shared.u32 %r3614, [%rd1158];
// inline asm
bfe.u32 %r2389, %r2398, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1159, %r2389, 4;
add.s64 %rd1160, %rd45, %rd1159;
ld.shared.u32 %r3615, [%rd1160];
add.s32 %r3616, %r3615, %r3614;
// inline asm
bfe.u32 %r2393, %r2398, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1161, %r2393, 4;
add.s64 %rd1162, %rd48, %rd1161;
ld.shared.u32 %r3617, [%rd1162];
xor.b32 %r3618, %r3617, %r3616;
// inline asm
bfe.u32 %r2397, %r2398, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1163, %r2397, 4;
add.s64 %rd1164, %rd51, %rd1163;
ld.shared.u32 %r3619, [%rd1164];
add.s32 %r3620, %r3619, %r3618;
xor.b32 %r3621, %r72, %r2382;
xor.b32 %r2414, %r3621, %r3620;
// inline asm
bfe.u32 %r2401, %r2414, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1165, %r2401, 4;
add.s64 %rd1166, %rd42, %rd1165;
ld.shared.u32 %r3622, [%rd1166];
// inline asm
bfe.u32 %r2405, %r2414, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1167, %r2405, 4;
add.s64 %rd1168, %rd45, %rd1167;
ld.shared.u32 %r3623, [%rd1168];
add.s32 %r3624, %r3623, %r3622;
// inline asm
bfe.u32 %r2409, %r2414, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1169, %r2409, 4;
add.s64 %rd1170, %rd48, %rd1169;
ld.shared.u32 %r3625, [%rd1170];
xor.b32 %r3626, %r3625, %r3624;
// inline asm
bfe.u32 %r2413, %r2414, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1171, %r2413, 4;
add.s64 %rd1172, %rd51, %rd1171;
ld.shared.u32 %r3627, [%rd1172];
add.s32 %r3628, %r3627, %r3626;
xor.b32 %r3629, %r75, %r2398;
xor.b32 %r2430, %r3629, %r3628;
// inline asm
bfe.u32 %r2417, %r2430, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1173, %r2417, 4;
add.s64 %rd1174, %rd42, %rd1173;
ld.shared.u32 %r3630, [%rd1174];
// inline asm
bfe.u32 %r2421, %r2430, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1175, %r2421, 4;
add.s64 %rd1176, %rd45, %rd1175;
ld.shared.u32 %r3631, [%rd1176];
add.s32 %r3632, %r3631, %r3630;
// inline asm
bfe.u32 %r2425, %r2430, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1177, %r2425, 4;
add.s64 %rd1178, %rd48, %rd1177;
ld.shared.u32 %r3633, [%rd1178];
xor.b32 %r3634, %r3633, %r3632;
// inline asm
bfe.u32 %r2429, %r2430, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1179, %r2429, 4;
add.s64 %rd1180, %rd51, %rd1179;
ld.shared.u32 %r3635, [%rd1180];
add.s32 %r3636, %r3635, %r3634;
xor.b32 %r3637, %r74, %r2414;
xor.b32 %r2446, %r3637, %r3636;
// inline asm
bfe.u32 %r2433, %r2446, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1181, %r2433, 4;
add.s64 %rd1182, %rd42, %rd1181;
ld.shared.u32 %r3638, [%rd1182];
// inline asm
bfe.u32 %r2437, %r2446, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1183, %r2437, 4;
add.s64 %rd1184, %rd45, %rd1183;
ld.shared.u32 %r3639, [%rd1184];
add.s32 %r3640, %r3639, %r3638;
// inline asm
bfe.u32 %r2441, %r2446, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1185, %r2441, 4;
add.s64 %rd1186, %rd48, %rd1185;
ld.shared.u32 %r3641, [%rd1186];
xor.b32 %r3642, %r3641, %r3640;
// inline asm
bfe.u32 %r2445, %r2446, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1187, %r2445, 4;
add.s64 %rd1188, %rd51, %rd1187;
ld.shared.u32 %r3643, [%rd1188];
add.s32 %r3644, %r3643, %r3642;
xor.b32 %r3645, %r77, %r2430;
xor.b32 %r2462, %r3645, %r3644;
// inline asm
bfe.u32 %r2449, %r2462, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1189, %r2449, 4;
add.s64 %rd1190, %rd42, %rd1189;
ld.shared.u32 %r3646, [%rd1190];
// inline asm
bfe.u32 %r2453, %r2462, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1191, %r2453, 4;
add.s64 %rd1192, %rd45, %rd1191;
ld.shared.u32 %r3647, [%rd1192];
add.s32 %r3648, %r3647, %r3646;
// inline asm
bfe.u32 %r2457, %r2462, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1193, %r2457, 4;
add.s64 %rd1194, %rd48, %rd1193;
ld.shared.u32 %r3649, [%rd1194];
xor.b32 %r3650, %r3649, %r3648;
// inline asm
bfe.u32 %r2461, %r2462, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1195, %r2461, 4;
add.s64 %rd1196, %rd51, %rd1195;
ld.shared.u32 %r3651, [%rd1196];
add.s32 %r3652, %r3651, %r3650;
xor.b32 %r3653, %r76, %r2446;
xor.b32 %r2478, %r3653, %r3652;
// inline asm
bfe.u32 %r2465, %r2478, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1197, %r2465, 4;
add.s64 %rd1198, %rd42, %rd1197;
ld.shared.u32 %r3654, [%rd1198];
// inline asm
bfe.u32 %r2469, %r2478, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1199, %r2469, 4;
add.s64 %rd1200, %rd45, %rd1199;
ld.shared.u32 %r3655, [%rd1200];
add.s32 %r3656, %r3655, %r3654;
// inline asm
bfe.u32 %r2473, %r2478, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1201, %r2473, 4;
add.s64 %rd1202, %rd48, %rd1201;
ld.shared.u32 %r3657, [%rd1202];
xor.b32 %r3658, %r3657, %r3656;
// inline asm
bfe.u32 %r2477, %r2478, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1203, %r2477, 4;
add.s64 %rd1204, %rd51, %rd1203;
ld.shared.u32 %r3659, [%rd1204];
add.s32 %r3660, %r3659, %r3658;
xor.b32 %r3661, %r79, %r2462;
xor.b32 %r2494, %r3661, %r3660;
// inline asm
bfe.u32 %r2481, %r2494, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1205, %r2481, 4;
add.s64 %rd1206, %rd42, %rd1205;
ld.shared.u32 %r3662, [%rd1206];
// inline asm
bfe.u32 %r2485, %r2494, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1207, %r2485, 4;
add.s64 %rd1208, %rd45, %rd1207;
ld.shared.u32 %r3663, [%rd1208];
add.s32 %r3664, %r3663, %r3662;
// inline asm
bfe.u32 %r2489, %r2494, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1209, %r2489, 4;
add.s64 %rd1210, %rd48, %rd1209;
ld.shared.u32 %r3665, [%rd1210];
xor.b32 %r3666, %r3665, %r3664;
// inline asm
bfe.u32 %r2493, %r2494, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1211, %r2493, 4;
add.s64 %rd1212, %rd51, %rd1211;
ld.shared.u32 %r3667, [%rd1212];
add.s32 %r3668, %r3667, %r3666;
xor.b32 %r3669, %r78, %r2478;
xor.b32 %r2510, %r3669, %r3668;
// inline asm
bfe.u32 %r2497, %r2510, %r2499, %r2512;
// inline asm
mul.wide.u32 %rd1213, %r2497, 4;
add.s64 %rd1214, %rd42, %rd1213;
ld.shared.u32 %r3670, [%rd1214];
// inline asm
bfe.u32 %r2501, %r2510, %r2503, %r2512;
// inline asm
mul.wide.u32 %rd1215, %r2501, 4;
add.s64 %rd1216, %rd45, %rd1215;
ld.shared.u32 %r3671, [%rd1216];
add.s32 %r3672, %r3671, %r3670;
// inline asm
bfe.u32 %r2505, %r2510, %r2512, %r2512;
// inline asm
mul.wide.u32 %rd1217, %r2505, 4;
add.s64 %rd1218, %rd48, %rd1217;
ld.shared.u32 %r3673, [%rd1218];
xor.b32 %r3674, %r3673, %r3672;
// inline asm
bfe.u32 %r2509, %r2510, %r208, %r2512;
// inline asm
mul.wide.u32 %rd1219, %r2509, 4;
add.s64 %rd1220, %rd51, %rd1219;
ld.shared.u32 %r3675, [%rd1220];
add.s32 %r3676, %r3675, %r3674;
xor.b32 %r3677, %r2655, %r2494;
xor.b32 %r80, %r3677, %r3676;
xor.b32 %r81, %r2657, %r2510;
mov.u64 %rd3495, %rd3;
mov.u32 %r10310, %r208;
mov.u32 %r10314, %r80;
mov.u32 %r10315, %r81;
BB4_6:
mov.u32 %r10277, 16;
mov.u32 %r10276, 8;
mov.u32 %r10275, 24;
xor.b32 %r3691, %r65, %r10315;
// inline asm
bfe.u32 %r3678, %r3691, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1224, %r3678, 4;
add.s64 %rd1225, %rd42, %rd1224;
ld.shared.u32 %r3935, [%rd1225];
// inline asm
bfe.u32 %r3682, %r3691, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1228, %r3682, 4;
add.s64 %rd1229, %rd45, %rd1228;
ld.shared.u32 %r3936, [%rd1229];
add.s32 %r3937, %r3936, %r3935;
// inline asm
bfe.u32 %r3686, %r3691, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1232, %r3686, 4;
add.s64 %rd1233, %rd48, %rd1232;
ld.shared.u32 %r3938, [%rd1233];
xor.b32 %r3939, %r3938, %r3937;
// inline asm
bfe.u32 %r3690, %r3691, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1236, %r3690, 4;
add.s64 %rd1237, %rd51, %rd1236;
ld.shared.u32 %r3940, [%rd1237];
add.s32 %r3941, %r3940, %r3939;
xor.b32 %r3942, %r64, %r10314;
xor.b32 %r3707, %r3942, %r3941;
// inline asm
bfe.u32 %r3694, %r3707, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1238, %r3694, 4;
add.s64 %rd1239, %rd42, %rd1238;
ld.shared.u32 %r3943, [%rd1239];
// inline asm
bfe.u32 %r3698, %r3707, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1240, %r3698, 4;
add.s64 %rd1241, %rd45, %rd1240;
ld.shared.u32 %r3944, [%rd1241];
add.s32 %r3945, %r3944, %r3943;
// inline asm
bfe.u32 %r3702, %r3707, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1242, %r3702, 4;
add.s64 %rd1243, %rd48, %rd1242;
ld.shared.u32 %r3946, [%rd1243];
xor.b32 %r3947, %r3946, %r3945;
// inline asm
bfe.u32 %r3706, %r3707, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1244, %r3706, 4;
add.s64 %rd1245, %rd51, %rd1244;
ld.shared.u32 %r3948, [%rd1245];
add.s32 %r3949, %r3948, %r3947;
xor.b32 %r3950, %r67, %r3691;
xor.b32 %r3723, %r3950, %r3949;
// inline asm
bfe.u32 %r3710, %r3723, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1246, %r3710, 4;
add.s64 %rd1247, %rd42, %rd1246;
ld.shared.u32 %r3951, [%rd1247];
// inline asm
bfe.u32 %r3714, %r3723, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1248, %r3714, 4;
add.s64 %rd1249, %rd45, %rd1248;
ld.shared.u32 %r3952, [%rd1249];
add.s32 %r3953, %r3952, %r3951;
// inline asm
bfe.u32 %r3718, %r3723, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1250, %r3718, 4;
add.s64 %rd1251, %rd48, %rd1250;
ld.shared.u32 %r3954, [%rd1251];
xor.b32 %r3955, %r3954, %r3953;
// inline asm
bfe.u32 %r3722, %r3723, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1252, %r3722, 4;
add.s64 %rd1253, %rd51, %rd1252;
ld.shared.u32 %r3956, [%rd1253];
add.s32 %r3957, %r3956, %r3955;
xor.b32 %r3958, %r66, %r3707;
xor.b32 %r3739, %r3958, %r3957;
// inline asm
bfe.u32 %r3726, %r3739, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1254, %r3726, 4;
add.s64 %rd1255, %rd42, %rd1254;
ld.shared.u32 %r3959, [%rd1255];
// inline asm
bfe.u32 %r3730, %r3739, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1256, %r3730, 4;
add.s64 %rd1257, %rd45, %rd1256;
ld.shared.u32 %r3960, [%rd1257];
add.s32 %r3961, %r3960, %r3959;
// inline asm
bfe.u32 %r3734, %r3739, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1258, %r3734, 4;
add.s64 %rd1259, %rd48, %rd1258;
ld.shared.u32 %r3962, [%rd1259];
xor.b32 %r3963, %r3962, %r3961;
// inline asm
bfe.u32 %r3738, %r3739, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1260, %r3738, 4;
add.s64 %rd1261, %rd51, %rd1260;
ld.shared.u32 %r3964, [%rd1261];
add.s32 %r3965, %r3964, %r3963;
xor.b32 %r3966, %r69, %r3723;
xor.b32 %r3755, %r3966, %r3965;
// inline asm
bfe.u32 %r3742, %r3755, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1262, %r3742, 4;
add.s64 %rd1263, %rd42, %rd1262;
ld.shared.u32 %r3967, [%rd1263];
// inline asm
bfe.u32 %r3746, %r3755, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1264, %r3746, 4;
add.s64 %rd1265, %rd45, %rd1264;
ld.shared.u32 %r3968, [%rd1265];
add.s32 %r3969, %r3968, %r3967;
// inline asm
bfe.u32 %r3750, %r3755, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1266, %r3750, 4;
add.s64 %rd1267, %rd48, %rd1266;
ld.shared.u32 %r3970, [%rd1267];
xor.b32 %r3971, %r3970, %r3969;
// inline asm
bfe.u32 %r3754, %r3755, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1268, %r3754, 4;
add.s64 %rd1269, %rd51, %rd1268;
ld.shared.u32 %r3972, [%rd1269];
add.s32 %r3973, %r3972, %r3971;
xor.b32 %r3974, %r68, %r3739;
xor.b32 %r3771, %r3974, %r3973;
// inline asm
bfe.u32 %r3758, %r3771, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1270, %r3758, 4;
add.s64 %rd1271, %rd42, %rd1270;
ld.shared.u32 %r3975, [%rd1271];
// inline asm
bfe.u32 %r3762, %r3771, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1272, %r3762, 4;
add.s64 %rd1273, %rd45, %rd1272;
ld.shared.u32 %r3976, [%rd1273];
add.s32 %r3977, %r3976, %r3975;
// inline asm
bfe.u32 %r3766, %r3771, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1274, %r3766, 4;
add.s64 %rd1275, %rd48, %rd1274;
ld.shared.u32 %r3978, [%rd1275];
xor.b32 %r3979, %r3978, %r3977;
// inline asm
bfe.u32 %r3770, %r3771, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1276, %r3770, 4;
add.s64 %rd1277, %rd51, %rd1276;
ld.shared.u32 %r3980, [%rd1277];
add.s32 %r3981, %r3980, %r3979;
xor.b32 %r3982, %r71, %r3755;
xor.b32 %r3787, %r3982, %r3981;
// inline asm
bfe.u32 %r3774, %r3787, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1278, %r3774, 4;
add.s64 %rd1279, %rd42, %rd1278;
ld.shared.u32 %r3983, [%rd1279];
// inline asm
bfe.u32 %r3778, %r3787, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1280, %r3778, 4;
add.s64 %rd1281, %rd45, %rd1280;
ld.shared.u32 %r3984, [%rd1281];
add.s32 %r3985, %r3984, %r3983;
// inline asm
bfe.u32 %r3782, %r3787, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1282, %r3782, 4;
add.s64 %rd1283, %rd48, %rd1282;
ld.shared.u32 %r3986, [%rd1283];
xor.b32 %r3987, %r3986, %r3985;
// inline asm
bfe.u32 %r3786, %r3787, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1284, %r3786, 4;
add.s64 %rd1285, %rd51, %rd1284;
ld.shared.u32 %r3988, [%rd1285];
add.s32 %r3989, %r3988, %r3987;
xor.b32 %r3990, %r70, %r3771;
xor.b32 %r3803, %r3990, %r3989;
// inline asm
bfe.u32 %r3790, %r3803, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1286, %r3790, 4;
add.s64 %rd1287, %rd42, %rd1286;
ld.shared.u32 %r3991, [%rd1287];
// inline asm
bfe.u32 %r3794, %r3803, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1288, %r3794, 4;
add.s64 %rd1289, %rd45, %rd1288;
ld.shared.u32 %r3992, [%rd1289];
add.s32 %r3993, %r3992, %r3991;
// inline asm
bfe.u32 %r3798, %r3803, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1290, %r3798, 4;
add.s64 %rd1291, %rd48, %rd1290;
ld.shared.u32 %r3994, [%rd1291];
xor.b32 %r3995, %r3994, %r3993;
// inline asm
bfe.u32 %r3802, %r3803, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1292, %r3802, 4;
add.s64 %rd1293, %rd51, %rd1292;
ld.shared.u32 %r3996, [%rd1293];
add.s32 %r3997, %r3996, %r3995;
xor.b32 %r3998, %r73, %r3787;
xor.b32 %r3819, %r3998, %r3997;
// inline asm
bfe.u32 %r3806, %r3819, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1294, %r3806, 4;
add.s64 %rd1295, %rd42, %rd1294;
ld.shared.u32 %r3999, [%rd1295];
// inline asm
bfe.u32 %r3810, %r3819, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1296, %r3810, 4;
add.s64 %rd1297, %rd45, %rd1296;
ld.shared.u32 %r4000, [%rd1297];
add.s32 %r4001, %r4000, %r3999;
// inline asm
bfe.u32 %r3814, %r3819, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1298, %r3814, 4;
add.s64 %rd1299, %rd48, %rd1298;
ld.shared.u32 %r4002, [%rd1299];
xor.b32 %r4003, %r4002, %r4001;
// inline asm
bfe.u32 %r3818, %r3819, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1300, %r3818, 4;
add.s64 %rd1301, %rd51, %rd1300;
ld.shared.u32 %r4004, [%rd1301];
add.s32 %r4005, %r4004, %r4003;
xor.b32 %r4006, %r72, %r3803;
xor.b32 %r3835, %r4006, %r4005;
// inline asm
bfe.u32 %r3822, %r3835, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1302, %r3822, 4;
add.s64 %rd1303, %rd42, %rd1302;
ld.shared.u32 %r4007, [%rd1303];
// inline asm
bfe.u32 %r3826, %r3835, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1304, %r3826, 4;
add.s64 %rd1305, %rd45, %rd1304;
ld.shared.u32 %r4008, [%rd1305];
add.s32 %r4009, %r4008, %r4007;
// inline asm
bfe.u32 %r3830, %r3835, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1306, %r3830, 4;
add.s64 %rd1307, %rd48, %rd1306;
ld.shared.u32 %r4010, [%rd1307];
xor.b32 %r4011, %r4010, %r4009;
// inline asm
bfe.u32 %r3834, %r3835, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1308, %r3834, 4;
add.s64 %rd1309, %rd51, %rd1308;
ld.shared.u32 %r4012, [%rd1309];
add.s32 %r4013, %r4012, %r4011;
xor.b32 %r4014, %r75, %r3819;
xor.b32 %r3851, %r4014, %r4013;
// inline asm
bfe.u32 %r3838, %r3851, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1310, %r3838, 4;
add.s64 %rd1311, %rd42, %rd1310;
ld.shared.u32 %r4015, [%rd1311];
// inline asm
bfe.u32 %r3842, %r3851, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1312, %r3842, 4;
add.s64 %rd1313, %rd45, %rd1312;
ld.shared.u32 %r4016, [%rd1313];
add.s32 %r4017, %r4016, %r4015;
// inline asm
bfe.u32 %r3846, %r3851, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1314, %r3846, 4;
add.s64 %rd1315, %rd48, %rd1314;
ld.shared.u32 %r4018, [%rd1315];
xor.b32 %r4019, %r4018, %r4017;
// inline asm
bfe.u32 %r3850, %r3851, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1316, %r3850, 4;
add.s64 %rd1317, %rd51, %rd1316;
ld.shared.u32 %r4020, [%rd1317];
add.s32 %r4021, %r4020, %r4019;
xor.b32 %r4022, %r74, %r3835;
xor.b32 %r3867, %r4022, %r4021;
// inline asm
bfe.u32 %r3854, %r3867, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1318, %r3854, 4;
add.s64 %rd1319, %rd42, %rd1318;
ld.shared.u32 %r4023, [%rd1319];
// inline asm
bfe.u32 %r3858, %r3867, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1320, %r3858, 4;
add.s64 %rd1321, %rd45, %rd1320;
ld.shared.u32 %r4024, [%rd1321];
add.s32 %r4025, %r4024, %r4023;
// inline asm
bfe.u32 %r3862, %r3867, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1322, %r3862, 4;
add.s64 %rd1323, %rd48, %rd1322;
ld.shared.u32 %r4026, [%rd1323];
xor.b32 %r4027, %r4026, %r4025;
// inline asm
bfe.u32 %r3866, %r3867, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1324, %r3866, 4;
add.s64 %rd1325, %rd51, %rd1324;
ld.shared.u32 %r4028, [%rd1325];
add.s32 %r4029, %r4028, %r4027;
xor.b32 %r4030, %r77, %r3851;
xor.b32 %r3883, %r4030, %r4029;
// inline asm
bfe.u32 %r3870, %r3883, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1326, %r3870, 4;
add.s64 %rd1327, %rd42, %rd1326;
ld.shared.u32 %r4031, [%rd1327];
// inline asm
bfe.u32 %r3874, %r3883, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1328, %r3874, 4;
add.s64 %rd1329, %rd45, %rd1328;
ld.shared.u32 %r4032, [%rd1329];
add.s32 %r4033, %r4032, %r4031;
// inline asm
bfe.u32 %r3878, %r3883, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1330, %r3878, 4;
add.s64 %rd1331, %rd48, %rd1330;
ld.shared.u32 %r4034, [%rd1331];
xor.b32 %r4035, %r4034, %r4033;
// inline asm
bfe.u32 %r3882, %r3883, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1332, %r3882, 4;
add.s64 %rd1333, %rd51, %rd1332;
ld.shared.u32 %r4036, [%rd1333];
add.s32 %r4037, %r4036, %r4035;
xor.b32 %r4038, %r76, %r3867;
xor.b32 %r3899, %r4038, %r4037;
// inline asm
bfe.u32 %r3886, %r3899, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1334, %r3886, 4;
add.s64 %rd1335, %rd42, %rd1334;
ld.shared.u32 %r4039, [%rd1335];
// inline asm
bfe.u32 %r3890, %r3899, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1336, %r3890, 4;
add.s64 %rd1337, %rd45, %rd1336;
ld.shared.u32 %r4040, [%rd1337];
add.s32 %r4041, %r4040, %r4039;
// inline asm
bfe.u32 %r3894, %r3899, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1338, %r3894, 4;
add.s64 %rd1339, %rd48, %rd1338;
ld.shared.u32 %r4042, [%rd1339];
xor.b32 %r4043, %r4042, %r4041;
// inline asm
bfe.u32 %r3898, %r3899, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1340, %r3898, 4;
add.s64 %rd1341, %rd51, %rd1340;
ld.shared.u32 %r4044, [%rd1341];
add.s32 %r4045, %r4044, %r4043;
xor.b32 %r4046, %r79, %r3883;
xor.b32 %r3915, %r4046, %r4045;
// inline asm
bfe.u32 %r3902, %r3915, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1342, %r3902, 4;
add.s64 %rd1343, %rd42, %rd1342;
ld.shared.u32 %r4047, [%rd1343];
// inline asm
bfe.u32 %r3906, %r3915, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1344, %r3906, 4;
add.s64 %rd1345, %rd45, %rd1344;
ld.shared.u32 %r4048, [%rd1345];
add.s32 %r4049, %r4048, %r4047;
// inline asm
bfe.u32 %r3910, %r3915, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1346, %r3910, 4;
add.s64 %rd1347, %rd48, %rd1346;
ld.shared.u32 %r4050, [%rd1347];
xor.b32 %r4051, %r4050, %r4049;
// inline asm
bfe.u32 %r3914, %r3915, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1348, %r3914, 4;
add.s64 %rd1349, %rd51, %rd1348;
ld.shared.u32 %r4052, [%rd1349];
add.s32 %r4053, %r4052, %r4051;
xor.b32 %r4054, %r78, %r3899;
xor.b32 %r3931, %r4054, %r4053;
// inline asm
bfe.u32 %r3918, %r3931, %r10275, %r10276;
// inline asm
mul.wide.u32 %rd1350, %r3918, 4;
add.s64 %rd1351, %rd42, %rd1350;
ld.shared.u32 %r4055, [%rd1351];
// inline asm
bfe.u32 %r3922, %r3931, %r10277, %r10276;
// inline asm
mul.wide.u32 %rd1352, %r3922, 4;
add.s64 %rd1353, %rd45, %rd1352;
ld.shared.u32 %r4056, [%rd1353];
add.s32 %r4057, %r4056, %r4055;
// inline asm
bfe.u32 %r3926, %r3931, %r10276, %r10276;
// inline asm
mul.wide.u32 %rd1354, %r3926, 4;
add.s64 %rd1355, %rd48, %rd1354;
ld.shared.u32 %r4058, [%rd1355];
xor.b32 %r4059, %r4058, %r4057;
// inline asm
bfe.u32 %r3930, %r3931, %r208, %r10276;
// inline asm
mul.wide.u32 %rd1356, %r3930, 4;
add.s64 %rd1357, %rd51, %rd1356;
ld.shared.u32 %r4060, [%rd1357];
add.s32 %r4061, %r4060, %r4059;
xor.b32 %r4062, %r81, %r3915;
xor.b32 %r10314, %r4062, %r4061;
xor.b32 %r10315, %r80, %r3931;
st.shared.u32 [%rd3495], %r10315;
st.shared.u32 [%rd3495+4], %r10314;
add.s64 %rd3495, %rd3495, 8;
add.s32 %r10310, %r10310, 2;
setp.lt.u32 %p4, %r10310, 256;
@%p4 bra BB4_6;
mov.u64 %rd3496, %rd4;
mov.u32 %r10313, %r208;
BB4_8:
mov.u32 %r10280, 16;
mov.u32 %r10279, 8;
mov.u32 %r10278, 24;
xor.b32 %r4076, %r65, %r10315;
// inline asm
bfe.u32 %r4063, %r4076, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1361, %r4063, 4;
add.s64 %rd1362, %rd42, %rd1361;
ld.shared.u32 %r4320, [%rd1362];
// inline asm
bfe.u32 %r4067, %r4076, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1365, %r4067, 4;
add.s64 %rd1366, %rd45, %rd1365;
ld.shared.u32 %r4321, [%rd1366];
add.s32 %r4322, %r4321, %r4320;
// inline asm
bfe.u32 %r4071, %r4076, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1369, %r4071, 4;
add.s64 %rd1370, %rd48, %rd1369;
ld.shared.u32 %r4323, [%rd1370];
xor.b32 %r4324, %r4323, %r4322;
// inline asm
bfe.u32 %r4075, %r4076, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1373, %r4075, 4;
add.s64 %rd1374, %rd51, %rd1373;
ld.shared.u32 %r4325, [%rd1374];
add.s32 %r4326, %r4325, %r4324;
xor.b32 %r4327, %r64, %r10314;
xor.b32 %r4092, %r4327, %r4326;
// inline asm
bfe.u32 %r4079, %r4092, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1375, %r4079, 4;
add.s64 %rd1376, %rd42, %rd1375;
ld.shared.u32 %r4328, [%rd1376];
// inline asm
bfe.u32 %r4083, %r4092, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1377, %r4083, 4;
add.s64 %rd1378, %rd45, %rd1377;
ld.shared.u32 %r4329, [%rd1378];
add.s32 %r4330, %r4329, %r4328;
// inline asm
bfe.u32 %r4087, %r4092, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1379, %r4087, 4;
add.s64 %rd1380, %rd48, %rd1379;
ld.shared.u32 %r4331, [%rd1380];
xor.b32 %r4332, %r4331, %r4330;
// inline asm
bfe.u32 %r4091, %r4092, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1381, %r4091, 4;
add.s64 %rd1382, %rd51, %rd1381;
ld.shared.u32 %r4333, [%rd1382];
add.s32 %r4334, %r4333, %r4332;
xor.b32 %r4335, %r67, %r4076;
xor.b32 %r4108, %r4335, %r4334;
// inline asm
bfe.u32 %r4095, %r4108, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1383, %r4095, 4;
add.s64 %rd1384, %rd42, %rd1383;
ld.shared.u32 %r4336, [%rd1384];
// inline asm
bfe.u32 %r4099, %r4108, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1385, %r4099, 4;
add.s64 %rd1386, %rd45, %rd1385;
ld.shared.u32 %r4337, [%rd1386];
add.s32 %r4338, %r4337, %r4336;
// inline asm
bfe.u32 %r4103, %r4108, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1387, %r4103, 4;
add.s64 %rd1388, %rd48, %rd1387;
ld.shared.u32 %r4339, [%rd1388];
xor.b32 %r4340, %r4339, %r4338;
// inline asm
bfe.u32 %r4107, %r4108, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1389, %r4107, 4;
add.s64 %rd1390, %rd51, %rd1389;
ld.shared.u32 %r4341, [%rd1390];
add.s32 %r4342, %r4341, %r4340;
xor.b32 %r4343, %r66, %r4092;
xor.b32 %r4124, %r4343, %r4342;
// inline asm
bfe.u32 %r4111, %r4124, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1391, %r4111, 4;
add.s64 %rd1392, %rd42, %rd1391;
ld.shared.u32 %r4344, [%rd1392];
// inline asm
bfe.u32 %r4115, %r4124, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1393, %r4115, 4;
add.s64 %rd1394, %rd45, %rd1393;
ld.shared.u32 %r4345, [%rd1394];
add.s32 %r4346, %r4345, %r4344;
// inline asm
bfe.u32 %r4119, %r4124, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1395, %r4119, 4;
add.s64 %rd1396, %rd48, %rd1395;
ld.shared.u32 %r4347, [%rd1396];
xor.b32 %r4348, %r4347, %r4346;
// inline asm
bfe.u32 %r4123, %r4124, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1397, %r4123, 4;
add.s64 %rd1398, %rd51, %rd1397;
ld.shared.u32 %r4349, [%rd1398];
add.s32 %r4350, %r4349, %r4348;
xor.b32 %r4351, %r69, %r4108;
xor.b32 %r4140, %r4351, %r4350;
// inline asm
bfe.u32 %r4127, %r4140, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1399, %r4127, 4;
add.s64 %rd1400, %rd42, %rd1399;
ld.shared.u32 %r4352, [%rd1400];
// inline asm
bfe.u32 %r4131, %r4140, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1401, %r4131, 4;
add.s64 %rd1402, %rd45, %rd1401;
ld.shared.u32 %r4353, [%rd1402];
add.s32 %r4354, %r4353, %r4352;
// inline asm
bfe.u32 %r4135, %r4140, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1403, %r4135, 4;
add.s64 %rd1404, %rd48, %rd1403;
ld.shared.u32 %r4355, [%rd1404];
xor.b32 %r4356, %r4355, %r4354;
// inline asm
bfe.u32 %r4139, %r4140, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1405, %r4139, 4;
add.s64 %rd1406, %rd51, %rd1405;
ld.shared.u32 %r4357, [%rd1406];
add.s32 %r4358, %r4357, %r4356;
xor.b32 %r4359, %r68, %r4124;
xor.b32 %r4156, %r4359, %r4358;
// inline asm
bfe.u32 %r4143, %r4156, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1407, %r4143, 4;
add.s64 %rd1408, %rd42, %rd1407;
ld.shared.u32 %r4360, [%rd1408];
// inline asm
bfe.u32 %r4147, %r4156, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1409, %r4147, 4;
add.s64 %rd1410, %rd45, %rd1409;
ld.shared.u32 %r4361, [%rd1410];
add.s32 %r4362, %r4361, %r4360;
// inline asm
bfe.u32 %r4151, %r4156, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1411, %r4151, 4;
add.s64 %rd1412, %rd48, %rd1411;
ld.shared.u32 %r4363, [%rd1412];
xor.b32 %r4364, %r4363, %r4362;
// inline asm
bfe.u32 %r4155, %r4156, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1413, %r4155, 4;
add.s64 %rd1414, %rd51, %rd1413;
ld.shared.u32 %r4365, [%rd1414];
add.s32 %r4366, %r4365, %r4364;
xor.b32 %r4367, %r71, %r4140;
xor.b32 %r4172, %r4367, %r4366;
// inline asm
bfe.u32 %r4159, %r4172, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1415, %r4159, 4;
add.s64 %rd1416, %rd42, %rd1415;
ld.shared.u32 %r4368, [%rd1416];
// inline asm
bfe.u32 %r4163, %r4172, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1417, %r4163, 4;
add.s64 %rd1418, %rd45, %rd1417;
ld.shared.u32 %r4369, [%rd1418];
add.s32 %r4370, %r4369, %r4368;
// inline asm
bfe.u32 %r4167, %r4172, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1419, %r4167, 4;
add.s64 %rd1420, %rd48, %rd1419;
ld.shared.u32 %r4371, [%rd1420];
xor.b32 %r4372, %r4371, %r4370;
// inline asm
bfe.u32 %r4171, %r4172, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1421, %r4171, 4;
add.s64 %rd1422, %rd51, %rd1421;
ld.shared.u32 %r4373, [%rd1422];
add.s32 %r4374, %r4373, %r4372;
xor.b32 %r4375, %r70, %r4156;
xor.b32 %r4188, %r4375, %r4374;
// inline asm
bfe.u32 %r4175, %r4188, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1423, %r4175, 4;
add.s64 %rd1424, %rd42, %rd1423;
ld.shared.u32 %r4376, [%rd1424];
// inline asm
bfe.u32 %r4179, %r4188, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1425, %r4179, 4;
add.s64 %rd1426, %rd45, %rd1425;
ld.shared.u32 %r4377, [%rd1426];
add.s32 %r4378, %r4377, %r4376;
// inline asm
bfe.u32 %r4183, %r4188, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1427, %r4183, 4;
add.s64 %rd1428, %rd48, %rd1427;
ld.shared.u32 %r4379, [%rd1428];
xor.b32 %r4380, %r4379, %r4378;
// inline asm
bfe.u32 %r4187, %r4188, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1429, %r4187, 4;
add.s64 %rd1430, %rd51, %rd1429;
ld.shared.u32 %r4381, [%rd1430];
add.s32 %r4382, %r4381, %r4380;
xor.b32 %r4383, %r73, %r4172;
xor.b32 %r4204, %r4383, %r4382;
// inline asm
bfe.u32 %r4191, %r4204, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1431, %r4191, 4;
add.s64 %rd1432, %rd42, %rd1431;
ld.shared.u32 %r4384, [%rd1432];
// inline asm
bfe.u32 %r4195, %r4204, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1433, %r4195, 4;
add.s64 %rd1434, %rd45, %rd1433;
ld.shared.u32 %r4385, [%rd1434];
add.s32 %r4386, %r4385, %r4384;
// inline asm
bfe.u32 %r4199, %r4204, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1435, %r4199, 4;
add.s64 %rd1436, %rd48, %rd1435;
ld.shared.u32 %r4387, [%rd1436];
xor.b32 %r4388, %r4387, %r4386;
// inline asm
bfe.u32 %r4203, %r4204, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1437, %r4203, 4;
add.s64 %rd1438, %rd51, %rd1437;
ld.shared.u32 %r4389, [%rd1438];
add.s32 %r4390, %r4389, %r4388;
xor.b32 %r4391, %r72, %r4188;
xor.b32 %r4220, %r4391, %r4390;
// inline asm
bfe.u32 %r4207, %r4220, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1439, %r4207, 4;
add.s64 %rd1440, %rd42, %rd1439;
ld.shared.u32 %r4392, [%rd1440];
// inline asm
bfe.u32 %r4211, %r4220, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1441, %r4211, 4;
add.s64 %rd1442, %rd45, %rd1441;
ld.shared.u32 %r4393, [%rd1442];
add.s32 %r4394, %r4393, %r4392;
// inline asm
bfe.u32 %r4215, %r4220, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1443, %r4215, 4;
add.s64 %rd1444, %rd48, %rd1443;
ld.shared.u32 %r4395, [%rd1444];
xor.b32 %r4396, %r4395, %r4394;
// inline asm
bfe.u32 %r4219, %r4220, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1445, %r4219, 4;
add.s64 %rd1446, %rd51, %rd1445;
ld.shared.u32 %r4397, [%rd1446];
add.s32 %r4398, %r4397, %r4396;
xor.b32 %r4399, %r75, %r4204;
xor.b32 %r4236, %r4399, %r4398;
// inline asm
bfe.u32 %r4223, %r4236, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1447, %r4223, 4;
add.s64 %rd1448, %rd42, %rd1447;
ld.shared.u32 %r4400, [%rd1448];
// inline asm
bfe.u32 %r4227, %r4236, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1449, %r4227, 4;
add.s64 %rd1450, %rd45, %rd1449;
ld.shared.u32 %r4401, [%rd1450];
add.s32 %r4402, %r4401, %r4400;
// inline asm
bfe.u32 %r4231, %r4236, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1451, %r4231, 4;
add.s64 %rd1452, %rd48, %rd1451;
ld.shared.u32 %r4403, [%rd1452];
xor.b32 %r4404, %r4403, %r4402;
// inline asm
bfe.u32 %r4235, %r4236, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1453, %r4235, 4;
add.s64 %rd1454, %rd51, %rd1453;
ld.shared.u32 %r4405, [%rd1454];
add.s32 %r4406, %r4405, %r4404;
xor.b32 %r4407, %r74, %r4220;
xor.b32 %r4252, %r4407, %r4406;
// inline asm
bfe.u32 %r4239, %r4252, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1455, %r4239, 4;
add.s64 %rd1456, %rd42, %rd1455;
ld.shared.u32 %r4408, [%rd1456];
// inline asm
bfe.u32 %r4243, %r4252, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1457, %r4243, 4;
add.s64 %rd1458, %rd45, %rd1457;
ld.shared.u32 %r4409, [%rd1458];
add.s32 %r4410, %r4409, %r4408;
// inline asm
bfe.u32 %r4247, %r4252, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1459, %r4247, 4;
add.s64 %rd1460, %rd48, %rd1459;
ld.shared.u32 %r4411, [%rd1460];
xor.b32 %r4412, %r4411, %r4410;
// inline asm
bfe.u32 %r4251, %r4252, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1461, %r4251, 4;
add.s64 %rd1462, %rd51, %rd1461;
ld.shared.u32 %r4413, [%rd1462];
add.s32 %r4414, %r4413, %r4412;
xor.b32 %r4415, %r77, %r4236;
xor.b32 %r4268, %r4415, %r4414;
// inline asm
bfe.u32 %r4255, %r4268, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1463, %r4255, 4;
add.s64 %rd1464, %rd42, %rd1463;
ld.shared.u32 %r4416, [%rd1464];
// inline asm
bfe.u32 %r4259, %r4268, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1465, %r4259, 4;
add.s64 %rd1466, %rd45, %rd1465;
ld.shared.u32 %r4417, [%rd1466];
add.s32 %r4418, %r4417, %r4416;
// inline asm
bfe.u32 %r4263, %r4268, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1467, %r4263, 4;
add.s64 %rd1468, %rd48, %rd1467;
ld.shared.u32 %r4419, [%rd1468];
xor.b32 %r4420, %r4419, %r4418;
// inline asm
bfe.u32 %r4267, %r4268, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1469, %r4267, 4;
add.s64 %rd1470, %rd51, %rd1469;
ld.shared.u32 %r4421, [%rd1470];
add.s32 %r4422, %r4421, %r4420;
xor.b32 %r4423, %r76, %r4252;
xor.b32 %r4284, %r4423, %r4422;
// inline asm
bfe.u32 %r4271, %r4284, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1471, %r4271, 4;
add.s64 %rd1472, %rd42, %rd1471;
ld.shared.u32 %r4424, [%rd1472];
// inline asm
bfe.u32 %r4275, %r4284, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1473, %r4275, 4;
add.s64 %rd1474, %rd45, %rd1473;
ld.shared.u32 %r4425, [%rd1474];
add.s32 %r4426, %r4425, %r4424;
// inline asm
bfe.u32 %r4279, %r4284, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1475, %r4279, 4;
add.s64 %rd1476, %rd48, %rd1475;
ld.shared.u32 %r4427, [%rd1476];
xor.b32 %r4428, %r4427, %r4426;
// inline asm
bfe.u32 %r4283, %r4284, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1477, %r4283, 4;
add.s64 %rd1478, %rd51, %rd1477;
ld.shared.u32 %r4429, [%rd1478];
add.s32 %r4430, %r4429, %r4428;
xor.b32 %r4431, %r79, %r4268;
xor.b32 %r4300, %r4431, %r4430;
// inline asm
bfe.u32 %r4287, %r4300, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1479, %r4287, 4;
add.s64 %rd1480, %rd42, %rd1479;
ld.shared.u32 %r4432, [%rd1480];
// inline asm
bfe.u32 %r4291, %r4300, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1481, %r4291, 4;
add.s64 %rd1482, %rd45, %rd1481;
ld.shared.u32 %r4433, [%rd1482];
add.s32 %r4434, %r4433, %r4432;
// inline asm
bfe.u32 %r4295, %r4300, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1483, %r4295, 4;
add.s64 %rd1484, %rd48, %rd1483;
ld.shared.u32 %r4435, [%rd1484];
xor.b32 %r4436, %r4435, %r4434;
// inline asm
bfe.u32 %r4299, %r4300, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1485, %r4299, 4;
add.s64 %rd1486, %rd51, %rd1485;
ld.shared.u32 %r4437, [%rd1486];
add.s32 %r4438, %r4437, %r4436;
xor.b32 %r4439, %r78, %r4284;
xor.b32 %r4316, %r4439, %r4438;
// inline asm
bfe.u32 %r4303, %r4316, %r10278, %r10279;
// inline asm
mul.wide.u32 %rd1487, %r4303, 4;
add.s64 %rd1488, %rd42, %rd1487;
ld.shared.u32 %r4440, [%rd1488];
// inline asm
bfe.u32 %r4307, %r4316, %r10280, %r10279;
// inline asm
mul.wide.u32 %rd1489, %r4307, 4;
add.s64 %rd1490, %rd45, %rd1489;
ld.shared.u32 %r4441, [%rd1490];
add.s32 %r4442, %r4441, %r4440;
// inline asm
bfe.u32 %r4311, %r4316, %r10279, %r10279;
// inline asm
mul.wide.u32 %rd1491, %r4311, 4;
add.s64 %rd1492, %rd48, %rd1491;
ld.shared.u32 %r4443, [%rd1492];
xor.b32 %r4444, %r4443, %r4442;
// inline asm
bfe.u32 %r4315, %r4316, %r208, %r10279;
// inline asm
mul.wide.u32 %rd1493, %r4315, 4;
add.s64 %rd1494, %rd51, %rd1493;
ld.shared.u32 %r4445, [%rd1494];
add.s32 %r4446, %r4445, %r4444;
xor.b32 %r4447, %r81, %r4300;
xor.b32 %r10314, %r4447, %r4446;
xor.b32 %r10315, %r80, %r4316;
st.shared.u32 [%rd3496], %r10315;
st.shared.u32 [%rd3496+4], %r10314;
add.s64 %rd3496, %rd3496, 8;
add.s32 %r10313, %r10313, 2;
setp.lt.u32 %p5, %r10313, 256;
@%p5 bra BB4_8;
mov.u64 %rd3497, %rd5;
mov.u32 %r10316, %r208;
BB4_10:
mov.u32 %r10283, 16;
mov.u32 %r10282, 8;
mov.u32 %r10281, 24;
xor.b32 %r4461, %r65, %r10315;
// inline asm
bfe.u32 %r4448, %r4461, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1498, %r4448, 4;
add.s64 %rd1499, %rd42, %rd1498;
ld.shared.u32 %r4705, [%rd1499];
// inline asm
bfe.u32 %r4452, %r4461, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1502, %r4452, 4;
add.s64 %rd1503, %rd45, %rd1502;
ld.shared.u32 %r4706, [%rd1503];
add.s32 %r4707, %r4706, %r4705;
// inline asm
bfe.u32 %r4456, %r4461, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1506, %r4456, 4;
add.s64 %rd1507, %rd48, %rd1506;
ld.shared.u32 %r4708, [%rd1507];
xor.b32 %r4709, %r4708, %r4707;
// inline asm
bfe.u32 %r4460, %r4461, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1510, %r4460, 4;
add.s64 %rd1511, %rd51, %rd1510;
ld.shared.u32 %r4710, [%rd1511];
add.s32 %r4711, %r4710, %r4709;
xor.b32 %r4712, %r64, %r10314;
xor.b32 %r4477, %r4712, %r4711;
// inline asm
bfe.u32 %r4464, %r4477, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1512, %r4464, 4;
add.s64 %rd1513, %rd42, %rd1512;
ld.shared.u32 %r4713, [%rd1513];
// inline asm
bfe.u32 %r4468, %r4477, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1514, %r4468, 4;
add.s64 %rd1515, %rd45, %rd1514;
ld.shared.u32 %r4714, [%rd1515];
add.s32 %r4715, %r4714, %r4713;
// inline asm
bfe.u32 %r4472, %r4477, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1516, %r4472, 4;
add.s64 %rd1517, %rd48, %rd1516;
ld.shared.u32 %r4716, [%rd1517];
xor.b32 %r4717, %r4716, %r4715;
// inline asm
bfe.u32 %r4476, %r4477, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1518, %r4476, 4;
add.s64 %rd1519, %rd51, %rd1518;
ld.shared.u32 %r4718, [%rd1519];
add.s32 %r4719, %r4718, %r4717;
xor.b32 %r4720, %r67, %r4461;
xor.b32 %r4493, %r4720, %r4719;
// inline asm
bfe.u32 %r4480, %r4493, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1520, %r4480, 4;
add.s64 %rd1521, %rd42, %rd1520;
ld.shared.u32 %r4721, [%rd1521];
// inline asm
bfe.u32 %r4484, %r4493, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1522, %r4484, 4;
add.s64 %rd1523, %rd45, %rd1522;
ld.shared.u32 %r4722, [%rd1523];
add.s32 %r4723, %r4722, %r4721;
// inline asm
bfe.u32 %r4488, %r4493, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1524, %r4488, 4;
add.s64 %rd1525, %rd48, %rd1524;
ld.shared.u32 %r4724, [%rd1525];
xor.b32 %r4725, %r4724, %r4723;
// inline asm
bfe.u32 %r4492, %r4493, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1526, %r4492, 4;
add.s64 %rd1527, %rd51, %rd1526;
ld.shared.u32 %r4726, [%rd1527];
add.s32 %r4727, %r4726, %r4725;
xor.b32 %r4728, %r66, %r4477;
xor.b32 %r4509, %r4728, %r4727;
// inline asm
bfe.u32 %r4496, %r4509, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1528, %r4496, 4;
add.s64 %rd1529, %rd42, %rd1528;
ld.shared.u32 %r4729, [%rd1529];
// inline asm
bfe.u32 %r4500, %r4509, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1530, %r4500, 4;
add.s64 %rd1531, %rd45, %rd1530;
ld.shared.u32 %r4730, [%rd1531];
add.s32 %r4731, %r4730, %r4729;
// inline asm
bfe.u32 %r4504, %r4509, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1532, %r4504, 4;
add.s64 %rd1533, %rd48, %rd1532;
ld.shared.u32 %r4732, [%rd1533];
xor.b32 %r4733, %r4732, %r4731;
// inline asm
bfe.u32 %r4508, %r4509, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1534, %r4508, 4;
add.s64 %rd1535, %rd51, %rd1534;
ld.shared.u32 %r4734, [%rd1535];
add.s32 %r4735, %r4734, %r4733;
xor.b32 %r4736, %r69, %r4493;
xor.b32 %r4525, %r4736, %r4735;
// inline asm
bfe.u32 %r4512, %r4525, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1536, %r4512, 4;
add.s64 %rd1537, %rd42, %rd1536;
ld.shared.u32 %r4737, [%rd1537];
// inline asm
bfe.u32 %r4516, %r4525, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1538, %r4516, 4;
add.s64 %rd1539, %rd45, %rd1538;
ld.shared.u32 %r4738, [%rd1539];
add.s32 %r4739, %r4738, %r4737;
// inline asm
bfe.u32 %r4520, %r4525, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1540, %r4520, 4;
add.s64 %rd1541, %rd48, %rd1540;
ld.shared.u32 %r4740, [%rd1541];
xor.b32 %r4741, %r4740, %r4739;
// inline asm
bfe.u32 %r4524, %r4525, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1542, %r4524, 4;
add.s64 %rd1543, %rd51, %rd1542;
ld.shared.u32 %r4742, [%rd1543];
add.s32 %r4743, %r4742, %r4741;
xor.b32 %r4744, %r68, %r4509;
xor.b32 %r4541, %r4744, %r4743;
// inline asm
bfe.u32 %r4528, %r4541, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1544, %r4528, 4;
add.s64 %rd1545, %rd42, %rd1544;
ld.shared.u32 %r4745, [%rd1545];
// inline asm
bfe.u32 %r4532, %r4541, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1546, %r4532, 4;
add.s64 %rd1547, %rd45, %rd1546;
ld.shared.u32 %r4746, [%rd1547];
add.s32 %r4747, %r4746, %r4745;
// inline asm
bfe.u32 %r4536, %r4541, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1548, %r4536, 4;
add.s64 %rd1549, %rd48, %rd1548;
ld.shared.u32 %r4748, [%rd1549];
xor.b32 %r4749, %r4748, %r4747;
// inline asm
bfe.u32 %r4540, %r4541, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1550, %r4540, 4;
add.s64 %rd1551, %rd51, %rd1550;
ld.shared.u32 %r4750, [%rd1551];
add.s32 %r4751, %r4750, %r4749;
xor.b32 %r4752, %r71, %r4525;
xor.b32 %r4557, %r4752, %r4751;
// inline asm
bfe.u32 %r4544, %r4557, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1552, %r4544, 4;
add.s64 %rd1553, %rd42, %rd1552;
ld.shared.u32 %r4753, [%rd1553];
// inline asm
bfe.u32 %r4548, %r4557, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1554, %r4548, 4;
add.s64 %rd1555, %rd45, %rd1554;
ld.shared.u32 %r4754, [%rd1555];
add.s32 %r4755, %r4754, %r4753;
// inline asm
bfe.u32 %r4552, %r4557, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1556, %r4552, 4;
add.s64 %rd1557, %rd48, %rd1556;
ld.shared.u32 %r4756, [%rd1557];
xor.b32 %r4757, %r4756, %r4755;
// inline asm
bfe.u32 %r4556, %r4557, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1558, %r4556, 4;
add.s64 %rd1559, %rd51, %rd1558;
ld.shared.u32 %r4758, [%rd1559];
add.s32 %r4759, %r4758, %r4757;
xor.b32 %r4760, %r70, %r4541;
xor.b32 %r4573, %r4760, %r4759;
// inline asm
bfe.u32 %r4560, %r4573, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1560, %r4560, 4;
add.s64 %rd1561, %rd42, %rd1560;
ld.shared.u32 %r4761, [%rd1561];
// inline asm
bfe.u32 %r4564, %r4573, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1562, %r4564, 4;
add.s64 %rd1563, %rd45, %rd1562;
ld.shared.u32 %r4762, [%rd1563];
add.s32 %r4763, %r4762, %r4761;
// inline asm
bfe.u32 %r4568, %r4573, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1564, %r4568, 4;
add.s64 %rd1565, %rd48, %rd1564;
ld.shared.u32 %r4764, [%rd1565];
xor.b32 %r4765, %r4764, %r4763;
// inline asm
bfe.u32 %r4572, %r4573, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1566, %r4572, 4;
add.s64 %rd1567, %rd51, %rd1566;
ld.shared.u32 %r4766, [%rd1567];
add.s32 %r4767, %r4766, %r4765;
xor.b32 %r4768, %r73, %r4557;
xor.b32 %r4589, %r4768, %r4767;
// inline asm
bfe.u32 %r4576, %r4589, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1568, %r4576, 4;
add.s64 %rd1569, %rd42, %rd1568;
ld.shared.u32 %r4769, [%rd1569];
// inline asm
bfe.u32 %r4580, %r4589, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1570, %r4580, 4;
add.s64 %rd1571, %rd45, %rd1570;
ld.shared.u32 %r4770, [%rd1571];
add.s32 %r4771, %r4770, %r4769;
// inline asm
bfe.u32 %r4584, %r4589, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1572, %r4584, 4;
add.s64 %rd1573, %rd48, %rd1572;
ld.shared.u32 %r4772, [%rd1573];
xor.b32 %r4773, %r4772, %r4771;
// inline asm
bfe.u32 %r4588, %r4589, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1574, %r4588, 4;
add.s64 %rd1575, %rd51, %rd1574;
ld.shared.u32 %r4774, [%rd1575];
add.s32 %r4775, %r4774, %r4773;
xor.b32 %r4776, %r72, %r4573;
xor.b32 %r4605, %r4776, %r4775;
// inline asm
bfe.u32 %r4592, %r4605, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1576, %r4592, 4;
add.s64 %rd1577, %rd42, %rd1576;
ld.shared.u32 %r4777, [%rd1577];
// inline asm
bfe.u32 %r4596, %r4605, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1578, %r4596, 4;
add.s64 %rd1579, %rd45, %rd1578;
ld.shared.u32 %r4778, [%rd1579];
add.s32 %r4779, %r4778, %r4777;
// inline asm
bfe.u32 %r4600, %r4605, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1580, %r4600, 4;
add.s64 %rd1581, %rd48, %rd1580;
ld.shared.u32 %r4780, [%rd1581];
xor.b32 %r4781, %r4780, %r4779;
// inline asm
bfe.u32 %r4604, %r4605, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1582, %r4604, 4;
add.s64 %rd1583, %rd51, %rd1582;
ld.shared.u32 %r4782, [%rd1583];
add.s32 %r4783, %r4782, %r4781;
xor.b32 %r4784, %r75, %r4589;
xor.b32 %r4621, %r4784, %r4783;
// inline asm
bfe.u32 %r4608, %r4621, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1584, %r4608, 4;
add.s64 %rd1585, %rd42, %rd1584;
ld.shared.u32 %r4785, [%rd1585];
// inline asm
bfe.u32 %r4612, %r4621, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1586, %r4612, 4;
add.s64 %rd1587, %rd45, %rd1586;
ld.shared.u32 %r4786, [%rd1587];
add.s32 %r4787, %r4786, %r4785;
// inline asm
bfe.u32 %r4616, %r4621, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1588, %r4616, 4;
add.s64 %rd1589, %rd48, %rd1588;
ld.shared.u32 %r4788, [%rd1589];
xor.b32 %r4789, %r4788, %r4787;
// inline asm
bfe.u32 %r4620, %r4621, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1590, %r4620, 4;
add.s64 %rd1591, %rd51, %rd1590;
ld.shared.u32 %r4790, [%rd1591];
add.s32 %r4791, %r4790, %r4789;
xor.b32 %r4792, %r74, %r4605;
xor.b32 %r4637, %r4792, %r4791;
// inline asm
bfe.u32 %r4624, %r4637, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1592, %r4624, 4;
add.s64 %rd1593, %rd42, %rd1592;
ld.shared.u32 %r4793, [%rd1593];
// inline asm
bfe.u32 %r4628, %r4637, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1594, %r4628, 4;
add.s64 %rd1595, %rd45, %rd1594;
ld.shared.u32 %r4794, [%rd1595];
add.s32 %r4795, %r4794, %r4793;
// inline asm
bfe.u32 %r4632, %r4637, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1596, %r4632, 4;
add.s64 %rd1597, %rd48, %rd1596;
ld.shared.u32 %r4796, [%rd1597];
xor.b32 %r4797, %r4796, %r4795;
// inline asm
bfe.u32 %r4636, %r4637, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1598, %r4636, 4;
add.s64 %rd1599, %rd51, %rd1598;
ld.shared.u32 %r4798, [%rd1599];
add.s32 %r4799, %r4798, %r4797;
xor.b32 %r4800, %r77, %r4621;
xor.b32 %r4653, %r4800, %r4799;
// inline asm
bfe.u32 %r4640, %r4653, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1600, %r4640, 4;
add.s64 %rd1601, %rd42, %rd1600;
ld.shared.u32 %r4801, [%rd1601];
// inline asm
bfe.u32 %r4644, %r4653, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1602, %r4644, 4;
add.s64 %rd1603, %rd45, %rd1602;
ld.shared.u32 %r4802, [%rd1603];
add.s32 %r4803, %r4802, %r4801;
// inline asm
bfe.u32 %r4648, %r4653, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1604, %r4648, 4;
add.s64 %rd1605, %rd48, %rd1604;
ld.shared.u32 %r4804, [%rd1605];
xor.b32 %r4805, %r4804, %r4803;
// inline asm
bfe.u32 %r4652, %r4653, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1606, %r4652, 4;
add.s64 %rd1607, %rd51, %rd1606;
ld.shared.u32 %r4806, [%rd1607];
add.s32 %r4807, %r4806, %r4805;
xor.b32 %r4808, %r76, %r4637;
xor.b32 %r4669, %r4808, %r4807;
// inline asm
bfe.u32 %r4656, %r4669, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1608, %r4656, 4;
add.s64 %rd1609, %rd42, %rd1608;
ld.shared.u32 %r4809, [%rd1609];
// inline asm
bfe.u32 %r4660, %r4669, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1610, %r4660, 4;
add.s64 %rd1611, %rd45, %rd1610;
ld.shared.u32 %r4810, [%rd1611];
add.s32 %r4811, %r4810, %r4809;
// inline asm
bfe.u32 %r4664, %r4669, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1612, %r4664, 4;
add.s64 %rd1613, %rd48, %rd1612;
ld.shared.u32 %r4812, [%rd1613];
xor.b32 %r4813, %r4812, %r4811;
// inline asm
bfe.u32 %r4668, %r4669, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1614, %r4668, 4;
add.s64 %rd1615, %rd51, %rd1614;
ld.shared.u32 %r4814, [%rd1615];
add.s32 %r4815, %r4814, %r4813;
xor.b32 %r4816, %r79, %r4653;
xor.b32 %r4685, %r4816, %r4815;
// inline asm
bfe.u32 %r4672, %r4685, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1616, %r4672, 4;
add.s64 %rd1617, %rd42, %rd1616;
ld.shared.u32 %r4817, [%rd1617];
// inline asm
bfe.u32 %r4676, %r4685, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1618, %r4676, 4;
add.s64 %rd1619, %rd45, %rd1618;
ld.shared.u32 %r4818, [%rd1619];
add.s32 %r4819, %r4818, %r4817;
// inline asm
bfe.u32 %r4680, %r4685, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1620, %r4680, 4;
add.s64 %rd1621, %rd48, %rd1620;
ld.shared.u32 %r4820, [%rd1621];
xor.b32 %r4821, %r4820, %r4819;
// inline asm
bfe.u32 %r4684, %r4685, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1622, %r4684, 4;
add.s64 %rd1623, %rd51, %rd1622;
ld.shared.u32 %r4822, [%rd1623];
add.s32 %r4823, %r4822, %r4821;
xor.b32 %r4824, %r78, %r4669;
xor.b32 %r4701, %r4824, %r4823;
// inline asm
bfe.u32 %r4688, %r4701, %r10281, %r10282;
// inline asm
mul.wide.u32 %rd1624, %r4688, 4;
add.s64 %rd1625, %rd42, %rd1624;
ld.shared.u32 %r4825, [%rd1625];
// inline asm
bfe.u32 %r4692, %r4701, %r10283, %r10282;
// inline asm
mul.wide.u32 %rd1626, %r4692, 4;
add.s64 %rd1627, %rd45, %rd1626;
ld.shared.u32 %r4826, [%rd1627];
add.s32 %r4827, %r4826, %r4825;
// inline asm
bfe.u32 %r4696, %r4701, %r10282, %r10282;
// inline asm
mul.wide.u32 %rd1628, %r4696, 4;
add.s64 %rd1629, %rd48, %rd1628;
ld.shared.u32 %r4828, [%rd1629];
xor.b32 %r4829, %r4828, %r4827;
// inline asm
bfe.u32 %r4700, %r4701, %r208, %r10282;
// inline asm
mul.wide.u32 %rd1630, %r4700, 4;
add.s64 %rd1631, %rd51, %rd1630;
ld.shared.u32 %r4830, [%rd1631];
add.s32 %r4831, %r4830, %r4829;
xor.b32 %r4832, %r81, %r4685;
xor.b32 %r10314, %r4832, %r4831;
xor.b32 %r10315, %r80, %r4701;
st.shared.u32 [%rd3497], %r10315;
st.shared.u32 [%rd3497+4], %r10314;
add.s64 %rd3497, %rd3497, 8;
add.s32 %r10316, %r10316, 2;
setp.lt.u32 %p6, %r10316, 256;
@%p6 bra BB4_10;
mov.u64 %rd3498, %rd6;
mov.u32 %r10319, %r208;
BB4_12:
mov.u32 %r10286, 16;
mov.u32 %r10285, 8;
mov.u32 %r10284, 24;
xor.b32 %r4846, %r65, %r10315;
// inline asm
bfe.u32 %r4833, %r4846, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1635, %r4833, 4;
add.s64 %rd1636, %rd42, %rd1635;
ld.shared.u32 %r5089, [%rd1636];
// inline asm
bfe.u32 %r4837, %r4846, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1639, %r4837, 4;
add.s64 %rd1640, %rd45, %rd1639;
ld.shared.u32 %r5090, [%rd1640];
add.s32 %r5091, %r5090, %r5089;
// inline asm
bfe.u32 %r4841, %r4846, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1643, %r4841, 4;
add.s64 %rd1644, %rd48, %rd1643;
ld.shared.u32 %r5092, [%rd1644];
xor.b32 %r5093, %r5092, %r5091;
// inline asm
bfe.u32 %r4845, %r4846, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1647, %r4845, 4;
add.s64 %rd1648, %rd51, %rd1647;
ld.shared.u32 %r5094, [%rd1648];
add.s32 %r5095, %r5094, %r5093;
xor.b32 %r5096, %r64, %r10314;
xor.b32 %r4862, %r5096, %r5095;
// inline asm
bfe.u32 %r4849, %r4862, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1649, %r4849, 4;
add.s64 %rd1650, %rd42, %rd1649;
ld.shared.u32 %r5097, [%rd1650];
// inline asm
bfe.u32 %r4853, %r4862, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1651, %r4853, 4;
add.s64 %rd1652, %rd45, %rd1651;
ld.shared.u32 %r5098, [%rd1652];
add.s32 %r5099, %r5098, %r5097;
// inline asm
bfe.u32 %r4857, %r4862, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1653, %r4857, 4;
add.s64 %rd1654, %rd48, %rd1653;
ld.shared.u32 %r5100, [%rd1654];
xor.b32 %r5101, %r5100, %r5099;
// inline asm
bfe.u32 %r4861, %r4862, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1655, %r4861, 4;
add.s64 %rd1656, %rd51, %rd1655;
ld.shared.u32 %r5102, [%rd1656];
add.s32 %r5103, %r5102, %r5101;
xor.b32 %r5104, %r67, %r4846;
xor.b32 %r4878, %r5104, %r5103;
// inline asm
bfe.u32 %r4865, %r4878, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1657, %r4865, 4;
add.s64 %rd1658, %rd42, %rd1657;
ld.shared.u32 %r5105, [%rd1658];
// inline asm
bfe.u32 %r4869, %r4878, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1659, %r4869, 4;
add.s64 %rd1660, %rd45, %rd1659;
ld.shared.u32 %r5106, [%rd1660];
add.s32 %r5107, %r5106, %r5105;
// inline asm
bfe.u32 %r4873, %r4878, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1661, %r4873, 4;
add.s64 %rd1662, %rd48, %rd1661;
ld.shared.u32 %r5108, [%rd1662];
xor.b32 %r5109, %r5108, %r5107;
// inline asm
bfe.u32 %r4877, %r4878, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1663, %r4877, 4;
add.s64 %rd1664, %rd51, %rd1663;
ld.shared.u32 %r5110, [%rd1664];
add.s32 %r5111, %r5110, %r5109;
xor.b32 %r5112, %r66, %r4862;
xor.b32 %r4894, %r5112, %r5111;
// inline asm
bfe.u32 %r4881, %r4894, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1665, %r4881, 4;
add.s64 %rd1666, %rd42, %rd1665;
ld.shared.u32 %r5113, [%rd1666];
// inline asm
bfe.u32 %r4885, %r4894, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1667, %r4885, 4;
add.s64 %rd1668, %rd45, %rd1667;
ld.shared.u32 %r5114, [%rd1668];
add.s32 %r5115, %r5114, %r5113;
// inline asm
bfe.u32 %r4889, %r4894, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1669, %r4889, 4;
add.s64 %rd1670, %rd48, %rd1669;
ld.shared.u32 %r5116, [%rd1670];
xor.b32 %r5117, %r5116, %r5115;
// inline asm
bfe.u32 %r4893, %r4894, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1671, %r4893, 4;
add.s64 %rd1672, %rd51, %rd1671;
ld.shared.u32 %r5118, [%rd1672];
add.s32 %r5119, %r5118, %r5117;
xor.b32 %r5120, %r69, %r4878;
xor.b32 %r4910, %r5120, %r5119;
// inline asm
bfe.u32 %r4897, %r4910, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1673, %r4897, 4;
add.s64 %rd1674, %rd42, %rd1673;
ld.shared.u32 %r5121, [%rd1674];
// inline asm
bfe.u32 %r4901, %r4910, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1675, %r4901, 4;
add.s64 %rd1676, %rd45, %rd1675;
ld.shared.u32 %r5122, [%rd1676];
add.s32 %r5123, %r5122, %r5121;
// inline asm
bfe.u32 %r4905, %r4910, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1677, %r4905, 4;
add.s64 %rd1678, %rd48, %rd1677;
ld.shared.u32 %r5124, [%rd1678];
xor.b32 %r5125, %r5124, %r5123;
// inline asm
bfe.u32 %r4909, %r4910, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1679, %r4909, 4;
add.s64 %rd1680, %rd51, %rd1679;
ld.shared.u32 %r5126, [%rd1680];
add.s32 %r5127, %r5126, %r5125;
xor.b32 %r5128, %r68, %r4894;
xor.b32 %r4926, %r5128, %r5127;
// inline asm
bfe.u32 %r4913, %r4926, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1681, %r4913, 4;
add.s64 %rd1682, %rd42, %rd1681;
ld.shared.u32 %r5129, [%rd1682];
// inline asm
bfe.u32 %r4917, %r4926, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1683, %r4917, 4;
add.s64 %rd1684, %rd45, %rd1683;
ld.shared.u32 %r5130, [%rd1684];
add.s32 %r5131, %r5130, %r5129;
// inline asm
bfe.u32 %r4921, %r4926, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1685, %r4921, 4;
add.s64 %rd1686, %rd48, %rd1685;
ld.shared.u32 %r5132, [%rd1686];
xor.b32 %r5133, %r5132, %r5131;
// inline asm
bfe.u32 %r4925, %r4926, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1687, %r4925, 4;
add.s64 %rd1688, %rd51, %rd1687;
ld.shared.u32 %r5134, [%rd1688];
add.s32 %r5135, %r5134, %r5133;
xor.b32 %r5136, %r71, %r4910;
xor.b32 %r4942, %r5136, %r5135;
// inline asm
bfe.u32 %r4929, %r4942, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1689, %r4929, 4;
add.s64 %rd1690, %rd42, %rd1689;
ld.shared.u32 %r5137, [%rd1690];
// inline asm
bfe.u32 %r4933, %r4942, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1691, %r4933, 4;
add.s64 %rd1692, %rd45, %rd1691;
ld.shared.u32 %r5138, [%rd1692];
add.s32 %r5139, %r5138, %r5137;
// inline asm
bfe.u32 %r4937, %r4942, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1693, %r4937, 4;
add.s64 %rd1694, %rd48, %rd1693;
ld.shared.u32 %r5140, [%rd1694];
xor.b32 %r5141, %r5140, %r5139;
// inline asm
bfe.u32 %r4941, %r4942, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1695, %r4941, 4;
add.s64 %rd1696, %rd51, %rd1695;
ld.shared.u32 %r5142, [%rd1696];
add.s32 %r5143, %r5142, %r5141;
xor.b32 %r5144, %r70, %r4926;
xor.b32 %r4958, %r5144, %r5143;
// inline asm
bfe.u32 %r4945, %r4958, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1697, %r4945, 4;
add.s64 %rd1698, %rd42, %rd1697;
ld.shared.u32 %r5145, [%rd1698];
// inline asm
bfe.u32 %r4949, %r4958, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1699, %r4949, 4;
add.s64 %rd1700, %rd45, %rd1699;
ld.shared.u32 %r5146, [%rd1700];
add.s32 %r5147, %r5146, %r5145;
// inline asm
bfe.u32 %r4953, %r4958, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1701, %r4953, 4;
add.s64 %rd1702, %rd48, %rd1701;
ld.shared.u32 %r5148, [%rd1702];
xor.b32 %r5149, %r5148, %r5147;
// inline asm
bfe.u32 %r4957, %r4958, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1703, %r4957, 4;
add.s64 %rd1704, %rd51, %rd1703;
ld.shared.u32 %r5150, [%rd1704];
add.s32 %r5151, %r5150, %r5149;
xor.b32 %r5152, %r73, %r4942;
xor.b32 %r4974, %r5152, %r5151;
// inline asm
bfe.u32 %r4961, %r4974, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1705, %r4961, 4;
add.s64 %rd1706, %rd42, %rd1705;
ld.shared.u32 %r5153, [%rd1706];
// inline asm
bfe.u32 %r4965, %r4974, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1707, %r4965, 4;
add.s64 %rd1708, %rd45, %rd1707;
ld.shared.u32 %r5154, [%rd1708];
add.s32 %r5155, %r5154, %r5153;
// inline asm
bfe.u32 %r4969, %r4974, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1709, %r4969, 4;
add.s64 %rd1710, %rd48, %rd1709;
ld.shared.u32 %r5156, [%rd1710];
xor.b32 %r5157, %r5156, %r5155;
// inline asm
bfe.u32 %r4973, %r4974, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1711, %r4973, 4;
add.s64 %rd1712, %rd51, %rd1711;
ld.shared.u32 %r5158, [%rd1712];
add.s32 %r5159, %r5158, %r5157;
xor.b32 %r5160, %r72, %r4958;
xor.b32 %r4990, %r5160, %r5159;
// inline asm
bfe.u32 %r4977, %r4990, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1713, %r4977, 4;
add.s64 %rd1714, %rd42, %rd1713;
ld.shared.u32 %r5161, [%rd1714];
// inline asm
bfe.u32 %r4981, %r4990, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1715, %r4981, 4;
add.s64 %rd1716, %rd45, %rd1715;
ld.shared.u32 %r5162, [%rd1716];
add.s32 %r5163, %r5162, %r5161;
// inline asm
bfe.u32 %r4985, %r4990, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1717, %r4985, 4;
add.s64 %rd1718, %rd48, %rd1717;
ld.shared.u32 %r5164, [%rd1718];
xor.b32 %r5165, %r5164, %r5163;
// inline asm
bfe.u32 %r4989, %r4990, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1719, %r4989, 4;
add.s64 %rd1720, %rd51, %rd1719;
ld.shared.u32 %r5166, [%rd1720];
add.s32 %r5167, %r5166, %r5165;
xor.b32 %r5168, %r75, %r4974;
xor.b32 %r5006, %r5168, %r5167;
// inline asm
bfe.u32 %r4993, %r5006, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1721, %r4993, 4;
add.s64 %rd1722, %rd42, %rd1721;
ld.shared.u32 %r5169, [%rd1722];
// inline asm
bfe.u32 %r4997, %r5006, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1723, %r4997, 4;
add.s64 %rd1724, %rd45, %rd1723;
ld.shared.u32 %r5170, [%rd1724];
add.s32 %r5171, %r5170, %r5169;
// inline asm
bfe.u32 %r5001, %r5006, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1725, %r5001, 4;
add.s64 %rd1726, %rd48, %rd1725;
ld.shared.u32 %r5172, [%rd1726];
xor.b32 %r5173, %r5172, %r5171;
// inline asm
bfe.u32 %r5005, %r5006, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1727, %r5005, 4;
add.s64 %rd1728, %rd51, %rd1727;
ld.shared.u32 %r5174, [%rd1728];
add.s32 %r5175, %r5174, %r5173;
xor.b32 %r5176, %r74, %r4990;
xor.b32 %r5022, %r5176, %r5175;
// inline asm
bfe.u32 %r5009, %r5022, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1729, %r5009, 4;
add.s64 %rd1730, %rd42, %rd1729;
ld.shared.u32 %r5177, [%rd1730];
// inline asm
bfe.u32 %r5013, %r5022, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1731, %r5013, 4;
add.s64 %rd1732, %rd45, %rd1731;
ld.shared.u32 %r5178, [%rd1732];
add.s32 %r5179, %r5178, %r5177;
// inline asm
bfe.u32 %r5017, %r5022, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1733, %r5017, 4;
add.s64 %rd1734, %rd48, %rd1733;
ld.shared.u32 %r5180, [%rd1734];
xor.b32 %r5181, %r5180, %r5179;
// inline asm
bfe.u32 %r5021, %r5022, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1735, %r5021, 4;
add.s64 %rd1736, %rd51, %rd1735;
ld.shared.u32 %r5182, [%rd1736];
add.s32 %r5183, %r5182, %r5181;
xor.b32 %r5184, %r77, %r5006;
xor.b32 %r5038, %r5184, %r5183;
// inline asm
bfe.u32 %r5025, %r5038, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1737, %r5025, 4;
add.s64 %rd1738, %rd42, %rd1737;
ld.shared.u32 %r5185, [%rd1738];
// inline asm
bfe.u32 %r5029, %r5038, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1739, %r5029, 4;
add.s64 %rd1740, %rd45, %rd1739;
ld.shared.u32 %r5186, [%rd1740];
add.s32 %r5187, %r5186, %r5185;
// inline asm
bfe.u32 %r5033, %r5038, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1741, %r5033, 4;
add.s64 %rd1742, %rd48, %rd1741;
ld.shared.u32 %r5188, [%rd1742];
xor.b32 %r5189, %r5188, %r5187;
// inline asm
bfe.u32 %r5037, %r5038, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1743, %r5037, 4;
add.s64 %rd1744, %rd51, %rd1743;
ld.shared.u32 %r5190, [%rd1744];
add.s32 %r5191, %r5190, %r5189;
xor.b32 %r5192, %r76, %r5022;
xor.b32 %r5054, %r5192, %r5191;
// inline asm
bfe.u32 %r5041, %r5054, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1745, %r5041, 4;
add.s64 %rd1746, %rd42, %rd1745;
ld.shared.u32 %r5193, [%rd1746];
// inline asm
bfe.u32 %r5045, %r5054, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1747, %r5045, 4;
add.s64 %rd1748, %rd45, %rd1747;
ld.shared.u32 %r5194, [%rd1748];
add.s32 %r5195, %r5194, %r5193;
// inline asm
bfe.u32 %r5049, %r5054, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1749, %r5049, 4;
add.s64 %rd1750, %rd48, %rd1749;
ld.shared.u32 %r5196, [%rd1750];
xor.b32 %r5197, %r5196, %r5195;
// inline asm
bfe.u32 %r5053, %r5054, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1751, %r5053, 4;
add.s64 %rd1752, %rd51, %rd1751;
ld.shared.u32 %r5198, [%rd1752];
add.s32 %r5199, %r5198, %r5197;
xor.b32 %r5200, %r79, %r5038;
xor.b32 %r5070, %r5200, %r5199;
// inline asm
bfe.u32 %r5057, %r5070, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1753, %r5057, 4;
add.s64 %rd1754, %rd42, %rd1753;
ld.shared.u32 %r5201, [%rd1754];
// inline asm
bfe.u32 %r5061, %r5070, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1755, %r5061, 4;
add.s64 %rd1756, %rd45, %rd1755;
ld.shared.u32 %r5202, [%rd1756];
add.s32 %r5203, %r5202, %r5201;
// inline asm
bfe.u32 %r5065, %r5070, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1757, %r5065, 4;
add.s64 %rd1758, %rd48, %rd1757;
ld.shared.u32 %r5204, [%rd1758];
xor.b32 %r5205, %r5204, %r5203;
// inline asm
bfe.u32 %r5069, %r5070, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1759, %r5069, 4;
add.s64 %rd1760, %rd51, %rd1759;
ld.shared.u32 %r5206, [%rd1760];
add.s32 %r5207, %r5206, %r5205;
xor.b32 %r5208, %r78, %r5054;
xor.b32 %r5086, %r5208, %r5207;
// inline asm
bfe.u32 %r5073, %r5086, %r10284, %r10285;
// inline asm
mul.wide.u32 %rd1761, %r5073, 4;
add.s64 %rd1762, %rd42, %rd1761;
ld.shared.u32 %r5209, [%rd1762];
// inline asm
bfe.u32 %r5077, %r5086, %r10286, %r10285;
// inline asm
mul.wide.u32 %rd1763, %r5077, 4;
add.s64 %rd1764, %rd45, %rd1763;
ld.shared.u32 %r5210, [%rd1764];
add.s32 %r5211, %r5210, %r5209;
// inline asm
bfe.u32 %r5081, %r5086, %r10285, %r10285;
// inline asm
mul.wide.u32 %rd1765, %r5081, 4;
add.s64 %rd1766, %rd48, %rd1765;
ld.shared.u32 %r5212, [%rd1766];
xor.b32 %r5213, %r5212, %r5211;
// inline asm
bfe.u32 %r5085, %r5086, %r208, %r10285;
// inline asm
mul.wide.u32 %rd1767, %r5085, 4;
add.s64 %rd1768, %rd51, %rd1767;
ld.shared.u32 %r5214, [%rd1768];
add.s32 %r5215, %r5214, %r5213;
xor.b32 %r5216, %r81, %r5070;
xor.b32 %r10314, %r5216, %r5215;
xor.b32 %r10315, %r80, %r5086;
st.shared.u32 [%rd3498], %r10315;
st.shared.u32 [%rd3498+4], %r10314;
add.s64 %rd3498, %rd3498, 8;
add.s32 %r10319, %r10319, 2;
setp.lt.u32 %p7, %r10319, 256;
@%p7 bra BB4_12;
mov.u32 %r10289, 16;
mov.u32 %r10288, 8;
mov.u32 %r10287, 24;
xor.b32 %r5230, %r65, %r41;
// inline asm
bfe.u32 %r5217, %r5230, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1772, %r5217, 4;
add.s64 %rd1773, %rd42, %rd1772;
ld.shared.u32 %r7522, [%rd1773];
// inline asm
bfe.u32 %r5221, %r5230, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1776, %r5221, 4;
add.s64 %rd1777, %rd45, %rd1776;
ld.shared.u32 %r7523, [%rd1777];
add.s32 %r7524, %r7523, %r7522;
// inline asm
bfe.u32 %r5225, %r5230, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1780, %r5225, 4;
add.s64 %rd1781, %rd48, %rd1780;
ld.shared.u32 %r7525, [%rd1781];
xor.b32 %r7526, %r7525, %r7524;
mov.u32 %r10322, 0;
// inline asm
bfe.u32 %r5229, %r5230, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1784, %r5229, 4;
add.s64 %rd1785, %rd51, %rd1784;
ld.shared.u32 %r7527, [%rd1785];
add.s32 %r7528, %r7527, %r7526;
xor.b32 %r7529, %r64, %r42;
xor.b32 %r5246, %r7529, %r7528;
// inline asm
bfe.u32 %r5233, %r5246, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1786, %r5233, 4;
add.s64 %rd1787, %rd42, %rd1786;
ld.shared.u32 %r7530, [%rd1787];
// inline asm
bfe.u32 %r5237, %r5246, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1788, %r5237, 4;
add.s64 %rd1789, %rd45, %rd1788;
ld.shared.u32 %r7531, [%rd1789];
add.s32 %r7532, %r7531, %r7530;
// inline asm
bfe.u32 %r5241, %r5246, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1790, %r5241, 4;
add.s64 %rd1791, %rd48, %rd1790;
ld.shared.u32 %r7533, [%rd1791];
xor.b32 %r7534, %r7533, %r7532;
// inline asm
bfe.u32 %r5245, %r5246, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1792, %r5245, 4;
add.s64 %rd1793, %rd51, %rd1792;
ld.shared.u32 %r7535, [%rd1793];
add.s32 %r7536, %r7535, %r7534;
xor.b32 %r7537, %r67, %r43;
xor.b32 %r7538, %r7537, %r5230;
xor.b32 %r5262, %r7538, %r7536;
// inline asm
bfe.u32 %r5249, %r5262, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1794, %r5249, 4;
add.s64 %rd1795, %rd42, %rd1794;
ld.shared.u32 %r7539, [%rd1795];
// inline asm
bfe.u32 %r5253, %r5262, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1796, %r5253, 4;
add.s64 %rd1797, %rd45, %rd1796;
ld.shared.u32 %r7540, [%rd1797];
add.s32 %r7541, %r7540, %r7539;
// inline asm
bfe.u32 %r5257, %r5262, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1798, %r5257, 4;
add.s64 %rd1799, %rd48, %rd1798;
ld.shared.u32 %r7542, [%rd1799];
xor.b32 %r7543, %r7542, %r7541;
// inline asm
bfe.u32 %r5261, %r5262, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1800, %r5261, 4;
add.s64 %rd1801, %rd51, %rd1800;
ld.shared.u32 %r7544, [%rd1801];
add.s32 %r7545, %r7544, %r7543;
xor.b32 %r7546, %r66, %r44;
xor.b32 %r7547, %r7546, %r5246;
xor.b32 %r5278, %r7547, %r7545;
// inline asm
bfe.u32 %r5265, %r5278, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1802, %r5265, 4;
add.s64 %rd1803, %rd42, %rd1802;
ld.shared.u32 %r7548, [%rd1803];
// inline asm
bfe.u32 %r5269, %r5278, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1804, %r5269, 4;
add.s64 %rd1805, %rd45, %rd1804;
ld.shared.u32 %r7549, [%rd1805];
add.s32 %r7550, %r7549, %r7548;
// inline asm
bfe.u32 %r5273, %r5278, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1806, %r5273, 4;
add.s64 %rd1807, %rd48, %rd1806;
ld.shared.u32 %r7551, [%rd1807];
xor.b32 %r7552, %r7551, %r7550;
// inline asm
bfe.u32 %r5277, %r5278, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1808, %r5277, 4;
add.s64 %rd1809, %rd51, %rd1808;
ld.shared.u32 %r7553, [%rd1809];
add.s32 %r7554, %r7553, %r7552;
xor.b32 %r7555, %r69, %r41;
xor.b32 %r7556, %r7555, %r5262;
xor.b32 %r5294, %r7556, %r7554;
// inline asm
bfe.u32 %r5281, %r5294, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1810, %r5281, 4;
add.s64 %rd1811, %rd42, %rd1810;
ld.shared.u32 %r7557, [%rd1811];
// inline asm
bfe.u32 %r5285, %r5294, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1812, %r5285, 4;
add.s64 %rd1813, %rd45, %rd1812;
ld.shared.u32 %r7558, [%rd1813];
add.s32 %r7559, %r7558, %r7557;
// inline asm
bfe.u32 %r5289, %r5294, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1814, %r5289, 4;
add.s64 %rd1815, %rd48, %rd1814;
ld.shared.u32 %r7560, [%rd1815];
xor.b32 %r7561, %r7560, %r7559;
// inline asm
bfe.u32 %r5293, %r5294, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1816, %r5293, 4;
add.s64 %rd1817, %rd51, %rd1816;
ld.shared.u32 %r7562, [%rd1817];
add.s32 %r7563, %r7562, %r7561;
xor.b32 %r7564, %r68, %r42;
xor.b32 %r7565, %r7564, %r5278;
xor.b32 %r5310, %r7565, %r7563;
// inline asm
bfe.u32 %r5297, %r5310, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1818, %r5297, 4;
add.s64 %rd1819, %rd42, %rd1818;
ld.shared.u32 %r7566, [%rd1819];
// inline asm
bfe.u32 %r5301, %r5310, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1820, %r5301, 4;
add.s64 %rd1821, %rd45, %rd1820;
ld.shared.u32 %r7567, [%rd1821];
add.s32 %r7568, %r7567, %r7566;
// inline asm
bfe.u32 %r5305, %r5310, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1822, %r5305, 4;
add.s64 %rd1823, %rd48, %rd1822;
ld.shared.u32 %r7569, [%rd1823];
xor.b32 %r7570, %r7569, %r7568;
// inline asm
bfe.u32 %r5309, %r5310, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1824, %r5309, 4;
add.s64 %rd1825, %rd51, %rd1824;
ld.shared.u32 %r7571, [%rd1825];
add.s32 %r7572, %r7571, %r7570;
xor.b32 %r7573, %r71, %r43;
xor.b32 %r7574, %r7573, %r5294;
xor.b32 %r5326, %r7574, %r7572;
// inline asm
bfe.u32 %r5313, %r5326, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1826, %r5313, 4;
add.s64 %rd1827, %rd42, %rd1826;
ld.shared.u32 %r7575, [%rd1827];
// inline asm
bfe.u32 %r5317, %r5326, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1828, %r5317, 4;
add.s64 %rd1829, %rd45, %rd1828;
ld.shared.u32 %r7576, [%rd1829];
add.s32 %r7577, %r7576, %r7575;
// inline asm
bfe.u32 %r5321, %r5326, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1830, %r5321, 4;
add.s64 %rd1831, %rd48, %rd1830;
ld.shared.u32 %r7578, [%rd1831];
xor.b32 %r7579, %r7578, %r7577;
// inline asm
bfe.u32 %r5325, %r5326, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1832, %r5325, 4;
add.s64 %rd1833, %rd51, %rd1832;
ld.shared.u32 %r7580, [%rd1833];
add.s32 %r7581, %r7580, %r7579;
xor.b32 %r7582, %r70, %r44;
xor.b32 %r7583, %r7582, %r5310;
xor.b32 %r5342, %r7583, %r7581;
// inline asm
bfe.u32 %r5329, %r5342, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1834, %r5329, 4;
add.s64 %rd1835, %rd42, %rd1834;
ld.shared.u32 %r7584, [%rd1835];
// inline asm
bfe.u32 %r5333, %r5342, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1836, %r5333, 4;
add.s64 %rd1837, %rd45, %rd1836;
ld.shared.u32 %r7585, [%rd1837];
add.s32 %r7586, %r7585, %r7584;
// inline asm
bfe.u32 %r5337, %r5342, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1838, %r5337, 4;
add.s64 %rd1839, %rd48, %rd1838;
ld.shared.u32 %r7587, [%rd1839];
xor.b32 %r7588, %r7587, %r7586;
// inline asm
bfe.u32 %r5341, %r5342, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1840, %r5341, 4;
add.s64 %rd1841, %rd51, %rd1840;
ld.shared.u32 %r7589, [%rd1841];
add.s32 %r7590, %r7589, %r7588;
xor.b32 %r7591, %r73, %r41;
xor.b32 %r7592, %r7591, %r5326;
xor.b32 %r5358, %r7592, %r7590;
// inline asm
bfe.u32 %r5345, %r5358, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1842, %r5345, 4;
add.s64 %rd1843, %rd42, %rd1842;
ld.shared.u32 %r7593, [%rd1843];
// inline asm
bfe.u32 %r5349, %r5358, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1844, %r5349, 4;
add.s64 %rd1845, %rd45, %rd1844;
ld.shared.u32 %r7594, [%rd1845];
add.s32 %r7595, %r7594, %r7593;
// inline asm
bfe.u32 %r5353, %r5358, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1846, %r5353, 4;
add.s64 %rd1847, %rd48, %rd1846;
ld.shared.u32 %r7596, [%rd1847];
xor.b32 %r7597, %r7596, %r7595;
// inline asm
bfe.u32 %r5357, %r5358, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1848, %r5357, 4;
add.s64 %rd1849, %rd51, %rd1848;
ld.shared.u32 %r7598, [%rd1849];
add.s32 %r7599, %r7598, %r7597;
xor.b32 %r7600, %r72, %r42;
xor.b32 %r7601, %r7600, %r5342;
xor.b32 %r5374, %r7601, %r7599;
// inline asm
bfe.u32 %r5361, %r5374, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1850, %r5361, 4;
add.s64 %rd1851, %rd42, %rd1850;
ld.shared.u32 %r7602, [%rd1851];
// inline asm
bfe.u32 %r5365, %r5374, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1852, %r5365, 4;
add.s64 %rd1853, %rd45, %rd1852;
ld.shared.u32 %r7603, [%rd1853];
add.s32 %r7604, %r7603, %r7602;
// inline asm
bfe.u32 %r5369, %r5374, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1854, %r5369, 4;
add.s64 %rd1855, %rd48, %rd1854;
ld.shared.u32 %r7605, [%rd1855];
xor.b32 %r7606, %r7605, %r7604;
// inline asm
bfe.u32 %r5373, %r5374, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1856, %r5373, 4;
add.s64 %rd1857, %rd51, %rd1856;
ld.shared.u32 %r7607, [%rd1857];
add.s32 %r7608, %r7607, %r7606;
xor.b32 %r7609, %r75, %r43;
xor.b32 %r7610, %r7609, %r5358;
xor.b32 %r5390, %r7610, %r7608;
// inline asm
bfe.u32 %r5377, %r5390, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1858, %r5377, 4;
add.s64 %rd1859, %rd42, %rd1858;
ld.shared.u32 %r7611, [%rd1859];
// inline asm
bfe.u32 %r5381, %r5390, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1860, %r5381, 4;
add.s64 %rd1861, %rd45, %rd1860;
ld.shared.u32 %r7612, [%rd1861];
add.s32 %r7613, %r7612, %r7611;
// inline asm
bfe.u32 %r5385, %r5390, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1862, %r5385, 4;
add.s64 %rd1863, %rd48, %rd1862;
ld.shared.u32 %r7614, [%rd1863];
xor.b32 %r7615, %r7614, %r7613;
// inline asm
bfe.u32 %r5389, %r5390, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1864, %r5389, 4;
add.s64 %rd1865, %rd51, %rd1864;
ld.shared.u32 %r7616, [%rd1865];
add.s32 %r7617, %r7616, %r7615;
xor.b32 %r7618, %r74, %r44;
xor.b32 %r7619, %r7618, %r5374;
xor.b32 %r5406, %r7619, %r7617;
// inline asm
bfe.u32 %r5393, %r5406, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1866, %r5393, 4;
add.s64 %rd1867, %rd42, %rd1866;
ld.shared.u32 %r7620, [%rd1867];
// inline asm
bfe.u32 %r5397, %r5406, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1868, %r5397, 4;
add.s64 %rd1869, %rd45, %rd1868;
ld.shared.u32 %r7621, [%rd1869];
add.s32 %r7622, %r7621, %r7620;
// inline asm
bfe.u32 %r5401, %r5406, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1870, %r5401, 4;
add.s64 %rd1871, %rd48, %rd1870;
ld.shared.u32 %r7623, [%rd1871];
xor.b32 %r7624, %r7623, %r7622;
// inline asm
bfe.u32 %r5405, %r5406, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1872, %r5405, 4;
add.s64 %rd1873, %rd51, %rd1872;
ld.shared.u32 %r7625, [%rd1873];
add.s32 %r7626, %r7625, %r7624;
xor.b32 %r7627, %r77, %r41;
xor.b32 %r7628, %r7627, %r5390;
xor.b32 %r5422, %r7628, %r7626;
// inline asm
bfe.u32 %r5409, %r5422, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1874, %r5409, 4;
add.s64 %rd1875, %rd42, %rd1874;
ld.shared.u32 %r7629, [%rd1875];
// inline asm
bfe.u32 %r5413, %r5422, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1876, %r5413, 4;
add.s64 %rd1877, %rd45, %rd1876;
ld.shared.u32 %r7630, [%rd1877];
add.s32 %r7631, %r7630, %r7629;
// inline asm
bfe.u32 %r5417, %r5422, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1878, %r5417, 4;
add.s64 %rd1879, %rd48, %rd1878;
ld.shared.u32 %r7632, [%rd1879];
xor.b32 %r7633, %r7632, %r7631;
// inline asm
bfe.u32 %r5421, %r5422, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1880, %r5421, 4;
add.s64 %rd1881, %rd51, %rd1880;
ld.shared.u32 %r7634, [%rd1881];
add.s32 %r7635, %r7634, %r7633;
xor.b32 %r7636, %r76, %r42;
xor.b32 %r7637, %r7636, %r5406;
xor.b32 %r5438, %r7637, %r7635;
// inline asm
bfe.u32 %r5425, %r5438, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1882, %r5425, 4;
add.s64 %rd1883, %rd42, %rd1882;
ld.shared.u32 %r7638, [%rd1883];
// inline asm
bfe.u32 %r5429, %r5438, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1884, %r5429, 4;
add.s64 %rd1885, %rd45, %rd1884;
ld.shared.u32 %r7639, [%rd1885];
add.s32 %r7640, %r7639, %r7638;
// inline asm
bfe.u32 %r5433, %r5438, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1886, %r5433, 4;
add.s64 %rd1887, %rd48, %rd1886;
ld.shared.u32 %r7641, [%rd1887];
xor.b32 %r7642, %r7641, %r7640;
// inline asm
bfe.u32 %r5437, %r5438, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1888, %r5437, 4;
add.s64 %rd1889, %rd51, %rd1888;
ld.shared.u32 %r7643, [%rd1889];
add.s32 %r7644, %r7643, %r7642;
xor.b32 %r7645, %r79, %r43;
xor.b32 %r7646, %r7645, %r5422;
xor.b32 %r5454, %r7646, %r7644;
// inline asm
bfe.u32 %r5441, %r5454, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1890, %r5441, 4;
add.s64 %rd1891, %rd42, %rd1890;
ld.shared.u32 %r7647, [%rd1891];
// inline asm
bfe.u32 %r5445, %r5454, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1892, %r5445, 4;
add.s64 %rd1893, %rd45, %rd1892;
ld.shared.u32 %r7648, [%rd1893];
add.s32 %r7649, %r7648, %r7647;
// inline asm
bfe.u32 %r5449, %r5454, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1894, %r5449, 4;
add.s64 %rd1895, %rd48, %rd1894;
ld.shared.u32 %r7650, [%rd1895];
xor.b32 %r7651, %r7650, %r7649;
// inline asm
bfe.u32 %r5453, %r5454, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1896, %r5453, 4;
add.s64 %rd1897, %rd51, %rd1896;
ld.shared.u32 %r7652, [%rd1897];
add.s32 %r7653, %r7652, %r7651;
xor.b32 %r7654, %r78, %r44;
xor.b32 %r7655, %r7654, %r5438;
xor.b32 %r5470, %r7655, %r7653;
// inline asm
bfe.u32 %r5457, %r5470, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1898, %r5457, 4;
add.s64 %rd1899, %rd42, %rd1898;
ld.shared.u32 %r7656, [%rd1899];
// inline asm
bfe.u32 %r5461, %r5470, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1900, %r5461, 4;
add.s64 %rd1901, %rd45, %rd1900;
ld.shared.u32 %r7657, [%rd1901];
add.s32 %r7658, %r7657, %r7656;
// inline asm
bfe.u32 %r5465, %r5470, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1902, %r5465, 4;
add.s64 %rd1903, %rd48, %rd1902;
ld.shared.u32 %r7659, [%rd1903];
xor.b32 %r7660, %r7659, %r7658;
// inline asm
bfe.u32 %r5469, %r5470, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1904, %r5469, 4;
add.s64 %rd1905, %rd51, %rd1904;
ld.shared.u32 %r7661, [%rd1905];
add.s32 %r7662, %r7661, %r7660;
xor.b32 %r7663, %r81, %r41;
xor.b32 %r7664, %r7663, %r5454;
xor.b32 %r10350, %r7664, %r7662;
xor.b32 %r7665, %r80, %r42;
xor.b32 %r10351, %r7665, %r5470;
// inline asm
bfe.u32 %r5473, %r10322, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1906, %r5473, 4;
add.s64 %rd1907, %rd42, %rd1906;
ld.shared.u32 %r7666, [%rd1907];
// inline asm
bfe.u32 %r5477, %r10322, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1908, %r5477, 4;
add.s64 %rd1909, %rd45, %rd1908;
ld.shared.u32 %r7667, [%rd1909];
add.s32 %r7668, %r7667, %r7666;
// inline asm
bfe.u32 %r5481, %r10322, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1910, %r5481, 4;
add.s64 %rd1911, %rd48, %rd1910;
ld.shared.u32 %r7669, [%rd1911];
xor.b32 %r7670, %r7669, %r7668;
// inline asm
bfe.u32 %r5485, %r10322, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1912, %r5485, 4;
add.s64 %rd1913, %rd51, %rd1912;
ld.shared.u32 %r7671, [%rd1913];
add.s32 %r5502, %r7671, %r7670;
// inline asm
bfe.u32 %r5489, %r5502, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1914, %r5489, 4;
add.s64 %rd1915, %rd42, %rd1914;
ld.shared.u32 %r7672, [%rd1915];
// inline asm
bfe.u32 %r5493, %r5502, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1916, %r5493, 4;
add.s64 %rd1917, %rd45, %rd1916;
ld.shared.u32 %r7673, [%rd1917];
add.s32 %r7674, %r7673, %r7672;
// inline asm
bfe.u32 %r5497, %r5502, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1918, %r5497, 4;
add.s64 %rd1919, %rd48, %rd1918;
ld.shared.u32 %r7675, [%rd1919];
xor.b32 %r7676, %r7675, %r7674;
// inline asm
bfe.u32 %r5501, %r5502, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1920, %r5501, 4;
add.s64 %rd1921, %rd51, %rd1920;
ld.shared.u32 %r7677, [%rd1921];
add.s32 %r7678, %r7677, %r7676;
xor.b32 %r5518, %r7537, %r7678;
// inline asm
bfe.u32 %r5505, %r5518, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1922, %r5505, 4;
add.s64 %rd1923, %rd42, %rd1922;
ld.shared.u32 %r7679, [%rd1923];
// inline asm
bfe.u32 %r5509, %r5518, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1924, %r5509, 4;
add.s64 %rd1925, %rd45, %rd1924;
ld.shared.u32 %r7680, [%rd1925];
add.s32 %r7681, %r7680, %r7679;
// inline asm
bfe.u32 %r5513, %r5518, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1926, %r5513, 4;
add.s64 %rd1927, %rd48, %rd1926;
ld.shared.u32 %r7682, [%rd1927];
xor.b32 %r7683, %r7682, %r7681;
// inline asm
bfe.u32 %r5517, %r5518, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1928, %r5517, 4;
add.s64 %rd1929, %rd51, %rd1928;
ld.shared.u32 %r7684, [%rd1929];
add.s32 %r7685, %r7684, %r7683;
xor.b32 %r7686, %r7546, %r5502;
xor.b32 %r5534, %r7686, %r7685;
// inline asm
bfe.u32 %r5521, %r5534, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1930, %r5521, 4;
add.s64 %rd1931, %rd42, %rd1930;
ld.shared.u32 %r7687, [%rd1931];
// inline asm
bfe.u32 %r5525, %r5534, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1932, %r5525, 4;
add.s64 %rd1933, %rd45, %rd1932;
ld.shared.u32 %r7688, [%rd1933];
add.s32 %r7689, %r7688, %r7687;
// inline asm
bfe.u32 %r5529, %r5534, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1934, %r5529, 4;
add.s64 %rd1935, %rd48, %rd1934;
ld.shared.u32 %r7690, [%rd1935];
xor.b32 %r7691, %r7690, %r7689;
// inline asm
bfe.u32 %r5533, %r5534, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1936, %r5533, 4;
add.s64 %rd1937, %rd51, %rd1936;
ld.shared.u32 %r7692, [%rd1937];
add.s32 %r7693, %r7692, %r7691;
xor.b32 %r7694, %r7555, %r5518;
xor.b32 %r5550, %r7694, %r7693;
// inline asm
bfe.u32 %r5537, %r5550, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1938, %r5537, 4;
add.s64 %rd1939, %rd42, %rd1938;
ld.shared.u32 %r7695, [%rd1939];
// inline asm
bfe.u32 %r5541, %r5550, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1940, %r5541, 4;
add.s64 %rd1941, %rd45, %rd1940;
ld.shared.u32 %r7696, [%rd1941];
add.s32 %r7697, %r7696, %r7695;
// inline asm
bfe.u32 %r5545, %r5550, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1942, %r5545, 4;
add.s64 %rd1943, %rd48, %rd1942;
ld.shared.u32 %r7698, [%rd1943];
xor.b32 %r7699, %r7698, %r7697;
// inline asm
bfe.u32 %r5549, %r5550, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1944, %r5549, 4;
add.s64 %rd1945, %rd51, %rd1944;
ld.shared.u32 %r7700, [%rd1945];
add.s32 %r7701, %r7700, %r7699;
xor.b32 %r7702, %r7564, %r5534;
xor.b32 %r5566, %r7702, %r7701;
// inline asm
bfe.u32 %r5553, %r5566, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1946, %r5553, 4;
add.s64 %rd1947, %rd42, %rd1946;
ld.shared.u32 %r7703, [%rd1947];
// inline asm
bfe.u32 %r5557, %r5566, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1948, %r5557, 4;
add.s64 %rd1949, %rd45, %rd1948;
ld.shared.u32 %r7704, [%rd1949];
add.s32 %r7705, %r7704, %r7703;
// inline asm
bfe.u32 %r5561, %r5566, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1950, %r5561, 4;
add.s64 %rd1951, %rd48, %rd1950;
ld.shared.u32 %r7706, [%rd1951];
xor.b32 %r7707, %r7706, %r7705;
// inline asm
bfe.u32 %r5565, %r5566, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1952, %r5565, 4;
add.s64 %rd1953, %rd51, %rd1952;
ld.shared.u32 %r7708, [%rd1953];
add.s32 %r7709, %r7708, %r7707;
xor.b32 %r7710, %r7573, %r5550;
xor.b32 %r5582, %r7710, %r7709;
// inline asm
bfe.u32 %r5569, %r5582, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1954, %r5569, 4;
add.s64 %rd1955, %rd42, %rd1954;
ld.shared.u32 %r7711, [%rd1955];
// inline asm
bfe.u32 %r5573, %r5582, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1956, %r5573, 4;
add.s64 %rd1957, %rd45, %rd1956;
ld.shared.u32 %r7712, [%rd1957];
add.s32 %r7713, %r7712, %r7711;
// inline asm
bfe.u32 %r5577, %r5582, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1958, %r5577, 4;
add.s64 %rd1959, %rd48, %rd1958;
ld.shared.u32 %r7714, [%rd1959];
xor.b32 %r7715, %r7714, %r7713;
// inline asm
bfe.u32 %r5581, %r5582, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1960, %r5581, 4;
add.s64 %rd1961, %rd51, %rd1960;
ld.shared.u32 %r7716, [%rd1961];
add.s32 %r7717, %r7716, %r7715;
xor.b32 %r7718, %r7582, %r5566;
xor.b32 %r5598, %r7718, %r7717;
// inline asm
bfe.u32 %r5585, %r5598, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1962, %r5585, 4;
add.s64 %rd1963, %rd42, %rd1962;
ld.shared.u32 %r7719, [%rd1963];
// inline asm
bfe.u32 %r5589, %r5598, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1964, %r5589, 4;
add.s64 %rd1965, %rd45, %rd1964;
ld.shared.u32 %r7720, [%rd1965];
add.s32 %r7721, %r7720, %r7719;
// inline asm
bfe.u32 %r5593, %r5598, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1966, %r5593, 4;
add.s64 %rd1967, %rd48, %rd1966;
ld.shared.u32 %r7722, [%rd1967];
xor.b32 %r7723, %r7722, %r7721;
// inline asm
bfe.u32 %r5597, %r5598, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1968, %r5597, 4;
add.s64 %rd1969, %rd51, %rd1968;
ld.shared.u32 %r7724, [%rd1969];
add.s32 %r7725, %r7724, %r7723;
xor.b32 %r7726, %r7591, %r5582;
xor.b32 %r5614, %r7726, %r7725;
// inline asm
bfe.u32 %r5601, %r5614, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1970, %r5601, 4;
add.s64 %rd1971, %rd42, %rd1970;
ld.shared.u32 %r7727, [%rd1971];
// inline asm
bfe.u32 %r5605, %r5614, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1972, %r5605, 4;
add.s64 %rd1973, %rd45, %rd1972;
ld.shared.u32 %r7728, [%rd1973];
add.s32 %r7729, %r7728, %r7727;
// inline asm
bfe.u32 %r5609, %r5614, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1974, %r5609, 4;
add.s64 %rd1975, %rd48, %rd1974;
ld.shared.u32 %r7730, [%rd1975];
xor.b32 %r7731, %r7730, %r7729;
// inline asm
bfe.u32 %r5613, %r5614, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1976, %r5613, 4;
add.s64 %rd1977, %rd51, %rd1976;
ld.shared.u32 %r7732, [%rd1977];
add.s32 %r7733, %r7732, %r7731;
xor.b32 %r7734, %r7600, %r5598;
xor.b32 %r5630, %r7734, %r7733;
// inline asm
bfe.u32 %r5617, %r5630, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1978, %r5617, 4;
add.s64 %rd1979, %rd42, %rd1978;
ld.shared.u32 %r7735, [%rd1979];
// inline asm
bfe.u32 %r5621, %r5630, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1980, %r5621, 4;
add.s64 %rd1981, %rd45, %rd1980;
ld.shared.u32 %r7736, [%rd1981];
add.s32 %r7737, %r7736, %r7735;
// inline asm
bfe.u32 %r5625, %r5630, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1982, %r5625, 4;
add.s64 %rd1983, %rd48, %rd1982;
ld.shared.u32 %r7738, [%rd1983];
xor.b32 %r7739, %r7738, %r7737;
// inline asm
bfe.u32 %r5629, %r5630, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1984, %r5629, 4;
add.s64 %rd1985, %rd51, %rd1984;
ld.shared.u32 %r7740, [%rd1985];
add.s32 %r7741, %r7740, %r7739;
xor.b32 %r7742, %r7609, %r5614;
xor.b32 %r5646, %r7742, %r7741;
// inline asm
bfe.u32 %r5633, %r5646, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1986, %r5633, 4;
add.s64 %rd1987, %rd42, %rd1986;
ld.shared.u32 %r7743, [%rd1987];
// inline asm
bfe.u32 %r5637, %r5646, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1988, %r5637, 4;
add.s64 %rd1989, %rd45, %rd1988;
ld.shared.u32 %r7744, [%rd1989];
add.s32 %r7745, %r7744, %r7743;
// inline asm
bfe.u32 %r5641, %r5646, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1990, %r5641, 4;
add.s64 %rd1991, %rd48, %rd1990;
ld.shared.u32 %r7746, [%rd1991];
xor.b32 %r7747, %r7746, %r7745;
// inline asm
bfe.u32 %r5645, %r5646, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd1992, %r5645, 4;
add.s64 %rd1993, %rd51, %rd1992;
ld.shared.u32 %r7748, [%rd1993];
add.s32 %r7749, %r7748, %r7747;
xor.b32 %r7750, %r7618, %r5630;
xor.b32 %r5662, %r7750, %r7749;
// inline asm
bfe.u32 %r5649, %r5662, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd1994, %r5649, 4;
add.s64 %rd1995, %rd42, %rd1994;
ld.shared.u32 %r7751, [%rd1995];
// inline asm
bfe.u32 %r5653, %r5662, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd1996, %r5653, 4;
add.s64 %rd1997, %rd45, %rd1996;
ld.shared.u32 %r7752, [%rd1997];
add.s32 %r7753, %r7752, %r7751;
// inline asm
bfe.u32 %r5657, %r5662, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd1998, %r5657, 4;
add.s64 %rd1999, %rd48, %rd1998;
ld.shared.u32 %r7754, [%rd1999];
xor.b32 %r7755, %r7754, %r7753;
// inline asm
bfe.u32 %r5661, %r5662, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2000, %r5661, 4;
add.s64 %rd2001, %rd51, %rd2000;
ld.shared.u32 %r7756, [%rd2001];
add.s32 %r7757, %r7756, %r7755;
xor.b32 %r7758, %r7627, %r5646;
xor.b32 %r5678, %r7758, %r7757;
// inline asm
bfe.u32 %r5665, %r5678, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2002, %r5665, 4;
add.s64 %rd2003, %rd42, %rd2002;
ld.shared.u32 %r7759, [%rd2003];
// inline asm
bfe.u32 %r5669, %r5678, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2004, %r5669, 4;
add.s64 %rd2005, %rd45, %rd2004;
ld.shared.u32 %r7760, [%rd2005];
add.s32 %r7761, %r7760, %r7759;
// inline asm
bfe.u32 %r5673, %r5678, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2006, %r5673, 4;
add.s64 %rd2007, %rd48, %rd2006;
ld.shared.u32 %r7762, [%rd2007];
xor.b32 %r7763, %r7762, %r7761;
// inline asm
bfe.u32 %r5677, %r5678, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2008, %r5677, 4;
add.s64 %rd2009, %rd51, %rd2008;
ld.shared.u32 %r7764, [%rd2009];
add.s32 %r7765, %r7764, %r7763;
xor.b32 %r7766, %r7636, %r5662;
xor.b32 %r5694, %r7766, %r7765;
// inline asm
bfe.u32 %r5681, %r5694, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2010, %r5681, 4;
add.s64 %rd2011, %rd42, %rd2010;
ld.shared.u32 %r7767, [%rd2011];
// inline asm
bfe.u32 %r5685, %r5694, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2012, %r5685, 4;
add.s64 %rd2013, %rd45, %rd2012;
ld.shared.u32 %r7768, [%rd2013];
add.s32 %r7769, %r7768, %r7767;
// inline asm
bfe.u32 %r5689, %r5694, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2014, %r5689, 4;
add.s64 %rd2015, %rd48, %rd2014;
ld.shared.u32 %r7770, [%rd2015];
xor.b32 %r7771, %r7770, %r7769;
// inline asm
bfe.u32 %r5693, %r5694, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2016, %r5693, 4;
add.s64 %rd2017, %rd51, %rd2016;
ld.shared.u32 %r7772, [%rd2017];
add.s32 %r7773, %r7772, %r7771;
xor.b32 %r7774, %r7645, %r5678;
xor.b32 %r5710, %r7774, %r7773;
// inline asm
bfe.u32 %r5697, %r5710, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2018, %r5697, 4;
add.s64 %rd2019, %rd42, %rd2018;
ld.shared.u32 %r7775, [%rd2019];
// inline asm
bfe.u32 %r5701, %r5710, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2020, %r5701, 4;
add.s64 %rd2021, %rd45, %rd2020;
ld.shared.u32 %r7776, [%rd2021];
add.s32 %r7777, %r7776, %r7775;
// inline asm
bfe.u32 %r5705, %r5710, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2022, %r5705, 4;
add.s64 %rd2023, %rd48, %rd2022;
ld.shared.u32 %r7778, [%rd2023];
xor.b32 %r7779, %r7778, %r7777;
// inline asm
bfe.u32 %r5709, %r5710, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2024, %r5709, 4;
add.s64 %rd2025, %rd51, %rd2024;
ld.shared.u32 %r7780, [%rd2025];
add.s32 %r7781, %r7780, %r7779;
xor.b32 %r7782, %r7654, %r5694;
xor.b32 %r5726, %r7782, %r7781;
// inline asm
bfe.u32 %r5713, %r5726, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2026, %r5713, 4;
add.s64 %rd2027, %rd42, %rd2026;
ld.shared.u32 %r7783, [%rd2027];
// inline asm
bfe.u32 %r5717, %r5726, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2028, %r5717, 4;
add.s64 %rd2029, %rd45, %rd2028;
ld.shared.u32 %r7784, [%rd2029];
add.s32 %r7785, %r7784, %r7783;
// inline asm
bfe.u32 %r5721, %r5726, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2030, %r5721, 4;
add.s64 %rd2031, %rd48, %rd2030;
ld.shared.u32 %r7786, [%rd2031];
xor.b32 %r7787, %r7786, %r7785;
// inline asm
bfe.u32 %r5725, %r5726, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2032, %r5725, 4;
add.s64 %rd2033, %rd51, %rd2032;
ld.shared.u32 %r7788, [%rd2033];
add.s32 %r7789, %r7788, %r7787;
xor.b32 %r7790, %r7663, %r5710;
xor.b32 %r10348, %r7790, %r7789;
xor.b32 %r10349, %r7665, %r5726;
xor.b32 %r5742, %r5470, %r5726;
// inline asm
bfe.u32 %r5729, %r5742, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2034, %r5729, 4;
add.s64 %rd2035, %rd42, %rd2034;
ld.shared.u32 %r7791, [%rd2035];
// inline asm
bfe.u32 %r5733, %r5742, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2036, %r5733, 4;
add.s64 %rd2037, %rd45, %rd2036;
ld.shared.u32 %r7792, [%rd2037];
add.s32 %r7793, %r7792, %r7791;
// inline asm
bfe.u32 %r5737, %r5742, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2038, %r5737, 4;
add.s64 %rd2039, %rd48, %rd2038;
ld.shared.u32 %r7794, [%rd2039];
xor.b32 %r7795, %r7794, %r7793;
// inline asm
bfe.u32 %r5741, %r5742, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2040, %r5741, 4;
add.s64 %rd2041, %rd51, %rd2040;
ld.shared.u32 %r7796, [%rd2041];
add.s32 %r7797, %r7796, %r7795;
xor.b32 %r7798, %r10350, %r10348;
xor.b32 %r5758, %r7798, %r7797;
// inline asm
bfe.u32 %r5745, %r5758, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2042, %r5745, 4;
add.s64 %rd2043, %rd42, %rd2042;
ld.shared.u32 %r7799, [%rd2043];
// inline asm
bfe.u32 %r5749, %r5758, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2044, %r5749, 4;
add.s64 %rd2045, %rd45, %rd2044;
ld.shared.u32 %r7800, [%rd2045];
add.s32 %r7801, %r7800, %r7799;
// inline asm
bfe.u32 %r5753, %r5758, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2046, %r5753, 4;
add.s64 %rd2047, %rd48, %rd2046;
ld.shared.u32 %r7802, [%rd2047];
xor.b32 %r7803, %r7802, %r7801;
// inline asm
bfe.u32 %r5757, %r5758, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2048, %r5757, 4;
add.s64 %rd2049, %rd51, %rd2048;
ld.shared.u32 %r7804, [%rd2049];
add.s32 %r7805, %r7804, %r7803;
xor.b32 %r5774, %r10351, %r7805;
// inline asm
bfe.u32 %r5761, %r5774, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2050, %r5761, 4;
add.s64 %rd2051, %rd42, %rd2050;
ld.shared.u32 %r7806, [%rd2051];
// inline asm
bfe.u32 %r5765, %r5774, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2052, %r5765, 4;
add.s64 %rd2053, %rd45, %rd2052;
ld.shared.u32 %r7807, [%rd2053];
add.s32 %r7808, %r7807, %r7806;
// inline asm
bfe.u32 %r5769, %r5774, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2054, %r5769, 4;
add.s64 %rd2055, %rd48, %rd2054;
ld.shared.u32 %r7809, [%rd2055];
xor.b32 %r7810, %r7809, %r7808;
// inline asm
bfe.u32 %r5773, %r5774, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2056, %r5773, 4;
add.s64 %rd2057, %rd51, %rd2056;
ld.shared.u32 %r7811, [%rd2057];
add.s32 %r7812, %r7811, %r7810;
xor.b32 %r7813, %r10350, %r7797;
xor.b32 %r5790, %r7813, %r7812;
// inline asm
bfe.u32 %r5777, %r5790, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2058, %r5777, 4;
add.s64 %rd2059, %rd42, %rd2058;
ld.shared.u32 %r7814, [%rd2059];
// inline asm
bfe.u32 %r5781, %r5790, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2060, %r5781, 4;
add.s64 %rd2061, %rd45, %rd2060;
ld.shared.u32 %r7815, [%rd2061];
add.s32 %r7816, %r7815, %r7814;
// inline asm
bfe.u32 %r5785, %r5790, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2062, %r5785, 4;
add.s64 %rd2063, %rd48, %rd2062;
ld.shared.u32 %r7817, [%rd2063];
xor.b32 %r7818, %r7817, %r7816;
// inline asm
bfe.u32 %r5789, %r5790, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2064, %r5789, 4;
add.s64 %rd2065, %rd51, %rd2064;
ld.shared.u32 %r7819, [%rd2065];
add.s32 %r7820, %r7819, %r7818;
xor.b32 %r7821, %r7555, %r5774;
xor.b32 %r5806, %r7821, %r7820;
// inline asm
bfe.u32 %r5793, %r5806, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2066, %r5793, 4;
add.s64 %rd2067, %rd42, %rd2066;
ld.shared.u32 %r7822, [%rd2067];
// inline asm
bfe.u32 %r5797, %r5806, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2068, %r5797, 4;
add.s64 %rd2069, %rd45, %rd2068;
ld.shared.u32 %r7823, [%rd2069];
add.s32 %r7824, %r7823, %r7822;
// inline asm
bfe.u32 %r5801, %r5806, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2070, %r5801, 4;
add.s64 %rd2071, %rd48, %rd2070;
ld.shared.u32 %r7825, [%rd2071];
xor.b32 %r7826, %r7825, %r7824;
// inline asm
bfe.u32 %r5805, %r5806, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2072, %r5805, 4;
add.s64 %rd2073, %rd51, %rd2072;
ld.shared.u32 %r7827, [%rd2073];
add.s32 %r7828, %r7827, %r7826;
xor.b32 %r7829, %r7564, %r5790;
xor.b32 %r5822, %r7829, %r7828;
// inline asm
bfe.u32 %r5809, %r5822, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2074, %r5809, 4;
add.s64 %rd2075, %rd42, %rd2074;
ld.shared.u32 %r7830, [%rd2075];
// inline asm
bfe.u32 %r5813, %r5822, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2076, %r5813, 4;
add.s64 %rd2077, %rd45, %rd2076;
ld.shared.u32 %r7831, [%rd2077];
add.s32 %r7832, %r7831, %r7830;
// inline asm
bfe.u32 %r5817, %r5822, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2078, %r5817, 4;
add.s64 %rd2079, %rd48, %rd2078;
ld.shared.u32 %r7833, [%rd2079];
xor.b32 %r7834, %r7833, %r7832;
// inline asm
bfe.u32 %r5821, %r5822, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2080, %r5821, 4;
add.s64 %rd2081, %rd51, %rd2080;
ld.shared.u32 %r7835, [%rd2081];
add.s32 %r7836, %r7835, %r7834;
xor.b32 %r7837, %r7573, %r5806;
xor.b32 %r5838, %r7837, %r7836;
// inline asm
bfe.u32 %r5825, %r5838, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2082, %r5825, 4;
add.s64 %rd2083, %rd42, %rd2082;
ld.shared.u32 %r7838, [%rd2083];
// inline asm
bfe.u32 %r5829, %r5838, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2084, %r5829, 4;
add.s64 %rd2085, %rd45, %rd2084;
ld.shared.u32 %r7839, [%rd2085];
add.s32 %r7840, %r7839, %r7838;
// inline asm
bfe.u32 %r5833, %r5838, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2086, %r5833, 4;
add.s64 %rd2087, %rd48, %rd2086;
ld.shared.u32 %r7841, [%rd2087];
xor.b32 %r7842, %r7841, %r7840;
// inline asm
bfe.u32 %r5837, %r5838, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2088, %r5837, 4;
add.s64 %rd2089, %rd51, %rd2088;
ld.shared.u32 %r7843, [%rd2089];
add.s32 %r7844, %r7843, %r7842;
xor.b32 %r7845, %r7582, %r5822;
xor.b32 %r5854, %r7845, %r7844;
// inline asm
bfe.u32 %r5841, %r5854, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2090, %r5841, 4;
add.s64 %rd2091, %rd42, %rd2090;
ld.shared.u32 %r7846, [%rd2091];
// inline asm
bfe.u32 %r5845, %r5854, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2092, %r5845, 4;
add.s64 %rd2093, %rd45, %rd2092;
ld.shared.u32 %r7847, [%rd2093];
add.s32 %r7848, %r7847, %r7846;
// inline asm
bfe.u32 %r5849, %r5854, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2094, %r5849, 4;
add.s64 %rd2095, %rd48, %rd2094;
ld.shared.u32 %r7849, [%rd2095];
xor.b32 %r7850, %r7849, %r7848;
// inline asm
bfe.u32 %r5853, %r5854, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2096, %r5853, 4;
add.s64 %rd2097, %rd51, %rd2096;
ld.shared.u32 %r7851, [%rd2097];
add.s32 %r7852, %r7851, %r7850;
xor.b32 %r7853, %r7591, %r5838;
xor.b32 %r5870, %r7853, %r7852;
// inline asm
bfe.u32 %r5857, %r5870, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2098, %r5857, 4;
add.s64 %rd2099, %rd42, %rd2098;
ld.shared.u32 %r7854, [%rd2099];
// inline asm
bfe.u32 %r5861, %r5870, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2100, %r5861, 4;
add.s64 %rd2101, %rd45, %rd2100;
ld.shared.u32 %r7855, [%rd2101];
add.s32 %r7856, %r7855, %r7854;
// inline asm
bfe.u32 %r5865, %r5870, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2102, %r5865, 4;
add.s64 %rd2103, %rd48, %rd2102;
ld.shared.u32 %r7857, [%rd2103];
xor.b32 %r7858, %r7857, %r7856;
// inline asm
bfe.u32 %r5869, %r5870, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2104, %r5869, 4;
add.s64 %rd2105, %rd51, %rd2104;
ld.shared.u32 %r7859, [%rd2105];
add.s32 %r7860, %r7859, %r7858;
xor.b32 %r7861, %r7600, %r5854;
xor.b32 %r5886, %r7861, %r7860;
// inline asm
bfe.u32 %r5873, %r5886, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2106, %r5873, 4;
add.s64 %rd2107, %rd42, %rd2106;
ld.shared.u32 %r7862, [%rd2107];
// inline asm
bfe.u32 %r5877, %r5886, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2108, %r5877, 4;
add.s64 %rd2109, %rd45, %rd2108;
ld.shared.u32 %r7863, [%rd2109];
add.s32 %r7864, %r7863, %r7862;
// inline asm
bfe.u32 %r5881, %r5886, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2110, %r5881, 4;
add.s64 %rd2111, %rd48, %rd2110;
ld.shared.u32 %r7865, [%rd2111];
xor.b32 %r7866, %r7865, %r7864;
// inline asm
bfe.u32 %r5885, %r5886, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2112, %r5885, 4;
add.s64 %rd2113, %rd51, %rd2112;
ld.shared.u32 %r7867, [%rd2113];
add.s32 %r7868, %r7867, %r7866;
xor.b32 %r7869, %r7609, %r5870;
xor.b32 %r5902, %r7869, %r7868;
// inline asm
bfe.u32 %r5889, %r5902, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2114, %r5889, 4;
add.s64 %rd2115, %rd42, %rd2114;
ld.shared.u32 %r7870, [%rd2115];
// inline asm
bfe.u32 %r5893, %r5902, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2116, %r5893, 4;
add.s64 %rd2117, %rd45, %rd2116;
ld.shared.u32 %r7871, [%rd2117];
add.s32 %r7872, %r7871, %r7870;
// inline asm
bfe.u32 %r5897, %r5902, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2118, %r5897, 4;
add.s64 %rd2119, %rd48, %rd2118;
ld.shared.u32 %r7873, [%rd2119];
xor.b32 %r7874, %r7873, %r7872;
// inline asm
bfe.u32 %r5901, %r5902, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2120, %r5901, 4;
add.s64 %rd2121, %rd51, %rd2120;
ld.shared.u32 %r7875, [%rd2121];
add.s32 %r7876, %r7875, %r7874;
xor.b32 %r7877, %r7618, %r5886;
xor.b32 %r5918, %r7877, %r7876;
// inline asm
bfe.u32 %r5905, %r5918, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2122, %r5905, 4;
add.s64 %rd2123, %rd42, %rd2122;
ld.shared.u32 %r7878, [%rd2123];
// inline asm
bfe.u32 %r5909, %r5918, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2124, %r5909, 4;
add.s64 %rd2125, %rd45, %rd2124;
ld.shared.u32 %r7879, [%rd2125];
add.s32 %r7880, %r7879, %r7878;
// inline asm
bfe.u32 %r5913, %r5918, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2126, %r5913, 4;
add.s64 %rd2127, %rd48, %rd2126;
ld.shared.u32 %r7881, [%rd2127];
xor.b32 %r7882, %r7881, %r7880;
// inline asm
bfe.u32 %r5917, %r5918, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2128, %r5917, 4;
add.s64 %rd2129, %rd51, %rd2128;
ld.shared.u32 %r7883, [%rd2129];
add.s32 %r7884, %r7883, %r7882;
xor.b32 %r7885, %r7627, %r5902;
xor.b32 %r5934, %r7885, %r7884;
// inline asm
bfe.u32 %r5921, %r5934, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2130, %r5921, 4;
add.s64 %rd2131, %rd42, %rd2130;
ld.shared.u32 %r7886, [%rd2131];
// inline asm
bfe.u32 %r5925, %r5934, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2132, %r5925, 4;
add.s64 %rd2133, %rd45, %rd2132;
ld.shared.u32 %r7887, [%rd2133];
add.s32 %r7888, %r7887, %r7886;
// inline asm
bfe.u32 %r5929, %r5934, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2134, %r5929, 4;
add.s64 %rd2135, %rd48, %rd2134;
ld.shared.u32 %r7889, [%rd2135];
xor.b32 %r7890, %r7889, %r7888;
// inline asm
bfe.u32 %r5933, %r5934, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2136, %r5933, 4;
add.s64 %rd2137, %rd51, %rd2136;
ld.shared.u32 %r7891, [%rd2137];
add.s32 %r7892, %r7891, %r7890;
xor.b32 %r7893, %r7636, %r5918;
xor.b32 %r5950, %r7893, %r7892;
// inline asm
bfe.u32 %r5937, %r5950, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2138, %r5937, 4;
add.s64 %rd2139, %rd42, %rd2138;
ld.shared.u32 %r7894, [%rd2139];
// inline asm
bfe.u32 %r5941, %r5950, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2140, %r5941, 4;
add.s64 %rd2141, %rd45, %rd2140;
ld.shared.u32 %r7895, [%rd2141];
add.s32 %r7896, %r7895, %r7894;
// inline asm
bfe.u32 %r5945, %r5950, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2142, %r5945, 4;
add.s64 %rd2143, %rd48, %rd2142;
ld.shared.u32 %r7897, [%rd2143];
xor.b32 %r7898, %r7897, %r7896;
// inline asm
bfe.u32 %r5949, %r5950, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2144, %r5949, 4;
add.s64 %rd2145, %rd51, %rd2144;
ld.shared.u32 %r7899, [%rd2145];
add.s32 %r7900, %r7899, %r7898;
xor.b32 %r7901, %r7645, %r5934;
xor.b32 %r5966, %r7901, %r7900;
// inline asm
bfe.u32 %r5953, %r5966, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2146, %r5953, 4;
add.s64 %rd2147, %rd42, %rd2146;
ld.shared.u32 %r7902, [%rd2147];
// inline asm
bfe.u32 %r5957, %r5966, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2148, %r5957, 4;
add.s64 %rd2149, %rd45, %rd2148;
ld.shared.u32 %r7903, [%rd2149];
add.s32 %r7904, %r7903, %r7902;
// inline asm
bfe.u32 %r5961, %r5966, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2150, %r5961, 4;
add.s64 %rd2151, %rd48, %rd2150;
ld.shared.u32 %r7905, [%rd2151];
xor.b32 %r7906, %r7905, %r7904;
// inline asm
bfe.u32 %r5965, %r5966, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2152, %r5965, 4;
add.s64 %rd2153, %rd51, %rd2152;
ld.shared.u32 %r7907, [%rd2153];
add.s32 %r7908, %r7907, %r7906;
xor.b32 %r7909, %r7654, %r5950;
xor.b32 %r5982, %r7909, %r7908;
// inline asm
bfe.u32 %r5969, %r5982, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2154, %r5969, 4;
add.s64 %rd2155, %rd42, %rd2154;
ld.shared.u32 %r7910, [%rd2155];
// inline asm
bfe.u32 %r5973, %r5982, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2156, %r5973, 4;
add.s64 %rd2157, %rd45, %rd2156;
ld.shared.u32 %r7911, [%rd2157];
add.s32 %r7912, %r7911, %r7910;
// inline asm
bfe.u32 %r5977, %r5982, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2158, %r5977, 4;
add.s64 %rd2159, %rd48, %rd2158;
ld.shared.u32 %r7913, [%rd2159];
xor.b32 %r7914, %r7913, %r7912;
// inline asm
bfe.u32 %r5981, %r5982, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2160, %r5981, 4;
add.s64 %rd2161, %rd51, %rd2160;
ld.shared.u32 %r7915, [%rd2161];
add.s32 %r7916, %r7915, %r7914;
xor.b32 %r7917, %r7663, %r5966;
xor.b32 %r10346, %r7917, %r7916;
xor.b32 %r10347, %r7665, %r5982;
xor.b32 %r5998, %r5470, %r5982;
// inline asm
bfe.u32 %r5985, %r5998, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2162, %r5985, 4;
add.s64 %rd2163, %rd42, %rd2162;
ld.shared.u32 %r7918, [%rd2163];
// inline asm
bfe.u32 %r5989, %r5998, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2164, %r5989, 4;
add.s64 %rd2165, %rd45, %rd2164;
ld.shared.u32 %r7919, [%rd2165];
add.s32 %r7920, %r7919, %r7918;
// inline asm
bfe.u32 %r5993, %r5998, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2166, %r5993, 4;
add.s64 %rd2167, %rd48, %rd2166;
ld.shared.u32 %r7921, [%rd2167];
xor.b32 %r7922, %r7921, %r7920;
// inline asm
bfe.u32 %r5997, %r5998, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2168, %r5997, 4;
add.s64 %rd2169, %rd51, %rd2168;
ld.shared.u32 %r7923, [%rd2169];
add.s32 %r7924, %r7923, %r7922;
xor.b32 %r7925, %r10350, %r10346;
xor.b32 %r6014, %r7925, %r7924;
// inline asm
bfe.u32 %r6001, %r6014, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2170, %r6001, 4;
add.s64 %rd2171, %rd42, %rd2170;
ld.shared.u32 %r7926, [%rd2171];
// inline asm
bfe.u32 %r6005, %r6014, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2172, %r6005, 4;
add.s64 %rd2173, %rd45, %rd2172;
ld.shared.u32 %r7927, [%rd2173];
add.s32 %r7928, %r7927, %r7926;
// inline asm
bfe.u32 %r6009, %r6014, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2174, %r6009, 4;
add.s64 %rd2175, %rd48, %rd2174;
ld.shared.u32 %r7929, [%rd2175];
xor.b32 %r7930, %r7929, %r7928;
// inline asm
bfe.u32 %r6013, %r6014, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2176, %r6013, 4;
add.s64 %rd2177, %rd51, %rd2176;
ld.shared.u32 %r7931, [%rd2177];
add.s32 %r7932, %r7931, %r7930;
xor.b32 %r7933, %r10349, %r5998;
xor.b32 %r6030, %r7933, %r7932;
// inline asm
bfe.u32 %r6017, %r6030, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2178, %r6017, 4;
add.s64 %rd2179, %rd42, %rd2178;
ld.shared.u32 %r7934, [%rd2179];
// inline asm
bfe.u32 %r6021, %r6030, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2180, %r6021, 4;
add.s64 %rd2181, %rd45, %rd2180;
ld.shared.u32 %r7935, [%rd2181];
add.s32 %r7936, %r7935, %r7934;
// inline asm
bfe.u32 %r6025, %r6030, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2182, %r6025, 4;
add.s64 %rd2183, %rd48, %rd2182;
ld.shared.u32 %r7937, [%rd2183];
xor.b32 %r7938, %r7937, %r7936;
// inline asm
bfe.u32 %r6029, %r6030, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2184, %r6029, 4;
add.s64 %rd2185, %rd51, %rd2184;
ld.shared.u32 %r7939, [%rd2185];
add.s32 %r7940, %r7939, %r7938;
xor.b32 %r7941, %r10348, %r6014;
xor.b32 %r6046, %r7941, %r7940;
// inline asm
bfe.u32 %r6033, %r6046, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2186, %r6033, 4;
add.s64 %rd2187, %rd42, %rd2186;
ld.shared.u32 %r7942, [%rd2187];
// inline asm
bfe.u32 %r6037, %r6046, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2188, %r6037, 4;
add.s64 %rd2189, %rd45, %rd2188;
ld.shared.u32 %r7943, [%rd2189];
add.s32 %r7944, %r7943, %r7942;
// inline asm
bfe.u32 %r6041, %r6046, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2190, %r6041, 4;
add.s64 %rd2191, %rd48, %rd2190;
ld.shared.u32 %r7945, [%rd2191];
xor.b32 %r7946, %r7945, %r7944;
// inline asm
bfe.u32 %r6045, %r6046, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2192, %r6045, 4;
add.s64 %rd2193, %rd51, %rd2192;
ld.shared.u32 %r7947, [%rd2193];
add.s32 %r7948, %r7947, %r7946;
xor.b32 %r7949, %r10347, %r6030;
xor.b32 %r6062, %r7949, %r7948;
// inline asm
bfe.u32 %r6049, %r6062, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2194, %r6049, 4;
add.s64 %rd2195, %rd42, %rd2194;
ld.shared.u32 %r7950, [%rd2195];
// inline asm
bfe.u32 %r6053, %r6062, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2196, %r6053, 4;
add.s64 %rd2197, %rd45, %rd2196;
ld.shared.u32 %r7951, [%rd2197];
add.s32 %r7952, %r7951, %r7950;
// inline asm
bfe.u32 %r6057, %r6062, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2198, %r6057, 4;
add.s64 %rd2199, %rd48, %rd2198;
ld.shared.u32 %r7953, [%rd2199];
xor.b32 %r7954, %r7953, %r7952;
// inline asm
bfe.u32 %r6061, %r6062, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2200, %r6061, 4;
add.s64 %rd2201, %rd51, %rd2200;
ld.shared.u32 %r7955, [%rd2201];
add.s32 %r7956, %r7955, %r7954;
xor.b32 %r7957, %r10346, %r6046;
xor.b32 %r6078, %r7957, %r7956;
// inline asm
bfe.u32 %r6065, %r6078, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2202, %r6065, 4;
add.s64 %rd2203, %rd42, %rd2202;
ld.shared.u32 %r7958, [%rd2203];
// inline asm
bfe.u32 %r6069, %r6078, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2204, %r6069, 4;
add.s64 %rd2205, %rd45, %rd2204;
ld.shared.u32 %r7959, [%rd2205];
add.s32 %r7960, %r7959, %r7958;
// inline asm
bfe.u32 %r6073, %r6078, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2206, %r6073, 4;
add.s64 %rd2207, %rd48, %rd2206;
ld.shared.u32 %r7961, [%rd2207];
xor.b32 %r7962, %r7961, %r7960;
// inline asm
bfe.u32 %r6077, %r6078, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2208, %r6077, 4;
add.s64 %rd2209, %rd51, %rd2208;
ld.shared.u32 %r7963, [%rd2209];
add.s32 %r7964, %r7963, %r7962;
xor.b32 %r7965, %r7573, %r6062;
xor.b32 %r6094, %r7965, %r7964;
// inline asm
bfe.u32 %r6081, %r6094, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2210, %r6081, 4;
add.s64 %rd2211, %rd42, %rd2210;
ld.shared.u32 %r7966, [%rd2211];
// inline asm
bfe.u32 %r6085, %r6094, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2212, %r6085, 4;
add.s64 %rd2213, %rd45, %rd2212;
ld.shared.u32 %r7967, [%rd2213];
add.s32 %r7968, %r7967, %r7966;
// inline asm
bfe.u32 %r6089, %r6094, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2214, %r6089, 4;
add.s64 %rd2215, %rd48, %rd2214;
ld.shared.u32 %r7969, [%rd2215];
xor.b32 %r7970, %r7969, %r7968;
// inline asm
bfe.u32 %r6093, %r6094, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2216, %r6093, 4;
add.s64 %rd2217, %rd51, %rd2216;
ld.shared.u32 %r7971, [%rd2217];
add.s32 %r7972, %r7971, %r7970;
xor.b32 %r7973, %r7582, %r6078;
xor.b32 %r6110, %r7973, %r7972;
// inline asm
bfe.u32 %r6097, %r6110, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2218, %r6097, 4;
add.s64 %rd2219, %rd42, %rd2218;
ld.shared.u32 %r7974, [%rd2219];
// inline asm
bfe.u32 %r6101, %r6110, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2220, %r6101, 4;
add.s64 %rd2221, %rd45, %rd2220;
ld.shared.u32 %r7975, [%rd2221];
add.s32 %r7976, %r7975, %r7974;
// inline asm
bfe.u32 %r6105, %r6110, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2222, %r6105, 4;
add.s64 %rd2223, %rd48, %rd2222;
ld.shared.u32 %r7977, [%rd2223];
xor.b32 %r7978, %r7977, %r7976;
// inline asm
bfe.u32 %r6109, %r6110, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2224, %r6109, 4;
add.s64 %rd2225, %rd51, %rd2224;
ld.shared.u32 %r7979, [%rd2225];
add.s32 %r7980, %r7979, %r7978;
xor.b32 %r7981, %r7591, %r6094;
xor.b32 %r6126, %r7981, %r7980;
// inline asm
bfe.u32 %r6113, %r6126, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2226, %r6113, 4;
add.s64 %rd2227, %rd42, %rd2226;
ld.shared.u32 %r7982, [%rd2227];
// inline asm
bfe.u32 %r6117, %r6126, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2228, %r6117, 4;
add.s64 %rd2229, %rd45, %rd2228;
ld.shared.u32 %r7983, [%rd2229];
add.s32 %r7984, %r7983, %r7982;
// inline asm
bfe.u32 %r6121, %r6126, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2230, %r6121, 4;
add.s64 %rd2231, %rd48, %rd2230;
ld.shared.u32 %r7985, [%rd2231];
xor.b32 %r7986, %r7985, %r7984;
// inline asm
bfe.u32 %r6125, %r6126, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2232, %r6125, 4;
add.s64 %rd2233, %rd51, %rd2232;
ld.shared.u32 %r7987, [%rd2233];
add.s32 %r7988, %r7987, %r7986;
xor.b32 %r7989, %r7600, %r6110;
xor.b32 %r6142, %r7989, %r7988;
// inline asm
bfe.u32 %r6129, %r6142, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2234, %r6129, 4;
add.s64 %rd2235, %rd42, %rd2234;
ld.shared.u32 %r7990, [%rd2235];
// inline asm
bfe.u32 %r6133, %r6142, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2236, %r6133, 4;
add.s64 %rd2237, %rd45, %rd2236;
ld.shared.u32 %r7991, [%rd2237];
add.s32 %r7992, %r7991, %r7990;
// inline asm
bfe.u32 %r6137, %r6142, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2238, %r6137, 4;
add.s64 %rd2239, %rd48, %rd2238;
ld.shared.u32 %r7993, [%rd2239];
xor.b32 %r7994, %r7993, %r7992;
// inline asm
bfe.u32 %r6141, %r6142, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2240, %r6141, 4;
add.s64 %rd2241, %rd51, %rd2240;
ld.shared.u32 %r7995, [%rd2241];
add.s32 %r7996, %r7995, %r7994;
xor.b32 %r7997, %r7609, %r6126;
xor.b32 %r6158, %r7997, %r7996;
// inline asm
bfe.u32 %r6145, %r6158, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2242, %r6145, 4;
add.s64 %rd2243, %rd42, %rd2242;
ld.shared.u32 %r7998, [%rd2243];
// inline asm
bfe.u32 %r6149, %r6158, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2244, %r6149, 4;
add.s64 %rd2245, %rd45, %rd2244;
ld.shared.u32 %r7999, [%rd2245];
add.s32 %r8000, %r7999, %r7998;
// inline asm
bfe.u32 %r6153, %r6158, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2246, %r6153, 4;
add.s64 %rd2247, %rd48, %rd2246;
ld.shared.u32 %r8001, [%rd2247];
xor.b32 %r8002, %r8001, %r8000;
// inline asm
bfe.u32 %r6157, %r6158, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2248, %r6157, 4;
add.s64 %rd2249, %rd51, %rd2248;
ld.shared.u32 %r8003, [%rd2249];
add.s32 %r8004, %r8003, %r8002;
xor.b32 %r8005, %r7618, %r6142;
xor.b32 %r6174, %r8005, %r8004;
// inline asm
bfe.u32 %r6161, %r6174, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2250, %r6161, 4;
add.s64 %rd2251, %rd42, %rd2250;
ld.shared.u32 %r8006, [%rd2251];
// inline asm
bfe.u32 %r6165, %r6174, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2252, %r6165, 4;
add.s64 %rd2253, %rd45, %rd2252;
ld.shared.u32 %r8007, [%rd2253];
add.s32 %r8008, %r8007, %r8006;
// inline asm
bfe.u32 %r6169, %r6174, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2254, %r6169, 4;
add.s64 %rd2255, %rd48, %rd2254;
ld.shared.u32 %r8009, [%rd2255];
xor.b32 %r8010, %r8009, %r8008;
// inline asm
bfe.u32 %r6173, %r6174, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2256, %r6173, 4;
add.s64 %rd2257, %rd51, %rd2256;
ld.shared.u32 %r8011, [%rd2257];
add.s32 %r8012, %r8011, %r8010;
xor.b32 %r8013, %r7627, %r6158;
xor.b32 %r6190, %r8013, %r8012;
// inline asm
bfe.u32 %r6177, %r6190, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2258, %r6177, 4;
add.s64 %rd2259, %rd42, %rd2258;
ld.shared.u32 %r8014, [%rd2259];
// inline asm
bfe.u32 %r6181, %r6190, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2260, %r6181, 4;
add.s64 %rd2261, %rd45, %rd2260;
ld.shared.u32 %r8015, [%rd2261];
add.s32 %r8016, %r8015, %r8014;
// inline asm
bfe.u32 %r6185, %r6190, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2262, %r6185, 4;
add.s64 %rd2263, %rd48, %rd2262;
ld.shared.u32 %r8017, [%rd2263];
xor.b32 %r8018, %r8017, %r8016;
// inline asm
bfe.u32 %r6189, %r6190, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2264, %r6189, 4;
add.s64 %rd2265, %rd51, %rd2264;
ld.shared.u32 %r8019, [%rd2265];
add.s32 %r8020, %r8019, %r8018;
xor.b32 %r8021, %r7636, %r6174;
xor.b32 %r6206, %r8021, %r8020;
// inline asm
bfe.u32 %r6193, %r6206, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2266, %r6193, 4;
add.s64 %rd2267, %rd42, %rd2266;
ld.shared.u32 %r8022, [%rd2267];
// inline asm
bfe.u32 %r6197, %r6206, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2268, %r6197, 4;
add.s64 %rd2269, %rd45, %rd2268;
ld.shared.u32 %r8023, [%rd2269];
add.s32 %r8024, %r8023, %r8022;
// inline asm
bfe.u32 %r6201, %r6206, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2270, %r6201, 4;
add.s64 %rd2271, %rd48, %rd2270;
ld.shared.u32 %r8025, [%rd2271];
xor.b32 %r8026, %r8025, %r8024;
// inline asm
bfe.u32 %r6205, %r6206, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2272, %r6205, 4;
add.s64 %rd2273, %rd51, %rd2272;
ld.shared.u32 %r8027, [%rd2273];
add.s32 %r8028, %r8027, %r8026;
xor.b32 %r8029, %r7645, %r6190;
xor.b32 %r6222, %r8029, %r8028;
// inline asm
bfe.u32 %r6209, %r6222, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2274, %r6209, 4;
add.s64 %rd2275, %rd42, %rd2274;
ld.shared.u32 %r8030, [%rd2275];
// inline asm
bfe.u32 %r6213, %r6222, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2276, %r6213, 4;
add.s64 %rd2277, %rd45, %rd2276;
ld.shared.u32 %r8031, [%rd2277];
add.s32 %r8032, %r8031, %r8030;
// inline asm
bfe.u32 %r6217, %r6222, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2278, %r6217, 4;
add.s64 %rd2279, %rd48, %rd2278;
ld.shared.u32 %r8033, [%rd2279];
xor.b32 %r8034, %r8033, %r8032;
// inline asm
bfe.u32 %r6221, %r6222, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2280, %r6221, 4;
add.s64 %rd2281, %rd51, %rd2280;
ld.shared.u32 %r8035, [%rd2281];
add.s32 %r8036, %r8035, %r8034;
xor.b32 %r8037, %r7654, %r6206;
xor.b32 %r6238, %r8037, %r8036;
// inline asm
bfe.u32 %r6225, %r6238, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2282, %r6225, 4;
add.s64 %rd2283, %rd42, %rd2282;
ld.shared.u32 %r8038, [%rd2283];
// inline asm
bfe.u32 %r6229, %r6238, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2284, %r6229, 4;
add.s64 %rd2285, %rd45, %rd2284;
ld.shared.u32 %r8039, [%rd2285];
add.s32 %r8040, %r8039, %r8038;
// inline asm
bfe.u32 %r6233, %r6238, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2286, %r6233, 4;
add.s64 %rd2287, %rd48, %rd2286;
ld.shared.u32 %r8041, [%rd2287];
xor.b32 %r8042, %r8041, %r8040;
// inline asm
bfe.u32 %r6237, %r6238, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2288, %r6237, 4;
add.s64 %rd2289, %rd51, %rd2288;
ld.shared.u32 %r8043, [%rd2289];
add.s32 %r8044, %r8043, %r8042;
xor.b32 %r8045, %r7663, %r6222;
xor.b32 %r10344, %r8045, %r8044;
xor.b32 %r10345, %r7665, %r6238;
xor.b32 %r6254, %r5470, %r6238;
// inline asm
bfe.u32 %r6241, %r6254, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2290, %r6241, 4;
add.s64 %rd2291, %rd42, %rd2290;
ld.shared.u32 %r8046, [%rd2291];
// inline asm
bfe.u32 %r6245, %r6254, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2292, %r6245, 4;
add.s64 %rd2293, %rd45, %rd2292;
ld.shared.u32 %r8047, [%rd2293];
add.s32 %r8048, %r8047, %r8046;
// inline asm
bfe.u32 %r6249, %r6254, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2294, %r6249, 4;
add.s64 %rd2295, %rd48, %rd2294;
ld.shared.u32 %r8049, [%rd2295];
xor.b32 %r8050, %r8049, %r8048;
// inline asm
bfe.u32 %r6253, %r6254, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2296, %r6253, 4;
add.s64 %rd2297, %rd51, %rd2296;
ld.shared.u32 %r8051, [%rd2297];
add.s32 %r8052, %r8051, %r8050;
xor.b32 %r8053, %r10350, %r10344;
xor.b32 %r6270, %r8053, %r8052;
// inline asm
bfe.u32 %r6257, %r6270, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2298, %r6257, 4;
add.s64 %rd2299, %rd42, %rd2298;
ld.shared.u32 %r8054, [%rd2299];
// inline asm
bfe.u32 %r6261, %r6270, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2300, %r6261, 4;
add.s64 %rd2301, %rd45, %rd2300;
ld.shared.u32 %r8055, [%rd2301];
add.s32 %r8056, %r8055, %r8054;
// inline asm
bfe.u32 %r6265, %r6270, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2302, %r6265, 4;
add.s64 %rd2303, %rd48, %rd2302;
ld.shared.u32 %r8057, [%rd2303];
xor.b32 %r8058, %r8057, %r8056;
// inline asm
bfe.u32 %r6269, %r6270, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2304, %r6269, 4;
add.s64 %rd2305, %rd51, %rd2304;
ld.shared.u32 %r8059, [%rd2305];
add.s32 %r8060, %r8059, %r8058;
xor.b32 %r8061, %r10349, %r6254;
xor.b32 %r6286, %r8061, %r8060;
// inline asm
bfe.u32 %r6273, %r6286, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2306, %r6273, 4;
add.s64 %rd2307, %rd42, %rd2306;
ld.shared.u32 %r8062, [%rd2307];
// inline asm
bfe.u32 %r6277, %r6286, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2308, %r6277, 4;
add.s64 %rd2309, %rd45, %rd2308;
ld.shared.u32 %r8063, [%rd2309];
add.s32 %r8064, %r8063, %r8062;
// inline asm
bfe.u32 %r6281, %r6286, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2310, %r6281, 4;
add.s64 %rd2311, %rd48, %rd2310;
ld.shared.u32 %r8065, [%rd2311];
xor.b32 %r8066, %r8065, %r8064;
// inline asm
bfe.u32 %r6285, %r6286, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2312, %r6285, 4;
add.s64 %rd2313, %rd51, %rd2312;
ld.shared.u32 %r8067, [%rd2313];
add.s32 %r8068, %r8067, %r8066;
xor.b32 %r8069, %r10348, %r6270;
xor.b32 %r6302, %r8069, %r8068;
// inline asm
bfe.u32 %r6289, %r6302, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2314, %r6289, 4;
add.s64 %rd2315, %rd42, %rd2314;
ld.shared.u32 %r8070, [%rd2315];
// inline asm
bfe.u32 %r6293, %r6302, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2316, %r6293, 4;
add.s64 %rd2317, %rd45, %rd2316;
ld.shared.u32 %r8071, [%rd2317];
add.s32 %r8072, %r8071, %r8070;
// inline asm
bfe.u32 %r6297, %r6302, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2318, %r6297, 4;
add.s64 %rd2319, %rd48, %rd2318;
ld.shared.u32 %r8073, [%rd2319];
xor.b32 %r8074, %r8073, %r8072;
// inline asm
bfe.u32 %r6301, %r6302, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2320, %r6301, 4;
add.s64 %rd2321, %rd51, %rd2320;
ld.shared.u32 %r8075, [%rd2321];
add.s32 %r8076, %r8075, %r8074;
xor.b32 %r8077, %r10347, %r6286;
xor.b32 %r6318, %r8077, %r8076;
// inline asm
bfe.u32 %r6305, %r6318, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2322, %r6305, 4;
add.s64 %rd2323, %rd42, %rd2322;
ld.shared.u32 %r8078, [%rd2323];
// inline asm
bfe.u32 %r6309, %r6318, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2324, %r6309, 4;
add.s64 %rd2325, %rd45, %rd2324;
ld.shared.u32 %r8079, [%rd2325];
add.s32 %r8080, %r8079, %r8078;
// inline asm
bfe.u32 %r6313, %r6318, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2326, %r6313, 4;
add.s64 %rd2327, %rd48, %rd2326;
ld.shared.u32 %r8081, [%rd2327];
xor.b32 %r8082, %r8081, %r8080;
// inline asm
bfe.u32 %r6317, %r6318, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2328, %r6317, 4;
add.s64 %rd2329, %rd51, %rd2328;
ld.shared.u32 %r8083, [%rd2329];
add.s32 %r8084, %r8083, %r8082;
xor.b32 %r8085, %r10346, %r6302;
xor.b32 %r6334, %r8085, %r8084;
// inline asm
bfe.u32 %r6321, %r6334, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2330, %r6321, 4;
add.s64 %rd2331, %rd42, %rd2330;
ld.shared.u32 %r8086, [%rd2331];
// inline asm
bfe.u32 %r6325, %r6334, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2332, %r6325, 4;
add.s64 %rd2333, %rd45, %rd2332;
ld.shared.u32 %r8087, [%rd2333];
add.s32 %r8088, %r8087, %r8086;
// inline asm
bfe.u32 %r6329, %r6334, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2334, %r6329, 4;
add.s64 %rd2335, %rd48, %rd2334;
ld.shared.u32 %r8089, [%rd2335];
xor.b32 %r8090, %r8089, %r8088;
// inline asm
bfe.u32 %r6333, %r6334, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2336, %r6333, 4;
add.s64 %rd2337, %rd51, %rd2336;
ld.shared.u32 %r8091, [%rd2337];
add.s32 %r8092, %r8091, %r8090;
xor.b32 %r8093, %r10345, %r6318;
xor.b32 %r6350, %r8093, %r8092;
// inline asm
bfe.u32 %r6337, %r6350, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2338, %r6337, 4;
add.s64 %rd2339, %rd42, %rd2338;
ld.shared.u32 %r8094, [%rd2339];
// inline asm
bfe.u32 %r6341, %r6350, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2340, %r6341, 4;
add.s64 %rd2341, %rd45, %rd2340;
ld.shared.u32 %r8095, [%rd2341];
add.s32 %r8096, %r8095, %r8094;
// inline asm
bfe.u32 %r6345, %r6350, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2342, %r6345, 4;
add.s64 %rd2343, %rd48, %rd2342;
ld.shared.u32 %r8097, [%rd2343];
xor.b32 %r8098, %r8097, %r8096;
// inline asm
bfe.u32 %r6349, %r6350, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2344, %r6349, 4;
add.s64 %rd2345, %rd51, %rd2344;
ld.shared.u32 %r8099, [%rd2345];
add.s32 %r8100, %r8099, %r8098;
xor.b32 %r8101, %r10344, %r6334;
xor.b32 %r6366, %r8101, %r8100;
// inline asm
bfe.u32 %r6353, %r6366, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2346, %r6353, 4;
add.s64 %rd2347, %rd42, %rd2346;
ld.shared.u32 %r8102, [%rd2347];
// inline asm
bfe.u32 %r6357, %r6366, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2348, %r6357, 4;
add.s64 %rd2349, %rd45, %rd2348;
ld.shared.u32 %r8103, [%rd2349];
add.s32 %r8104, %r8103, %r8102;
// inline asm
bfe.u32 %r6361, %r6366, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2350, %r6361, 4;
add.s64 %rd2351, %rd48, %rd2350;
ld.shared.u32 %r8105, [%rd2351];
xor.b32 %r8106, %r8105, %r8104;
// inline asm
bfe.u32 %r6365, %r6366, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2352, %r6365, 4;
add.s64 %rd2353, %rd51, %rd2352;
ld.shared.u32 %r8107, [%rd2353];
add.s32 %r8108, %r8107, %r8106;
xor.b32 %r8109, %r7591, %r6350;
xor.b32 %r6382, %r8109, %r8108;
// inline asm
bfe.u32 %r6369, %r6382, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2354, %r6369, 4;
add.s64 %rd2355, %rd42, %rd2354;
ld.shared.u32 %r8110, [%rd2355];
// inline asm
bfe.u32 %r6373, %r6382, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2356, %r6373, 4;
add.s64 %rd2357, %rd45, %rd2356;
ld.shared.u32 %r8111, [%rd2357];
add.s32 %r8112, %r8111, %r8110;
// inline asm
bfe.u32 %r6377, %r6382, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2358, %r6377, 4;
add.s64 %rd2359, %rd48, %rd2358;
ld.shared.u32 %r8113, [%rd2359];
xor.b32 %r8114, %r8113, %r8112;
// inline asm
bfe.u32 %r6381, %r6382, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2360, %r6381, 4;
add.s64 %rd2361, %rd51, %rd2360;
ld.shared.u32 %r8115, [%rd2361];
add.s32 %r8116, %r8115, %r8114;
xor.b32 %r8117, %r7600, %r6366;
xor.b32 %r6398, %r8117, %r8116;
// inline asm
bfe.u32 %r6385, %r6398, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2362, %r6385, 4;
add.s64 %rd2363, %rd42, %rd2362;
ld.shared.u32 %r8118, [%rd2363];
// inline asm
bfe.u32 %r6389, %r6398, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2364, %r6389, 4;
add.s64 %rd2365, %rd45, %rd2364;
ld.shared.u32 %r8119, [%rd2365];
add.s32 %r8120, %r8119, %r8118;
// inline asm
bfe.u32 %r6393, %r6398, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2366, %r6393, 4;
add.s64 %rd2367, %rd48, %rd2366;
ld.shared.u32 %r8121, [%rd2367];
xor.b32 %r8122, %r8121, %r8120;
// inline asm
bfe.u32 %r6397, %r6398, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2368, %r6397, 4;
add.s64 %rd2369, %rd51, %rd2368;
ld.shared.u32 %r8123, [%rd2369];
add.s32 %r8124, %r8123, %r8122;
xor.b32 %r8125, %r7609, %r6382;
xor.b32 %r6414, %r8125, %r8124;
// inline asm
bfe.u32 %r6401, %r6414, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2370, %r6401, 4;
add.s64 %rd2371, %rd42, %rd2370;
ld.shared.u32 %r8126, [%rd2371];
// inline asm
bfe.u32 %r6405, %r6414, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2372, %r6405, 4;
add.s64 %rd2373, %rd45, %rd2372;
ld.shared.u32 %r8127, [%rd2373];
add.s32 %r8128, %r8127, %r8126;
// inline asm
bfe.u32 %r6409, %r6414, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2374, %r6409, 4;
add.s64 %rd2375, %rd48, %rd2374;
ld.shared.u32 %r8129, [%rd2375];
xor.b32 %r8130, %r8129, %r8128;
// inline asm
bfe.u32 %r6413, %r6414, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2376, %r6413, 4;
add.s64 %rd2377, %rd51, %rd2376;
ld.shared.u32 %r8131, [%rd2377];
add.s32 %r8132, %r8131, %r8130;
xor.b32 %r8133, %r7618, %r6398;
xor.b32 %r6430, %r8133, %r8132;
// inline asm
bfe.u32 %r6417, %r6430, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2378, %r6417, 4;
add.s64 %rd2379, %rd42, %rd2378;
ld.shared.u32 %r8134, [%rd2379];
// inline asm
bfe.u32 %r6421, %r6430, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2380, %r6421, 4;
add.s64 %rd2381, %rd45, %rd2380;
ld.shared.u32 %r8135, [%rd2381];
add.s32 %r8136, %r8135, %r8134;
// inline asm
bfe.u32 %r6425, %r6430, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2382, %r6425, 4;
add.s64 %rd2383, %rd48, %rd2382;
ld.shared.u32 %r8137, [%rd2383];
xor.b32 %r8138, %r8137, %r8136;
// inline asm
bfe.u32 %r6429, %r6430, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2384, %r6429, 4;
add.s64 %rd2385, %rd51, %rd2384;
ld.shared.u32 %r8139, [%rd2385];
add.s32 %r8140, %r8139, %r8138;
xor.b32 %r8141, %r7627, %r6414;
xor.b32 %r6446, %r8141, %r8140;
// inline asm
bfe.u32 %r6433, %r6446, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2386, %r6433, 4;
add.s64 %rd2387, %rd42, %rd2386;
ld.shared.u32 %r8142, [%rd2387];
// inline asm
bfe.u32 %r6437, %r6446, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2388, %r6437, 4;
add.s64 %rd2389, %rd45, %rd2388;
ld.shared.u32 %r8143, [%rd2389];
add.s32 %r8144, %r8143, %r8142;
// inline asm
bfe.u32 %r6441, %r6446, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2390, %r6441, 4;
add.s64 %rd2391, %rd48, %rd2390;
ld.shared.u32 %r8145, [%rd2391];
xor.b32 %r8146, %r8145, %r8144;
// inline asm
bfe.u32 %r6445, %r6446, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2392, %r6445, 4;
add.s64 %rd2393, %rd51, %rd2392;
ld.shared.u32 %r8147, [%rd2393];
add.s32 %r8148, %r8147, %r8146;
xor.b32 %r8149, %r7636, %r6430;
xor.b32 %r6462, %r8149, %r8148;
// inline asm
bfe.u32 %r6449, %r6462, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2394, %r6449, 4;
add.s64 %rd2395, %rd42, %rd2394;
ld.shared.u32 %r8150, [%rd2395];
// inline asm
bfe.u32 %r6453, %r6462, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2396, %r6453, 4;
add.s64 %rd2397, %rd45, %rd2396;
ld.shared.u32 %r8151, [%rd2397];
add.s32 %r8152, %r8151, %r8150;
// inline asm
bfe.u32 %r6457, %r6462, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2398, %r6457, 4;
add.s64 %rd2399, %rd48, %rd2398;
ld.shared.u32 %r8153, [%rd2399];
xor.b32 %r8154, %r8153, %r8152;
// inline asm
bfe.u32 %r6461, %r6462, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2400, %r6461, 4;
add.s64 %rd2401, %rd51, %rd2400;
ld.shared.u32 %r8155, [%rd2401];
add.s32 %r8156, %r8155, %r8154;
xor.b32 %r8157, %r7645, %r6446;
xor.b32 %r6478, %r8157, %r8156;
// inline asm
bfe.u32 %r6465, %r6478, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2402, %r6465, 4;
add.s64 %rd2403, %rd42, %rd2402;
ld.shared.u32 %r8158, [%rd2403];
// inline asm
bfe.u32 %r6469, %r6478, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2404, %r6469, 4;
add.s64 %rd2405, %rd45, %rd2404;
ld.shared.u32 %r8159, [%rd2405];
add.s32 %r8160, %r8159, %r8158;
// inline asm
bfe.u32 %r6473, %r6478, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2406, %r6473, 4;
add.s64 %rd2407, %rd48, %rd2406;
ld.shared.u32 %r8161, [%rd2407];
xor.b32 %r8162, %r8161, %r8160;
// inline asm
bfe.u32 %r6477, %r6478, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2408, %r6477, 4;
add.s64 %rd2409, %rd51, %rd2408;
ld.shared.u32 %r8163, [%rd2409];
add.s32 %r8164, %r8163, %r8162;
xor.b32 %r8165, %r7654, %r6462;
xor.b32 %r6494, %r8165, %r8164;
// inline asm
bfe.u32 %r6481, %r6494, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2410, %r6481, 4;
add.s64 %rd2411, %rd42, %rd2410;
ld.shared.u32 %r8166, [%rd2411];
// inline asm
bfe.u32 %r6485, %r6494, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2412, %r6485, 4;
add.s64 %rd2413, %rd45, %rd2412;
ld.shared.u32 %r8167, [%rd2413];
add.s32 %r8168, %r8167, %r8166;
// inline asm
bfe.u32 %r6489, %r6494, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2414, %r6489, 4;
add.s64 %rd2415, %rd48, %rd2414;
ld.shared.u32 %r8169, [%rd2415];
xor.b32 %r8170, %r8169, %r8168;
// inline asm
bfe.u32 %r6493, %r6494, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2416, %r6493, 4;
add.s64 %rd2417, %rd51, %rd2416;
ld.shared.u32 %r8171, [%rd2417];
add.s32 %r8172, %r8171, %r8170;
xor.b32 %r8173, %r7663, %r6478;
xor.b32 %r10342, %r8173, %r8172;
xor.b32 %r10343, %r7665, %r6494;
xor.b32 %r6510, %r5470, %r6494;
// inline asm
bfe.u32 %r6497, %r6510, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2418, %r6497, 4;
add.s64 %rd2419, %rd42, %rd2418;
ld.shared.u32 %r8174, [%rd2419];
// inline asm
bfe.u32 %r6501, %r6510, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2420, %r6501, 4;
add.s64 %rd2421, %rd45, %rd2420;
ld.shared.u32 %r8175, [%rd2421];
add.s32 %r8176, %r8175, %r8174;
// inline asm
bfe.u32 %r6505, %r6510, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2422, %r6505, 4;
add.s64 %rd2423, %rd48, %rd2422;
ld.shared.u32 %r8177, [%rd2423];
xor.b32 %r8178, %r8177, %r8176;
// inline asm
bfe.u32 %r6509, %r6510, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2424, %r6509, 4;
add.s64 %rd2425, %rd51, %rd2424;
ld.shared.u32 %r8179, [%rd2425];
add.s32 %r8180, %r8179, %r8178;
xor.b32 %r8181, %r10350, %r10342;
xor.b32 %r6526, %r8181, %r8180;
// inline asm
bfe.u32 %r6513, %r6526, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2426, %r6513, 4;
add.s64 %rd2427, %rd42, %rd2426;
ld.shared.u32 %r8182, [%rd2427];
// inline asm
bfe.u32 %r6517, %r6526, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2428, %r6517, 4;
add.s64 %rd2429, %rd45, %rd2428;
ld.shared.u32 %r8183, [%rd2429];
add.s32 %r8184, %r8183, %r8182;
// inline asm
bfe.u32 %r6521, %r6526, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2430, %r6521, 4;
add.s64 %rd2431, %rd48, %rd2430;
ld.shared.u32 %r8185, [%rd2431];
xor.b32 %r8186, %r8185, %r8184;
// inline asm
bfe.u32 %r6525, %r6526, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2432, %r6525, 4;
add.s64 %rd2433, %rd51, %rd2432;
ld.shared.u32 %r8187, [%rd2433];
add.s32 %r8188, %r8187, %r8186;
xor.b32 %r8189, %r10349, %r6510;
xor.b32 %r6542, %r8189, %r8188;
// inline asm
bfe.u32 %r6529, %r6542, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2434, %r6529, 4;
add.s64 %rd2435, %rd42, %rd2434;
ld.shared.u32 %r8190, [%rd2435];
// inline asm
bfe.u32 %r6533, %r6542, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2436, %r6533, 4;
add.s64 %rd2437, %rd45, %rd2436;
ld.shared.u32 %r8191, [%rd2437];
add.s32 %r8192, %r8191, %r8190;
// inline asm
bfe.u32 %r6537, %r6542, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2438, %r6537, 4;
add.s64 %rd2439, %rd48, %rd2438;
ld.shared.u32 %r8193, [%rd2439];
xor.b32 %r8194, %r8193, %r8192;
// inline asm
bfe.u32 %r6541, %r6542, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2440, %r6541, 4;
add.s64 %rd2441, %rd51, %rd2440;
ld.shared.u32 %r8195, [%rd2441];
add.s32 %r8196, %r8195, %r8194;
xor.b32 %r8197, %r10348, %r6526;
xor.b32 %r6558, %r8197, %r8196;
// inline asm
bfe.u32 %r6545, %r6558, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2442, %r6545, 4;
add.s64 %rd2443, %rd42, %rd2442;
ld.shared.u32 %r8198, [%rd2443];
// inline asm
bfe.u32 %r6549, %r6558, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2444, %r6549, 4;
add.s64 %rd2445, %rd45, %rd2444;
ld.shared.u32 %r8199, [%rd2445];
add.s32 %r8200, %r8199, %r8198;
// inline asm
bfe.u32 %r6553, %r6558, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2446, %r6553, 4;
add.s64 %rd2447, %rd48, %rd2446;
ld.shared.u32 %r8201, [%rd2447];
xor.b32 %r8202, %r8201, %r8200;
// inline asm
bfe.u32 %r6557, %r6558, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2448, %r6557, 4;
add.s64 %rd2449, %rd51, %rd2448;
ld.shared.u32 %r8203, [%rd2449];
add.s32 %r8204, %r8203, %r8202;
xor.b32 %r8205, %r10347, %r6542;
xor.b32 %r6574, %r8205, %r8204;
// inline asm
bfe.u32 %r6561, %r6574, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2450, %r6561, 4;
add.s64 %rd2451, %rd42, %rd2450;
ld.shared.u32 %r8206, [%rd2451];
// inline asm
bfe.u32 %r6565, %r6574, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2452, %r6565, 4;
add.s64 %rd2453, %rd45, %rd2452;
ld.shared.u32 %r8207, [%rd2453];
add.s32 %r8208, %r8207, %r8206;
// inline asm
bfe.u32 %r6569, %r6574, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2454, %r6569, 4;
add.s64 %rd2455, %rd48, %rd2454;
ld.shared.u32 %r8209, [%rd2455];
xor.b32 %r8210, %r8209, %r8208;
// inline asm
bfe.u32 %r6573, %r6574, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2456, %r6573, 4;
add.s64 %rd2457, %rd51, %rd2456;
ld.shared.u32 %r8211, [%rd2457];
add.s32 %r8212, %r8211, %r8210;
xor.b32 %r8213, %r10346, %r6558;
xor.b32 %r6590, %r8213, %r8212;
// inline asm
bfe.u32 %r6577, %r6590, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2458, %r6577, 4;
add.s64 %rd2459, %rd42, %rd2458;
ld.shared.u32 %r8214, [%rd2459];
// inline asm
bfe.u32 %r6581, %r6590, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2460, %r6581, 4;
add.s64 %rd2461, %rd45, %rd2460;
ld.shared.u32 %r8215, [%rd2461];
add.s32 %r8216, %r8215, %r8214;
// inline asm
bfe.u32 %r6585, %r6590, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2462, %r6585, 4;
add.s64 %rd2463, %rd48, %rd2462;
ld.shared.u32 %r8217, [%rd2463];
xor.b32 %r8218, %r8217, %r8216;
// inline asm
bfe.u32 %r6589, %r6590, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2464, %r6589, 4;
add.s64 %rd2465, %rd51, %rd2464;
ld.shared.u32 %r8219, [%rd2465];
add.s32 %r8220, %r8219, %r8218;
xor.b32 %r8221, %r10345, %r6574;
xor.b32 %r6606, %r8221, %r8220;
// inline asm
bfe.u32 %r6593, %r6606, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2466, %r6593, 4;
add.s64 %rd2467, %rd42, %rd2466;
ld.shared.u32 %r8222, [%rd2467];
// inline asm
bfe.u32 %r6597, %r6606, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2468, %r6597, 4;
add.s64 %rd2469, %rd45, %rd2468;
ld.shared.u32 %r8223, [%rd2469];
add.s32 %r8224, %r8223, %r8222;
// inline asm
bfe.u32 %r6601, %r6606, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2470, %r6601, 4;
add.s64 %rd2471, %rd48, %rd2470;
ld.shared.u32 %r8225, [%rd2471];
xor.b32 %r8226, %r8225, %r8224;
// inline asm
bfe.u32 %r6605, %r6606, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2472, %r6605, 4;
add.s64 %rd2473, %rd51, %rd2472;
ld.shared.u32 %r8227, [%rd2473];
add.s32 %r8228, %r8227, %r8226;
xor.b32 %r8229, %r10344, %r6590;
xor.b32 %r6622, %r8229, %r8228;
// inline asm
bfe.u32 %r6609, %r6622, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2474, %r6609, 4;
add.s64 %rd2475, %rd42, %rd2474;
ld.shared.u32 %r8230, [%rd2475];
// inline asm
bfe.u32 %r6613, %r6622, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2476, %r6613, 4;
add.s64 %rd2477, %rd45, %rd2476;
ld.shared.u32 %r8231, [%rd2477];
add.s32 %r8232, %r8231, %r8230;
// inline asm
bfe.u32 %r6617, %r6622, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2478, %r6617, 4;
add.s64 %rd2479, %rd48, %rd2478;
ld.shared.u32 %r8233, [%rd2479];
xor.b32 %r8234, %r8233, %r8232;
// inline asm
bfe.u32 %r6621, %r6622, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2480, %r6621, 4;
add.s64 %rd2481, %rd51, %rd2480;
ld.shared.u32 %r8235, [%rd2481];
add.s32 %r8236, %r8235, %r8234;
xor.b32 %r8237, %r10343, %r6606;
xor.b32 %r6638, %r8237, %r8236;
// inline asm
bfe.u32 %r6625, %r6638, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2482, %r6625, 4;
add.s64 %rd2483, %rd42, %rd2482;
ld.shared.u32 %r8238, [%rd2483];
// inline asm
bfe.u32 %r6629, %r6638, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2484, %r6629, 4;
add.s64 %rd2485, %rd45, %rd2484;
ld.shared.u32 %r8239, [%rd2485];
add.s32 %r8240, %r8239, %r8238;
// inline asm
bfe.u32 %r6633, %r6638, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2486, %r6633, 4;
add.s64 %rd2487, %rd48, %rd2486;
ld.shared.u32 %r8241, [%rd2487];
xor.b32 %r8242, %r8241, %r8240;
// inline asm
bfe.u32 %r6637, %r6638, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2488, %r6637, 4;
add.s64 %rd2489, %rd51, %rd2488;
ld.shared.u32 %r8243, [%rd2489];
add.s32 %r8244, %r8243, %r8242;
xor.b32 %r8245, %r10342, %r6622;
xor.b32 %r6654, %r8245, %r8244;
// inline asm
bfe.u32 %r6641, %r6654, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2490, %r6641, 4;
add.s64 %rd2491, %rd42, %rd2490;
ld.shared.u32 %r8246, [%rd2491];
// inline asm
bfe.u32 %r6645, %r6654, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2492, %r6645, 4;
add.s64 %rd2493, %rd45, %rd2492;
ld.shared.u32 %r8247, [%rd2493];
add.s32 %r8248, %r8247, %r8246;
// inline asm
bfe.u32 %r6649, %r6654, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2494, %r6649, 4;
add.s64 %rd2495, %rd48, %rd2494;
ld.shared.u32 %r8249, [%rd2495];
xor.b32 %r8250, %r8249, %r8248;
// inline asm
bfe.u32 %r6653, %r6654, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2496, %r6653, 4;
add.s64 %rd2497, %rd51, %rd2496;
ld.shared.u32 %r8251, [%rd2497];
add.s32 %r8252, %r8251, %r8250;
xor.b32 %r8253, %r7609, %r6638;
xor.b32 %r6670, %r8253, %r8252;
// inline asm
bfe.u32 %r6657, %r6670, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2498, %r6657, 4;
add.s64 %rd2499, %rd42, %rd2498;
ld.shared.u32 %r8254, [%rd2499];
// inline asm
bfe.u32 %r6661, %r6670, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2500, %r6661, 4;
add.s64 %rd2501, %rd45, %rd2500;
ld.shared.u32 %r8255, [%rd2501];
add.s32 %r8256, %r8255, %r8254;
// inline asm
bfe.u32 %r6665, %r6670, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2502, %r6665, 4;
add.s64 %rd2503, %rd48, %rd2502;
ld.shared.u32 %r8257, [%rd2503];
xor.b32 %r8258, %r8257, %r8256;
// inline asm
bfe.u32 %r6669, %r6670, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2504, %r6669, 4;
add.s64 %rd2505, %rd51, %rd2504;
ld.shared.u32 %r8259, [%rd2505];
add.s32 %r8260, %r8259, %r8258;
xor.b32 %r8261, %r7618, %r6654;
xor.b32 %r6686, %r8261, %r8260;
// inline asm
bfe.u32 %r6673, %r6686, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2506, %r6673, 4;
add.s64 %rd2507, %rd42, %rd2506;
ld.shared.u32 %r8262, [%rd2507];
// inline asm
bfe.u32 %r6677, %r6686, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2508, %r6677, 4;
add.s64 %rd2509, %rd45, %rd2508;
ld.shared.u32 %r8263, [%rd2509];
add.s32 %r8264, %r8263, %r8262;
// inline asm
bfe.u32 %r6681, %r6686, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2510, %r6681, 4;
add.s64 %rd2511, %rd48, %rd2510;
ld.shared.u32 %r8265, [%rd2511];
xor.b32 %r8266, %r8265, %r8264;
// inline asm
bfe.u32 %r6685, %r6686, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2512, %r6685, 4;
add.s64 %rd2513, %rd51, %rd2512;
ld.shared.u32 %r8267, [%rd2513];
add.s32 %r8268, %r8267, %r8266;
xor.b32 %r8269, %r7627, %r6670;
xor.b32 %r6702, %r8269, %r8268;
// inline asm
bfe.u32 %r6689, %r6702, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2514, %r6689, 4;
add.s64 %rd2515, %rd42, %rd2514;
ld.shared.u32 %r8270, [%rd2515];
// inline asm
bfe.u32 %r6693, %r6702, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2516, %r6693, 4;
add.s64 %rd2517, %rd45, %rd2516;
ld.shared.u32 %r8271, [%rd2517];
add.s32 %r8272, %r8271, %r8270;
// inline asm
bfe.u32 %r6697, %r6702, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2518, %r6697, 4;
add.s64 %rd2519, %rd48, %rd2518;
ld.shared.u32 %r8273, [%rd2519];
xor.b32 %r8274, %r8273, %r8272;
// inline asm
bfe.u32 %r6701, %r6702, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2520, %r6701, 4;
add.s64 %rd2521, %rd51, %rd2520;
ld.shared.u32 %r8275, [%rd2521];
add.s32 %r8276, %r8275, %r8274;
xor.b32 %r8277, %r7636, %r6686;
xor.b32 %r6718, %r8277, %r8276;
// inline asm
bfe.u32 %r6705, %r6718, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2522, %r6705, 4;
add.s64 %rd2523, %rd42, %rd2522;
ld.shared.u32 %r8278, [%rd2523];
// inline asm
bfe.u32 %r6709, %r6718, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2524, %r6709, 4;
add.s64 %rd2525, %rd45, %rd2524;
ld.shared.u32 %r8279, [%rd2525];
add.s32 %r8280, %r8279, %r8278;
// inline asm
bfe.u32 %r6713, %r6718, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2526, %r6713, 4;
add.s64 %rd2527, %rd48, %rd2526;
ld.shared.u32 %r8281, [%rd2527];
xor.b32 %r8282, %r8281, %r8280;
// inline asm
bfe.u32 %r6717, %r6718, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2528, %r6717, 4;
add.s64 %rd2529, %rd51, %rd2528;
ld.shared.u32 %r8283, [%rd2529];
add.s32 %r8284, %r8283, %r8282;
xor.b32 %r8285, %r7645, %r6702;
xor.b32 %r6734, %r8285, %r8284;
// inline asm
bfe.u32 %r6721, %r6734, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2530, %r6721, 4;
add.s64 %rd2531, %rd42, %rd2530;
ld.shared.u32 %r8286, [%rd2531];
// inline asm
bfe.u32 %r6725, %r6734, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2532, %r6725, 4;
add.s64 %rd2533, %rd45, %rd2532;
ld.shared.u32 %r8287, [%rd2533];
add.s32 %r8288, %r8287, %r8286;
// inline asm
bfe.u32 %r6729, %r6734, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2534, %r6729, 4;
add.s64 %rd2535, %rd48, %rd2534;
ld.shared.u32 %r8289, [%rd2535];
xor.b32 %r8290, %r8289, %r8288;
// inline asm
bfe.u32 %r6733, %r6734, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2536, %r6733, 4;
add.s64 %rd2537, %rd51, %rd2536;
ld.shared.u32 %r8291, [%rd2537];
add.s32 %r8292, %r8291, %r8290;
xor.b32 %r8293, %r7654, %r6718;
xor.b32 %r6750, %r8293, %r8292;
// inline asm
bfe.u32 %r6737, %r6750, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2538, %r6737, 4;
add.s64 %rd2539, %rd42, %rd2538;
ld.shared.u32 %r8294, [%rd2539];
// inline asm
bfe.u32 %r6741, %r6750, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2540, %r6741, 4;
add.s64 %rd2541, %rd45, %rd2540;
ld.shared.u32 %r8295, [%rd2541];
add.s32 %r8296, %r8295, %r8294;
// inline asm
bfe.u32 %r6745, %r6750, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2542, %r6745, 4;
add.s64 %rd2543, %rd48, %rd2542;
ld.shared.u32 %r8297, [%rd2543];
xor.b32 %r8298, %r8297, %r8296;
// inline asm
bfe.u32 %r6749, %r6750, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2544, %r6749, 4;
add.s64 %rd2545, %rd51, %rd2544;
ld.shared.u32 %r8299, [%rd2545];
add.s32 %r8300, %r8299, %r8298;
xor.b32 %r8301, %r7663, %r6734;
xor.b32 %r10340, %r8301, %r8300;
xor.b32 %r10341, %r7665, %r6750;
xor.b32 %r6766, %r5470, %r6750;
// inline asm
bfe.u32 %r6753, %r6766, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2546, %r6753, 4;
add.s64 %rd2547, %rd42, %rd2546;
ld.shared.u32 %r8302, [%rd2547];
// inline asm
bfe.u32 %r6757, %r6766, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2548, %r6757, 4;
add.s64 %rd2549, %rd45, %rd2548;
ld.shared.u32 %r8303, [%rd2549];
add.s32 %r8304, %r8303, %r8302;
// inline asm
bfe.u32 %r6761, %r6766, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2550, %r6761, 4;
add.s64 %rd2551, %rd48, %rd2550;
ld.shared.u32 %r8305, [%rd2551];
xor.b32 %r8306, %r8305, %r8304;
// inline asm
bfe.u32 %r6765, %r6766, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2552, %r6765, 4;
add.s64 %rd2553, %rd51, %rd2552;
ld.shared.u32 %r8307, [%rd2553];
add.s32 %r8308, %r8307, %r8306;
xor.b32 %r8309, %r10350, %r10340;
xor.b32 %r6782, %r8309, %r8308;
// inline asm
bfe.u32 %r6769, %r6782, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2554, %r6769, 4;
add.s64 %rd2555, %rd42, %rd2554;
ld.shared.u32 %r8310, [%rd2555];
// inline asm
bfe.u32 %r6773, %r6782, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2556, %r6773, 4;
add.s64 %rd2557, %rd45, %rd2556;
ld.shared.u32 %r8311, [%rd2557];
add.s32 %r8312, %r8311, %r8310;
// inline asm
bfe.u32 %r6777, %r6782, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2558, %r6777, 4;
add.s64 %rd2559, %rd48, %rd2558;
ld.shared.u32 %r8313, [%rd2559];
xor.b32 %r8314, %r8313, %r8312;
// inline asm
bfe.u32 %r6781, %r6782, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2560, %r6781, 4;
add.s64 %rd2561, %rd51, %rd2560;
ld.shared.u32 %r8315, [%rd2561];
add.s32 %r8316, %r8315, %r8314;
xor.b32 %r8317, %r10349, %r6766;
xor.b32 %r6798, %r8317, %r8316;
// inline asm
bfe.u32 %r6785, %r6798, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2562, %r6785, 4;
add.s64 %rd2563, %rd42, %rd2562;
ld.shared.u32 %r8318, [%rd2563];
// inline asm
bfe.u32 %r6789, %r6798, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2564, %r6789, 4;
add.s64 %rd2565, %rd45, %rd2564;
ld.shared.u32 %r8319, [%rd2565];
add.s32 %r8320, %r8319, %r8318;
// inline asm
bfe.u32 %r6793, %r6798, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2566, %r6793, 4;
add.s64 %rd2567, %rd48, %rd2566;
ld.shared.u32 %r8321, [%rd2567];
xor.b32 %r8322, %r8321, %r8320;
// inline asm
bfe.u32 %r6797, %r6798, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2568, %r6797, 4;
add.s64 %rd2569, %rd51, %rd2568;
ld.shared.u32 %r8323, [%rd2569];
add.s32 %r8324, %r8323, %r8322;
xor.b32 %r8325, %r10348, %r6782;
xor.b32 %r6814, %r8325, %r8324;
// inline asm
bfe.u32 %r6801, %r6814, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2570, %r6801, 4;
add.s64 %rd2571, %rd42, %rd2570;
ld.shared.u32 %r8326, [%rd2571];
// inline asm
bfe.u32 %r6805, %r6814, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2572, %r6805, 4;
add.s64 %rd2573, %rd45, %rd2572;
ld.shared.u32 %r8327, [%rd2573];
add.s32 %r8328, %r8327, %r8326;
// inline asm
bfe.u32 %r6809, %r6814, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2574, %r6809, 4;
add.s64 %rd2575, %rd48, %rd2574;
ld.shared.u32 %r8329, [%rd2575];
xor.b32 %r8330, %r8329, %r8328;
// inline asm
bfe.u32 %r6813, %r6814, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2576, %r6813, 4;
add.s64 %rd2577, %rd51, %rd2576;
ld.shared.u32 %r8331, [%rd2577];
add.s32 %r8332, %r8331, %r8330;
xor.b32 %r8333, %r10347, %r6798;
xor.b32 %r6830, %r8333, %r8332;
// inline asm
bfe.u32 %r6817, %r6830, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2578, %r6817, 4;
add.s64 %rd2579, %rd42, %rd2578;
ld.shared.u32 %r8334, [%rd2579];
// inline asm
bfe.u32 %r6821, %r6830, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2580, %r6821, 4;
add.s64 %rd2581, %rd45, %rd2580;
ld.shared.u32 %r8335, [%rd2581];
add.s32 %r8336, %r8335, %r8334;
// inline asm
bfe.u32 %r6825, %r6830, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2582, %r6825, 4;
add.s64 %rd2583, %rd48, %rd2582;
ld.shared.u32 %r8337, [%rd2583];
xor.b32 %r8338, %r8337, %r8336;
// inline asm
bfe.u32 %r6829, %r6830, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2584, %r6829, 4;
add.s64 %rd2585, %rd51, %rd2584;
ld.shared.u32 %r8339, [%rd2585];
add.s32 %r8340, %r8339, %r8338;
xor.b32 %r8341, %r10346, %r6814;
xor.b32 %r6846, %r8341, %r8340;
// inline asm
bfe.u32 %r6833, %r6846, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2586, %r6833, 4;
add.s64 %rd2587, %rd42, %rd2586;
ld.shared.u32 %r8342, [%rd2587];
// inline asm
bfe.u32 %r6837, %r6846, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2588, %r6837, 4;
add.s64 %rd2589, %rd45, %rd2588;
ld.shared.u32 %r8343, [%rd2589];
add.s32 %r8344, %r8343, %r8342;
// inline asm
bfe.u32 %r6841, %r6846, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2590, %r6841, 4;
add.s64 %rd2591, %rd48, %rd2590;
ld.shared.u32 %r8345, [%rd2591];
xor.b32 %r8346, %r8345, %r8344;
// inline asm
bfe.u32 %r6845, %r6846, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2592, %r6845, 4;
add.s64 %rd2593, %rd51, %rd2592;
ld.shared.u32 %r8347, [%rd2593];
add.s32 %r8348, %r8347, %r8346;
xor.b32 %r8349, %r10345, %r6830;
xor.b32 %r6862, %r8349, %r8348;
// inline asm
bfe.u32 %r6849, %r6862, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2594, %r6849, 4;
add.s64 %rd2595, %rd42, %rd2594;
ld.shared.u32 %r8350, [%rd2595];
// inline asm
bfe.u32 %r6853, %r6862, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2596, %r6853, 4;
add.s64 %rd2597, %rd45, %rd2596;
ld.shared.u32 %r8351, [%rd2597];
add.s32 %r8352, %r8351, %r8350;
// inline asm
bfe.u32 %r6857, %r6862, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2598, %r6857, 4;
add.s64 %rd2599, %rd48, %rd2598;
ld.shared.u32 %r8353, [%rd2599];
xor.b32 %r8354, %r8353, %r8352;
// inline asm
bfe.u32 %r6861, %r6862, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2600, %r6861, 4;
add.s64 %rd2601, %rd51, %rd2600;
ld.shared.u32 %r8355, [%rd2601];
add.s32 %r8356, %r8355, %r8354;
xor.b32 %r8357, %r10344, %r6846;
xor.b32 %r6878, %r8357, %r8356;
// inline asm
bfe.u32 %r6865, %r6878, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2602, %r6865, 4;
add.s64 %rd2603, %rd42, %rd2602;
ld.shared.u32 %r8358, [%rd2603];
// inline asm
bfe.u32 %r6869, %r6878, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2604, %r6869, 4;
add.s64 %rd2605, %rd45, %rd2604;
ld.shared.u32 %r8359, [%rd2605];
add.s32 %r8360, %r8359, %r8358;
// inline asm
bfe.u32 %r6873, %r6878, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2606, %r6873, 4;
add.s64 %rd2607, %rd48, %rd2606;
ld.shared.u32 %r8361, [%rd2607];
xor.b32 %r8362, %r8361, %r8360;
// inline asm
bfe.u32 %r6877, %r6878, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2608, %r6877, 4;
add.s64 %rd2609, %rd51, %rd2608;
ld.shared.u32 %r8363, [%rd2609];
add.s32 %r8364, %r8363, %r8362;
xor.b32 %r8365, %r10343, %r6862;
xor.b32 %r6894, %r8365, %r8364;
// inline asm
bfe.u32 %r6881, %r6894, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2610, %r6881, 4;
add.s64 %rd2611, %rd42, %rd2610;
ld.shared.u32 %r8366, [%rd2611];
// inline asm
bfe.u32 %r6885, %r6894, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2612, %r6885, 4;
add.s64 %rd2613, %rd45, %rd2612;
ld.shared.u32 %r8367, [%rd2613];
add.s32 %r8368, %r8367, %r8366;
// inline asm
bfe.u32 %r6889, %r6894, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2614, %r6889, 4;
add.s64 %rd2615, %rd48, %rd2614;
ld.shared.u32 %r8369, [%rd2615];
xor.b32 %r8370, %r8369, %r8368;
// inline asm
bfe.u32 %r6893, %r6894, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2616, %r6893, 4;
add.s64 %rd2617, %rd51, %rd2616;
ld.shared.u32 %r8371, [%rd2617];
add.s32 %r8372, %r8371, %r8370;
xor.b32 %r8373, %r10342, %r6878;
xor.b32 %r6910, %r8373, %r8372;
// inline asm
bfe.u32 %r6897, %r6910, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2618, %r6897, 4;
add.s64 %rd2619, %rd42, %rd2618;
ld.shared.u32 %r8374, [%rd2619];
// inline asm
bfe.u32 %r6901, %r6910, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2620, %r6901, 4;
add.s64 %rd2621, %rd45, %rd2620;
ld.shared.u32 %r8375, [%rd2621];
add.s32 %r8376, %r8375, %r8374;
// inline asm
bfe.u32 %r6905, %r6910, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2622, %r6905, 4;
add.s64 %rd2623, %rd48, %rd2622;
ld.shared.u32 %r8377, [%rd2623];
xor.b32 %r8378, %r8377, %r8376;
// inline asm
bfe.u32 %r6909, %r6910, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2624, %r6909, 4;
add.s64 %rd2625, %rd51, %rd2624;
ld.shared.u32 %r8379, [%rd2625];
add.s32 %r8380, %r8379, %r8378;
xor.b32 %r8381, %r10341, %r6894;
xor.b32 %r6926, %r8381, %r8380;
// inline asm
bfe.u32 %r6913, %r6926, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2626, %r6913, 4;
add.s64 %rd2627, %rd42, %rd2626;
ld.shared.u32 %r8382, [%rd2627];
// inline asm
bfe.u32 %r6917, %r6926, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2628, %r6917, 4;
add.s64 %rd2629, %rd45, %rd2628;
ld.shared.u32 %r8383, [%rd2629];
add.s32 %r8384, %r8383, %r8382;
// inline asm
bfe.u32 %r6921, %r6926, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2630, %r6921, 4;
add.s64 %rd2631, %rd48, %rd2630;
ld.shared.u32 %r8385, [%rd2631];
xor.b32 %r8386, %r8385, %r8384;
// inline asm
bfe.u32 %r6925, %r6926, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2632, %r6925, 4;
add.s64 %rd2633, %rd51, %rd2632;
ld.shared.u32 %r8387, [%rd2633];
add.s32 %r8388, %r8387, %r8386;
xor.b32 %r8389, %r10340, %r6910;
xor.b32 %r6942, %r8389, %r8388;
// inline asm
bfe.u32 %r6929, %r6942, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2634, %r6929, 4;
add.s64 %rd2635, %rd42, %rd2634;
ld.shared.u32 %r8390, [%rd2635];
// inline asm
bfe.u32 %r6933, %r6942, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2636, %r6933, 4;
add.s64 %rd2637, %rd45, %rd2636;
ld.shared.u32 %r8391, [%rd2637];
add.s32 %r8392, %r8391, %r8390;
// inline asm
bfe.u32 %r6937, %r6942, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2638, %r6937, 4;
add.s64 %rd2639, %rd48, %rd2638;
ld.shared.u32 %r8393, [%rd2639];
xor.b32 %r8394, %r8393, %r8392;
// inline asm
bfe.u32 %r6941, %r6942, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2640, %r6941, 4;
add.s64 %rd2641, %rd51, %rd2640;
ld.shared.u32 %r8395, [%rd2641];
add.s32 %r8396, %r8395, %r8394;
xor.b32 %r8397, %r7627, %r6926;
xor.b32 %r6958, %r8397, %r8396;
// inline asm
bfe.u32 %r6945, %r6958, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2642, %r6945, 4;
add.s64 %rd2643, %rd42, %rd2642;
ld.shared.u32 %r8398, [%rd2643];
// inline asm
bfe.u32 %r6949, %r6958, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2644, %r6949, 4;
add.s64 %rd2645, %rd45, %rd2644;
ld.shared.u32 %r8399, [%rd2645];
add.s32 %r8400, %r8399, %r8398;
// inline asm
bfe.u32 %r6953, %r6958, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2646, %r6953, 4;
add.s64 %rd2647, %rd48, %rd2646;
ld.shared.u32 %r8401, [%rd2647];
xor.b32 %r8402, %r8401, %r8400;
// inline asm
bfe.u32 %r6957, %r6958, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2648, %r6957, 4;
add.s64 %rd2649, %rd51, %rd2648;
ld.shared.u32 %r8403, [%rd2649];
add.s32 %r8404, %r8403, %r8402;
xor.b32 %r8405, %r7636, %r6942;
xor.b32 %r6974, %r8405, %r8404;
// inline asm
bfe.u32 %r6961, %r6974, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2650, %r6961, 4;
add.s64 %rd2651, %rd42, %rd2650;
ld.shared.u32 %r8406, [%rd2651];
// inline asm
bfe.u32 %r6965, %r6974, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2652, %r6965, 4;
add.s64 %rd2653, %rd45, %rd2652;
ld.shared.u32 %r8407, [%rd2653];
add.s32 %r8408, %r8407, %r8406;
// inline asm
bfe.u32 %r6969, %r6974, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2654, %r6969, 4;
add.s64 %rd2655, %rd48, %rd2654;
ld.shared.u32 %r8409, [%rd2655];
xor.b32 %r8410, %r8409, %r8408;
// inline asm
bfe.u32 %r6973, %r6974, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2656, %r6973, 4;
add.s64 %rd2657, %rd51, %rd2656;
ld.shared.u32 %r8411, [%rd2657];
add.s32 %r8412, %r8411, %r8410;
xor.b32 %r8413, %r7645, %r6958;
xor.b32 %r6990, %r8413, %r8412;
// inline asm
bfe.u32 %r6977, %r6990, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2658, %r6977, 4;
add.s64 %rd2659, %rd42, %rd2658;
ld.shared.u32 %r8414, [%rd2659];
// inline asm
bfe.u32 %r6981, %r6990, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2660, %r6981, 4;
add.s64 %rd2661, %rd45, %rd2660;
ld.shared.u32 %r8415, [%rd2661];
add.s32 %r8416, %r8415, %r8414;
// inline asm
bfe.u32 %r6985, %r6990, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2662, %r6985, 4;
add.s64 %rd2663, %rd48, %rd2662;
ld.shared.u32 %r8417, [%rd2663];
xor.b32 %r8418, %r8417, %r8416;
// inline asm
bfe.u32 %r6989, %r6990, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2664, %r6989, 4;
add.s64 %rd2665, %rd51, %rd2664;
ld.shared.u32 %r8419, [%rd2665];
add.s32 %r8420, %r8419, %r8418;
xor.b32 %r8421, %r7654, %r6974;
xor.b32 %r7006, %r8421, %r8420;
// inline asm
bfe.u32 %r6993, %r7006, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2666, %r6993, 4;
add.s64 %rd2667, %rd42, %rd2666;
ld.shared.u32 %r8422, [%rd2667];
// inline asm
bfe.u32 %r6997, %r7006, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2668, %r6997, 4;
add.s64 %rd2669, %rd45, %rd2668;
ld.shared.u32 %r8423, [%rd2669];
add.s32 %r8424, %r8423, %r8422;
// inline asm
bfe.u32 %r7001, %r7006, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2670, %r7001, 4;
add.s64 %rd2671, %rd48, %rd2670;
ld.shared.u32 %r8425, [%rd2671];
xor.b32 %r8426, %r8425, %r8424;
// inline asm
bfe.u32 %r7005, %r7006, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2672, %r7005, 4;
add.s64 %rd2673, %rd51, %rd2672;
ld.shared.u32 %r8427, [%rd2673];
add.s32 %r8428, %r8427, %r8426;
xor.b32 %r8429, %r7663, %r6990;
xor.b32 %r10338, %r8429, %r8428;
xor.b32 %r10339, %r7665, %r7006;
xor.b32 %r7022, %r5470, %r7006;
// inline asm
bfe.u32 %r7009, %r7022, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2674, %r7009, 4;
add.s64 %rd2675, %rd42, %rd2674;
ld.shared.u32 %r8430, [%rd2675];
// inline asm
bfe.u32 %r7013, %r7022, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2676, %r7013, 4;
add.s64 %rd2677, %rd45, %rd2676;
ld.shared.u32 %r8431, [%rd2677];
add.s32 %r8432, %r8431, %r8430;
// inline asm
bfe.u32 %r7017, %r7022, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2678, %r7017, 4;
add.s64 %rd2679, %rd48, %rd2678;
ld.shared.u32 %r8433, [%rd2679];
xor.b32 %r8434, %r8433, %r8432;
// inline asm
bfe.u32 %r7021, %r7022, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2680, %r7021, 4;
add.s64 %rd2681, %rd51, %rd2680;
ld.shared.u32 %r8435, [%rd2681];
add.s32 %r8436, %r8435, %r8434;
xor.b32 %r8437, %r10350, %r10338;
xor.b32 %r7038, %r8437, %r8436;
// inline asm
bfe.u32 %r7025, %r7038, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2682, %r7025, 4;
add.s64 %rd2683, %rd42, %rd2682;
ld.shared.u32 %r8438, [%rd2683];
// inline asm
bfe.u32 %r7029, %r7038, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2684, %r7029, 4;
add.s64 %rd2685, %rd45, %rd2684;
ld.shared.u32 %r8439, [%rd2685];
add.s32 %r8440, %r8439, %r8438;
// inline asm
bfe.u32 %r7033, %r7038, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2686, %r7033, 4;
add.s64 %rd2687, %rd48, %rd2686;
ld.shared.u32 %r8441, [%rd2687];
xor.b32 %r8442, %r8441, %r8440;
// inline asm
bfe.u32 %r7037, %r7038, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2688, %r7037, 4;
add.s64 %rd2689, %rd51, %rd2688;
ld.shared.u32 %r8443, [%rd2689];
add.s32 %r8444, %r8443, %r8442;
xor.b32 %r8445, %r10349, %r7022;
xor.b32 %r7054, %r8445, %r8444;
// inline asm
bfe.u32 %r7041, %r7054, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2690, %r7041, 4;
add.s64 %rd2691, %rd42, %rd2690;
ld.shared.u32 %r8446, [%rd2691];
// inline asm
bfe.u32 %r7045, %r7054, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2692, %r7045, 4;
add.s64 %rd2693, %rd45, %rd2692;
ld.shared.u32 %r8447, [%rd2693];
add.s32 %r8448, %r8447, %r8446;
// inline asm
bfe.u32 %r7049, %r7054, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2694, %r7049, 4;
add.s64 %rd2695, %rd48, %rd2694;
ld.shared.u32 %r8449, [%rd2695];
xor.b32 %r8450, %r8449, %r8448;
// inline asm
bfe.u32 %r7053, %r7054, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2696, %r7053, 4;
add.s64 %rd2697, %rd51, %rd2696;
ld.shared.u32 %r8451, [%rd2697];
add.s32 %r8452, %r8451, %r8450;
xor.b32 %r8453, %r10348, %r7038;
xor.b32 %r7070, %r8453, %r8452;
// inline asm
bfe.u32 %r7057, %r7070, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2698, %r7057, 4;
add.s64 %rd2699, %rd42, %rd2698;
ld.shared.u32 %r8454, [%rd2699];
// inline asm
bfe.u32 %r7061, %r7070, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2700, %r7061, 4;
add.s64 %rd2701, %rd45, %rd2700;
ld.shared.u32 %r8455, [%rd2701];
add.s32 %r8456, %r8455, %r8454;
// inline asm
bfe.u32 %r7065, %r7070, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2702, %r7065, 4;
add.s64 %rd2703, %rd48, %rd2702;
ld.shared.u32 %r8457, [%rd2703];
xor.b32 %r8458, %r8457, %r8456;
// inline asm
bfe.u32 %r7069, %r7070, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2704, %r7069, 4;
add.s64 %rd2705, %rd51, %rd2704;
ld.shared.u32 %r8459, [%rd2705];
add.s32 %r8460, %r8459, %r8458;
xor.b32 %r8461, %r10347, %r7054;
xor.b32 %r7086, %r8461, %r8460;
// inline asm
bfe.u32 %r7073, %r7086, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2706, %r7073, 4;
add.s64 %rd2707, %rd42, %rd2706;
ld.shared.u32 %r8462, [%rd2707];
// inline asm
bfe.u32 %r7077, %r7086, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2708, %r7077, 4;
add.s64 %rd2709, %rd45, %rd2708;
ld.shared.u32 %r8463, [%rd2709];
add.s32 %r8464, %r8463, %r8462;
// inline asm
bfe.u32 %r7081, %r7086, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2710, %r7081, 4;
add.s64 %rd2711, %rd48, %rd2710;
ld.shared.u32 %r8465, [%rd2711];
xor.b32 %r8466, %r8465, %r8464;
// inline asm
bfe.u32 %r7085, %r7086, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2712, %r7085, 4;
add.s64 %rd2713, %rd51, %rd2712;
ld.shared.u32 %r8467, [%rd2713];
add.s32 %r8468, %r8467, %r8466;
xor.b32 %r8469, %r10346, %r7070;
xor.b32 %r7102, %r8469, %r8468;
// inline asm
bfe.u32 %r7089, %r7102, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2714, %r7089, 4;
add.s64 %rd2715, %rd42, %rd2714;
ld.shared.u32 %r8470, [%rd2715];
// inline asm
bfe.u32 %r7093, %r7102, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2716, %r7093, 4;
add.s64 %rd2717, %rd45, %rd2716;
ld.shared.u32 %r8471, [%rd2717];
add.s32 %r8472, %r8471, %r8470;
// inline asm
bfe.u32 %r7097, %r7102, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2718, %r7097, 4;
add.s64 %rd2719, %rd48, %rd2718;
ld.shared.u32 %r8473, [%rd2719];
xor.b32 %r8474, %r8473, %r8472;
// inline asm
bfe.u32 %r7101, %r7102, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2720, %r7101, 4;
add.s64 %rd2721, %rd51, %rd2720;
ld.shared.u32 %r8475, [%rd2721];
add.s32 %r8476, %r8475, %r8474;
xor.b32 %r8477, %r10345, %r7086;
xor.b32 %r7118, %r8477, %r8476;
// inline asm
bfe.u32 %r7105, %r7118, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2722, %r7105, 4;
add.s64 %rd2723, %rd42, %rd2722;
ld.shared.u32 %r8478, [%rd2723];
// inline asm
bfe.u32 %r7109, %r7118, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2724, %r7109, 4;
add.s64 %rd2725, %rd45, %rd2724;
ld.shared.u32 %r8479, [%rd2725];
add.s32 %r8480, %r8479, %r8478;
// inline asm
bfe.u32 %r7113, %r7118, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2726, %r7113, 4;
add.s64 %rd2727, %rd48, %rd2726;
ld.shared.u32 %r8481, [%rd2727];
xor.b32 %r8482, %r8481, %r8480;
// inline asm
bfe.u32 %r7117, %r7118, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2728, %r7117, 4;
add.s64 %rd2729, %rd51, %rd2728;
ld.shared.u32 %r8483, [%rd2729];
add.s32 %r8484, %r8483, %r8482;
xor.b32 %r8485, %r10344, %r7102;
xor.b32 %r7134, %r8485, %r8484;
// inline asm
bfe.u32 %r7121, %r7134, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2730, %r7121, 4;
add.s64 %rd2731, %rd42, %rd2730;
ld.shared.u32 %r8486, [%rd2731];
// inline asm
bfe.u32 %r7125, %r7134, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2732, %r7125, 4;
add.s64 %rd2733, %rd45, %rd2732;
ld.shared.u32 %r8487, [%rd2733];
add.s32 %r8488, %r8487, %r8486;
// inline asm
bfe.u32 %r7129, %r7134, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2734, %r7129, 4;
add.s64 %rd2735, %rd48, %rd2734;
ld.shared.u32 %r8489, [%rd2735];
xor.b32 %r8490, %r8489, %r8488;
// inline asm
bfe.u32 %r7133, %r7134, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2736, %r7133, 4;
add.s64 %rd2737, %rd51, %rd2736;
ld.shared.u32 %r8491, [%rd2737];
add.s32 %r8492, %r8491, %r8490;
xor.b32 %r8493, %r10343, %r7118;
xor.b32 %r7150, %r8493, %r8492;
// inline asm
bfe.u32 %r7137, %r7150, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2738, %r7137, 4;
add.s64 %rd2739, %rd42, %rd2738;
ld.shared.u32 %r8494, [%rd2739];
// inline asm
bfe.u32 %r7141, %r7150, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2740, %r7141, 4;
add.s64 %rd2741, %rd45, %rd2740;
ld.shared.u32 %r8495, [%rd2741];
add.s32 %r8496, %r8495, %r8494;
// inline asm
bfe.u32 %r7145, %r7150, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2742, %r7145, 4;
add.s64 %rd2743, %rd48, %rd2742;
ld.shared.u32 %r8497, [%rd2743];
xor.b32 %r8498, %r8497, %r8496;
// inline asm
bfe.u32 %r7149, %r7150, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2744, %r7149, 4;
add.s64 %rd2745, %rd51, %rd2744;
ld.shared.u32 %r8499, [%rd2745];
add.s32 %r8500, %r8499, %r8498;
xor.b32 %r8501, %r10342, %r7134;
xor.b32 %r7166, %r8501, %r8500;
// inline asm
bfe.u32 %r7153, %r7166, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2746, %r7153, 4;
add.s64 %rd2747, %rd42, %rd2746;
ld.shared.u32 %r8502, [%rd2747];
// inline asm
bfe.u32 %r7157, %r7166, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2748, %r7157, 4;
add.s64 %rd2749, %rd45, %rd2748;
ld.shared.u32 %r8503, [%rd2749];
add.s32 %r8504, %r8503, %r8502;
// inline asm
bfe.u32 %r7161, %r7166, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2750, %r7161, 4;
add.s64 %rd2751, %rd48, %rd2750;
ld.shared.u32 %r8505, [%rd2751];
xor.b32 %r8506, %r8505, %r8504;
// inline asm
bfe.u32 %r7165, %r7166, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2752, %r7165, 4;
add.s64 %rd2753, %rd51, %rd2752;
ld.shared.u32 %r8507, [%rd2753];
add.s32 %r8508, %r8507, %r8506;
xor.b32 %r8509, %r10341, %r7150;
xor.b32 %r7182, %r8509, %r8508;
// inline asm
bfe.u32 %r7169, %r7182, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2754, %r7169, 4;
add.s64 %rd2755, %rd42, %rd2754;
ld.shared.u32 %r8510, [%rd2755];
// inline asm
bfe.u32 %r7173, %r7182, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2756, %r7173, 4;
add.s64 %rd2757, %rd45, %rd2756;
ld.shared.u32 %r8511, [%rd2757];
add.s32 %r8512, %r8511, %r8510;
// inline asm
bfe.u32 %r7177, %r7182, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2758, %r7177, 4;
add.s64 %rd2759, %rd48, %rd2758;
ld.shared.u32 %r8513, [%rd2759];
xor.b32 %r8514, %r8513, %r8512;
// inline asm
bfe.u32 %r7181, %r7182, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2760, %r7181, 4;
add.s64 %rd2761, %rd51, %rd2760;
ld.shared.u32 %r8515, [%rd2761];
add.s32 %r8516, %r8515, %r8514;
xor.b32 %r8517, %r10340, %r7166;
xor.b32 %r7198, %r8517, %r8516;
// inline asm
bfe.u32 %r7185, %r7198, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2762, %r7185, 4;
add.s64 %rd2763, %rd42, %rd2762;
ld.shared.u32 %r8518, [%rd2763];
// inline asm
bfe.u32 %r7189, %r7198, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2764, %r7189, 4;
add.s64 %rd2765, %rd45, %rd2764;
ld.shared.u32 %r8519, [%rd2765];
add.s32 %r8520, %r8519, %r8518;
// inline asm
bfe.u32 %r7193, %r7198, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2766, %r7193, 4;
add.s64 %rd2767, %rd48, %rd2766;
ld.shared.u32 %r8521, [%rd2767];
xor.b32 %r8522, %r8521, %r8520;
// inline asm
bfe.u32 %r7197, %r7198, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2768, %r7197, 4;
add.s64 %rd2769, %rd51, %rd2768;
ld.shared.u32 %r8523, [%rd2769];
add.s32 %r8524, %r8523, %r8522;
xor.b32 %r8525, %r10339, %r7182;
xor.b32 %r7214, %r8525, %r8524;
// inline asm
bfe.u32 %r7201, %r7214, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2770, %r7201, 4;
add.s64 %rd2771, %rd42, %rd2770;
ld.shared.u32 %r8526, [%rd2771];
// inline asm
bfe.u32 %r7205, %r7214, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2772, %r7205, 4;
add.s64 %rd2773, %rd45, %rd2772;
ld.shared.u32 %r8527, [%rd2773];
add.s32 %r8528, %r8527, %r8526;
// inline asm
bfe.u32 %r7209, %r7214, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2774, %r7209, 4;
add.s64 %rd2775, %rd48, %rd2774;
ld.shared.u32 %r8529, [%rd2775];
xor.b32 %r8530, %r8529, %r8528;
// inline asm
bfe.u32 %r7213, %r7214, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2776, %r7213, 4;
add.s64 %rd2777, %rd51, %rd2776;
ld.shared.u32 %r8531, [%rd2777];
add.s32 %r8532, %r8531, %r8530;
xor.b32 %r8533, %r10338, %r7198;
xor.b32 %r7230, %r8533, %r8532;
// inline asm
bfe.u32 %r7217, %r7230, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2778, %r7217, 4;
add.s64 %rd2779, %rd42, %rd2778;
ld.shared.u32 %r8534, [%rd2779];
// inline asm
bfe.u32 %r7221, %r7230, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2780, %r7221, 4;
add.s64 %rd2781, %rd45, %rd2780;
ld.shared.u32 %r8535, [%rd2781];
add.s32 %r8536, %r8535, %r8534;
// inline asm
bfe.u32 %r7225, %r7230, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2782, %r7225, 4;
add.s64 %rd2783, %rd48, %rd2782;
ld.shared.u32 %r8537, [%rd2783];
xor.b32 %r8538, %r8537, %r8536;
// inline asm
bfe.u32 %r7229, %r7230, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2784, %r7229, 4;
add.s64 %rd2785, %rd51, %rd2784;
ld.shared.u32 %r8539, [%rd2785];
add.s32 %r8540, %r8539, %r8538;
xor.b32 %r8541, %r7645, %r7214;
xor.b32 %r7246, %r8541, %r8540;
// inline asm
bfe.u32 %r7233, %r7246, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2786, %r7233, 4;
add.s64 %rd2787, %rd42, %rd2786;
ld.shared.u32 %r8542, [%rd2787];
// inline asm
bfe.u32 %r7237, %r7246, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2788, %r7237, 4;
add.s64 %rd2789, %rd45, %rd2788;
ld.shared.u32 %r8543, [%rd2789];
add.s32 %r8544, %r8543, %r8542;
// inline asm
bfe.u32 %r7241, %r7246, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2790, %r7241, 4;
add.s64 %rd2791, %rd48, %rd2790;
ld.shared.u32 %r8545, [%rd2791];
xor.b32 %r8546, %r8545, %r8544;
// inline asm
bfe.u32 %r7245, %r7246, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2792, %r7245, 4;
add.s64 %rd2793, %rd51, %rd2792;
ld.shared.u32 %r8547, [%rd2793];
add.s32 %r8548, %r8547, %r8546;
xor.b32 %r8549, %r7654, %r7230;
xor.b32 %r7262, %r8549, %r8548;
// inline asm
bfe.u32 %r7249, %r7262, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2794, %r7249, 4;
add.s64 %rd2795, %rd42, %rd2794;
ld.shared.u32 %r8550, [%rd2795];
// inline asm
bfe.u32 %r7253, %r7262, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2796, %r7253, 4;
add.s64 %rd2797, %rd45, %rd2796;
ld.shared.u32 %r8551, [%rd2797];
add.s32 %r8552, %r8551, %r8550;
// inline asm
bfe.u32 %r7257, %r7262, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2798, %r7257, 4;
add.s64 %rd2799, %rd48, %rd2798;
ld.shared.u32 %r8553, [%rd2799];
xor.b32 %r8554, %r8553, %r8552;
// inline asm
bfe.u32 %r7261, %r7262, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2800, %r7261, 4;
add.s64 %rd2801, %rd51, %rd2800;
ld.shared.u32 %r8555, [%rd2801];
add.s32 %r8556, %r8555, %r8554;
xor.b32 %r8557, %r7663, %r7246;
xor.b32 %r10336, %r8557, %r8556;
xor.b32 %r10337, %r7665, %r7262;
xor.b32 %r7278, %r5470, %r7262;
// inline asm
bfe.u32 %r7265, %r7278, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2802, %r7265, 4;
add.s64 %rd2803, %rd42, %rd2802;
ld.shared.u32 %r8558, [%rd2803];
// inline asm
bfe.u32 %r7269, %r7278, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2804, %r7269, 4;
add.s64 %rd2805, %rd45, %rd2804;
ld.shared.u32 %r8559, [%rd2805];
add.s32 %r8560, %r8559, %r8558;
// inline asm
bfe.u32 %r7273, %r7278, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2806, %r7273, 4;
add.s64 %rd2807, %rd48, %rd2806;
ld.shared.u32 %r8561, [%rd2807];
xor.b32 %r8562, %r8561, %r8560;
// inline asm
bfe.u32 %r7277, %r7278, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2808, %r7277, 4;
add.s64 %rd2809, %rd51, %rd2808;
ld.shared.u32 %r8563, [%rd2809];
add.s32 %r8564, %r8563, %r8562;
xor.b32 %r8565, %r10350, %r10336;
xor.b32 %r7294, %r8565, %r8564;
// inline asm
bfe.u32 %r7281, %r7294, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2810, %r7281, 4;
add.s64 %rd2811, %rd42, %rd2810;
ld.shared.u32 %r8566, [%rd2811];
// inline asm
bfe.u32 %r7285, %r7294, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2812, %r7285, 4;
add.s64 %rd2813, %rd45, %rd2812;
ld.shared.u32 %r8567, [%rd2813];
add.s32 %r8568, %r8567, %r8566;
// inline asm
bfe.u32 %r7289, %r7294, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2814, %r7289, 4;
add.s64 %rd2815, %rd48, %rd2814;
ld.shared.u32 %r8569, [%rd2815];
xor.b32 %r8570, %r8569, %r8568;
// inline asm
bfe.u32 %r7293, %r7294, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2816, %r7293, 4;
add.s64 %rd2817, %rd51, %rd2816;
ld.shared.u32 %r8571, [%rd2817];
add.s32 %r8572, %r8571, %r8570;
xor.b32 %r8573, %r10349, %r7278;
xor.b32 %r7310, %r8573, %r8572;
// inline asm
bfe.u32 %r7297, %r7310, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2818, %r7297, 4;
add.s64 %rd2819, %rd42, %rd2818;
ld.shared.u32 %r8574, [%rd2819];
// inline asm
bfe.u32 %r7301, %r7310, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2820, %r7301, 4;
add.s64 %rd2821, %rd45, %rd2820;
ld.shared.u32 %r8575, [%rd2821];
add.s32 %r8576, %r8575, %r8574;
// inline asm
bfe.u32 %r7305, %r7310, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2822, %r7305, 4;
add.s64 %rd2823, %rd48, %rd2822;
ld.shared.u32 %r8577, [%rd2823];
xor.b32 %r8578, %r8577, %r8576;
// inline asm
bfe.u32 %r7309, %r7310, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2824, %r7309, 4;
add.s64 %rd2825, %rd51, %rd2824;
ld.shared.u32 %r8579, [%rd2825];
add.s32 %r8580, %r8579, %r8578;
xor.b32 %r8581, %r10348, %r7294;
xor.b32 %r7326, %r8581, %r8580;
// inline asm
bfe.u32 %r7313, %r7326, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2826, %r7313, 4;
add.s64 %rd2827, %rd42, %rd2826;
ld.shared.u32 %r8582, [%rd2827];
// inline asm
bfe.u32 %r7317, %r7326, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2828, %r7317, 4;
add.s64 %rd2829, %rd45, %rd2828;
ld.shared.u32 %r8583, [%rd2829];
add.s32 %r8584, %r8583, %r8582;
// inline asm
bfe.u32 %r7321, %r7326, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2830, %r7321, 4;
add.s64 %rd2831, %rd48, %rd2830;
ld.shared.u32 %r8585, [%rd2831];
xor.b32 %r8586, %r8585, %r8584;
// inline asm
bfe.u32 %r7325, %r7326, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2832, %r7325, 4;
add.s64 %rd2833, %rd51, %rd2832;
ld.shared.u32 %r8587, [%rd2833];
add.s32 %r8588, %r8587, %r8586;
xor.b32 %r8589, %r10347, %r7310;
xor.b32 %r7342, %r8589, %r8588;
// inline asm
bfe.u32 %r7329, %r7342, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2834, %r7329, 4;
add.s64 %rd2835, %rd42, %rd2834;
ld.shared.u32 %r8590, [%rd2835];
// inline asm
bfe.u32 %r7333, %r7342, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2836, %r7333, 4;
add.s64 %rd2837, %rd45, %rd2836;
ld.shared.u32 %r8591, [%rd2837];
add.s32 %r8592, %r8591, %r8590;
// inline asm
bfe.u32 %r7337, %r7342, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2838, %r7337, 4;
add.s64 %rd2839, %rd48, %rd2838;
ld.shared.u32 %r8593, [%rd2839];
xor.b32 %r8594, %r8593, %r8592;
// inline asm
bfe.u32 %r7341, %r7342, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2840, %r7341, 4;
add.s64 %rd2841, %rd51, %rd2840;
ld.shared.u32 %r8595, [%rd2841];
add.s32 %r8596, %r8595, %r8594;
xor.b32 %r8597, %r10346, %r7326;
xor.b32 %r7358, %r8597, %r8596;
// inline asm
bfe.u32 %r7345, %r7358, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2842, %r7345, 4;
add.s64 %rd2843, %rd42, %rd2842;
ld.shared.u32 %r8598, [%rd2843];
// inline asm
bfe.u32 %r7349, %r7358, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2844, %r7349, 4;
add.s64 %rd2845, %rd45, %rd2844;
ld.shared.u32 %r8599, [%rd2845];
add.s32 %r8600, %r8599, %r8598;
// inline asm
bfe.u32 %r7353, %r7358, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2846, %r7353, 4;
add.s64 %rd2847, %rd48, %rd2846;
ld.shared.u32 %r8601, [%rd2847];
xor.b32 %r8602, %r8601, %r8600;
// inline asm
bfe.u32 %r7357, %r7358, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2848, %r7357, 4;
add.s64 %rd2849, %rd51, %rd2848;
ld.shared.u32 %r8603, [%rd2849];
add.s32 %r8604, %r8603, %r8602;
xor.b32 %r8605, %r10345, %r7342;
xor.b32 %r7374, %r8605, %r8604;
// inline asm
bfe.u32 %r7361, %r7374, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2850, %r7361, 4;
add.s64 %rd2851, %rd42, %rd2850;
ld.shared.u32 %r8606, [%rd2851];
// inline asm
bfe.u32 %r7365, %r7374, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2852, %r7365, 4;
add.s64 %rd2853, %rd45, %rd2852;
ld.shared.u32 %r8607, [%rd2853];
add.s32 %r8608, %r8607, %r8606;
// inline asm
bfe.u32 %r7369, %r7374, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2854, %r7369, 4;
add.s64 %rd2855, %rd48, %rd2854;
ld.shared.u32 %r8609, [%rd2855];
xor.b32 %r8610, %r8609, %r8608;
// inline asm
bfe.u32 %r7373, %r7374, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2856, %r7373, 4;
add.s64 %rd2857, %rd51, %rd2856;
ld.shared.u32 %r8611, [%rd2857];
add.s32 %r8612, %r8611, %r8610;
xor.b32 %r8613, %r10344, %r7358;
xor.b32 %r7390, %r8613, %r8612;
// inline asm
bfe.u32 %r7377, %r7390, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2858, %r7377, 4;
add.s64 %rd2859, %rd42, %rd2858;
ld.shared.u32 %r8614, [%rd2859];
// inline asm
bfe.u32 %r7381, %r7390, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2860, %r7381, 4;
add.s64 %rd2861, %rd45, %rd2860;
ld.shared.u32 %r8615, [%rd2861];
add.s32 %r8616, %r8615, %r8614;
// inline asm
bfe.u32 %r7385, %r7390, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2862, %r7385, 4;
add.s64 %rd2863, %rd48, %rd2862;
ld.shared.u32 %r8617, [%rd2863];
xor.b32 %r8618, %r8617, %r8616;
// inline asm
bfe.u32 %r7389, %r7390, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2864, %r7389, 4;
add.s64 %rd2865, %rd51, %rd2864;
ld.shared.u32 %r8619, [%rd2865];
add.s32 %r8620, %r8619, %r8618;
xor.b32 %r8621, %r10343, %r7374;
xor.b32 %r7406, %r8621, %r8620;
// inline asm
bfe.u32 %r7393, %r7406, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2866, %r7393, 4;
add.s64 %rd2867, %rd42, %rd2866;
ld.shared.u32 %r8622, [%rd2867];
// inline asm
bfe.u32 %r7397, %r7406, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2868, %r7397, 4;
add.s64 %rd2869, %rd45, %rd2868;
ld.shared.u32 %r8623, [%rd2869];
add.s32 %r8624, %r8623, %r8622;
// inline asm
bfe.u32 %r7401, %r7406, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2870, %r7401, 4;
add.s64 %rd2871, %rd48, %rd2870;
ld.shared.u32 %r8625, [%rd2871];
xor.b32 %r8626, %r8625, %r8624;
// inline asm
bfe.u32 %r7405, %r7406, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2872, %r7405, 4;
add.s64 %rd2873, %rd51, %rd2872;
ld.shared.u32 %r8627, [%rd2873];
add.s32 %r8628, %r8627, %r8626;
xor.b32 %r8629, %r10342, %r7390;
xor.b32 %r7422, %r8629, %r8628;
// inline asm
bfe.u32 %r7409, %r7422, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2874, %r7409, 4;
add.s64 %rd2875, %rd42, %rd2874;
ld.shared.u32 %r8630, [%rd2875];
// inline asm
bfe.u32 %r7413, %r7422, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2876, %r7413, 4;
add.s64 %rd2877, %rd45, %rd2876;
ld.shared.u32 %r8631, [%rd2877];
add.s32 %r8632, %r8631, %r8630;
// inline asm
bfe.u32 %r7417, %r7422, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2878, %r7417, 4;
add.s64 %rd2879, %rd48, %rd2878;
ld.shared.u32 %r8633, [%rd2879];
xor.b32 %r8634, %r8633, %r8632;
// inline asm
bfe.u32 %r7421, %r7422, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2880, %r7421, 4;
add.s64 %rd2881, %rd51, %rd2880;
ld.shared.u32 %r8635, [%rd2881];
add.s32 %r8636, %r8635, %r8634;
xor.b32 %r8637, %r10341, %r7406;
xor.b32 %r7438, %r8637, %r8636;
// inline asm
bfe.u32 %r7425, %r7438, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2882, %r7425, 4;
add.s64 %rd2883, %rd42, %rd2882;
ld.shared.u32 %r8638, [%rd2883];
// inline asm
bfe.u32 %r7429, %r7438, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2884, %r7429, 4;
add.s64 %rd2885, %rd45, %rd2884;
ld.shared.u32 %r8639, [%rd2885];
add.s32 %r8640, %r8639, %r8638;
// inline asm
bfe.u32 %r7433, %r7438, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2886, %r7433, 4;
add.s64 %rd2887, %rd48, %rd2886;
ld.shared.u32 %r8641, [%rd2887];
xor.b32 %r8642, %r8641, %r8640;
// inline asm
bfe.u32 %r7437, %r7438, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2888, %r7437, 4;
add.s64 %rd2889, %rd51, %rd2888;
ld.shared.u32 %r8643, [%rd2889];
add.s32 %r8644, %r8643, %r8642;
xor.b32 %r8645, %r10340, %r7422;
xor.b32 %r7454, %r8645, %r8644;
// inline asm
bfe.u32 %r7441, %r7454, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2890, %r7441, 4;
add.s64 %rd2891, %rd42, %rd2890;
ld.shared.u32 %r8646, [%rd2891];
// inline asm
bfe.u32 %r7445, %r7454, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2892, %r7445, 4;
add.s64 %rd2893, %rd45, %rd2892;
ld.shared.u32 %r8647, [%rd2893];
add.s32 %r8648, %r8647, %r8646;
// inline asm
bfe.u32 %r7449, %r7454, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2894, %r7449, 4;
add.s64 %rd2895, %rd48, %rd2894;
ld.shared.u32 %r8649, [%rd2895];
xor.b32 %r8650, %r8649, %r8648;
// inline asm
bfe.u32 %r7453, %r7454, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2896, %r7453, 4;
add.s64 %rd2897, %rd51, %rd2896;
ld.shared.u32 %r8651, [%rd2897];
add.s32 %r8652, %r8651, %r8650;
xor.b32 %r8653, %r10339, %r7438;
xor.b32 %r7470, %r8653, %r8652;
// inline asm
bfe.u32 %r7457, %r7470, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2898, %r7457, 4;
add.s64 %rd2899, %rd42, %rd2898;
ld.shared.u32 %r8654, [%rd2899];
// inline asm
bfe.u32 %r7461, %r7470, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2900, %r7461, 4;
add.s64 %rd2901, %rd45, %rd2900;
ld.shared.u32 %r8655, [%rd2901];
add.s32 %r8656, %r8655, %r8654;
// inline asm
bfe.u32 %r7465, %r7470, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2902, %r7465, 4;
add.s64 %rd2903, %rd48, %rd2902;
ld.shared.u32 %r8657, [%rd2903];
xor.b32 %r8658, %r8657, %r8656;
// inline asm
bfe.u32 %r7469, %r7470, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2904, %r7469, 4;
add.s64 %rd2905, %rd51, %rd2904;
ld.shared.u32 %r8659, [%rd2905];
add.s32 %r8660, %r8659, %r8658;
xor.b32 %r8661, %r10338, %r7454;
xor.b32 %r7486, %r8661, %r8660;
// inline asm
bfe.u32 %r7473, %r7486, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2906, %r7473, 4;
add.s64 %rd2907, %rd42, %rd2906;
ld.shared.u32 %r8662, [%rd2907];
// inline asm
bfe.u32 %r7477, %r7486, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2908, %r7477, 4;
add.s64 %rd2909, %rd45, %rd2908;
ld.shared.u32 %r8663, [%rd2909];
add.s32 %r8664, %r8663, %r8662;
// inline asm
bfe.u32 %r7481, %r7486, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2910, %r7481, 4;
add.s64 %rd2911, %rd48, %rd2910;
ld.shared.u32 %r8665, [%rd2911];
xor.b32 %r8666, %r8665, %r8664;
// inline asm
bfe.u32 %r7485, %r7486, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2912, %r7485, 4;
add.s64 %rd2913, %rd51, %rd2912;
ld.shared.u32 %r8667, [%rd2913];
add.s32 %r8668, %r8667, %r8666;
xor.b32 %r8669, %r10337, %r7470;
xor.b32 %r7502, %r8669, %r8668;
// inline asm
bfe.u32 %r7489, %r7502, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2914, %r7489, 4;
add.s64 %rd2915, %rd42, %rd2914;
ld.shared.u32 %r8670, [%rd2915];
// inline asm
bfe.u32 %r7493, %r7502, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2916, %r7493, 4;
add.s64 %rd2917, %rd45, %rd2916;
ld.shared.u32 %r8671, [%rd2917];
add.s32 %r8672, %r8671, %r8670;
// inline asm
bfe.u32 %r7497, %r7502, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2918, %r7497, 4;
add.s64 %rd2919, %rd48, %rd2918;
ld.shared.u32 %r8673, [%rd2919];
xor.b32 %r8674, %r8673, %r8672;
// inline asm
bfe.u32 %r7501, %r7502, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2920, %r7501, 4;
add.s64 %rd2921, %rd51, %rd2920;
ld.shared.u32 %r8675, [%rd2921];
add.s32 %r8676, %r8675, %r8674;
xor.b32 %r8677, %r10336, %r7486;
xor.b32 %r7518, %r8677, %r8676;
// inline asm
bfe.u32 %r7505, %r7518, %r10287, %r10288;
// inline asm
mul.wide.u32 %rd2922, %r7505, 4;
add.s64 %rd2923, %rd42, %rd2922;
ld.shared.u32 %r8678, [%rd2923];
// inline asm
bfe.u32 %r7509, %r7518, %r10289, %r10288;
// inline asm
mul.wide.u32 %rd2924, %r7509, 4;
add.s64 %rd2925, %rd45, %rd2924;
ld.shared.u32 %r8679, [%rd2925];
add.s32 %r8680, %r8679, %r8678;
// inline asm
bfe.u32 %r7513, %r7518, %r10288, %r10288;
// inline asm
mul.wide.u32 %rd2926, %r7513, 4;
add.s64 %rd2927, %rd48, %rd2926;
ld.shared.u32 %r8681, [%rd2927];
xor.b32 %r8682, %r8681, %r8680;
// inline asm
bfe.u32 %r7517, %r7518, %r10322, %r10288;
// inline asm
mul.wide.u32 %rd2928, %r7517, 4;
add.s64 %rd2929, %rd51, %rd2928;
ld.shared.u32 %r8683, [%rd2929];
add.s32 %r8684, %r8683, %r8682;
xor.b32 %r8685, %r7663, %r7502;
xor.b32 %r10334, %r8685, %r8684;
xor.b32 %r10335, %r7665, %r7518;
mov.u64 %rd3499, %rd3;
mov.u32 %r10326, %r10334;
mov.u32 %r10327, %r10335;
BB4_14:
mov.u32 %r10325, 0;
mov.u32 %r10260, 16;
mov.u32 %r10259, 8;
mov.u32 %r10258, 24;
xor.b32 %r8699, %r10351, %r10327;
// inline asm
bfe.u32 %r8686, %r8699, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2933, %r8686, 4;
add.s64 %rd2934, %rd42, %rd2933;
ld.shared.u32 %r8943, [%rd2934];
// inline asm
bfe.u32 %r8690, %r8699, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2937, %r8690, 4;
add.s64 %rd2938, %rd45, %rd2937;
ld.shared.u32 %r8944, [%rd2938];
add.s32 %r8945, %r8944, %r8943;
// inline asm
bfe.u32 %r8694, %r8699, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2941, %r8694, 4;
add.s64 %rd2942, %rd48, %rd2941;
ld.shared.u32 %r8946, [%rd2942];
xor.b32 %r8947, %r8946, %r8945;
// inline asm
bfe.u32 %r8698, %r8699, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd2945, %r8698, 4;
add.s64 %rd2946, %rd51, %rd2945;
ld.shared.u32 %r8948, [%rd2946];
add.s32 %r8949, %r8948, %r8947;
xor.b32 %r8950, %r10350, %r10326;
xor.b32 %r8715, %r8950, %r8949;
// inline asm
bfe.u32 %r8702, %r8715, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2947, %r8702, 4;
add.s64 %rd2948, %rd42, %rd2947;
ld.shared.u32 %r8951, [%rd2948];
// inline asm
bfe.u32 %r8706, %r8715, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2949, %r8706, 4;
add.s64 %rd2950, %rd45, %rd2949;
ld.shared.u32 %r8952, [%rd2950];
add.s32 %r8953, %r8952, %r8951;
// inline asm
bfe.u32 %r8710, %r8715, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2951, %r8710, 4;
add.s64 %rd2952, %rd48, %rd2951;
ld.shared.u32 %r8954, [%rd2952];
xor.b32 %r8955, %r8954, %r8953;
// inline asm
bfe.u32 %r8714, %r8715, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd2953, %r8714, 4;
add.s64 %rd2954, %rd51, %rd2953;
ld.shared.u32 %r8956, [%rd2954];
add.s32 %r8957, %r8956, %r8955;
xor.b32 %r8958, %r10349, %r8699;
xor.b32 %r8731, %r8958, %r8957;
// inline asm
bfe.u32 %r8718, %r8731, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2955, %r8718, 4;
add.s64 %rd2956, %rd42, %rd2955;
ld.shared.u32 %r8959, [%rd2956];
// inline asm
bfe.u32 %r8722, %r8731, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2957, %r8722, 4;
add.s64 %rd2958, %rd45, %rd2957;
ld.shared.u32 %r8960, [%rd2958];
add.s32 %r8961, %r8960, %r8959;
// inline asm
bfe.u32 %r8726, %r8731, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2959, %r8726, 4;
add.s64 %rd2960, %rd48, %rd2959;
ld.shared.u32 %r8962, [%rd2960];
xor.b32 %r8963, %r8962, %r8961;
// inline asm
bfe.u32 %r8730, %r8731, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd2961, %r8730, 4;
add.s64 %rd2962, %rd51, %rd2961;
ld.shared.u32 %r8964, [%rd2962];
add.s32 %r8965, %r8964, %r8963;
xor.b32 %r8966, %r10348, %r8715;
xor.b32 %r8747, %r8966, %r8965;
// inline asm
bfe.u32 %r8734, %r8747, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2963, %r8734, 4;
add.s64 %rd2964, %rd42, %rd2963;
ld.shared.u32 %r8967, [%rd2964];
// inline asm
bfe.u32 %r8738, %r8747, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2965, %r8738, 4;
add.s64 %rd2966, %rd45, %rd2965;
ld.shared.u32 %r8968, [%rd2966];
add.s32 %r8969, %r8968, %r8967;
// inline asm
bfe.u32 %r8742, %r8747, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2967, %r8742, 4;
add.s64 %rd2968, %rd48, %rd2967;
ld.shared.u32 %r8970, [%rd2968];
xor.b32 %r8971, %r8970, %r8969;
// inline asm
bfe.u32 %r8746, %r8747, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd2969, %r8746, 4;
add.s64 %rd2970, %rd51, %rd2969;
ld.shared.u32 %r8972, [%rd2970];
add.s32 %r8973, %r8972, %r8971;
xor.b32 %r8974, %r10347, %r8731;
xor.b32 %r8763, %r8974, %r8973;
// inline asm
bfe.u32 %r8750, %r8763, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2971, %r8750, 4;
add.s64 %rd2972, %rd42, %rd2971;
ld.shared.u32 %r8975, [%rd2972];
// inline asm
bfe.u32 %r8754, %r8763, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2973, %r8754, 4;
add.s64 %rd2974, %rd45, %rd2973;
ld.shared.u32 %r8976, [%rd2974];
add.s32 %r8977, %r8976, %r8975;
// inline asm
bfe.u32 %r8758, %r8763, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2975, %r8758, 4;
add.s64 %rd2976, %rd48, %rd2975;
ld.shared.u32 %r8978, [%rd2976];
xor.b32 %r8979, %r8978, %r8977;
// inline asm
bfe.u32 %r8762, %r8763, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd2977, %r8762, 4;
add.s64 %rd2978, %rd51, %rd2977;
ld.shared.u32 %r8980, [%rd2978];
add.s32 %r8981, %r8980, %r8979;
xor.b32 %r8982, %r10346, %r8747;
xor.b32 %r8779, %r8982, %r8981;
// inline asm
bfe.u32 %r8766, %r8779, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2979, %r8766, 4;
add.s64 %rd2980, %rd42, %rd2979;
ld.shared.u32 %r8983, [%rd2980];
// inline asm
bfe.u32 %r8770, %r8779, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2981, %r8770, 4;
add.s64 %rd2982, %rd45, %rd2981;
ld.shared.u32 %r8984, [%rd2982];
add.s32 %r8985, %r8984, %r8983;
// inline asm
bfe.u32 %r8774, %r8779, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2983, %r8774, 4;
add.s64 %rd2984, %rd48, %rd2983;
ld.shared.u32 %r8986, [%rd2984];
xor.b32 %r8987, %r8986, %r8985;
// inline asm
bfe.u32 %r8778, %r8779, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd2985, %r8778, 4;
add.s64 %rd2986, %rd51, %rd2985;
ld.shared.u32 %r8988, [%rd2986];
add.s32 %r8989, %r8988, %r8987;
xor.b32 %r8990, %r10345, %r8763;
xor.b32 %r8795, %r8990, %r8989;
// inline asm
bfe.u32 %r8782, %r8795, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2987, %r8782, 4;
add.s64 %rd2988, %rd42, %rd2987;
ld.shared.u32 %r8991, [%rd2988];
// inline asm
bfe.u32 %r8786, %r8795, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2989, %r8786, 4;
add.s64 %rd2990, %rd45, %rd2989;
ld.shared.u32 %r8992, [%rd2990];
add.s32 %r8993, %r8992, %r8991;
// inline asm
bfe.u32 %r8790, %r8795, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2991, %r8790, 4;
add.s64 %rd2992, %rd48, %rd2991;
ld.shared.u32 %r8994, [%rd2992];
xor.b32 %r8995, %r8994, %r8993;
// inline asm
bfe.u32 %r8794, %r8795, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd2993, %r8794, 4;
add.s64 %rd2994, %rd51, %rd2993;
ld.shared.u32 %r8996, [%rd2994];
add.s32 %r8997, %r8996, %r8995;
xor.b32 %r8998, %r10344, %r8779;
xor.b32 %r8811, %r8998, %r8997;
// inline asm
bfe.u32 %r8798, %r8811, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd2995, %r8798, 4;
add.s64 %rd2996, %rd42, %rd2995;
ld.shared.u32 %r8999, [%rd2996];
// inline asm
bfe.u32 %r8802, %r8811, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd2997, %r8802, 4;
add.s64 %rd2998, %rd45, %rd2997;
ld.shared.u32 %r9000, [%rd2998];
add.s32 %r9001, %r9000, %r8999;
// inline asm
bfe.u32 %r8806, %r8811, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd2999, %r8806, 4;
add.s64 %rd3000, %rd48, %rd2999;
ld.shared.u32 %r9002, [%rd3000];
xor.b32 %r9003, %r9002, %r9001;
// inline asm
bfe.u32 %r8810, %r8811, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3001, %r8810, 4;
add.s64 %rd3002, %rd51, %rd3001;
ld.shared.u32 %r9004, [%rd3002];
add.s32 %r9005, %r9004, %r9003;
xor.b32 %r9006, %r10343, %r8795;
xor.b32 %r8827, %r9006, %r9005;
// inline asm
bfe.u32 %r8814, %r8827, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3003, %r8814, 4;
add.s64 %rd3004, %rd42, %rd3003;
ld.shared.u32 %r9007, [%rd3004];
// inline asm
bfe.u32 %r8818, %r8827, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3005, %r8818, 4;
add.s64 %rd3006, %rd45, %rd3005;
ld.shared.u32 %r9008, [%rd3006];
add.s32 %r9009, %r9008, %r9007;
// inline asm
bfe.u32 %r8822, %r8827, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3007, %r8822, 4;
add.s64 %rd3008, %rd48, %rd3007;
ld.shared.u32 %r9010, [%rd3008];
xor.b32 %r9011, %r9010, %r9009;
// inline asm
bfe.u32 %r8826, %r8827, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3009, %r8826, 4;
add.s64 %rd3010, %rd51, %rd3009;
ld.shared.u32 %r9012, [%rd3010];
add.s32 %r9013, %r9012, %r9011;
xor.b32 %r9014, %r10342, %r8811;
xor.b32 %r8843, %r9014, %r9013;
// inline asm
bfe.u32 %r8830, %r8843, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3011, %r8830, 4;
add.s64 %rd3012, %rd42, %rd3011;
ld.shared.u32 %r9015, [%rd3012];
// inline asm
bfe.u32 %r8834, %r8843, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3013, %r8834, 4;
add.s64 %rd3014, %rd45, %rd3013;
ld.shared.u32 %r9016, [%rd3014];
add.s32 %r9017, %r9016, %r9015;
// inline asm
bfe.u32 %r8838, %r8843, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3015, %r8838, 4;
add.s64 %rd3016, %rd48, %rd3015;
ld.shared.u32 %r9018, [%rd3016];
xor.b32 %r9019, %r9018, %r9017;
// inline asm
bfe.u32 %r8842, %r8843, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3017, %r8842, 4;
add.s64 %rd3018, %rd51, %rd3017;
ld.shared.u32 %r9020, [%rd3018];
add.s32 %r9021, %r9020, %r9019;
xor.b32 %r9022, %r10341, %r8827;
xor.b32 %r8859, %r9022, %r9021;
// inline asm
bfe.u32 %r8846, %r8859, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3019, %r8846, 4;
add.s64 %rd3020, %rd42, %rd3019;
ld.shared.u32 %r9023, [%rd3020];
// inline asm
bfe.u32 %r8850, %r8859, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3021, %r8850, 4;
add.s64 %rd3022, %rd45, %rd3021;
ld.shared.u32 %r9024, [%rd3022];
add.s32 %r9025, %r9024, %r9023;
// inline asm
bfe.u32 %r8854, %r8859, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3023, %r8854, 4;
add.s64 %rd3024, %rd48, %rd3023;
ld.shared.u32 %r9026, [%rd3024];
xor.b32 %r9027, %r9026, %r9025;
// inline asm
bfe.u32 %r8858, %r8859, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3025, %r8858, 4;
add.s64 %rd3026, %rd51, %rd3025;
ld.shared.u32 %r9028, [%rd3026];
add.s32 %r9029, %r9028, %r9027;
xor.b32 %r9030, %r10340, %r8843;
xor.b32 %r8875, %r9030, %r9029;
// inline asm
bfe.u32 %r8862, %r8875, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3027, %r8862, 4;
add.s64 %rd3028, %rd42, %rd3027;
ld.shared.u32 %r9031, [%rd3028];
// inline asm
bfe.u32 %r8866, %r8875, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3029, %r8866, 4;
add.s64 %rd3030, %rd45, %rd3029;
ld.shared.u32 %r9032, [%rd3030];
add.s32 %r9033, %r9032, %r9031;
// inline asm
bfe.u32 %r8870, %r8875, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3031, %r8870, 4;
add.s64 %rd3032, %rd48, %rd3031;
ld.shared.u32 %r9034, [%rd3032];
xor.b32 %r9035, %r9034, %r9033;
// inline asm
bfe.u32 %r8874, %r8875, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3033, %r8874, 4;
add.s64 %rd3034, %rd51, %rd3033;
ld.shared.u32 %r9036, [%rd3034];
add.s32 %r9037, %r9036, %r9035;
xor.b32 %r9038, %r10339, %r8859;
xor.b32 %r8891, %r9038, %r9037;
// inline asm
bfe.u32 %r8878, %r8891, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3035, %r8878, 4;
add.s64 %rd3036, %rd42, %rd3035;
ld.shared.u32 %r9039, [%rd3036];
// inline asm
bfe.u32 %r8882, %r8891, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3037, %r8882, 4;
add.s64 %rd3038, %rd45, %rd3037;
ld.shared.u32 %r9040, [%rd3038];
add.s32 %r9041, %r9040, %r9039;
// inline asm
bfe.u32 %r8886, %r8891, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3039, %r8886, 4;
add.s64 %rd3040, %rd48, %rd3039;
ld.shared.u32 %r9042, [%rd3040];
xor.b32 %r9043, %r9042, %r9041;
// inline asm
bfe.u32 %r8890, %r8891, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3041, %r8890, 4;
add.s64 %rd3042, %rd51, %rd3041;
ld.shared.u32 %r9044, [%rd3042];
add.s32 %r9045, %r9044, %r9043;
xor.b32 %r9046, %r10338, %r8875;
xor.b32 %r8907, %r9046, %r9045;
// inline asm
bfe.u32 %r8894, %r8907, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3043, %r8894, 4;
add.s64 %rd3044, %rd42, %rd3043;
ld.shared.u32 %r9047, [%rd3044];
// inline asm
bfe.u32 %r8898, %r8907, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3045, %r8898, 4;
add.s64 %rd3046, %rd45, %rd3045;
ld.shared.u32 %r9048, [%rd3046];
add.s32 %r9049, %r9048, %r9047;
// inline asm
bfe.u32 %r8902, %r8907, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3047, %r8902, 4;
add.s64 %rd3048, %rd48, %rd3047;
ld.shared.u32 %r9050, [%rd3048];
xor.b32 %r9051, %r9050, %r9049;
// inline asm
bfe.u32 %r8906, %r8907, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3049, %r8906, 4;
add.s64 %rd3050, %rd51, %rd3049;
ld.shared.u32 %r9052, [%rd3050];
add.s32 %r9053, %r9052, %r9051;
xor.b32 %r9054, %r10337, %r8891;
xor.b32 %r8923, %r9054, %r9053;
// inline asm
bfe.u32 %r8910, %r8923, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3051, %r8910, 4;
add.s64 %rd3052, %rd42, %rd3051;
ld.shared.u32 %r9055, [%rd3052];
// inline asm
bfe.u32 %r8914, %r8923, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3053, %r8914, 4;
add.s64 %rd3054, %rd45, %rd3053;
ld.shared.u32 %r9056, [%rd3054];
add.s32 %r9057, %r9056, %r9055;
// inline asm
bfe.u32 %r8918, %r8923, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3055, %r8918, 4;
add.s64 %rd3056, %rd48, %rd3055;
ld.shared.u32 %r9058, [%rd3056];
xor.b32 %r9059, %r9058, %r9057;
// inline asm
bfe.u32 %r8922, %r8923, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3057, %r8922, 4;
add.s64 %rd3058, %rd51, %rd3057;
ld.shared.u32 %r9060, [%rd3058];
add.s32 %r9061, %r9060, %r9059;
xor.b32 %r9062, %r10336, %r8907;
xor.b32 %r8939, %r9062, %r9061;
// inline asm
bfe.u32 %r8926, %r8939, %r10258, %r10259;
// inline asm
mul.wide.u32 %rd3059, %r8926, 4;
add.s64 %rd3060, %rd42, %rd3059;
ld.shared.u32 %r9063, [%rd3060];
// inline asm
bfe.u32 %r8930, %r8939, %r10260, %r10259;
// inline asm
mul.wide.u32 %rd3061, %r8930, 4;
add.s64 %rd3062, %rd45, %rd3061;
ld.shared.u32 %r9064, [%rd3062];
add.s32 %r9065, %r9064, %r9063;
// inline asm
bfe.u32 %r8934, %r8939, %r10259, %r10259;
// inline asm
mul.wide.u32 %rd3063, %r8934, 4;
add.s64 %rd3064, %rd48, %rd3063;
ld.shared.u32 %r9066, [%rd3064];
xor.b32 %r9067, %r9066, %r9065;
// inline asm
bfe.u32 %r8938, %r8939, %r10325, %r10259;
// inline asm
mul.wide.u32 %rd3065, %r8938, 4;
add.s64 %rd3066, %rd51, %rd3065;
ld.shared.u32 %r9068, [%rd3066];
add.s32 %r9069, %r9068, %r9067;
xor.b32 %r9070, %r10335, %r8923;
xor.b32 %r10326, %r9070, %r9069;
xor.b32 %r10327, %r10334, %r8939;
st.shared.u32 [%rd3499], %r10327;
st.shared.u32 [%rd3499+4], %r10326;
add.s64 %rd3499, %rd3499, 8;
add.s32 %r10322, %r10322, 2;
setp.lt.u32 %p8, %r10322, 256;
@%p8 bra BB4_14;
mov.u64 %rd3500, %rd4;
BB4_16:
mov.u32 %r10328, 0;
mov.u32 %r10264, 16;
mov.u32 %r10263, 8;
mov.u32 %r10262, 24;
xor.b32 %r9084, %r10351, %r10327;
// inline asm
bfe.u32 %r9071, %r9084, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3070, %r9071, 4;
add.s64 %rd3071, %rd42, %rd3070;
ld.shared.u32 %r9328, [%rd3071];
// inline asm
bfe.u32 %r9075, %r9084, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3074, %r9075, 4;
add.s64 %rd3075, %rd45, %rd3074;
ld.shared.u32 %r9329, [%rd3075];
add.s32 %r9330, %r9329, %r9328;
// inline asm
bfe.u32 %r9079, %r9084, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3078, %r9079, 4;
add.s64 %rd3079, %rd48, %rd3078;
ld.shared.u32 %r9331, [%rd3079];
xor.b32 %r9332, %r9331, %r9330;
// inline asm
bfe.u32 %r9083, %r9084, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3082, %r9083, 4;
add.s64 %rd3083, %rd51, %rd3082;
ld.shared.u32 %r9333, [%rd3083];
add.s32 %r9334, %r9333, %r9332;
xor.b32 %r9335, %r10350, %r10326;
xor.b32 %r9100, %r9335, %r9334;
// inline asm
bfe.u32 %r9087, %r9100, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3084, %r9087, 4;
add.s64 %rd3085, %rd42, %rd3084;
ld.shared.u32 %r9336, [%rd3085];
// inline asm
bfe.u32 %r9091, %r9100, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3086, %r9091, 4;
add.s64 %rd3087, %rd45, %rd3086;
ld.shared.u32 %r9337, [%rd3087];
add.s32 %r9338, %r9337, %r9336;
// inline asm
bfe.u32 %r9095, %r9100, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3088, %r9095, 4;
add.s64 %rd3089, %rd48, %rd3088;
ld.shared.u32 %r9339, [%rd3089];
xor.b32 %r9340, %r9339, %r9338;
// inline asm
bfe.u32 %r9099, %r9100, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3090, %r9099, 4;
add.s64 %rd3091, %rd51, %rd3090;
ld.shared.u32 %r9341, [%rd3091];
add.s32 %r9342, %r9341, %r9340;
xor.b32 %r9343, %r10349, %r9084;
xor.b32 %r9116, %r9343, %r9342;
// inline asm
bfe.u32 %r9103, %r9116, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3092, %r9103, 4;
add.s64 %rd3093, %rd42, %rd3092;
ld.shared.u32 %r9344, [%rd3093];
// inline asm
bfe.u32 %r9107, %r9116, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3094, %r9107, 4;
add.s64 %rd3095, %rd45, %rd3094;
ld.shared.u32 %r9345, [%rd3095];
add.s32 %r9346, %r9345, %r9344;
// inline asm
bfe.u32 %r9111, %r9116, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3096, %r9111, 4;
add.s64 %rd3097, %rd48, %rd3096;
ld.shared.u32 %r9347, [%rd3097];
xor.b32 %r9348, %r9347, %r9346;
// inline asm
bfe.u32 %r9115, %r9116, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3098, %r9115, 4;
add.s64 %rd3099, %rd51, %rd3098;
ld.shared.u32 %r9349, [%rd3099];
add.s32 %r9350, %r9349, %r9348;
xor.b32 %r9351, %r10348, %r9100;
xor.b32 %r9132, %r9351, %r9350;
// inline asm
bfe.u32 %r9119, %r9132, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3100, %r9119, 4;
add.s64 %rd3101, %rd42, %rd3100;
ld.shared.u32 %r9352, [%rd3101];
// inline asm
bfe.u32 %r9123, %r9132, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3102, %r9123, 4;
add.s64 %rd3103, %rd45, %rd3102;
ld.shared.u32 %r9353, [%rd3103];
add.s32 %r9354, %r9353, %r9352;
// inline asm
bfe.u32 %r9127, %r9132, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3104, %r9127, 4;
add.s64 %rd3105, %rd48, %rd3104;
ld.shared.u32 %r9355, [%rd3105];
xor.b32 %r9356, %r9355, %r9354;
// inline asm
bfe.u32 %r9131, %r9132, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3106, %r9131, 4;
add.s64 %rd3107, %rd51, %rd3106;
ld.shared.u32 %r9357, [%rd3107];
add.s32 %r9358, %r9357, %r9356;
xor.b32 %r9359, %r10347, %r9116;
xor.b32 %r9148, %r9359, %r9358;
// inline asm
bfe.u32 %r9135, %r9148, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3108, %r9135, 4;
add.s64 %rd3109, %rd42, %rd3108;
ld.shared.u32 %r9360, [%rd3109];
// inline asm
bfe.u32 %r9139, %r9148, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3110, %r9139, 4;
add.s64 %rd3111, %rd45, %rd3110;
ld.shared.u32 %r9361, [%rd3111];
add.s32 %r9362, %r9361, %r9360;
// inline asm
bfe.u32 %r9143, %r9148, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3112, %r9143, 4;
add.s64 %rd3113, %rd48, %rd3112;
ld.shared.u32 %r9363, [%rd3113];
xor.b32 %r9364, %r9363, %r9362;
// inline asm
bfe.u32 %r9147, %r9148, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3114, %r9147, 4;
add.s64 %rd3115, %rd51, %rd3114;
ld.shared.u32 %r9365, [%rd3115];
add.s32 %r9366, %r9365, %r9364;
xor.b32 %r9367, %r10346, %r9132;
xor.b32 %r9164, %r9367, %r9366;
// inline asm
bfe.u32 %r9151, %r9164, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3116, %r9151, 4;
add.s64 %rd3117, %rd42, %rd3116;
ld.shared.u32 %r9368, [%rd3117];
// inline asm
bfe.u32 %r9155, %r9164, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3118, %r9155, 4;
add.s64 %rd3119, %rd45, %rd3118;
ld.shared.u32 %r9369, [%rd3119];
add.s32 %r9370, %r9369, %r9368;
// inline asm
bfe.u32 %r9159, %r9164, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3120, %r9159, 4;
add.s64 %rd3121, %rd48, %rd3120;
ld.shared.u32 %r9371, [%rd3121];
xor.b32 %r9372, %r9371, %r9370;
// inline asm
bfe.u32 %r9163, %r9164, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3122, %r9163, 4;
add.s64 %rd3123, %rd51, %rd3122;
ld.shared.u32 %r9373, [%rd3123];
add.s32 %r9374, %r9373, %r9372;
xor.b32 %r9375, %r10345, %r9148;
xor.b32 %r9180, %r9375, %r9374;
// inline asm
bfe.u32 %r9167, %r9180, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3124, %r9167, 4;
add.s64 %rd3125, %rd42, %rd3124;
ld.shared.u32 %r9376, [%rd3125];
// inline asm
bfe.u32 %r9171, %r9180, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3126, %r9171, 4;
add.s64 %rd3127, %rd45, %rd3126;
ld.shared.u32 %r9377, [%rd3127];
add.s32 %r9378, %r9377, %r9376;
// inline asm
bfe.u32 %r9175, %r9180, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3128, %r9175, 4;
add.s64 %rd3129, %rd48, %rd3128;
ld.shared.u32 %r9379, [%rd3129];
xor.b32 %r9380, %r9379, %r9378;
// inline asm
bfe.u32 %r9179, %r9180, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3130, %r9179, 4;
add.s64 %rd3131, %rd51, %rd3130;
ld.shared.u32 %r9381, [%rd3131];
add.s32 %r9382, %r9381, %r9380;
xor.b32 %r9383, %r10344, %r9164;
xor.b32 %r9196, %r9383, %r9382;
// inline asm
bfe.u32 %r9183, %r9196, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3132, %r9183, 4;
add.s64 %rd3133, %rd42, %rd3132;
ld.shared.u32 %r9384, [%rd3133];
// inline asm
bfe.u32 %r9187, %r9196, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3134, %r9187, 4;
add.s64 %rd3135, %rd45, %rd3134;
ld.shared.u32 %r9385, [%rd3135];
add.s32 %r9386, %r9385, %r9384;
// inline asm
bfe.u32 %r9191, %r9196, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3136, %r9191, 4;
add.s64 %rd3137, %rd48, %rd3136;
ld.shared.u32 %r9387, [%rd3137];
xor.b32 %r9388, %r9387, %r9386;
// inline asm
bfe.u32 %r9195, %r9196, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3138, %r9195, 4;
add.s64 %rd3139, %rd51, %rd3138;
ld.shared.u32 %r9389, [%rd3139];
add.s32 %r9390, %r9389, %r9388;
xor.b32 %r9391, %r10343, %r9180;
xor.b32 %r9212, %r9391, %r9390;
// inline asm
bfe.u32 %r9199, %r9212, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3140, %r9199, 4;
add.s64 %rd3141, %rd42, %rd3140;
ld.shared.u32 %r9392, [%rd3141];
// inline asm
bfe.u32 %r9203, %r9212, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3142, %r9203, 4;
add.s64 %rd3143, %rd45, %rd3142;
ld.shared.u32 %r9393, [%rd3143];
add.s32 %r9394, %r9393, %r9392;
// inline asm
bfe.u32 %r9207, %r9212, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3144, %r9207, 4;
add.s64 %rd3145, %rd48, %rd3144;
ld.shared.u32 %r9395, [%rd3145];
xor.b32 %r9396, %r9395, %r9394;
// inline asm
bfe.u32 %r9211, %r9212, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3146, %r9211, 4;
add.s64 %rd3147, %rd51, %rd3146;
ld.shared.u32 %r9397, [%rd3147];
add.s32 %r9398, %r9397, %r9396;
xor.b32 %r9399, %r10342, %r9196;
xor.b32 %r9228, %r9399, %r9398;
// inline asm
bfe.u32 %r9215, %r9228, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3148, %r9215, 4;
add.s64 %rd3149, %rd42, %rd3148;
ld.shared.u32 %r9400, [%rd3149];
// inline asm
bfe.u32 %r9219, %r9228, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3150, %r9219, 4;
add.s64 %rd3151, %rd45, %rd3150;
ld.shared.u32 %r9401, [%rd3151];
add.s32 %r9402, %r9401, %r9400;
// inline asm
bfe.u32 %r9223, %r9228, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3152, %r9223, 4;
add.s64 %rd3153, %rd48, %rd3152;
ld.shared.u32 %r9403, [%rd3153];
xor.b32 %r9404, %r9403, %r9402;
// inline asm
bfe.u32 %r9227, %r9228, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3154, %r9227, 4;
add.s64 %rd3155, %rd51, %rd3154;
ld.shared.u32 %r9405, [%rd3155];
add.s32 %r9406, %r9405, %r9404;
xor.b32 %r9407, %r10341, %r9212;
xor.b32 %r9244, %r9407, %r9406;
// inline asm
bfe.u32 %r9231, %r9244, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3156, %r9231, 4;
add.s64 %rd3157, %rd42, %rd3156;
ld.shared.u32 %r9408, [%rd3157];
// inline asm
bfe.u32 %r9235, %r9244, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3158, %r9235, 4;
add.s64 %rd3159, %rd45, %rd3158;
ld.shared.u32 %r9409, [%rd3159];
add.s32 %r9410, %r9409, %r9408;
// inline asm
bfe.u32 %r9239, %r9244, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3160, %r9239, 4;
add.s64 %rd3161, %rd48, %rd3160;
ld.shared.u32 %r9411, [%rd3161];
xor.b32 %r9412, %r9411, %r9410;
// inline asm
bfe.u32 %r9243, %r9244, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3162, %r9243, 4;
add.s64 %rd3163, %rd51, %rd3162;
ld.shared.u32 %r9413, [%rd3163];
add.s32 %r9414, %r9413, %r9412;
xor.b32 %r9415, %r10340, %r9228;
xor.b32 %r9260, %r9415, %r9414;
// inline asm
bfe.u32 %r9247, %r9260, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3164, %r9247, 4;
add.s64 %rd3165, %rd42, %rd3164;
ld.shared.u32 %r9416, [%rd3165];
// inline asm
bfe.u32 %r9251, %r9260, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3166, %r9251, 4;
add.s64 %rd3167, %rd45, %rd3166;
ld.shared.u32 %r9417, [%rd3167];
add.s32 %r9418, %r9417, %r9416;
// inline asm
bfe.u32 %r9255, %r9260, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3168, %r9255, 4;
add.s64 %rd3169, %rd48, %rd3168;
ld.shared.u32 %r9419, [%rd3169];
xor.b32 %r9420, %r9419, %r9418;
// inline asm
bfe.u32 %r9259, %r9260, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3170, %r9259, 4;
add.s64 %rd3171, %rd51, %rd3170;
ld.shared.u32 %r9421, [%rd3171];
add.s32 %r9422, %r9421, %r9420;
xor.b32 %r9423, %r10339, %r9244;
xor.b32 %r9276, %r9423, %r9422;
// inline asm
bfe.u32 %r9263, %r9276, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3172, %r9263, 4;
add.s64 %rd3173, %rd42, %rd3172;
ld.shared.u32 %r9424, [%rd3173];
// inline asm
bfe.u32 %r9267, %r9276, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3174, %r9267, 4;
add.s64 %rd3175, %rd45, %rd3174;
ld.shared.u32 %r9425, [%rd3175];
add.s32 %r9426, %r9425, %r9424;
// inline asm
bfe.u32 %r9271, %r9276, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3176, %r9271, 4;
add.s64 %rd3177, %rd48, %rd3176;
ld.shared.u32 %r9427, [%rd3177];
xor.b32 %r9428, %r9427, %r9426;
// inline asm
bfe.u32 %r9275, %r9276, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3178, %r9275, 4;
add.s64 %rd3179, %rd51, %rd3178;
ld.shared.u32 %r9429, [%rd3179];
add.s32 %r9430, %r9429, %r9428;
xor.b32 %r9431, %r10338, %r9260;
xor.b32 %r9292, %r9431, %r9430;
// inline asm
bfe.u32 %r9279, %r9292, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3180, %r9279, 4;
add.s64 %rd3181, %rd42, %rd3180;
ld.shared.u32 %r9432, [%rd3181];
// inline asm
bfe.u32 %r9283, %r9292, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3182, %r9283, 4;
add.s64 %rd3183, %rd45, %rd3182;
ld.shared.u32 %r9433, [%rd3183];
add.s32 %r9434, %r9433, %r9432;
// inline asm
bfe.u32 %r9287, %r9292, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3184, %r9287, 4;
add.s64 %rd3185, %rd48, %rd3184;
ld.shared.u32 %r9435, [%rd3185];
xor.b32 %r9436, %r9435, %r9434;
// inline asm
bfe.u32 %r9291, %r9292, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3186, %r9291, 4;
add.s64 %rd3187, %rd51, %rd3186;
ld.shared.u32 %r9437, [%rd3187];
add.s32 %r9438, %r9437, %r9436;
xor.b32 %r9439, %r10337, %r9276;
xor.b32 %r9308, %r9439, %r9438;
// inline asm
bfe.u32 %r9295, %r9308, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3188, %r9295, 4;
add.s64 %rd3189, %rd42, %rd3188;
ld.shared.u32 %r9440, [%rd3189];
// inline asm
bfe.u32 %r9299, %r9308, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3190, %r9299, 4;
add.s64 %rd3191, %rd45, %rd3190;
ld.shared.u32 %r9441, [%rd3191];
add.s32 %r9442, %r9441, %r9440;
// inline asm
bfe.u32 %r9303, %r9308, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3192, %r9303, 4;
add.s64 %rd3193, %rd48, %rd3192;
ld.shared.u32 %r9443, [%rd3193];
xor.b32 %r9444, %r9443, %r9442;
// inline asm
bfe.u32 %r9307, %r9308, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3194, %r9307, 4;
add.s64 %rd3195, %rd51, %rd3194;
ld.shared.u32 %r9445, [%rd3195];
add.s32 %r9446, %r9445, %r9444;
xor.b32 %r9447, %r10336, %r9292;
xor.b32 %r9324, %r9447, %r9446;
// inline asm
bfe.u32 %r9311, %r9324, %r10262, %r10263;
// inline asm
mul.wide.u32 %rd3196, %r9311, 4;
add.s64 %rd3197, %rd42, %rd3196;
ld.shared.u32 %r9448, [%rd3197];
// inline asm
bfe.u32 %r9315, %r9324, %r10264, %r10263;
// inline asm
mul.wide.u32 %rd3198, %r9315, 4;
add.s64 %rd3199, %rd45, %rd3198;
ld.shared.u32 %r9449, [%rd3199];
add.s32 %r9450, %r9449, %r9448;
// inline asm
bfe.u32 %r9319, %r9324, %r10263, %r10263;
// inline asm
mul.wide.u32 %rd3200, %r9319, 4;
add.s64 %rd3201, %rd48, %rd3200;
ld.shared.u32 %r9451, [%rd3201];
xor.b32 %r9452, %r9451, %r9450;
// inline asm
bfe.u32 %r9323, %r9324, %r10328, %r10263;
// inline asm
mul.wide.u32 %rd3202, %r9323, 4;
add.s64 %rd3203, %rd51, %rd3202;
ld.shared.u32 %r9453, [%rd3203];
add.s32 %r9454, %r9453, %r9452;
xor.b32 %r9455, %r10335, %r9308;
xor.b32 %r10326, %r9455, %r9454;
xor.b32 %r10327, %r10334, %r9324;
st.shared.u32 [%rd3500], %r10327;
st.shared.u32 [%rd3500+4], %r10326;
add.s64 %rd3500, %rd3500, 8;
add.s32 %r10325, %r10325, 2;
setp.lt.u32 %p9, %r10325, 256;
@%p9 bra BB4_16;
mov.u64 %rd3501, %rd5;
BB4_18:
mov.u32 %r10331, 0;
mov.u32 %r10268, 16;
mov.u32 %r10267, 8;
mov.u32 %r10266, 24;
xor.b32 %r9469, %r10351, %r10327;
// inline asm
bfe.u32 %r9456, %r9469, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3207, %r9456, 4;
add.s64 %rd3208, %rd42, %rd3207;
ld.shared.u32 %r9713, [%rd3208];
// inline asm
bfe.u32 %r9460, %r9469, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3211, %r9460, 4;
add.s64 %rd3212, %rd45, %rd3211;
ld.shared.u32 %r9714, [%rd3212];
add.s32 %r9715, %r9714, %r9713;
// inline asm
bfe.u32 %r9464, %r9469, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3215, %r9464, 4;
add.s64 %rd3216, %rd48, %rd3215;
ld.shared.u32 %r9716, [%rd3216];
xor.b32 %r9717, %r9716, %r9715;
// inline asm
bfe.u32 %r9468, %r9469, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3219, %r9468, 4;
add.s64 %rd3220, %rd51, %rd3219;
ld.shared.u32 %r9718, [%rd3220];
add.s32 %r9719, %r9718, %r9717;
xor.b32 %r9720, %r10350, %r10326;
xor.b32 %r9485, %r9720, %r9719;
// inline asm
bfe.u32 %r9472, %r9485, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3221, %r9472, 4;
add.s64 %rd3222, %rd42, %rd3221;
ld.shared.u32 %r9721, [%rd3222];
// inline asm
bfe.u32 %r9476, %r9485, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3223, %r9476, 4;
add.s64 %rd3224, %rd45, %rd3223;
ld.shared.u32 %r9722, [%rd3224];
add.s32 %r9723, %r9722, %r9721;
// inline asm
bfe.u32 %r9480, %r9485, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3225, %r9480, 4;
add.s64 %rd3226, %rd48, %rd3225;
ld.shared.u32 %r9724, [%rd3226];
xor.b32 %r9725, %r9724, %r9723;
// inline asm
bfe.u32 %r9484, %r9485, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3227, %r9484, 4;
add.s64 %rd3228, %rd51, %rd3227;
ld.shared.u32 %r9726, [%rd3228];
add.s32 %r9727, %r9726, %r9725;
xor.b32 %r9728, %r10349, %r9469;
xor.b32 %r9501, %r9728, %r9727;
// inline asm
bfe.u32 %r9488, %r9501, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3229, %r9488, 4;
add.s64 %rd3230, %rd42, %rd3229;
ld.shared.u32 %r9729, [%rd3230];
// inline asm
bfe.u32 %r9492, %r9501, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3231, %r9492, 4;
add.s64 %rd3232, %rd45, %rd3231;
ld.shared.u32 %r9730, [%rd3232];
add.s32 %r9731, %r9730, %r9729;
// inline asm
bfe.u32 %r9496, %r9501, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3233, %r9496, 4;
add.s64 %rd3234, %rd48, %rd3233;
ld.shared.u32 %r9732, [%rd3234];
xor.b32 %r9733, %r9732, %r9731;
// inline asm
bfe.u32 %r9500, %r9501, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3235, %r9500, 4;
add.s64 %rd3236, %rd51, %rd3235;
ld.shared.u32 %r9734, [%rd3236];
add.s32 %r9735, %r9734, %r9733;
xor.b32 %r9736, %r10348, %r9485;
xor.b32 %r9517, %r9736, %r9735;
// inline asm
bfe.u32 %r9504, %r9517, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3237, %r9504, 4;
add.s64 %rd3238, %rd42, %rd3237;
ld.shared.u32 %r9737, [%rd3238];
// inline asm
bfe.u32 %r9508, %r9517, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3239, %r9508, 4;
add.s64 %rd3240, %rd45, %rd3239;
ld.shared.u32 %r9738, [%rd3240];
add.s32 %r9739, %r9738, %r9737;
// inline asm
bfe.u32 %r9512, %r9517, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3241, %r9512, 4;
add.s64 %rd3242, %rd48, %rd3241;
ld.shared.u32 %r9740, [%rd3242];
xor.b32 %r9741, %r9740, %r9739;
// inline asm
bfe.u32 %r9516, %r9517, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3243, %r9516, 4;
add.s64 %rd3244, %rd51, %rd3243;
ld.shared.u32 %r9742, [%rd3244];
add.s32 %r9743, %r9742, %r9741;
xor.b32 %r9744, %r10347, %r9501;
xor.b32 %r9533, %r9744, %r9743;
// inline asm
bfe.u32 %r9520, %r9533, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3245, %r9520, 4;
add.s64 %rd3246, %rd42, %rd3245;
ld.shared.u32 %r9745, [%rd3246];
// inline asm
bfe.u32 %r9524, %r9533, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3247, %r9524, 4;
add.s64 %rd3248, %rd45, %rd3247;
ld.shared.u32 %r9746, [%rd3248];
add.s32 %r9747, %r9746, %r9745;
// inline asm
bfe.u32 %r9528, %r9533, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3249, %r9528, 4;
add.s64 %rd3250, %rd48, %rd3249;
ld.shared.u32 %r9748, [%rd3250];
xor.b32 %r9749, %r9748, %r9747;
// inline asm
bfe.u32 %r9532, %r9533, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3251, %r9532, 4;
add.s64 %rd3252, %rd51, %rd3251;
ld.shared.u32 %r9750, [%rd3252];
add.s32 %r9751, %r9750, %r9749;
xor.b32 %r9752, %r10346, %r9517;
xor.b32 %r9549, %r9752, %r9751;
// inline asm
bfe.u32 %r9536, %r9549, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3253, %r9536, 4;
add.s64 %rd3254, %rd42, %rd3253;
ld.shared.u32 %r9753, [%rd3254];
// inline asm
bfe.u32 %r9540, %r9549, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3255, %r9540, 4;
add.s64 %rd3256, %rd45, %rd3255;
ld.shared.u32 %r9754, [%rd3256];
add.s32 %r9755, %r9754, %r9753;
// inline asm
bfe.u32 %r9544, %r9549, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3257, %r9544, 4;
add.s64 %rd3258, %rd48, %rd3257;
ld.shared.u32 %r9756, [%rd3258];
xor.b32 %r9757, %r9756, %r9755;
// inline asm
bfe.u32 %r9548, %r9549, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3259, %r9548, 4;
add.s64 %rd3260, %rd51, %rd3259;
ld.shared.u32 %r9758, [%rd3260];
add.s32 %r9759, %r9758, %r9757;
xor.b32 %r9760, %r10345, %r9533;
xor.b32 %r9565, %r9760, %r9759;
// inline asm
bfe.u32 %r9552, %r9565, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3261, %r9552, 4;
add.s64 %rd3262, %rd42, %rd3261;
ld.shared.u32 %r9761, [%rd3262];
// inline asm
bfe.u32 %r9556, %r9565, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3263, %r9556, 4;
add.s64 %rd3264, %rd45, %rd3263;
ld.shared.u32 %r9762, [%rd3264];
add.s32 %r9763, %r9762, %r9761;
// inline asm
bfe.u32 %r9560, %r9565, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3265, %r9560, 4;
add.s64 %rd3266, %rd48, %rd3265;
ld.shared.u32 %r9764, [%rd3266];
xor.b32 %r9765, %r9764, %r9763;
// inline asm
bfe.u32 %r9564, %r9565, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3267, %r9564, 4;
add.s64 %rd3268, %rd51, %rd3267;
ld.shared.u32 %r9766, [%rd3268];
add.s32 %r9767, %r9766, %r9765;
xor.b32 %r9768, %r10344, %r9549;
xor.b32 %r9581, %r9768, %r9767;
// inline asm
bfe.u32 %r9568, %r9581, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3269, %r9568, 4;
add.s64 %rd3270, %rd42, %rd3269;
ld.shared.u32 %r9769, [%rd3270];
// inline asm
bfe.u32 %r9572, %r9581, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3271, %r9572, 4;
add.s64 %rd3272, %rd45, %rd3271;
ld.shared.u32 %r9770, [%rd3272];
add.s32 %r9771, %r9770, %r9769;
// inline asm
bfe.u32 %r9576, %r9581, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3273, %r9576, 4;
add.s64 %rd3274, %rd48, %rd3273;
ld.shared.u32 %r9772, [%rd3274];
xor.b32 %r9773, %r9772, %r9771;
// inline asm
bfe.u32 %r9580, %r9581, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3275, %r9580, 4;
add.s64 %rd3276, %rd51, %rd3275;
ld.shared.u32 %r9774, [%rd3276];
add.s32 %r9775, %r9774, %r9773;
xor.b32 %r9776, %r10343, %r9565;
xor.b32 %r9597, %r9776, %r9775;
// inline asm
bfe.u32 %r9584, %r9597, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3277, %r9584, 4;
add.s64 %rd3278, %rd42, %rd3277;
ld.shared.u32 %r9777, [%rd3278];
// inline asm
bfe.u32 %r9588, %r9597, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3279, %r9588, 4;
add.s64 %rd3280, %rd45, %rd3279;
ld.shared.u32 %r9778, [%rd3280];
add.s32 %r9779, %r9778, %r9777;
// inline asm
bfe.u32 %r9592, %r9597, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3281, %r9592, 4;
add.s64 %rd3282, %rd48, %rd3281;
ld.shared.u32 %r9780, [%rd3282];
xor.b32 %r9781, %r9780, %r9779;
// inline asm
bfe.u32 %r9596, %r9597, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3283, %r9596, 4;
add.s64 %rd3284, %rd51, %rd3283;
ld.shared.u32 %r9782, [%rd3284];
add.s32 %r9783, %r9782, %r9781;
xor.b32 %r9784, %r10342, %r9581;
xor.b32 %r9613, %r9784, %r9783;
// inline asm
bfe.u32 %r9600, %r9613, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3285, %r9600, 4;
add.s64 %rd3286, %rd42, %rd3285;
ld.shared.u32 %r9785, [%rd3286];
// inline asm
bfe.u32 %r9604, %r9613, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3287, %r9604, 4;
add.s64 %rd3288, %rd45, %rd3287;
ld.shared.u32 %r9786, [%rd3288];
add.s32 %r9787, %r9786, %r9785;
// inline asm
bfe.u32 %r9608, %r9613, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3289, %r9608, 4;
add.s64 %rd3290, %rd48, %rd3289;
ld.shared.u32 %r9788, [%rd3290];
xor.b32 %r9789, %r9788, %r9787;
// inline asm
bfe.u32 %r9612, %r9613, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3291, %r9612, 4;
add.s64 %rd3292, %rd51, %rd3291;
ld.shared.u32 %r9790, [%rd3292];
add.s32 %r9791, %r9790, %r9789;
xor.b32 %r9792, %r10341, %r9597;
xor.b32 %r9629, %r9792, %r9791;
// inline asm
bfe.u32 %r9616, %r9629, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3293, %r9616, 4;
add.s64 %rd3294, %rd42, %rd3293;
ld.shared.u32 %r9793, [%rd3294];
// inline asm
bfe.u32 %r9620, %r9629, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3295, %r9620, 4;
add.s64 %rd3296, %rd45, %rd3295;
ld.shared.u32 %r9794, [%rd3296];
add.s32 %r9795, %r9794, %r9793;
// inline asm
bfe.u32 %r9624, %r9629, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3297, %r9624, 4;
add.s64 %rd3298, %rd48, %rd3297;
ld.shared.u32 %r9796, [%rd3298];
xor.b32 %r9797, %r9796, %r9795;
// inline asm
bfe.u32 %r9628, %r9629, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3299, %r9628, 4;
add.s64 %rd3300, %rd51, %rd3299;
ld.shared.u32 %r9798, [%rd3300];
add.s32 %r9799, %r9798, %r9797;
xor.b32 %r9800, %r10340, %r9613;
xor.b32 %r9645, %r9800, %r9799;
// inline asm
bfe.u32 %r9632, %r9645, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3301, %r9632, 4;
add.s64 %rd3302, %rd42, %rd3301;
ld.shared.u32 %r9801, [%rd3302];
// inline asm
bfe.u32 %r9636, %r9645, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3303, %r9636, 4;
add.s64 %rd3304, %rd45, %rd3303;
ld.shared.u32 %r9802, [%rd3304];
add.s32 %r9803, %r9802, %r9801;
// inline asm
bfe.u32 %r9640, %r9645, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3305, %r9640, 4;
add.s64 %rd3306, %rd48, %rd3305;
ld.shared.u32 %r9804, [%rd3306];
xor.b32 %r9805, %r9804, %r9803;
// inline asm
bfe.u32 %r9644, %r9645, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3307, %r9644, 4;
add.s64 %rd3308, %rd51, %rd3307;
ld.shared.u32 %r9806, [%rd3308];
add.s32 %r9807, %r9806, %r9805;
xor.b32 %r9808, %r10339, %r9629;
xor.b32 %r9661, %r9808, %r9807;
// inline asm
bfe.u32 %r9648, %r9661, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3309, %r9648, 4;
add.s64 %rd3310, %rd42, %rd3309;
ld.shared.u32 %r9809, [%rd3310];
// inline asm
bfe.u32 %r9652, %r9661, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3311, %r9652, 4;
add.s64 %rd3312, %rd45, %rd3311;
ld.shared.u32 %r9810, [%rd3312];
add.s32 %r9811, %r9810, %r9809;
// inline asm
bfe.u32 %r9656, %r9661, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3313, %r9656, 4;
add.s64 %rd3314, %rd48, %rd3313;
ld.shared.u32 %r9812, [%rd3314];
xor.b32 %r9813, %r9812, %r9811;
// inline asm
bfe.u32 %r9660, %r9661, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3315, %r9660, 4;
add.s64 %rd3316, %rd51, %rd3315;
ld.shared.u32 %r9814, [%rd3316];
add.s32 %r9815, %r9814, %r9813;
xor.b32 %r9816, %r10338, %r9645;
xor.b32 %r9677, %r9816, %r9815;
// inline asm
bfe.u32 %r9664, %r9677, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3317, %r9664, 4;
add.s64 %rd3318, %rd42, %rd3317;
ld.shared.u32 %r9817, [%rd3318];
// inline asm
bfe.u32 %r9668, %r9677, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3319, %r9668, 4;
add.s64 %rd3320, %rd45, %rd3319;
ld.shared.u32 %r9818, [%rd3320];
add.s32 %r9819, %r9818, %r9817;
// inline asm
bfe.u32 %r9672, %r9677, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3321, %r9672, 4;
add.s64 %rd3322, %rd48, %rd3321;
ld.shared.u32 %r9820, [%rd3322];
xor.b32 %r9821, %r9820, %r9819;
// inline asm
bfe.u32 %r9676, %r9677, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3323, %r9676, 4;
add.s64 %rd3324, %rd51, %rd3323;
ld.shared.u32 %r9822, [%rd3324];
add.s32 %r9823, %r9822, %r9821;
xor.b32 %r9824, %r10337, %r9661;
xor.b32 %r9693, %r9824, %r9823;
// inline asm
bfe.u32 %r9680, %r9693, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3325, %r9680, 4;
add.s64 %rd3326, %rd42, %rd3325;
ld.shared.u32 %r9825, [%rd3326];
// inline asm
bfe.u32 %r9684, %r9693, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3327, %r9684, 4;
add.s64 %rd3328, %rd45, %rd3327;
ld.shared.u32 %r9826, [%rd3328];
add.s32 %r9827, %r9826, %r9825;
// inline asm
bfe.u32 %r9688, %r9693, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3329, %r9688, 4;
add.s64 %rd3330, %rd48, %rd3329;
ld.shared.u32 %r9828, [%rd3330];
xor.b32 %r9829, %r9828, %r9827;
// inline asm
bfe.u32 %r9692, %r9693, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3331, %r9692, 4;
add.s64 %rd3332, %rd51, %rd3331;
ld.shared.u32 %r9830, [%rd3332];
add.s32 %r9831, %r9830, %r9829;
xor.b32 %r9832, %r10336, %r9677;
xor.b32 %r9709, %r9832, %r9831;
// inline asm
bfe.u32 %r9696, %r9709, %r10266, %r10267;
// inline asm
mul.wide.u32 %rd3333, %r9696, 4;
add.s64 %rd3334, %rd42, %rd3333;
ld.shared.u32 %r9833, [%rd3334];
// inline asm
bfe.u32 %r9700, %r9709, %r10268, %r10267;
// inline asm
mul.wide.u32 %rd3335, %r9700, 4;
add.s64 %rd3336, %rd45, %rd3335;
ld.shared.u32 %r9834, [%rd3336];
add.s32 %r9835, %r9834, %r9833;
// inline asm
bfe.u32 %r9704, %r9709, %r10267, %r10267;
// inline asm
mul.wide.u32 %rd3337, %r9704, 4;
add.s64 %rd3338, %rd48, %rd3337;
ld.shared.u32 %r9836, [%rd3338];
xor.b32 %r9837, %r9836, %r9835;
// inline asm
bfe.u32 %r9708, %r9709, %r10331, %r10267;
// inline asm
mul.wide.u32 %rd3339, %r9708, 4;
add.s64 %rd3340, %rd51, %rd3339;
ld.shared.u32 %r9838, [%rd3340];
add.s32 %r9839, %r9838, %r9837;
xor.b32 %r9840, %r10335, %r9693;
xor.b32 %r10326, %r9840, %r9839;
xor.b32 %r10327, %r10334, %r9709;
st.shared.u32 [%rd3501], %r10327;
st.shared.u32 [%rd3501+4], %r10326;
add.s64 %rd3501, %rd3501, 8;
add.s32 %r10328, %r10328, 2;
setp.lt.u32 %p10, %r10328, 256;
@%p10 bra BB4_18;
mov.u64 %rd3502, %rd6;
BB4_20:
mov.u32 %r10273, 0;
mov.u32 %r10272, 16;
mov.u32 %r10271, 8;
mov.u32 %r10270, 24;
xor.b32 %r9854, %r10351, %r10327;
// inline asm
bfe.u32 %r9841, %r9854, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3344, %r9841, 4;
add.s64 %rd3345, %rd42, %rd3344;
ld.shared.u32 %r10097, [%rd3345];
// inline asm
bfe.u32 %r9845, %r9854, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3348, %r9845, 4;
add.s64 %rd3349, %rd45, %rd3348;
ld.shared.u32 %r10098, [%rd3349];
add.s32 %r10099, %r10098, %r10097;
// inline asm
bfe.u32 %r9849, %r9854, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3352, %r9849, 4;
add.s64 %rd3353, %rd48, %rd3352;
ld.shared.u32 %r10100, [%rd3353];
xor.b32 %r10101, %r10100, %r10099;
// inline asm
bfe.u32 %r9853, %r9854, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3356, %r9853, 4;
add.s64 %rd3357, %rd51, %rd3356;
ld.shared.u32 %r10102, [%rd3357];
add.s32 %r10103, %r10102, %r10101;
xor.b32 %r10104, %r10350, %r10326;
xor.b32 %r9870, %r10104, %r10103;
// inline asm
bfe.u32 %r9857, %r9870, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3358, %r9857, 4;
add.s64 %rd3359, %rd42, %rd3358;
ld.shared.u32 %r10105, [%rd3359];
// inline asm
bfe.u32 %r9861, %r9870, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3360, %r9861, 4;
add.s64 %rd3361, %rd45, %rd3360;
ld.shared.u32 %r10106, [%rd3361];
add.s32 %r10107, %r10106, %r10105;
// inline asm
bfe.u32 %r9865, %r9870, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3362, %r9865, 4;
add.s64 %rd3363, %rd48, %rd3362;
ld.shared.u32 %r10108, [%rd3363];
xor.b32 %r10109, %r10108, %r10107;
// inline asm
bfe.u32 %r9869, %r9870, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3364, %r9869, 4;
add.s64 %rd3365, %rd51, %rd3364;
ld.shared.u32 %r10110, [%rd3365];
add.s32 %r10111, %r10110, %r10109;
xor.b32 %r10112, %r10349, %r9854;
xor.b32 %r9886, %r10112, %r10111;
// inline asm
bfe.u32 %r9873, %r9886, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3366, %r9873, 4;
add.s64 %rd3367, %rd42, %rd3366;
ld.shared.u32 %r10113, [%rd3367];
// inline asm
bfe.u32 %r9877, %r9886, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3368, %r9877, 4;
add.s64 %rd3369, %rd45, %rd3368;
ld.shared.u32 %r10114, [%rd3369];
add.s32 %r10115, %r10114, %r10113;
// inline asm
bfe.u32 %r9881, %r9886, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3370, %r9881, 4;
add.s64 %rd3371, %rd48, %rd3370;
ld.shared.u32 %r10116, [%rd3371];
xor.b32 %r10117, %r10116, %r10115;
// inline asm
bfe.u32 %r9885, %r9886, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3372, %r9885, 4;
add.s64 %rd3373, %rd51, %rd3372;
ld.shared.u32 %r10118, [%rd3373];
add.s32 %r10119, %r10118, %r10117;
xor.b32 %r10120, %r10348, %r9870;
xor.b32 %r9902, %r10120, %r10119;
// inline asm
bfe.u32 %r9889, %r9902, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3374, %r9889, 4;
add.s64 %rd3375, %rd42, %rd3374;
ld.shared.u32 %r10121, [%rd3375];
// inline asm
bfe.u32 %r9893, %r9902, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3376, %r9893, 4;
add.s64 %rd3377, %rd45, %rd3376;
ld.shared.u32 %r10122, [%rd3377];
add.s32 %r10123, %r10122, %r10121;
// inline asm
bfe.u32 %r9897, %r9902, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3378, %r9897, 4;
add.s64 %rd3379, %rd48, %rd3378;
ld.shared.u32 %r10124, [%rd3379];
xor.b32 %r10125, %r10124, %r10123;
// inline asm
bfe.u32 %r9901, %r9902, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3380, %r9901, 4;
add.s64 %rd3381, %rd51, %rd3380;
ld.shared.u32 %r10126, [%rd3381];
add.s32 %r10127, %r10126, %r10125;
xor.b32 %r10128, %r10347, %r9886;
xor.b32 %r9918, %r10128, %r10127;
// inline asm
bfe.u32 %r9905, %r9918, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3382, %r9905, 4;
add.s64 %rd3383, %rd42, %rd3382;
ld.shared.u32 %r10129, [%rd3383];
// inline asm
bfe.u32 %r9909, %r9918, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3384, %r9909, 4;
add.s64 %rd3385, %rd45, %rd3384;
ld.shared.u32 %r10130, [%rd3385];
add.s32 %r10131, %r10130, %r10129;
// inline asm
bfe.u32 %r9913, %r9918, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3386, %r9913, 4;
add.s64 %rd3387, %rd48, %rd3386;
ld.shared.u32 %r10132, [%rd3387];
xor.b32 %r10133, %r10132, %r10131;
// inline asm
bfe.u32 %r9917, %r9918, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3388, %r9917, 4;
add.s64 %rd3389, %rd51, %rd3388;
ld.shared.u32 %r10134, [%rd3389];
add.s32 %r10135, %r10134, %r10133;
xor.b32 %r10136, %r10346, %r9902;
xor.b32 %r9934, %r10136, %r10135;
// inline asm
bfe.u32 %r9921, %r9934, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3390, %r9921, 4;
add.s64 %rd3391, %rd42, %rd3390;
ld.shared.u32 %r10137, [%rd3391];
// inline asm
bfe.u32 %r9925, %r9934, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3392, %r9925, 4;
add.s64 %rd3393, %rd45, %rd3392;
ld.shared.u32 %r10138, [%rd3393];
add.s32 %r10139, %r10138, %r10137;
// inline asm
bfe.u32 %r9929, %r9934, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3394, %r9929, 4;
add.s64 %rd3395, %rd48, %rd3394;
ld.shared.u32 %r10140, [%rd3395];
xor.b32 %r10141, %r10140, %r10139;
// inline asm
bfe.u32 %r9933, %r9934, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3396, %r9933, 4;
add.s64 %rd3397, %rd51, %rd3396;
ld.shared.u32 %r10142, [%rd3397];
add.s32 %r10143, %r10142, %r10141;
xor.b32 %r10144, %r10345, %r9918;
xor.b32 %r9950, %r10144, %r10143;
// inline asm
bfe.u32 %r9937, %r9950, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3398, %r9937, 4;
add.s64 %rd3399, %rd42, %rd3398;
ld.shared.u32 %r10145, [%rd3399];
// inline asm
bfe.u32 %r9941, %r9950, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3400, %r9941, 4;
add.s64 %rd3401, %rd45, %rd3400;
ld.shared.u32 %r10146, [%rd3401];
add.s32 %r10147, %r10146, %r10145;
// inline asm
bfe.u32 %r9945, %r9950, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3402, %r9945, 4;
add.s64 %rd3403, %rd48, %rd3402;
ld.shared.u32 %r10148, [%rd3403];
xor.b32 %r10149, %r10148, %r10147;
// inline asm
bfe.u32 %r9949, %r9950, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3404, %r9949, 4;
add.s64 %rd3405, %rd51, %rd3404;
ld.shared.u32 %r10150, [%rd3405];
add.s32 %r10151, %r10150, %r10149;
xor.b32 %r10152, %r10344, %r9934;
xor.b32 %r9966, %r10152, %r10151;
// inline asm
bfe.u32 %r9953, %r9966, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3406, %r9953, 4;
add.s64 %rd3407, %rd42, %rd3406;
ld.shared.u32 %r10153, [%rd3407];
// inline asm
bfe.u32 %r9957, %r9966, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3408, %r9957, 4;
add.s64 %rd3409, %rd45, %rd3408;
ld.shared.u32 %r10154, [%rd3409];
add.s32 %r10155, %r10154, %r10153;
// inline asm
bfe.u32 %r9961, %r9966, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3410, %r9961, 4;
add.s64 %rd3411, %rd48, %rd3410;
ld.shared.u32 %r10156, [%rd3411];
xor.b32 %r10157, %r10156, %r10155;
// inline asm
bfe.u32 %r9965, %r9966, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3412, %r9965, 4;
add.s64 %rd3413, %rd51, %rd3412;
ld.shared.u32 %r10158, [%rd3413];
add.s32 %r10159, %r10158, %r10157;
xor.b32 %r10160, %r10343, %r9950;
xor.b32 %r9982, %r10160, %r10159;
// inline asm
bfe.u32 %r9969, %r9982, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3414, %r9969, 4;
add.s64 %rd3415, %rd42, %rd3414;
ld.shared.u32 %r10161, [%rd3415];
// inline asm
bfe.u32 %r9973, %r9982, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3416, %r9973, 4;
add.s64 %rd3417, %rd45, %rd3416;
ld.shared.u32 %r10162, [%rd3417];
add.s32 %r10163, %r10162, %r10161;
// inline asm
bfe.u32 %r9977, %r9982, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3418, %r9977, 4;
add.s64 %rd3419, %rd48, %rd3418;
ld.shared.u32 %r10164, [%rd3419];
xor.b32 %r10165, %r10164, %r10163;
// inline asm
bfe.u32 %r9981, %r9982, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3420, %r9981, 4;
add.s64 %rd3421, %rd51, %rd3420;
ld.shared.u32 %r10166, [%rd3421];
add.s32 %r10167, %r10166, %r10165;
xor.b32 %r10168, %r10342, %r9966;
xor.b32 %r9998, %r10168, %r10167;
// inline asm
bfe.u32 %r9985, %r9998, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3422, %r9985, 4;
add.s64 %rd3423, %rd42, %rd3422;
ld.shared.u32 %r10169, [%rd3423];
// inline asm
bfe.u32 %r9989, %r9998, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3424, %r9989, 4;
add.s64 %rd3425, %rd45, %rd3424;
ld.shared.u32 %r10170, [%rd3425];
add.s32 %r10171, %r10170, %r10169;
// inline asm
bfe.u32 %r9993, %r9998, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3426, %r9993, 4;
add.s64 %rd3427, %rd48, %rd3426;
ld.shared.u32 %r10172, [%rd3427];
xor.b32 %r10173, %r10172, %r10171;
// inline asm
bfe.u32 %r9997, %r9998, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3428, %r9997, 4;
add.s64 %rd3429, %rd51, %rd3428;
ld.shared.u32 %r10174, [%rd3429];
add.s32 %r10175, %r10174, %r10173;
xor.b32 %r10176, %r10341, %r9982;
xor.b32 %r10014, %r10176, %r10175;
// inline asm
bfe.u32 %r10001, %r10014, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3430, %r10001, 4;
add.s64 %rd3431, %rd42, %rd3430;
ld.shared.u32 %r10177, [%rd3431];
// inline asm
bfe.u32 %r10005, %r10014, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3432, %r10005, 4;
add.s64 %rd3433, %rd45, %rd3432;
ld.shared.u32 %r10178, [%rd3433];
add.s32 %r10179, %r10178, %r10177;
// inline asm
bfe.u32 %r10009, %r10014, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3434, %r10009, 4;
add.s64 %rd3435, %rd48, %rd3434;
ld.shared.u32 %r10180, [%rd3435];
xor.b32 %r10181, %r10180, %r10179;
// inline asm
bfe.u32 %r10013, %r10014, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3436, %r10013, 4;
add.s64 %rd3437, %rd51, %rd3436;
ld.shared.u32 %r10182, [%rd3437];
add.s32 %r10183, %r10182, %r10181;
xor.b32 %r10184, %r10340, %r9998;
xor.b32 %r10030, %r10184, %r10183;
// inline asm
bfe.u32 %r10017, %r10030, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3438, %r10017, 4;
add.s64 %rd3439, %rd42, %rd3438;
ld.shared.u32 %r10185, [%rd3439];
// inline asm
bfe.u32 %r10021, %r10030, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3440, %r10021, 4;
add.s64 %rd3441, %rd45, %rd3440;
ld.shared.u32 %r10186, [%rd3441];
add.s32 %r10187, %r10186, %r10185;
// inline asm
bfe.u32 %r10025, %r10030, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3442, %r10025, 4;
add.s64 %rd3443, %rd48, %rd3442;
ld.shared.u32 %r10188, [%rd3443];
xor.b32 %r10189, %r10188, %r10187;
// inline asm
bfe.u32 %r10029, %r10030, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3444, %r10029, 4;
add.s64 %rd3445, %rd51, %rd3444;
ld.shared.u32 %r10190, [%rd3445];
add.s32 %r10191, %r10190, %r10189;
xor.b32 %r10192, %r10339, %r10014;
xor.b32 %r10046, %r10192, %r10191;
// inline asm
bfe.u32 %r10033, %r10046, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3446, %r10033, 4;
add.s64 %rd3447, %rd42, %rd3446;
ld.shared.u32 %r10193, [%rd3447];
// inline asm
bfe.u32 %r10037, %r10046, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3448, %r10037, 4;
add.s64 %rd3449, %rd45, %rd3448;
ld.shared.u32 %r10194, [%rd3449];
add.s32 %r10195, %r10194, %r10193;
// inline asm
bfe.u32 %r10041, %r10046, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3450, %r10041, 4;
add.s64 %rd3451, %rd48, %rd3450;
ld.shared.u32 %r10196, [%rd3451];
xor.b32 %r10197, %r10196, %r10195;
// inline asm
bfe.u32 %r10045, %r10046, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3452, %r10045, 4;
add.s64 %rd3453, %rd51, %rd3452;
ld.shared.u32 %r10198, [%rd3453];
add.s32 %r10199, %r10198, %r10197;
xor.b32 %r10200, %r10338, %r10030;
xor.b32 %r10062, %r10200, %r10199;
// inline asm
bfe.u32 %r10049, %r10062, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3454, %r10049, 4;
add.s64 %rd3455, %rd42, %rd3454;
ld.shared.u32 %r10201, [%rd3455];
// inline asm
bfe.u32 %r10053, %r10062, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3456, %r10053, 4;
add.s64 %rd3457, %rd45, %rd3456;
ld.shared.u32 %r10202, [%rd3457];
add.s32 %r10203, %r10202, %r10201;
// inline asm
bfe.u32 %r10057, %r10062, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3458, %r10057, 4;
add.s64 %rd3459, %rd48, %rd3458;
ld.shared.u32 %r10204, [%rd3459];
xor.b32 %r10205, %r10204, %r10203;
// inline asm
bfe.u32 %r10061, %r10062, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3460, %r10061, 4;
add.s64 %rd3461, %rd51, %rd3460;
ld.shared.u32 %r10206, [%rd3461];
add.s32 %r10207, %r10206, %r10205;
xor.b32 %r10208, %r10337, %r10046;
xor.b32 %r10078, %r10208, %r10207;
// inline asm
bfe.u32 %r10065, %r10078, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3462, %r10065, 4;
add.s64 %rd3463, %rd42, %rd3462;
ld.shared.u32 %r10209, [%rd3463];
// inline asm
bfe.u32 %r10069, %r10078, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3464, %r10069, 4;
add.s64 %rd3465, %rd45, %rd3464;
ld.shared.u32 %r10210, [%rd3465];
add.s32 %r10211, %r10210, %r10209;
// inline asm
bfe.u32 %r10073, %r10078, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3466, %r10073, 4;
add.s64 %rd3467, %rd48, %rd3466;
ld.shared.u32 %r10212, [%rd3467];
xor.b32 %r10213, %r10212, %r10211;
// inline asm
bfe.u32 %r10077, %r10078, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3468, %r10077, 4;
add.s64 %rd3469, %rd51, %rd3468;
ld.shared.u32 %r10214, [%rd3469];
add.s32 %r10215, %r10214, %r10213;
xor.b32 %r10216, %r10336, %r10062;
xor.b32 %r10094, %r10216, %r10215;
// inline asm
bfe.u32 %r10081, %r10094, %r10270, %r10271;
// inline asm
mul.wide.u32 %rd3470, %r10081, 4;
add.s64 %rd3471, %rd42, %rd3470;
ld.shared.u32 %r10217, [%rd3471];
// inline asm
bfe.u32 %r10085, %r10094, %r10272, %r10271;
// inline asm
mul.wide.u32 %rd3472, %r10085, 4;
add.s64 %rd3473, %rd45, %rd3472;
ld.shared.u32 %r10218, [%rd3473];
add.s32 %r10219, %r10218, %r10217;
// inline asm
bfe.u32 %r10089, %r10094, %r10271, %r10271;
// inline asm
mul.wide.u32 %rd3474, %r10089, 4;
add.s64 %rd3475, %rd48, %rd3474;
ld.shared.u32 %r10220, [%rd3475];
xor.b32 %r10221, %r10220, %r10219;
// inline asm
bfe.u32 %r10093, %r10094, %r10273, %r10271;
// inline asm
mul.wide.u32 %rd3476, %r10093, 4;
add.s64 %rd3477, %rd51, %rd3476;
ld.shared.u32 %r10222, [%rd3477];
add.s32 %r10223, %r10222, %r10221;
xor.b32 %r10224, %r10335, %r10078;
xor.b32 %r10326, %r10224, %r10223;
xor.b32 %r10327, %r10334, %r10094;
st.shared.u32 [%rd3502], %r10327;
st.shared.u32 [%rd3502+4], %r10326;
add.s64 %rd3502, %rd3502, 8;
add.s32 %r10331, %r10331, 2;
setp.lt.u32 %p11, %r10331, 256;
@%p11 bra BB4_20;
ld.param.u32 %r10274, [m03200_loop_param_29];
add.s32 %r10309, %r10309, 1;
setp.lt.u32 %p12, %r10309, %r10274;
@%p12 bra BB4_5;
BB4_22:
st.global.u32 [%rd2+72], %r10351;
st.global.u32 [%rd2+76], %r10350;
st.global.u32 [%rd2+80], %r10349;
st.global.u32 [%rd2+84], %r10348;
st.global.u32 [%rd2+88], %r10347;
st.global.u32 [%rd2+92], %r10346;
st.global.u32 [%rd2+96], %r10345;
st.global.u32 [%rd2+100], %r10344;
st.global.u32 [%rd2+104], %r10343;
st.global.u32 [%rd2+108], %r10342;
st.global.u32 [%rd2+112], %r10341;
st.global.u32 [%rd2+116], %r10340;
st.global.u32 [%rd2+120], %r10339;
st.global.u32 [%rd2+124], %r10338;
st.global.u32 [%rd2+128], %r10337;
st.global.u32 [%rd2+132], %r10336;
st.global.u32 [%rd2+136], %r10335;
st.global.u32 [%rd2+140], %r10334;
mov.u32 %r10352, 0;
BB4_23:
mul.wide.u32 %rd3481, %r10352, 4;
add.s64 %rd3482, %rd42, %rd3481;
ld.shared.u32 %r10226, [%rd3482];
add.s64 %rd3485, %rd2, %rd3481;
ld.shared.u32 %r10227, [%rd3482+4];
ld.shared.u32 %r10228, [%rd3482+8];
ld.shared.u32 %r10229, [%rd3482+12];
ld.shared.u32 %r10230, [%rd3482+16];
ld.shared.u32 %r10231, [%rd3482+20];
ld.shared.u32 %r10232, [%rd3482+24];
ld.shared.u32 %r10233, [%rd3482+28];
st.global.u32 [%rd3485+144], %r10226;
add.s64 %rd3488, %rd45, %rd3481;
st.global.u32 [%rd3485+148], %r10227;
st.global.u32 [%rd3485+152], %r10228;
st.global.u32 [%rd3485+156], %r10229;
st.global.u32 [%rd3485+160], %r10230;
st.global.u32 [%rd3485+164], %r10231;
st.global.u32 [%rd3485+168], %r10232;
st.global.u32 [%rd3485+172], %r10233;
ld.shared.u32 %r10234, [%rd3488];
ld.shared.u32 %r10235, [%rd3488+4];
ld.shared.u32 %r10236, [%rd3488+8];
ld.shared.u32 %r10237, [%rd3488+12];
ld.shared.u32 %r10238, [%rd3488+16];
ld.shared.u32 %r10239, [%rd3488+20];
ld.shared.u32 %r10240, [%rd3488+24];
ld.shared.u32 %r10241, [%rd3488+28];
st.global.u32 [%rd3485+1168], %r10234;
add.s64 %rd3491, %rd48, %rd3481;
st.global.u32 [%rd3485+1172], %r10235;
st.global.u32 [%rd3485+1176], %r10236;
st.global.u32 [%rd3485+1180], %r10237;
st.global.u32 [%rd3485+1184], %r10238;
st.global.u32 [%rd3485+1188], %r10239;
st.global.u32 [%rd3485+1192], %r10240;
st.global.u32 [%rd3485+1196], %r10241;
ld.shared.u32 %r10242, [%rd3491];
ld.shared.u32 %r10243, [%rd3491+4];
ld.shared.u32 %r10244, [%rd3491+8];
ld.shared.u32 %r10245, [%rd3491+12];
ld.shared.u32 %r10246, [%rd3491+16];
ld.shared.u32 %r10247, [%rd3491+20];
ld.shared.u32 %r10248, [%rd3491+24];
ld.shared.u32 %r10249, [%rd3491+28];
st.global.u32 [%rd3485+2192], %r10242;
add.s64 %rd3494, %rd51, %rd3481;
st.global.u32 [%rd3485+2196], %r10243;
st.global.u32 [%rd3485+2200], %r10244;
st.global.u32 [%rd3485+2204], %r10245;
st.global.u32 [%rd3485+2208], %r10246;
st.global.u32 [%rd3485+2212], %r10247;
st.global.u32 [%rd3485+2216], %r10248;
st.global.u32 [%rd3485+2220], %r10249;
ld.shared.u32 %r10250, [%rd3494];
ld.shared.u32 %r10251, [%rd3494+4];
ld.shared.u32 %r10252, [%rd3494+8];
ld.shared.u32 %r10253, [%rd3494+12];
ld.shared.u32 %r10254, [%rd3494+16];
ld.shared.u32 %r10255, [%rd3494+20];
ld.shared.u32 %r10256, [%rd3494+24];
ld.shared.u32 %r10257, [%rd3494+28];
st.global.u32 [%rd3485+3216], %r10250;
st.global.u32 [%rd3485+3220], %r10251;
st.global.u32 [%rd3485+3224], %r10252;
st.global.u32 [%rd3485+3228], %r10253;
st.global.u32 [%rd3485+3232], %r10254;
st.global.u32 [%rd3485+3236], %r10255;
st.global.u32 [%rd3485+3240], %r10256;
st.global.u32 [%rd3485+3244], %r10257;
add.s32 %r10352, %r10352, 8;
setp.ne.s32 %p13, %r10352, 256;
@%p13 bra BB4_23;
BB4_24:
ret;
}
// .globl m03200_comp
.entry m03200_comp(
.param .u64 .ptr .global .align 4 m03200_comp_param_0,
.param .u64 .ptr .global .align 4 m03200_comp_param_1,
.param .u64 .ptr .global .align 4 m03200_comp_param_2,
.param .u64 .ptr .global .align 4 m03200_comp_param_3,
.param .u64 .ptr .global .align 4 m03200_comp_param_4,
.param .u64 .ptr .global .align 1 m03200_comp_param_5,
.param .u64 .ptr .global .align 4 m03200_comp_param_6,
.param .u64 .ptr .global .align 4 m03200_comp_param_7,
.param .u64 .ptr .global .align 4 m03200_comp_param_8,
.param .u64 .ptr .global .align 4 m03200_comp_param_9,
.param .u64 .ptr .global .align 4 m03200_comp_param_10,
.param .u64 .ptr .global .align 4 m03200_comp_param_11,
.param .u64 .ptr .global .align 4 m03200_comp_param_12,
.param .u64 .ptr .global .align 4 m03200_comp_param_13,
.param .u64 .ptr .global .align 8 m03200_comp_param_14,
.param .u64 .ptr .global .align 4 m03200_comp_param_15,
.param .u64 .ptr .global .align 4 m03200_comp_param_16,
.param .u64 .ptr .global .align 4 m03200_comp_param_17,
.param .u64 .ptr .global .align 1 m03200_comp_param_18,
.param .u64 .ptr .global .align 4 m03200_comp_param_19,
.param .u64 .ptr .global .align 4 m03200_comp_param_20,
.param .u64 .ptr .global .align 4 m03200_comp_param_21,
.param .u64 .ptr .global .align 4 m03200_comp_param_22,
.param .u64 .ptr .global .align 4 m03200_comp_param_23,
.param .u32 m03200_comp_param_24,
.param .u32 m03200_comp_param_25,
.param .u32 m03200_comp_param_26,
.param .u32 m03200_comp_param_27,
.param .u32 m03200_comp_param_28,
.param .u32 m03200_comp_param_29,
.param .u32 m03200_comp_param_30,
.param .u32 m03200_comp_param_31,
.param .u32 m03200_comp_param_32,
.param .u32 m03200_comp_param_33,
.param .u64 m03200_comp_param_34
)
.reqntid 8, 1, 1
{
.reg .pred %p<28>;
.reg .b32 %r<971>;
.reg .b64 %rd<376>;
// demoted variable
.shared .align 4 .b8 m03200_comp$S0_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_comp$S1_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_comp$S2_all[8192];
// demoted variable
.shared .align 4 .b8 m03200_comp$S3_all[8192];
ld.param.u64 %rd5, [m03200_comp_param_4];
ld.param.u64 %rd18, [m03200_comp_param_34];
mov.b32 %r61, %envreg3;
mov.u32 %r62, %ctaid.x;
mov.u32 %r63, %ntid.x;
mad.lo.s32 %r64, %r62, %r63, %r61;
mov.u32 %r65, %tid.x;
add.s32 %r1, %r64, %r65;
cvt.s64.s32 %rd1, %r1;
cvt.s64.s32 %rd2, %r65;
setp.ge.u64 %p1, %rd1, %rd18;
@%p1 bra BB5_34;
mul.wide.s32 %rd19, %r1, 4240;
add.s64 %rd20, %rd5, %rd19;
ld.global.u32 %r2, [%rd20+72];
ld.global.u32 %r3, [%rd20+76];
ld.global.u32 %r4, [%rd20+80];
ld.global.u32 %r5, [%rd20+84];
ld.global.u32 %r6, [%rd20+88];
ld.global.u32 %r7, [%rd20+92];
ld.global.u32 %r8, [%rd20+96];
ld.global.u32 %r9, [%rd20+100];
ld.global.u32 %r10, [%rd20+104];
ld.global.u32 %r11, [%rd20+108];
ld.global.u32 %r12, [%rd20+112];
ld.global.u32 %r13, [%rd20+116];
ld.global.u32 %r14, [%rd20+120];
ld.global.u32 %r15, [%rd20+124];
ld.global.u32 %r16, [%rd20+128];
ld.global.u32 %r17, [%rd20+132];
ld.global.u32 %r18, [%rd20+136];
ld.global.u32 %r19, [%rd20+140];
mov.u32 %r960, 0;
BB5_2:
mul.wide.u32 %rd23, %r960, 4;
add.s64 %rd24, %rd20, %rd23;
ld.global.u32 %r70, [%rd24+144];
shl.b64 %rd25, %rd2, 10;
mov.u64 %rd26, m03200_comp$S0_all;
add.s64 %rd27, %rd26, %rd25;
add.s64 %rd28, %rd27, %rd23;
ld.global.u32 %r71, [%rd24+1168];
ld.global.u32 %r72, [%rd24+2192];
ld.global.u32 %r73, [%rd24+3216];
ld.global.u32 %r74, [%rd24+148];
ld.global.u32 %r75, [%rd24+1172];
ld.global.u32 %r76, [%rd24+2196];
ld.global.u32 %r77, [%rd24+3220];
ld.global.u32 %r78, [%rd24+152];
ld.global.u32 %r79, [%rd24+1176];
ld.global.u32 %r80, [%rd24+2200];
ld.global.u32 %r81, [%rd24+3224];
ld.global.u32 %r82, [%rd24+156];
ld.global.u32 %r83, [%rd24+1180];
ld.global.u32 %r84, [%rd24+2204];
ld.global.u32 %r85, [%rd24+3228];
ld.global.u32 %r86, [%rd24+160];
ld.global.u32 %r87, [%rd24+1184];
ld.global.u32 %r88, [%rd24+2208];
ld.global.u32 %r89, [%rd24+3232];
ld.global.u32 %r90, [%rd24+164];
ld.global.u32 %r91, [%rd24+1188];
ld.global.u32 %r92, [%rd24+2212];
ld.global.u32 %r93, [%rd24+3236];
ld.global.u32 %r94, [%rd24+168];
ld.global.u32 %r95, [%rd24+1192];
ld.global.u32 %r96, [%rd24+2216];
ld.global.u32 %r97, [%rd24+3240];
ld.global.u32 %r98, [%rd24+172];
ld.global.u32 %r99, [%rd24+1196];
ld.global.u32 %r100, [%rd24+2220];
ld.global.u32 %r101, [%rd24+3244];
st.shared.u32 [%rd28], %r70;
mov.u64 %rd29, m03200_comp$S1_all;
add.s64 %rd30, %rd29, %rd25;
add.s64 %rd31, %rd30, %rd23;
st.shared.u32 [%rd28+4], %r74;
st.shared.u32 [%rd28+8], %r78;
st.shared.u32 [%rd28+12], %r82;
st.shared.u32 [%rd28+16], %r86;
st.shared.u32 [%rd28+20], %r90;
st.shared.u32 [%rd28+24], %r94;
st.shared.u32 [%rd28+28], %r98;
st.shared.u32 [%rd31], %r71;
mov.u64 %rd32, m03200_comp$S2_all;
add.s64 %rd33, %rd32, %rd25;
add.s64 %rd34, %rd33, %rd23;
st.shared.u32 [%rd31+4], %r75;
st.shared.u32 [%rd31+8], %r79;
st.shared.u32 [%rd31+12], %r83;
st.shared.u32 [%rd31+16], %r87;
st.shared.u32 [%rd31+20], %r91;
st.shared.u32 [%rd31+24], %r95;
st.shared.u32 [%rd31+28], %r99;
st.shared.u32 [%rd34], %r72;
mov.u64 %rd35, m03200_comp$S3_all;
add.s64 %rd36, %rd35, %rd25;
add.s64 %rd37, %rd36, %rd23;
st.shared.u32 [%rd34+4], %r76;
st.shared.u32 [%rd34+8], %r80;
st.shared.u32 [%rd34+12], %r84;
st.shared.u32 [%rd34+16], %r88;
st.shared.u32 [%rd34+20], %r92;
st.shared.u32 [%rd34+24], %r96;
st.shared.u32 [%rd34+28], %r100;
st.shared.u32 [%rd37], %r73;
st.shared.u32 [%rd37+4], %r77;
st.shared.u32 [%rd37+8], %r81;
st.shared.u32 [%rd37+12], %r85;
st.shared.u32 [%rd37+16], %r89;
st.shared.u32 [%rd37+20], %r93;
st.shared.u32 [%rd37+24], %r97;
st.shared.u32 [%rd37+28], %r101;
add.s32 %r960, %r960, 8;
mov.u32 %r963, 1332899944;
mov.u32 %r962, 1700884034;
mov.u32 %r961, -64;
setp.ne.s32 %p2, %r960, 256;
@%p2 bra BB5_2;
BB5_3:
mov.u32 %r959, %tid.x;
cvt.s64.s32 %rd375, %r959;
shl.b64 %rd374, %rd375, 10;
mov.u64 %rd373, m03200_comp$S3_all;
add.s64 %rd372, %rd373, %rd374;
mov.u32 %r957, %tid.x;
cvt.s64.s32 %rd367, %r957;
shl.b64 %rd366, %rd367, 10;
mov.u64 %rd365, m03200_comp$S2_all;
add.s64 %rd364, %rd365, %rd366;
mov.u64 %rd363, m03200_comp$S1_all;
add.s64 %rd362, %rd363, %rd366;
mov.u32 %r955, %tid.x;
cvt.s64.s32 %rd355, %r955;
shl.b64 %rd354, %rd355, 10;
mov.u64 %rd353, m03200_comp$S0_all;
add.s64 %rd352, %rd353, %rd354;
xor.b32 %r115, %r2, %r963;
mov.u32 %r344, 24;
mov.u32 %r357, 8;
// inline asm
bfe.u32 %r102, %r115, %r344, %r357;
// inline asm
mul.wide.u32 %rd41, %r102, 4;
add.s64 %rd42, %rd352, %rd41;
ld.shared.u32 %r361, [%rd42];
mov.u32 %r348, 16;
// inline asm
bfe.u32 %r106, %r115, %r348, %r357;
// inline asm
mul.wide.u32 %rd45, %r106, 4;
add.s64 %rd46, %rd362, %rd45;
ld.shared.u32 %r362, [%rd46];
add.s32 %r363, %r362, %r361;
// inline asm
bfe.u32 %r110, %r115, %r357, %r357;
// inline asm
mul.wide.u32 %rd49, %r110, 4;
add.s64 %rd50, %rd364, %rd49;
ld.shared.u32 %r364, [%rd50];
xor.b32 %r365, %r364, %r363;
mov.u32 %r356, 0;
// inline asm
bfe.u32 %r114, %r115, %r356, %r357;
// inline asm
mul.wide.u32 %rd53, %r114, 4;
add.s64 %rd54, %rd372, %rd53;
ld.shared.u32 %r366, [%rd54];
add.s32 %r367, %r366, %r365;
xor.b32 %r368, %r3, %r962;
xor.b32 %r131, %r368, %r367;
// inline asm
bfe.u32 %r118, %r131, %r344, %r357;
// inline asm
mul.wide.u32 %rd55, %r118, 4;
add.s64 %rd56, %rd352, %rd55;
ld.shared.u32 %r369, [%rd56];
// inline asm
bfe.u32 %r122, %r131, %r348, %r357;
// inline asm
mul.wide.u32 %rd57, %r122, 4;
add.s64 %rd58, %rd362, %rd57;
ld.shared.u32 %r370, [%rd58];
add.s32 %r371, %r370, %r369;
// inline asm
bfe.u32 %r126, %r131, %r357, %r357;
// inline asm
mul.wide.u32 %rd59, %r126, 4;
add.s64 %rd60, %rd364, %rd59;
ld.shared.u32 %r372, [%rd60];
xor.b32 %r373, %r372, %r371;
// inline asm
bfe.u32 %r130, %r131, %r356, %r357;
// inline asm
mul.wide.u32 %rd61, %r130, 4;
add.s64 %rd62, %rd372, %rd61;
ld.shared.u32 %r374, [%rd62];
add.s32 %r375, %r374, %r373;
xor.b32 %r376, %r4, %r115;
xor.b32 %r147, %r376, %r375;
// inline asm
bfe.u32 %r134, %r147, %r344, %r357;
// inline asm
mul.wide.u32 %rd63, %r134, 4;
add.s64 %rd64, %rd352, %rd63;
ld.shared.u32 %r377, [%rd64];
// inline asm
bfe.u32 %r138, %r147, %r348, %r357;
// inline asm
mul.wide.u32 %rd65, %r138, 4;
add.s64 %rd66, %rd362, %rd65;
ld.shared.u32 %r378, [%rd66];
add.s32 %r379, %r378, %r377;
// inline asm
bfe.u32 %r142, %r147, %r357, %r357;
// inline asm
mul.wide.u32 %rd67, %r142, 4;
add.s64 %rd68, %rd364, %rd67;
ld.shared.u32 %r380, [%rd68];
xor.b32 %r381, %r380, %r379;
// inline asm
bfe.u32 %r146, %r147, %r356, %r357;
// inline asm
mul.wide.u32 %rd69, %r146, 4;
add.s64 %rd70, %rd372, %rd69;
ld.shared.u32 %r382, [%rd70];
add.s32 %r383, %r382, %r381;
xor.b32 %r384, %r5, %r131;
xor.b32 %r163, %r384, %r383;
// inline asm
bfe.u32 %r150, %r163, %r344, %r357;
// inline asm
mul.wide.u32 %rd71, %r150, 4;
add.s64 %rd72, %rd352, %rd71;
ld.shared.u32 %r385, [%rd72];
// inline asm
bfe.u32 %r154, %r163, %r348, %r357;
// inline asm
mul.wide.u32 %rd73, %r154, 4;
add.s64 %rd74, %rd362, %rd73;
ld.shared.u32 %r386, [%rd74];
add.s32 %r387, %r386, %r385;
// inline asm
bfe.u32 %r158, %r163, %r357, %r357;
// inline asm
mul.wide.u32 %rd75, %r158, 4;
add.s64 %rd76, %rd364, %rd75;
ld.shared.u32 %r388, [%rd76];
xor.b32 %r389, %r388, %r387;
// inline asm
bfe.u32 %r162, %r163, %r356, %r357;
// inline asm
mul.wide.u32 %rd77, %r162, 4;
add.s64 %rd78, %rd372, %rd77;
ld.shared.u32 %r390, [%rd78];
add.s32 %r391, %r390, %r389;
xor.b32 %r392, %r6, %r147;
xor.b32 %r179, %r392, %r391;
// inline asm
bfe.u32 %r166, %r179, %r344, %r357;
// inline asm
mul.wide.u32 %rd79, %r166, 4;
add.s64 %rd80, %rd352, %rd79;
ld.shared.u32 %r393, [%rd80];
// inline asm
bfe.u32 %r170, %r179, %r348, %r357;
// inline asm
mul.wide.u32 %rd81, %r170, 4;
add.s64 %rd82, %rd362, %rd81;
ld.shared.u32 %r394, [%rd82];
add.s32 %r395, %r394, %r393;
// inline asm
bfe.u32 %r174, %r179, %r357, %r357;
// inline asm
mul.wide.u32 %rd83, %r174, 4;
add.s64 %rd84, %rd364, %rd83;
ld.shared.u32 %r396, [%rd84];
xor.b32 %r397, %r396, %r395;
// inline asm
bfe.u32 %r178, %r179, %r356, %r357;
// inline asm
mul.wide.u32 %rd85, %r178, 4;
add.s64 %rd86, %rd372, %rd85;
ld.shared.u32 %r398, [%rd86];
add.s32 %r399, %r398, %r397;
xor.b32 %r400, %r7, %r163;
xor.b32 %r195, %r400, %r399;
// inline asm
bfe.u32 %r182, %r195, %r344, %r357;
// inline asm
mul.wide.u32 %rd87, %r182, 4;
add.s64 %rd88, %rd352, %rd87;
ld.shared.u32 %r401, [%rd88];
// inline asm
bfe.u32 %r186, %r195, %r348, %r357;
// inline asm
mul.wide.u32 %rd89, %r186, 4;
add.s64 %rd90, %rd362, %rd89;
ld.shared.u32 %r402, [%rd90];
add.s32 %r403, %r402, %r401;
// inline asm
bfe.u32 %r190, %r195, %r357, %r357;
// inline asm
mul.wide.u32 %rd91, %r190, 4;
add.s64 %rd92, %rd364, %rd91;
ld.shared.u32 %r404, [%rd92];
xor.b32 %r405, %r404, %r403;
// inline asm
bfe.u32 %r194, %r195, %r356, %r357;
// inline asm
mul.wide.u32 %rd93, %r194, 4;
add.s64 %rd94, %rd372, %rd93;
ld.shared.u32 %r406, [%rd94];
add.s32 %r407, %r406, %r405;
xor.b32 %r408, %r8, %r179;
xor.b32 %r211, %r408, %r407;
// inline asm
bfe.u32 %r198, %r211, %r344, %r357;
// inline asm
mul.wide.u32 %rd95, %r198, 4;
add.s64 %rd96, %rd352, %rd95;
ld.shared.u32 %r409, [%rd96];
// inline asm
bfe.u32 %r202, %r211, %r348, %r357;
// inline asm
mul.wide.u32 %rd97, %r202, 4;
add.s64 %rd98, %rd362, %rd97;
ld.shared.u32 %r410, [%rd98];
add.s32 %r411, %r410, %r409;
// inline asm
bfe.u32 %r206, %r211, %r357, %r357;
// inline asm
mul.wide.u32 %rd99, %r206, 4;
add.s64 %rd100, %rd364, %rd99;
ld.shared.u32 %r412, [%rd100];
xor.b32 %r413, %r412, %r411;
// inline asm
bfe.u32 %r210, %r211, %r356, %r357;
// inline asm
mul.wide.u32 %rd101, %r210, 4;
add.s64 %rd102, %rd372, %rd101;
ld.shared.u32 %r414, [%rd102];
add.s32 %r415, %r414, %r413;
xor.b32 %r416, %r9, %r195;
xor.b32 %r227, %r416, %r415;
// inline asm
bfe.u32 %r214, %r227, %r344, %r357;
// inline asm
mul.wide.u32 %rd103, %r214, 4;
add.s64 %rd104, %rd352, %rd103;
ld.shared.u32 %r417, [%rd104];
// inline asm
bfe.u32 %r218, %r227, %r348, %r357;
// inline asm
mul.wide.u32 %rd105, %r218, 4;
add.s64 %rd106, %rd362, %rd105;
ld.shared.u32 %r418, [%rd106];
add.s32 %r419, %r418, %r417;
// inline asm
bfe.u32 %r222, %r227, %r357, %r357;
// inline asm
mul.wide.u32 %rd107, %r222, 4;
add.s64 %rd108, %rd364, %rd107;
ld.shared.u32 %r420, [%rd108];
xor.b32 %r421, %r420, %r419;
// inline asm
bfe.u32 %r226, %r227, %r356, %r357;
// inline asm
mul.wide.u32 %rd109, %r226, 4;
add.s64 %rd110, %rd372, %rd109;
ld.shared.u32 %r422, [%rd110];
add.s32 %r423, %r422, %r421;
xor.b32 %r424, %r10, %r211;
xor.b32 %r243, %r424, %r423;
// inline asm
bfe.u32 %r230, %r243, %r344, %r357;
// inline asm
mul.wide.u32 %rd111, %r230, 4;
add.s64 %rd112, %rd352, %rd111;
ld.shared.u32 %r425, [%rd112];
// inline asm
bfe.u32 %r234, %r243, %r348, %r357;
// inline asm
mul.wide.u32 %rd113, %r234, 4;
add.s64 %rd114, %rd362, %rd113;
ld.shared.u32 %r426, [%rd114];
add.s32 %r427, %r426, %r425;
// inline asm
bfe.u32 %r238, %r243, %r357, %r357;
// inline asm
mul.wide.u32 %rd115, %r238, 4;
add.s64 %rd116, %rd364, %rd115;
ld.shared.u32 %r428, [%rd116];
xor.b32 %r429, %r428, %r427;
// inline asm
bfe.u32 %r242, %r243, %r356, %r357;
// inline asm
mul.wide.u32 %rd117, %r242, 4;
add.s64 %rd118, %rd372, %rd117;
ld.shared.u32 %r430, [%rd118];
add.s32 %r431, %r430, %r429;
xor.b32 %r432, %r11, %r227;
xor.b32 %r259, %r432, %r431;
// inline asm
bfe.u32 %r246, %r259, %r344, %r357;
// inline asm
mul.wide.u32 %rd119, %r246, 4;
add.s64 %rd120, %rd352, %rd119;
ld.shared.u32 %r433, [%rd120];
// inline asm
bfe.u32 %r250, %r259, %r348, %r357;
// inline asm
mul.wide.u32 %rd121, %r250, 4;
add.s64 %rd122, %rd362, %rd121;
ld.shared.u32 %r434, [%rd122];
add.s32 %r435, %r434, %r433;
// inline asm
bfe.u32 %r254, %r259, %r357, %r357;
// inline asm
mul.wide.u32 %rd123, %r254, 4;
add.s64 %rd124, %rd364, %rd123;
ld.shared.u32 %r436, [%rd124];
xor.b32 %r437, %r436, %r435;
// inline asm
bfe.u32 %r258, %r259, %r356, %r357;
// inline asm
mul.wide.u32 %rd125, %r258, 4;
add.s64 %rd126, %rd372, %rd125;
ld.shared.u32 %r438, [%rd126];
add.s32 %r439, %r438, %r437;
xor.b32 %r440, %r12, %r243;
xor.b32 %r275, %r440, %r439;
// inline asm
bfe.u32 %r262, %r275, %r344, %r357;
// inline asm
mul.wide.u32 %rd127, %r262, 4;
add.s64 %rd128, %rd352, %rd127;
ld.shared.u32 %r441, [%rd128];
// inline asm
bfe.u32 %r266, %r275, %r348, %r357;
// inline asm
mul.wide.u32 %rd129, %r266, 4;
add.s64 %rd130, %rd362, %rd129;
ld.shared.u32 %r442, [%rd130];
add.s32 %r443, %r442, %r441;
// inline asm
bfe.u32 %r270, %r275, %r357, %r357;
// inline asm
mul.wide.u32 %rd131, %r270, 4;
add.s64 %rd132, %rd364, %rd131;
ld.shared.u32 %r444, [%rd132];
xor.b32 %r445, %r444, %r443;
// inline asm
bfe.u32 %r274, %r275, %r356, %r357;
// inline asm
mul.wide.u32 %rd133, %r274, 4;
add.s64 %rd134, %rd372, %rd133;
ld.shared.u32 %r446, [%rd134];
add.s32 %r447, %r446, %r445;
xor.b32 %r448, %r13, %r259;
xor.b32 %r291, %r448, %r447;
// inline asm
bfe.u32 %r278, %r291, %r344, %r357;
// inline asm
mul.wide.u32 %rd135, %r278, 4;
add.s64 %rd136, %rd352, %rd135;
ld.shared.u32 %r449, [%rd136];
// inline asm
bfe.u32 %r282, %r291, %r348, %r357;
// inline asm
mul.wide.u32 %rd137, %r282, 4;
add.s64 %rd138, %rd362, %rd137;
ld.shared.u32 %r450, [%rd138];
add.s32 %r451, %r450, %r449;
// inline asm
bfe.u32 %r286, %r291, %r357, %r357;
// inline asm
mul.wide.u32 %rd139, %r286, 4;
add.s64 %rd140, %rd364, %rd139;
ld.shared.u32 %r452, [%rd140];
xor.b32 %r453, %r452, %r451;
// inline asm
bfe.u32 %r290, %r291, %r356, %r357;
// inline asm
mul.wide.u32 %rd141, %r290, 4;
add.s64 %rd142, %rd372, %rd141;
ld.shared.u32 %r454, [%rd142];
add.s32 %r455, %r454, %r453;
xor.b32 %r456, %r14, %r275;
xor.b32 %r307, %r456, %r455;
// inline asm
bfe.u32 %r294, %r307, %r344, %r357;
// inline asm
mul.wide.u32 %rd143, %r294, 4;
add.s64 %rd144, %rd352, %rd143;
ld.shared.u32 %r457, [%rd144];
// inline asm
bfe.u32 %r298, %r307, %r348, %r357;
// inline asm
mul.wide.u32 %rd145, %r298, 4;
add.s64 %rd146, %rd362, %rd145;
ld.shared.u32 %r458, [%rd146];
add.s32 %r459, %r458, %r457;
// inline asm
bfe.u32 %r302, %r307, %r357, %r357;
// inline asm
mul.wide.u32 %rd147, %r302, 4;
add.s64 %rd148, %rd364, %rd147;
ld.shared.u32 %r460, [%rd148];
xor.b32 %r461, %r460, %r459;
// inline asm
bfe.u32 %r306, %r307, %r356, %r357;
// inline asm
mul.wide.u32 %rd149, %r306, 4;
add.s64 %rd150, %rd372, %rd149;
ld.shared.u32 %r462, [%rd150];
add.s32 %r463, %r462, %r461;
xor.b32 %r464, %r15, %r291;
xor.b32 %r323, %r464, %r463;
// inline asm
bfe.u32 %r310, %r323, %r344, %r357;
// inline asm
mul.wide.u32 %rd151, %r310, 4;
add.s64 %rd152, %rd352, %rd151;
ld.shared.u32 %r465, [%rd152];
// inline asm
bfe.u32 %r314, %r323, %r348, %r357;
// inline asm
mul.wide.u32 %rd153, %r314, 4;
add.s64 %rd154, %rd362, %rd153;
ld.shared.u32 %r466, [%rd154];
add.s32 %r467, %r466, %r465;
// inline asm
bfe.u32 %r318, %r323, %r357, %r357;
// inline asm
mul.wide.u32 %rd155, %r318, 4;
add.s64 %rd156, %rd364, %rd155;
ld.shared.u32 %r468, [%rd156];
xor.b32 %r469, %r468, %r467;
// inline asm
bfe.u32 %r322, %r323, %r356, %r357;
// inline asm
mul.wide.u32 %rd157, %r322, 4;
add.s64 %rd158, %rd372, %rd157;
ld.shared.u32 %r470, [%rd158];
add.s32 %r471, %r470, %r469;
xor.b32 %r472, %r16, %r307;
xor.b32 %r339, %r472, %r471;
// inline asm
bfe.u32 %r326, %r339, %r344, %r357;
// inline asm
mul.wide.u32 %rd159, %r326, 4;
add.s64 %rd160, %rd352, %rd159;
ld.shared.u32 %r473, [%rd160];
// inline asm
bfe.u32 %r330, %r339, %r348, %r357;
// inline asm
mul.wide.u32 %rd161, %r330, 4;
add.s64 %rd162, %rd362, %rd161;
ld.shared.u32 %r474, [%rd162];
add.s32 %r475, %r474, %r473;
// inline asm
bfe.u32 %r334, %r339, %r357, %r357;
// inline asm
mul.wide.u32 %rd163, %r334, 4;
add.s64 %rd164, %rd364, %rd163;
ld.shared.u32 %r476, [%rd164];
xor.b32 %r477, %r476, %r475;
// inline asm
bfe.u32 %r338, %r339, %r356, %r357;
// inline asm
mul.wide.u32 %rd165, %r338, 4;
add.s64 %rd166, %rd372, %rd165;
ld.shared.u32 %r478, [%rd166];
add.s32 %r479, %r478, %r477;
xor.b32 %r480, %r17, %r323;
xor.b32 %r355, %r480, %r479;
// inline asm
bfe.u32 %r342, %r355, %r344, %r357;
// inline asm
mul.wide.u32 %rd167, %r342, 4;
add.s64 %rd168, %rd352, %rd167;
ld.shared.u32 %r481, [%rd168];
// inline asm
bfe.u32 %r346, %r355, %r348, %r357;
// inline asm
mul.wide.u32 %rd169, %r346, 4;
add.s64 %rd170, %rd362, %rd169;
ld.shared.u32 %r482, [%rd170];
add.s32 %r483, %r482, %r481;
// inline asm
bfe.u32 %r350, %r355, %r357, %r357;
// inline asm
mul.wide.u32 %rd171, %r350, 4;
add.s64 %rd172, %rd364, %rd171;
ld.shared.u32 %r484, [%rd172];
xor.b32 %r485, %r484, %r483;
// inline asm
bfe.u32 %r354, %r355, %r356, %r357;
// inline asm
mul.wide.u32 %rd173, %r354, 4;
add.s64 %rd174, %rd372, %rd173;
ld.shared.u32 %r486, [%rd174];
add.s32 %r487, %r486, %r485;
xor.b32 %r488, %r18, %r339;
xor.b32 %r962, %r488, %r487;
xor.b32 %r963, %r19, %r355;
add.s32 %r961, %r961, 1;
setp.ne.s32 %p3, %r961, 0;
@%p3 bra BB5_3;
mov.u32 %r966, 1701343084;
mov.u32 %r965, 1684370003;
mov.u32 %r964, -64;
BB5_5:
mov.u32 %r958, %tid.x;
cvt.s64.s32 %rd371, %r958;
shl.b64 %rd370, %rd371, 10;
mov.u64 %rd369, m03200_comp$S3_all;
add.s64 %rd368, %rd369, %rd370;
mov.u32 %r956, %tid.x;
cvt.s64.s32 %rd361, %r956;
shl.b64 %rd360, %rd361, 10;
mov.u64 %rd359, m03200_comp$S2_all;
add.s64 %rd358, %rd359, %rd360;
mov.u64 %rd357, m03200_comp$S1_all;
add.s64 %rd356, %rd357, %rd360;
mov.u32 %r943, %tid.x;
cvt.s64.s32 %rd347, %r943;
shl.b64 %rd346, %rd347, 10;
mov.u64 %rd345, m03200_comp$S0_all;
add.s64 %rd344, %rd345, %rd346;
mov.u32 %r936, 0;
mov.u32 %r935, 16;
mov.u32 %r934, 8;
mov.u32 %r933, 24;
xor.b32 %r502, %r2, %r966;
// inline asm
bfe.u32 %r489, %r502, %r933, %r934;
// inline asm
mul.wide.u32 %rd178, %r489, 4;
add.s64 %rd179, %rd344, %rd178;
ld.shared.u32 %r745, [%rd179];
// inline asm
bfe.u32 %r493, %r502, %r935, %r934;
// inline asm
mul.wide.u32 %rd182, %r493, 4;
add.s64 %rd183, %rd356, %rd182;
ld.shared.u32 %r746, [%rd183];
add.s32 %r747, %r746, %r745;
// inline asm
bfe.u32 %r497, %r502, %r934, %r934;
// inline asm
mul.wide.u32 %rd186, %r497, 4;
add.s64 %rd187, %rd358, %rd186;
ld.shared.u32 %r748, [%rd187];
xor.b32 %r749, %r748, %r747;
// inline asm
bfe.u32 %r501, %r502, %r936, %r934;
// inline asm
mul.wide.u32 %rd190, %r501, 4;
add.s64 %rd191, %rd368, %rd190;
ld.shared.u32 %r750, [%rd191];
add.s32 %r751, %r750, %r749;
xor.b32 %r752, %r3, %r965;
xor.b32 %r518, %r752, %r751;
// inline asm
bfe.u32 %r505, %r518, %r933, %r934;
// inline asm
mul.wide.u32 %rd192, %r505, 4;
add.s64 %rd193, %rd344, %rd192;
ld.shared.u32 %r753, [%rd193];
// inline asm
bfe.u32 %r509, %r518, %r935, %r934;
// inline asm
mul.wide.u32 %rd194, %r509, 4;
add.s64 %rd195, %rd356, %rd194;
ld.shared.u32 %r754, [%rd195];
add.s32 %r755, %r754, %r753;
// inline asm
bfe.u32 %r513, %r518, %r934, %r934;
// inline asm
mul.wide.u32 %rd196, %r513, 4;
add.s64 %rd197, %rd358, %rd196;
ld.shared.u32 %r756, [%rd197];
xor.b32 %r757, %r756, %r755;
// inline asm
bfe.u32 %r517, %r518, %r936, %r934;
// inline asm
mul.wide.u32 %rd198, %r517, 4;
add.s64 %rd199, %rd368, %rd198;
ld.shared.u32 %r758, [%rd199];
add.s32 %r759, %r758, %r757;
xor.b32 %r760, %r4, %r502;
xor.b32 %r534, %r760, %r759;
// inline asm
bfe.u32 %r521, %r534, %r933, %r934;
// inline asm
mul.wide.u32 %rd200, %r521, 4;
add.s64 %rd201, %rd344, %rd200;
ld.shared.u32 %r761, [%rd201];
// inline asm
bfe.u32 %r525, %r534, %r935, %r934;
// inline asm
mul.wide.u32 %rd202, %r525, 4;
add.s64 %rd203, %rd356, %rd202;
ld.shared.u32 %r762, [%rd203];
add.s32 %r763, %r762, %r761;
// inline asm
bfe.u32 %r529, %r534, %r934, %r934;
// inline asm
mul.wide.u32 %rd204, %r529, 4;
add.s64 %rd205, %rd358, %rd204;
ld.shared.u32 %r764, [%rd205];
xor.b32 %r765, %r764, %r763;
// inline asm
bfe.u32 %r533, %r534, %r936, %r934;
// inline asm
mul.wide.u32 %rd206, %r533, 4;
add.s64 %rd207, %rd368, %rd206;
ld.shared.u32 %r766, [%rd207];
add.s32 %r767, %r766, %r765;
xor.b32 %r768, %r5, %r518;
xor.b32 %r550, %r768, %r767;
// inline asm
bfe.u32 %r537, %r550, %r933, %r934;
// inline asm
mul.wide.u32 %rd208, %r537, 4;
add.s64 %rd209, %rd344, %rd208;
ld.shared.u32 %r769, [%rd209];
// inline asm
bfe.u32 %r541, %r550, %r935, %r934;
// inline asm
mul.wide.u32 %rd210, %r541, 4;
add.s64 %rd211, %rd356, %rd210;
ld.shared.u32 %r770, [%rd211];
add.s32 %r771, %r770, %r769;
// inline asm
bfe.u32 %r545, %r550, %r934, %r934;
// inline asm
mul.wide.u32 %rd212, %r545, 4;
add.s64 %rd213, %rd358, %rd212;
ld.shared.u32 %r772, [%rd213];
xor.b32 %r773, %r772, %r771;
// inline asm
bfe.u32 %r549, %r550, %r936, %r934;
// inline asm
mul.wide.u32 %rd214, %r549, 4;
add.s64 %rd215, %rd368, %rd214;
ld.shared.u32 %r774, [%rd215];
add.s32 %r775, %r774, %r773;
xor.b32 %r776, %r6, %r534;
xor.b32 %r566, %r776, %r775;
// inline asm
bfe.u32 %r553, %r566, %r933, %r934;
// inline asm
mul.wide.u32 %rd216, %r553, 4;
add.s64 %rd217, %rd344, %rd216;
ld.shared.u32 %r777, [%rd217];
// inline asm
bfe.u32 %r557, %r566, %r935, %r934;
// inline asm
mul.wide.u32 %rd218, %r557, 4;
add.s64 %rd219, %rd356, %rd218;
ld.shared.u32 %r778, [%rd219];
add.s32 %r779, %r778, %r777;
// inline asm
bfe.u32 %r561, %r566, %r934, %r934;
// inline asm
mul.wide.u32 %rd220, %r561, 4;
add.s64 %rd221, %rd358, %rd220;
ld.shared.u32 %r780, [%rd221];
xor.b32 %r781, %r780, %r779;
// inline asm
bfe.u32 %r565, %r566, %r936, %r934;
// inline asm
mul.wide.u32 %rd222, %r565, 4;
add.s64 %rd223, %rd368, %rd222;
ld.shared.u32 %r782, [%rd223];
add.s32 %r783, %r782, %r781;
xor.b32 %r784, %r7, %r550;
xor.b32 %r582, %r784, %r783;
// inline asm
bfe.u32 %r569, %r582, %r933, %r934;
// inline asm
mul.wide.u32 %rd224, %r569, 4;
add.s64 %rd225, %rd344, %rd224;
ld.shared.u32 %r785, [%rd225];
// inline asm
bfe.u32 %r573, %r582, %r935, %r934;
// inline asm
mul.wide.u32 %rd226, %r573, 4;
add.s64 %rd227, %rd356, %rd226;
ld.shared.u32 %r786, [%rd227];
add.s32 %r787, %r786, %r785;
// inline asm
bfe.u32 %r577, %r582, %r934, %r934;
// inline asm
mul.wide.u32 %rd228, %r577, 4;
add.s64 %rd229, %rd358, %rd228;
ld.shared.u32 %r788, [%rd229];
xor.b32 %r789, %r788, %r787;
// inline asm
bfe.u32 %r581, %r582, %r936, %r934;
// inline asm
mul.wide.u32 %rd230, %r581, 4;
add.s64 %rd231, %rd368, %rd230;
ld.shared.u32 %r790, [%rd231];
add.s32 %r791, %r790, %r789;
xor.b32 %r792, %r8, %r566;
xor.b32 %r598, %r792, %r791;
// inline asm
bfe.u32 %r585, %r598, %r933, %r934;
// inline asm
mul.wide.u32 %rd232, %r585, 4;
add.s64 %rd233, %rd344, %rd232;
ld.shared.u32 %r793, [%rd233];
// inline asm
bfe.u32 %r589, %r598, %r935, %r934;
// inline asm
mul.wide.u32 %rd234, %r589, 4;
add.s64 %rd235, %rd356, %rd234;
ld.shared.u32 %r794, [%rd235];
add.s32 %r795, %r794, %r793;
// inline asm
bfe.u32 %r593, %r598, %r934, %r934;
// inline asm
mul.wide.u32 %rd236, %r593, 4;
add.s64 %rd237, %rd358, %rd236;
ld.shared.u32 %r796, [%rd237];
xor.b32 %r797, %r796, %r795;
// inline asm
bfe.u32 %r597, %r598, %r936, %r934;
// inline asm
mul.wide.u32 %rd238, %r597, 4;
add.s64 %rd239, %rd368, %rd238;
ld.shared.u32 %r798, [%rd239];
add.s32 %r799, %r798, %r797;
xor.b32 %r800, %r9, %r582;
xor.b32 %r614, %r800, %r799;
// inline asm
bfe.u32 %r601, %r614, %r933, %r934;
// inline asm
mul.wide.u32 %rd240, %r601, 4;
add.s64 %rd241, %rd344, %rd240;
ld.shared.u32 %r801, [%rd241];
// inline asm
bfe.u32 %r605, %r614, %r935, %r934;
// inline asm
mul.wide.u32 %rd242, %r605, 4;
add.s64 %rd243, %rd356, %rd242;
ld.shared.u32 %r802, [%rd243];
add.s32 %r803, %r802, %r801;
// inline asm
bfe.u32 %r609, %r614, %r934, %r934;
// inline asm
mul.wide.u32 %rd244, %r609, 4;
add.s64 %rd245, %rd358, %rd244;
ld.shared.u32 %r804, [%rd245];
xor.b32 %r805, %r804, %r803;
// inline asm
bfe.u32 %r613, %r614, %r936, %r934;
// inline asm
mul.wide.u32 %rd246, %r613, 4;
add.s64 %rd247, %rd368, %rd246;
ld.shared.u32 %r806, [%rd247];
add.s32 %r807, %r806, %r805;
xor.b32 %r808, %r10, %r598;
xor.b32 %r630, %r808, %r807;
// inline asm
bfe.u32 %r617, %r630, %r933, %r934;
// inline asm
mul.wide.u32 %rd248, %r617, 4;
add.s64 %rd249, %rd344, %rd248;
ld.shared.u32 %r809, [%rd249];
// inline asm
bfe.u32 %r621, %r630, %r935, %r934;
// inline asm
mul.wide.u32 %rd250, %r621, 4;
add.s64 %rd251, %rd356, %rd250;
ld.shared.u32 %r810, [%rd251];
add.s32 %r811, %r810, %r809;
// inline asm
bfe.u32 %r625, %r630, %r934, %r934;
// inline asm
mul.wide.u32 %rd252, %r625, 4;
add.s64 %rd253, %rd358, %rd252;
ld.shared.u32 %r812, [%rd253];
xor.b32 %r813, %r812, %r811;
// inline asm
bfe.u32 %r629, %r630, %r936, %r934;
// inline asm
mul.wide.u32 %rd254, %r629, 4;
add.s64 %rd255, %rd368, %rd254;
ld.shared.u32 %r814, [%rd255];
add.s32 %r815, %r814, %r813;
xor.b32 %r816, %r11, %r614;
xor.b32 %r646, %r816, %r815;
// inline asm
bfe.u32 %r633, %r646, %r933, %r934;
// inline asm
mul.wide.u32 %rd256, %r633, 4;
add.s64 %rd257, %rd344, %rd256;
ld.shared.u32 %r817, [%rd257];
// inline asm
bfe.u32 %r637, %r646, %r935, %r934;
// inline asm
mul.wide.u32 %rd258, %r637, 4;
add.s64 %rd259, %rd356, %rd258;
ld.shared.u32 %r818, [%rd259];
add.s32 %r819, %r818, %r817;
// inline asm
bfe.u32 %r641, %r646, %r934, %r934;
// inline asm
mul.wide.u32 %rd260, %r641, 4;
add.s64 %rd261, %rd358, %rd260;
ld.shared.u32 %r820, [%rd261];
xor.b32 %r821, %r820, %r819;
// inline asm
bfe.u32 %r645, %r646, %r936, %r934;
// inline asm
mul.wide.u32 %rd262, %r645, 4;
add.s64 %rd263, %rd368, %rd262;
ld.shared.u32 %r822, [%rd263];
add.s32 %r823, %r822, %r821;
xor.b32 %r824, %r12, %r630;
xor.b32 %r662, %r824, %r823;
// inline asm
bfe.u32 %r649, %r662, %r933, %r934;
// inline asm
mul.wide.u32 %rd264, %r649, 4;
add.s64 %rd265, %rd344, %rd264;
ld.shared.u32 %r825, [%rd265];
// inline asm
bfe.u32 %r653, %r662, %r935, %r934;
// inline asm
mul.wide.u32 %rd266, %r653, 4;
add.s64 %rd267, %rd356, %rd266;
ld.shared.u32 %r826, [%rd267];
add.s32 %r827, %r826, %r825;
// inline asm
bfe.u32 %r657, %r662, %r934, %r934;
// inline asm
mul.wide.u32 %rd268, %r657, 4;
add.s64 %rd269, %rd358, %rd268;
ld.shared.u32 %r828, [%rd269];
xor.b32 %r829, %r828, %r827;
// inline asm
bfe.u32 %r661, %r662, %r936, %r934;
// inline asm
mul.wide.u32 %rd270, %r661, 4;
add.s64 %rd271, %rd368, %rd270;
ld.shared.u32 %r830, [%rd271];
add.s32 %r831, %r830, %r829;
xor.b32 %r832, %r13, %r646;
xor.b32 %r678, %r832, %r831;
// inline asm
bfe.u32 %r665, %r678, %r933, %r934;
// inline asm
mul.wide.u32 %rd272, %r665, 4;
add.s64 %rd273, %rd344, %rd272;
ld.shared.u32 %r833, [%rd273];
// inline asm
bfe.u32 %r669, %r678, %r935, %r934;
// inline asm
mul.wide.u32 %rd274, %r669, 4;
add.s64 %rd275, %rd356, %rd274;
ld.shared.u32 %r834, [%rd275];
add.s32 %r835, %r834, %r833;
// inline asm
bfe.u32 %r673, %r678, %r934, %r934;
// inline asm
mul.wide.u32 %rd276, %r673, 4;
add.s64 %rd277, %rd358, %rd276;
ld.shared.u32 %r836, [%rd277];
xor.b32 %r837, %r836, %r835;
// inline asm
bfe.u32 %r677, %r678, %r936, %r934;
// inline asm
mul.wide.u32 %rd278, %r677, 4;
add.s64 %rd279, %rd368, %rd278;
ld.shared.u32 %r838, [%rd279];
add.s32 %r839, %r838, %r837;
xor.b32 %r840, %r14, %r662;
xor.b32 %r694, %r840, %r839;
// inline asm
bfe.u32 %r681, %r694, %r933, %r934;
// inline asm
mul.wide.u32 %rd280, %r681, 4;
add.s64 %rd281, %rd344, %rd280;
ld.shared.u32 %r841, [%rd281];
// inline asm
bfe.u32 %r685, %r694, %r935, %r934;
// inline asm
mul.wide.u32 %rd282, %r685, 4;
add.s64 %rd283, %rd356, %rd282;
ld.shared.u32 %r842, [%rd283];
add.s32 %r843, %r842, %r841;
// inline asm
bfe.u32 %r689, %r694, %r934, %r934;
// inline asm
mul.wide.u32 %rd284, %r689, 4;
add.s64 %rd285, %rd358, %rd284;
ld.shared.u32 %r844, [%rd285];
xor.b32 %r845, %r844, %r843;
// inline asm
bfe.u32 %r693, %r694, %r936, %r934;
// inline asm
mul.wide.u32 %rd286, %r693, 4;
add.s64 %rd287, %rd368, %rd286;
ld.shared.u32 %r846, [%rd287];
add.s32 %r847, %r846, %r845;
xor.b32 %r848, %r15, %r678;
xor.b32 %r710, %r848, %r847;
// inline asm
bfe.u32 %r697, %r710, %r933, %r934;
// inline asm
mul.wide.u32 %rd288, %r697, 4;
add.s64 %rd289, %rd344, %rd288;
ld.shared.u32 %r849, [%rd289];
// inline asm
bfe.u32 %r701, %r710, %r935, %r934;
// inline asm
mul.wide.u32 %rd290, %r701, 4;
add.s64 %rd291, %rd356, %rd290;
ld.shared.u32 %r850, [%rd291];
add.s32 %r851, %r850, %r849;
// inline asm
bfe.u32 %r705, %r710, %r934, %r934;
// inline asm
mul.wide.u32 %rd292, %r705, 4;
add.s64 %rd293, %rd358, %rd292;
ld.shared.u32 %r852, [%rd293];
xor.b32 %r853, %r852, %r851;
// inline asm
bfe.u32 %r709, %r710, %r936, %r934;
// inline asm
mul.wide.u32 %rd294, %r709, 4;
add.s64 %rd295, %rd368, %rd294;
ld.shared.u32 %r854, [%rd295];
add.s32 %r855, %r854, %r853;
xor.b32 %r856, %r16, %r694;
xor.b32 %r726, %r856, %r855;
// inline asm
bfe.u32 %r713, %r726, %r933, %r934;
// inline asm
mul.wide.u32 %rd296, %r713, 4;
add.s64 %rd297, %rd344, %rd296;
ld.shared.u32 %r857, [%rd297];
// inline asm
bfe.u32 %r717, %r726, %r935, %r934;
// inline asm
mul.wide.u32 %rd298, %r717, 4;
add.s64 %rd299, %rd356, %rd298;
ld.shared.u32 %r858, [%rd299];
add.s32 %r859, %r858, %r857;
// inline asm
bfe.u32 %r721, %r726, %r934, %r934;
// inline asm
mul.wide.u32 %rd300, %r721, 4;
add.s64 %rd301, %rd358, %rd300;
ld.shared.u32 %r860, [%rd301];
xor.b32 %r861, %r860, %r859;
// inline asm
bfe.u32 %r725, %r726, %r936, %r934;
// inline asm
mul.wide.u32 %rd302, %r725, 4;
add.s64 %rd303, %rd368, %rd302;
ld.shared.u32 %r862, [%rd303];
add.s32 %r863, %r862, %r861;
xor.b32 %r864, %r17, %r710;
xor.b32 %r742, %r864, %r863;
// inline asm
bfe.u32 %r729, %r742, %r933, %r934;
// inline asm
mul.wide.u32 %rd304, %r729, 4;
add.s64 %rd305, %rd344, %rd304;
ld.shared.u32 %r865, [%rd305];
// inline asm
bfe.u32 %r733, %r742, %r935, %r934;
// inline asm
mul.wide.u32 %rd306, %r733, 4;
add.s64 %rd307, %rd356, %rd306;
ld.shared.u32 %r866, [%rd307];
add.s32 %r867, %r866, %r865;
// inline asm
bfe.u32 %r737, %r742, %r934, %r934;
// inline asm
mul.wide.u32 %rd308, %r737, 4;
add.s64 %rd309, %rd358, %rd308;
ld.shared.u32 %r868, [%rd309];
xor.b32 %r869, %r868, %r867;
// inline asm
bfe.u32 %r741, %r742, %r936, %r934;
// inline asm
mul.wide.u32 %rd310, %r741, 4;
add.s64 %rd311, %rd368, %rd310;
ld.shared.u32 %r870, [%rd311];
add.s32 %r871, %r870, %r869;
xor.b32 %r872, %r18, %r726;
xor.b32 %r965, %r872, %r871;
xor.b32 %r966, %r19, %r742;
add.s32 %r964, %r964, 1;
setp.ne.s32 %p4, %r964, 0;
@%p4 bra BB5_5;
ld.param.u32 %r944, [m03200_comp_param_24];
ld.param.u64 %rd335, [m03200_comp_param_6];
ld.param.u32 %r937, [m03200_comp_param_25];
and.b32 %r34, %r937, 31;
shr.u32 %r873, %r963, %r34;
and.b32 %r874, %r873, %r944;
mul.wide.u32 %rd312, %r874, 4;
add.s64 %rd313, %rd335, %rd312;
and.b32 %r875, %r963, 31;
mov.u32 %r876, 1;
shl.b32 %r35, %r876, %r875;
ld.global.u32 %r877, [%rd313];
and.b32 %r878, %r877, %r35;
setp.eq.s32 %p5, %r878, 0;
@%p5 bra BB5_34;
ld.param.u64 %rd348, [m03200_comp_param_7];
ld.param.u32 %r945, [m03200_comp_param_24];
shr.u32 %r879, %r962, %r34;
and.b32 %r880, %r879, %r945;
mul.wide.u32 %rd314, %r880, 4;
add.s64 %rd315, %rd348, %rd314;
and.b32 %r881, %r962, 31;
shl.b32 %r36, %r876, %r881;
ld.global.u32 %r883, [%rd315];
and.b32 %r884, %r883, %r36;
setp.eq.s32 %p6, %r884, 0;
@%p6 bra BB5_34;
ld.param.u32 %r946, [m03200_comp_param_24];
ld.param.u64 %rd336, [m03200_comp_param_8];
shr.u32 %r885, %r966, %r34;
and.b32 %r886, %r885, %r946;
mul.wide.u32 %rd316, %r886, 4;
add.s64 %rd317, %rd336, %rd316;
and.b32 %r887, %r966, 31;
shl.b32 %r37, %r876, %r887;
ld.global.u32 %r889, [%rd317];
and.b32 %r890, %r889, %r37;
setp.eq.s32 %p7, %r890, 0;
@%p7 bra BB5_34;
ld.param.u32 %r947, [m03200_comp_param_24];
ld.param.u64 %rd337, [m03200_comp_param_9];
shr.u32 %r891, %r965, %r34;
and.b32 %r892, %r891, %r947;
mul.wide.u32 %rd318, %r892, 4;
add.s64 %rd319, %rd337, %rd318;
and.b32 %r893, %r965, 31;
shl.b32 %r38, %r876, %r893;
ld.global.u32 %r895, [%rd319];
and.b32 %r896, %r895, %r38;
setp.eq.s32 %p8, %r896, 0;
@%p8 bra BB5_34;
ld.param.u32 %r948, [m03200_comp_param_24];
ld.param.u64 %rd338, [m03200_comp_param_10];
ld.param.u32 %r938, [m03200_comp_param_26];
and.b32 %r39, %r938, 31;
shr.u32 %r897, %r963, %r39;
and.b32 %r898, %r897, %r948;
mul.wide.u32 %rd320, %r898, 4;
add.s64 %rd321, %rd338, %rd320;
ld.global.u32 %r899, [%rd321];
and.b32 %r900, %r899, %r35;
setp.eq.s32 %p9, %r900, 0;
@%p9 bra BB5_34;
ld.param.u32 %r949, [m03200_comp_param_24];
ld.param.u64 %rd339, [m03200_comp_param_11];
shr.u32 %r901, %r962, %r39;
and.b32 %r902, %r901, %r949;
mul.wide.u32 %rd322, %r902, 4;
add.s64 %rd323, %rd339, %rd322;
ld.global.u32 %r903, [%rd323];
and.b32 %r904, %r903, %r36;
setp.eq.s32 %p10, %r904, 0;
@%p10 bra BB5_34;
ld.param.u32 %r950, [m03200_comp_param_24];
ld.param.u64 %rd340, [m03200_comp_param_12];
shr.u32 %r905, %r966, %r39;
and.b32 %r906, %r905, %r950;
mul.wide.u32 %rd324, %r906, 4;
add.s64 %rd325, %rd340, %rd324;
ld.global.u32 %r907, [%rd325];
and.b32 %r908, %r907, %r37;
setp.eq.s32 %p11, %r908, 0;
@%p11 bra BB5_34;
ld.param.u32 %r951, [m03200_comp_param_24];
ld.param.u64 %rd341, [m03200_comp_param_13];
shr.u32 %r909, %r965, %r39;
and.b32 %r910, %r909, %r951;
mul.wide.u32 %rd326, %r910, 4;
add.s64 %rd327, %rd341, %rd326;
ld.global.u32 %r911, [%rd327];
and.b32 %r912, %r911, %r38;
setp.eq.s32 %p12, %r912, 0;
@%p12 bra BB5_34;
ld.param.u32 %r952, [m03200_comp_param_31];
ld.param.u32 %r939, [m03200_comp_param_32];
cvt.u64.u32 %rd3, %r939;
setp.eq.s32 %p13, %r952, 0;
mov.u32 %r913, -1;
@%p13 bra BB5_28;
ld.param.u32 %r967, [m03200_comp_param_31];
mov.u32 %r968, 0;
BB5_16:
ld.param.u64 %rd342, [m03200_comp_param_15];
shr.u32 %r42, %r967, 1;
add.s32 %r970, %r42, %r968;
cvt.u64.u32 %rd328, %r970;
add.s64 %rd329, %rd328, %rd3;
mul.lo.s64 %rd330, %rd329, 24;
add.s64 %rd4, %rd342, %rd330;
ld.global.u32 %r44, [%rd4+12];
setp.gt.u32 %p14, %r965, %r44;
mov.u32 %r969, %r876;
@%p14 bra BB5_26;
setp.lt.u32 %p15, %r965, %r44;
mov.u32 %r916, -1;
@%p15 bra BB5_18;
bra.uni BB5_19;
BB5_18:
mov.u32 %r969, %r916;
bra.uni BB5_26;
BB5_19:
ld.global.u32 %r45, [%rd4+8];
setp.gt.u32 %p16, %r966, %r45;
mov.u32 %r969, %r876;
@%p16 bra BB5_26;
setp.lt.u32 %p17, %r966, %r45;
@%p17 bra BB5_21;
bra.uni BB5_22;
BB5_21:
mov.u32 %r969, %r916;
bra.uni BB5_26;
BB5_22:
ld.global.u32 %r46, [%rd4+4];
setp.gt.u32 %p18, %r962, %r46;
mov.u32 %r969, %r876;
@%p18 bra BB5_26;
setp.lt.u32 %p19, %r962, %r46;
mov.u32 %r969, %r916;
@%p19 bra BB5_26;
ld.global.u32 %r47, [%rd4];
setp.gt.u32 %p20, %r963, %r47;
mov.u32 %r969, %r876;
@%p20 bra BB5_26;
setp.lt.u32 %p21, %r963, %r47;
selp.b32 %r969, -1, 0, %p21;
BB5_26:
add.s32 %r922, %r42, 1;
setp.gt.s32 %p22, %r969, 0;
selp.b32 %r923, %r922, 0, %p22;
add.s32 %r968, %r923, %r968;
selp.b32 %r924, -1, 0, %p22;
add.s32 %r925, %r924, %r967;
shr.u32 %r967, %r925, 1;
setp.eq.s32 %p23, %r969, 0;
@%p23 bra BB5_29;
setp.ne.s32 %p24, %r967, 0;
@%p24 bra BB5_16;
BB5_28:
mov.u32 %r970, %r913;
BB5_29:
setp.eq.s32 %p25, %r970, -1;
@%p25 bra BB5_34;
ld.param.u64 %rd343, [m03200_comp_param_16];
ld.param.u32 %r941, [m03200_comp_param_32];
add.s32 %r53, %r970, %r941;
mul.wide.u32 %rd331, %r53, 4;
add.s64 %rd332, %rd343, %rd331;
atom.global.add.u32 %r927, [%rd332], 1;
setp.ne.s32 %p26, %r927, 0;
@%p26 bra BB5_34;
ld.param.u64 %rd349, [m03200_comp_param_19];
ld.param.u32 %r954, [m03200_comp_param_31];
atom.global.add.u32 %r54, [%rd349], 1;
setp.lt.u32 %p27, %r54, %r954;
@%p27 bra BB5_33;
bra.uni BB5_32;
BB5_33:
ld.param.u64 %rd351, [m03200_comp_param_14];
ld.param.u32 %r942, [m03200_comp_param_27];
mul.wide.u32 %rd333, %r54, 24;
add.s64 %rd334, %rd351, %rd333;
st.global.v2.u32 [%rd334+16], {%r970, %r53};
mov.u32 %r929, 0;
st.global.v2.u32 [%rd334+8], {%r929, %r942};
st.global.u64 [%rd334], %rd1;
bra.uni BB5_34;
BB5_32:
ld.param.u64 %rd350, [m03200_comp_param_19];
atom.global.add.u32 %r928, [%rd350], -1;
BB5_34:
ret;
}
.metadata_section {
.metadata 0 {
"cl_kernel_attributes",
"m03200_init",
"reqd_work_group_size(8,1,1)"
}
.metadata 1 {
"cl_kernel_attributes",
"m03200_loop",
"reqd_work_group_size(8,1,1)"
}
.metadata 2 {
"cl_kernel_attributes",
"m03200_comp",
"reqd_work_group_size(8,1,1)"
}
} // end of .metadata_section