// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: UNKNOWN // Unknown Toolkit Version // Based on LLVM 3.4svn // .version 6.3 .target sm_52, texmode_independent .address_size 64 // .globl gpu_decompress .const .align 8 .b8 c_sbox0[1024] = {166, 11, 49, 209, 172, 181, 223, 152, 219, 114, 253, 47, 183, 223, 26, 208, 237, 175, 225, 184, 150, 126, 38, 106, 69, 144, 124, 186, 153, 127, 44, 241, 71, 153, 161, 36, 247, 108, 145, 179, 226, 242, 1, 8, 22, 252, 142, 133, 216, 32, 105, 99, 105, 78, 87, 113, 163, 254, 88, 164, 126, 61, 147, 244, 143, 116, 149, 13, 88, 182, 142, 114, 88, 205, 139, 113, 238, 74, 21, 130, 29, 164, 84, 123, 181, 89, 90, 194, 57, 213, 48, 156, 19, 96, 242, 42, 35, 176, 209, 197, 240, 133, 96, 40, 24, 121, 65, 202, 239, 56, 219, 184, 176, 220, 121, 142, 14, 24, 58, 96, 139, 14, 158, 108, 62, 138, 30, 176, 193, 119, 21, 215, 39, 75, 49, 189, 218, 47, 175, 120, 96, 92, 96, 85, 243, 37, 85, 230, 148, 171, 85, 170, 98, 152, 72, 87, 64, 20, 232, 99, 106, 57, 202, 85, 182, 16, 171, 42, 52, 92, 204, 180, 206, 232, 65, 17, 175, 134, 84, 161, 147, 233, 114, 124, 17, 20, 238, 179, 42, 188, 111, 99, 93, 197, 169, 43, 246, 49, 24, 116, 22, 62, 92, 206, 30, 147, 135, 155, 51, 186, 214, 175, 92, 207, 36, 108, 129, 83, 50, 122, 119, 134, 149, 40, 152, 72, 143, 59, 175, 185, 75, 107, 27, 232, 191, 196, 147, 33, 40, 102, 204, 9, 216, 97, 145, 169, 33, 251, 96, 172, 124, 72, 50, 128, 236, 93, 93, 93, 132, 239, 177, 117, 133, 233, 2, 35, 38, 220, 136, 27, 101, 235, 129, 62, 137, 35, 197, 172, 150, 211, 243, 111, 109, 15, 57, 66, 244, 131, 130, 68, 11, 46, 4, 32, 132, 164, 74, 240, 200, 105, 94, 155, 31, 158, 66, 104, 198, 33, 154, 108, 233, 246, 97, 156, 12, 103, 240, 136, 211, 171, 210, 160, 81, 106, 104, 47, 84, 216, 40, 167, 15, 150, 163, 51, 81, 171, 108, 11, 239, 110, 228, 59, 122, 19, 80, 240, 59, 186, 152, 42, 251, 126, 29, 101, 241, 161, 118, 1, 175, 57, 62, 89, 202, 102, 136, 14, 67, 130, 25, 134, 238, 140, 180, 159, 111, 69, 195, 165, 132, 125, 190, 94, 139, 59, 216, 117, 111, 224, 115, 32, 193, 133, 159, 68, 26, 64, 166, 106, 193, 86, 98, 170, 211, 78, 6, 119, 63, 54, 114, 223, 254, 27, 61, 2, 155, 66, 36, 215, 208, 55, 72, 18, 10, 208, 211, 234, 15, 219, 155, 192, 241, 73, 201, 114, 83, 7, 123, 27, 153, 128, 216, 121, 212, 37, 247, 222, 232, 246, 26, 80, 254, 227, 59, 76, 121, 182, 189, 224, 108, 151, 186, 6, 192, 4, 182, 79, 169, 193, 196, 96, 159, 64, 194, 158, 92, 94, 99, 36, 106, 25, 175, 111, 251, 104, 181, 83, 108, 62, 235, 178, 57, 19, 111, 236, 82, 59, 31, 81, 252, 109, 44, 149, 48, 155, 68, 69, 129, 204, 9, 189, 94, 175, 4, 208, 227, 190, 253, 74, 51, 222, 7, 40, 15, 102, 179, 75, 46, 25, 87, 168, 203, 192, 15, 116, 200, 69, 57, 95, 11, 210, 219, 251, 211, 185, 189, 192, 121, 85, 10, 50, 96, 26, 198, 0, 161, 214, 121, 114, 44, 64, 254, 37, 159, 103, 204, 163, 31, 251, 248, 233, 165, 142, 248, 34, 50, 219, 223, 22, 117, 60, 21, 107, 97, 253, 200, 30, 80, 47, 171, 82, 5, 173, 250, 181, 61, 50, 96, 135, 35, 253, 72, 123, 49, 83, 130, 223, 0, 62, 187, 87, 92, 158, 160, 140, 111, 202, 46, 86, 135, 26, 219, 105, 23, 223, 246, 168, 66, 213, 195, 255, 126, 40, 198, 50, 103, 172, 115, 85, 79, 140, 176, 39, 91, 105, 200, 88, 202, 187, 93, 163, 255, 225, 160, 17, 240, 184, 152, 61, 250, 16, 184, 131, 33, 253, 108, 181, 252, 74, 91, 211, 209, 45, 121, 228, 83, 154, 101, 69, 248, 182, 188, 73, 142, 210, 144, 151, 251, 75, 218, 242, 221, 225, 51, 126, 203, 164, 65, 19, 251, 98, 232, 198, 228, 206, 218, 202, 32, 239, 1, 76, 119, 54, 254, 158, 126, 208, 180, 31, 241, 43, 77, 218, 219, 149, 152, 145, 144, 174, 113, 142, 173, 234, 160, 213, 147, 107, 208, 209, 142, 208, 224, 37, 199, 175, 47, 91, 60, 142, 183, 148, 117, 142, 251, 226, 246, 143, 100, 43, 18, 242, 18, 184, 136, 136, 28, 240, 13, 144, 160, 94, 173, 79, 28, 195, 143, 104, 145, 241, 207, 209, 173, 193, 168, 179, 24, 34, 47, 47, 119, 23, 14, 190, 254, 45, 117, 234, 161, 31, 2, 139, 15, 204, 160, 229, 232, 116, 111, 181, 214, 243, 172, 24, 153, 226, 137, 206, 224, 79, 168, 180, 183, 224, 19, 253, 129, 59, 196, 124, 217, 168, 173, 210, 102, 162, 95, 22, 5, 119, 149, 128, 20, 115, 204, 147, 119, 20, 26, 33, 101, 32, 173, 230, 134, 250, 181, 119, 245, 66, 84, 199, 207, 53, 157, 251, 12, 175, 205, 235, 160, 137, 62, 123, 211, 27, 65, 214, 73, 126, 30, 174, 45, 14, 37, 0, 94, 179, 113, 32, 187, 0, 104, 34, 175, 224, 184, 87, 155, 54, 100, 36, 30, 185, 9, 240, 29, 145, 99, 85, 170, 166, 223, 89, 137, 67, 193, 120, 127, 83, 90, 217, 162, 91, 125, 32, 197, 185, 229, 2, 118, 3, 38, 131, 169, 207, 149, 98, 104, 25, 200, 17, 65, 74, 115, 78, 202, 45, 71, 179, 74, 169, 20, 123, 82, 0, 81, 27, 21, 41, 83, 154, 63, 87, 15, 214, 228, 198, 155, 188, 118, 164, 96, 43, 0, 116, 230, 129, 181, 111, 186, 8, 31, 233, 27, 87, 107, 236, 150, 242, 21, 217, 13, 42, 33, 101, 99, 182, 182, 249, 185, 231, 46, 5, 52, 255, 100, 86, 133, 197, 93, 45, 176, 83, 161, 143, 159, 169, 153, 71, 186, 8, 106, 7, 133, 110}; .const .align 8 .b8 c_sbox1[1024] = {233, 112, 122, 75, 68, 41, 179, 181, 46, 9, 117, 219, 35, 38, 25, 196, 176, 166, 110, 173, 125, 223, 167, 73, 184, 96, 238, 156, 102, 178, 237, 143, 113, 140, 170, 236, 255, 23, 154, 105, 108, 82, 100, 86, 225, 158, 177, 194, 165, 2, 54, 25, 41, 76, 9, 117, 64, 19, 89, 160, 62, 58, 24, 228, 154, 152, 84, 63, 101, 157, 66, 91, 214, 228, 143, 107, 214, 63, 247, 153, 7, 156, 210, 161, 245, 48, 232, 239, 230, 56, 45, 77, 193, 93, 37, 240, 134, 32, 221, 76, 38, 235, 112, 132, 198, 233, 130, 99, 94, 204, 30, 2, 63, 107, 104, 9, 201, 239, 186, 62, 20, 24, 151, 60, 161, 112, 106, 107, 132, 53, 127, 104, 134, 226, 160, 82, 5, 83, 156, 183, 55, 7, 80, 170, 28, 132, 7, 62, 92, 174, 222, 127, 236, 68, 125, 142, 184, 242, 22, 87, 55, 218, 58, 176, 13, 12, 80, 240, 4, 31, 28, 240, 255, 179, 0, 2, 26, 245, 12, 174, 178, 116, 181, 60, 88, 122, 131, 37, 189, 33, 9, 220, 249, 19, 145, 209, 246, 47, 169, 124, 115, 71, 50, 148, 1, 71, 245, 34, 129, 229, 229, 58, 220, 218, 194, 55, 52, 118, 181, 200, 167, 221, 243, 154, 70, 97, 68, 169, 14, 3, 208, 15, 62, 199, 200, 236, 65, 30, 117, 164, 153, 205, 56, 226, 47, 14, 234, 59, 161, 187, 128, 50, 49, 179, 62, 24, 56, 139, 84, 78, 8, 185, 109, 79, 3, 13, 66, 111, 191, 4, 10, 246, 144, 18, 184, 44, 121, 124, 151, 36, 114, 176, 121, 86, 175, 137, 175, 188, 31, 119, 154, 222, 16, 8, 147, 217, 18, 174, 139, 179, 46, 63, 207, 220, 31, 114, 18, 85, 36, 113, 107, 46, 230, 221, 26, 80, 135, 205, 132, 159, 24, 71, 88, 122, 23, 218, 8, 116, 188, 154, 159, 188, 140, 125, 75, 233, 58, 236, 122, 236, 250, 29, 133, 219, 102, 67, 9, 99, 210, 195, 100, 196, 71, 24, 28, 239, 8, 217, 21, 50, 55, 59, 67, 221, 22, 186, 194, 36, 67, 77, 161, 18, 81, 196, 101, 42, 2, 0, 148, 80, 221, 228, 58, 19, 158, 248, 223, 113, 85, 78, 49, 16, 214, 119, 172, 129, 155, 25, 17, 95, 241, 86, 53, 4, 107, 199, 163, 215, 59, 24, 17, 60, 9, 165, 36, 89, 237, 230, 143, 242, 250, 251, 241, 151, 44, 191, 186, 158, 110, 60, 21, 30, 112, 69, 227, 134, 177, 111, 233, 234, 10, 94, 14, 134, 179, 42, 62, 90, 28, 231, 31, 119, 250, 6, 61, 78, 185, 220, 101, 41, 15, 29, 231, 153, 214, 137, 62, 128, 37, 200, 102, 82, 120, 201, 76, 46, 106, 179, 16, 156, 186, 14, 21, 198, 120, 234, 226, 148, 83, 60, 252, 165, 244, 45, 10, 30, 167, 78, 247, 242, 61, 43, 29, 54, 15, 38, 57, 25, 96, 121, 194, 25, 8, 167, 35, 82, 182, 18, 19, 247, 110, 254, 173, 235, 102, 31, 195, 234, 149, 69, 188, 227, 131, 200, 123, 166, 209, 55, 127, 177, 40, 255, 140, 1, 239, 221, 50, 195, 165, 90, 108, 190, 133, 33, 88, 101, 2, 152, 171, 104, 15, 165, 206, 238, 59, 149, 47, 219, 173, 125, 239, 42, 132, 47, 110, 91, 40, 182, 33, 21, 112, 97, 7, 41, 117, 71, 221, 236, 16, 21, 159, 97, 48, 168, 204, 19, 150, 189, 97, 235, 30, 254, 52, 3, 207, 99, 3, 170, 144, 92, 115, 181, 57, 162, 112, 76, 11, 158, 158, 213, 20, 222, 170, 203, 188, 134, 204, 238, 167, 44, 98, 96, 171, 92, 171, 156, 110, 132, 243, 178, 175, 30, 139, 100, 202, 240, 189, 25, 185, 105, 35, 160, 80, 187, 90, 101, 50, 90, 104, 64, 179, 180, 42, 60, 213, 233, 158, 49, 247, 184, 33, 192, 25, 11, 84, 155, 153, 160, 95, 135, 126, 153, 247, 149, 168, 125, 61, 98, 154, 136, 55, 248, 119, 45, 227, 151, 95, 147, 237, 17, 129, 18, 104, 22, 41, 136, 53, 14, 214, 31, 230, 199, 161, 223, 222, 150, 153, 186, 88, 120, 165, 132, 245, 87, 99, 114, 34, 27, 255, 195, 131, 155, 150, 70, 194, 26, 235, 10, 179, 205, 84, 48, 46, 83, 228, 72, 217, 143, 40, 49, 188, 109, 239, 242, 235, 88, 234, 255, 198, 52, 97, 237, 40, 254, 115, 60, 124, 238, 217, 20, 74, 93, 227, 183, 100, 232, 20, 93, 16, 66, 224, 19, 62, 32, 182, 226, 238, 69, 234, 171, 170, 163, 21, 79, 108, 219, 208, 79, 203, 250, 66, 244, 66, 199, 181, 187, 106, 239, 29, 59, 79, 101, 5, 33, 205, 65, 158, 121, 30, 216, 199, 77, 133, 134, 106, 71, 75, 228, 80, 98, 129, 61, 242, 161, 98, 207, 70, 38, 141, 91, 160, 131, 136, 252, 163, 182, 199, 193, 195, 36, 21, 127, 146, 116, 203, 105, 11, 138, 132, 71, 133, 178, 146, 86, 0, 191, 91, 9, 157, 72, 25, 173, 116, 177, 98, 20, 0, 14, 130, 35, 42, 141, 66, 88, 234, 245, 85, 12, 62, 244, 173, 29, 97, 112, 63, 35, 146, 240, 114, 51, 65, 126, 147, 141, 241, 236, 95, 214, 219, 59, 34, 108, 89, 55, 222, 124, 96, 116, 238, 203, 167, 242, 133, 64, 110, 50, 119, 206, 132, 128, 7, 166, 158, 80, 248, 25, 85, 216, 239, 232, 53, 151, 217, 97, 170, 167, 105, 169, 194, 6, 12, 197, 252, 171, 4, 90, 220, 202, 11, 128, 46, 122, 68, 158, 132, 52, 69, 195, 5, 103, 213, 253, 201, 158, 30, 14, 211, 219, 115, 219, 205, 136, 85, 16, 121, 218, 95, 103, 64, 67, 103, 227, 101, 52, 196, 197, 216, 56, 62, 113, 158, 248, 40, 61, 32, 255, 109, 241, 231, 33, 62, 21, 74, 61, 176, 143, 43, 159, 227, 230, 247, 173, 131, 219}; .const .align 8 .b8 c_sbox2[1024] = {104, 90, 61, 233, 247, 64, 129, 148, 28, 38, 76, 246, 52, 41, 105, 148, 247, 32, 21, 65, 247, 212, 2, 118, 46, 107, 244, 188, 104, 0, 162, 212, 113, 36, 8, 212, 106, 244, 32, 51, 183, 212, 183, 67, 175, 97, 0, 80, 46, 246, 57, 30, 70, 69, 36, 151, 116, 79, 33, 20, 64, 136, 139, 191, 29, 252, 149, 77, 175, 145, 181, 150, 211, 221, 244, 112, 69, 47, 160, 102, 236, 9, 188, 191, 133, 151, 189, 3, 208, 109, 172, 127, 4, 133, 203, 49, 179, 39, 235, 150, 65, 57, 253, 85, 230, 71, 37, 218, 154, 10, 202, 171, 37, 120, 80, 40, 244, 41, 4, 83, 218, 134, 44, 10, 251, 109, 182, 233, 98, 20, 220, 104, 0, 105, 72, 215, 164, 192, 14, 104, 238, 141, 161, 39, 162, 254, 63, 79, 140, 173, 135, 232, 6, 224, 140, 181, 182, 214, 244, 122, 124, 30, 206, 170, 236, 95, 55, 211, 153, 163, 120, 206, 66, 42, 107, 64, 53, 158, 254, 32, 185, 133, 243, 217, 171, 215, 57, 238, 139, 78, 18, 59, 247, 250, 201, 29, 86, 24, 109, 75, 49, 102, 163, 38, 178, 151, 227, 234, 116, 250, 110, 58, 50, 67, 91, 221, 247, 231, 65, 104, 251, 32, 120, 202, 78, 245, 10, 251, 151, 179, 254, 216, 172, 86, 64, 69, 39, 149, 72, 186, 58, 58, 83, 85, 135, 141, 131, 32, 183, 169, 107, 254, 75, 149, 150, 208, 188, 103, 168, 85, 88, 154, 21, 161, 99, 41, 169, 204, 51, 219, 225, 153, 86, 74, 42, 166, 249, 37, 49, 63, 28, 126, 244, 94, 124, 49, 41, 144, 2, 232, 248, 253, 112, 47, 39, 4, 92, 21, 187, 128, 227, 44, 40, 5, 72, 21, 193, 149, 34, 109, 198, 228, 63, 19, 193, 72, 220, 134, 15, 199, 238, 201, 249, 7, 15, 31, 4, 65, 164, 121, 71, 64, 23, 110, 136, 93, 235, 81, 95, 50, 209, 192, 155, 213, 143, 193, 188, 242, 100, 53, 17, 65, 52, 120, 123, 37, 96, 156, 42, 96, 163, 232, 248, 223, 27, 108, 99, 31, 194, 180, 18, 14, 158, 50, 225, 2, 209, 79, 102, 175, 21, 129, 209, 202, 224, 149, 35, 107, 225, 146, 62, 51, 98, 11, 36, 59, 34, 185, 190, 238, 14, 162, 178, 133, 153, 13, 186, 230, 140, 12, 114, 222, 40, 247, 162, 45, 69, 120, 18, 208, 253, 148, 183, 149, 98, 8, 125, 100, 240, 245, 204, 231, 111, 163, 73, 84, 250, 72, 125, 135, 39, 253, 157, 195, 30, 141, 62, 243, 65, 99, 71, 10, 116, 255, 46, 153, 171, 110, 111, 58, 55, 253, 248, 244, 96, 220, 18, 168, 248, 221, 235, 161, 76, 225, 27, 153, 13, 107, 110, 219, 16, 85, 123, 198, 55, 44, 103, 109, 59, 212, 101, 39, 4, 232, 208, 220, 199, 13, 41, 241, 163, 255, 0, 204, 146, 15, 57, 181, 11, 237, 15, 105, 251, 159, 123, 102, 156, 125, 219, 206, 11, 207, 145, 160, 163, 94, 21, 217, 136, 47, 19, 187, 36, 173, 91, 81, 191, 121, 148, 123, 235, 214, 59, 118, 179, 46, 57, 55, 121, 89, 17, 204, 151, 226, 38, 128, 45, 49, 46, 244, 167, 173, 66, 104, 59, 43, 106, 198, 204, 76, 117, 18, 28, 241, 46, 120, 55, 66, 18, 106, 231, 81, 146, 183, 230, 187, 161, 6, 80, 99, 251, 75, 24, 16, 107, 26, 250, 237, 202, 17, 216, 189, 37, 61, 201, 195, 225, 226, 89, 22, 66, 68, 134, 19, 18, 10, 110, 236, 12, 217, 42, 234, 171, 213, 78, 103, 175, 100, 95, 168, 134, 218, 136, 233, 191, 190, 254, 195, 228, 100, 87, 128, 188, 157, 134, 192, 247, 240, 248, 123, 120, 96, 77, 96, 3, 96, 70, 131, 253, 209, 176, 31, 56, 246, 4, 174, 69, 119, 204, 252, 54, 215, 51, 107, 66, 131, 113, 171, 30, 240, 135, 65, 128, 176, 95, 94, 0, 60, 190, 87, 160, 119, 36, 174, 232, 189, 153, 66, 70, 85, 97, 46, 88, 191, 143, 244, 88, 78, 162, 253, 221, 242, 56, 239, 116, 244, 194, 189, 137, 135, 195, 249, 102, 83, 116, 142, 179, 200, 85, 242, 117, 180, 185, 217, 252, 70, 97, 38, 235, 122, 132, 223, 29, 139, 121, 14, 106, 132, 226, 149, 95, 145, 142, 89, 110, 70, 112, 87, 180, 32, 145, 85, 213, 140, 76, 222, 2, 201, 225, 172, 11, 185, 208, 5, 130, 187, 72, 98, 168, 17, 158, 169, 116, 117, 182, 25, 127, 183, 9, 220, 169, 224, 161, 9, 45, 102, 51, 70, 50, 196, 2, 31, 90, 232, 140, 190, 240, 9, 37, 160, 153, 74, 16, 254, 110, 29, 29, 61, 185, 26, 223, 164, 165, 11, 15, 242, 134, 161, 105, 241, 104, 40, 131, 218, 183, 220, 254, 6, 57, 87, 155, 206, 226, 161, 82, 127, 205, 79, 1, 94, 17, 80, 250, 131, 6, 167, 196, 181, 2, 160, 39, 208, 230, 13, 39, 140, 248, 154, 65, 134, 63, 119, 6, 76, 96, 195, 181, 6, 168, 97, 40, 122, 23, 240, 224, 134, 245, 192, 170, 88, 96, 0, 98, 125, 220, 48, 215, 158, 230, 17, 99, 234, 56, 35, 148, 221, 194, 83, 52, 22, 194, 194, 86, 238, 203, 187, 222, 182, 188, 144, 161, 125, 252, 235, 118, 29, 89, 206, 9, 228, 5, 111, 136, 1, 124, 75, 61, 10, 114, 57, 36, 124, 146, 124, 95, 114, 227, 134, 185, 157, 77, 114, 180, 91, 193, 26, 252, 184, 158, 211, 120, 85, 84, 237, 181, 165, 252, 8, 211, 124, 61, 216, 196, 15, 173, 77, 94, 239, 80, 30, 248, 230, 97, 177, 217, 20, 133, 162, 60, 19, 81, 108, 231, 199, 213, 111, 196, 78, 225, 86, 206, 191, 42, 54, 55, 200, 198, 221, 52, 50, 154, 215, 18, 130, 99, 146, 142, 250, 14, 103, 224, 0, 96, 64}; .const .align 8 .b8 c_sbox3[1024] = {55, 206, 57, 58, 207, 245, 250, 211, 55, 119, 194, 171, 27, 45, 197, 90, 158, 103, 176, 92, 66, 55, 163, 79, 64, 39, 130, 211, 190, 155, 188, 153, 157, 142, 17, 213, 21, 115, 15, 191, 126, 28, 45, 214, 123, 196, 0, 199, 107, 27, 140, 183, 69, 144, 161, 33, 190, 177, 110, 178, 180, 110, 54, 106, 47, 171, 72, 87, 121, 110, 148, 188, 210, 118, 163, 198, 200, 194, 73, 101, 238, 248, 15, 83, 125, 222, 141, 70, 29, 10, 115, 213, 198, 77, 208, 76, 219, 187, 57, 41, 80, 70, 186, 169, 232, 38, 149, 172, 4, 227, 94, 190, 240, 213, 250, 161, 154, 81, 45, 106, 226, 140, 239, 99, 34, 238, 134, 154, 184, 194, 137, 192, 246, 46, 36, 67, 170, 3, 30, 165, 164, 208, 242, 156, 186, 97, 192, 131, 77, 106, 233, 155, 80, 21, 229, 143, 214, 91, 100, 186, 249, 162, 38, 40, 225, 58, 58, 167, 134, 149, 169, 75, 233, 98, 85, 239, 211, 239, 47, 199, 218, 247, 82, 247, 105, 111, 4, 63, 89, 10, 250, 119, 21, 169, 228, 128, 1, 134, 176, 135, 173, 230, 9, 155, 147, 229, 62, 59, 90, 253, 144, 233, 151, 215, 52, 158, 217, 183, 240, 44, 81, 139, 43, 2, 58, 172, 213, 150, 125, 166, 125, 1, 214, 62, 207, 209, 40, 45, 125, 124, 207, 37, 159, 31, 155, 184, 242, 173, 114, 180, 214, 90, 76, 245, 136, 90, 113, 172, 41, 224, 230, 165, 25, 224, 253, 172, 176, 71, 155, 250, 147, 237, 141, 196, 211, 232, 204, 87, 59, 40, 41, 102, 213, 248, 40, 46, 19, 121, 145, 1, 95, 120, 85, 96, 117, 237, 68, 14, 150, 247, 140, 94, 211, 227, 212, 109, 5, 21, 186, 109, 244, 136, 37, 97, 161, 3, 189, 240, 100, 5, 21, 158, 235, 195, 162, 87, 144, 60, 236, 26, 39, 151, 42, 7, 58, 169, 155, 109, 63, 27, 245, 33, 99, 30, 251, 102, 156, 245, 25, 243, 220, 38, 40, 217, 51, 117, 245, 253, 85, 177, 130, 52, 86, 3, 187, 60, 186, 138, 17, 119, 81, 40, 248, 217, 10, 194, 103, 81, 204, 171, 95, 146, 173, 204, 81, 23, 232, 77, 142, 220, 48, 56, 98, 88, 157, 55, 145, 249, 32, 147, 194, 144, 122, 234, 206, 123, 62, 251, 100, 206, 33, 81, 50, 190, 79, 119, 126, 227, 182, 168, 70, 61, 41, 195, 105, 83, 222, 72, 128, 230, 19, 100, 16, 8, 174, 162, 36, 178, 109, 221, 253, 45, 133, 105, 102, 33, 7, 9, 10, 70, 154, 179, 221, 192, 69, 100, 207, 222, 108, 88, 174, 200, 32, 28, 221, 247, 190, 91, 64, 141, 88, 27, 127, 1, 210, 204, 187, 227, 180, 107, 126, 106, 162, 221, 69, 255, 89, 58, 68, 10, 53, 62, 213, 205, 180, 188, 168, 206, 234, 114, 187, 132, 100, 250, 174, 18, 102, 141, 71, 111, 60, 191, 99, 228, 155, 210, 158, 93, 47, 84, 27, 119, 194, 174, 112, 99, 78, 246, 141, 13, 14, 116, 87, 19, 91, 231, 113, 22, 114, 248, 93, 125, 83, 175, 8, 203, 64, 64, 204, 226, 180, 78, 106, 70, 210, 52, 132, 175, 21, 1, 40, 4, 176, 225, 29, 58, 152, 149, 180, 159, 184, 6, 72, 160, 110, 206, 130, 59, 63, 111, 130, 171, 32, 53, 75, 29, 26, 1, 248, 39, 114, 39, 177, 96, 21, 97, 220, 63, 147, 231, 43, 121, 58, 187, 189, 37, 69, 52, 225, 57, 136, 160, 75, 121, 206, 81, 183, 201, 50, 47, 201, 186, 31, 160, 126, 200, 28, 224, 246, 209, 199, 188, 195, 17, 1, 207, 199, 170, 232, 161, 73, 135, 144, 26, 154, 189, 79, 212, 203, 222, 218, 208, 56, 218, 10, 213, 42, 195, 57, 3, 103, 54, 145, 198, 124, 49, 249, 141, 79, 43, 177, 224, 183, 89, 158, 247, 58, 187, 245, 67, 255, 25, 213, 242, 156, 69, 217, 39, 44, 34, 151, 191, 42, 252, 230, 21, 113, 252, 145, 15, 37, 21, 148, 155, 97, 147, 229, 250, 235, 156, 182, 206, 89, 100, 168, 194, 209, 168, 186, 18, 94, 7, 193, 182, 12, 106, 5, 227, 101, 80, 210, 16, 66, 164, 3, 203, 14, 110, 236, 224, 59, 219, 152, 22, 190, 160, 152, 76, 100, 233, 120, 50, 50, 149, 31, 159, 223, 146, 211, 224, 43, 52, 160, 211, 30, 242, 113, 137, 65, 116, 10, 27, 140, 52, 163, 75, 32, 113, 190, 197, 216, 50, 118, 195, 141, 159, 53, 223, 46, 47, 153, 155, 71, 111, 11, 230, 29, 241, 227, 15, 84, 218, 76, 229, 145, 216, 218, 30, 207, 121, 98, 206, 111, 126, 62, 205, 102, 177, 24, 22, 5, 29, 44, 253, 197, 210, 143, 132, 153, 34, 251, 246, 87, 243, 35, 245, 35, 118, 50, 166, 49, 53, 168, 147, 2, 205, 204, 86, 98, 129, 240, 172, 181, 235, 117, 90, 151, 54, 22, 110, 204, 115, 210, 136, 146, 98, 150, 222, 208, 73, 185, 129, 27, 144, 80, 76, 20, 86, 198, 113, 189, 199, 198, 230, 10, 20, 122, 50, 6, 208, 225, 69, 154, 123, 242, 195, 253, 83, 170, 201, 0, 15, 168, 98, 226, 191, 37, 187, 246, 210, 189, 53, 5, 105, 18, 113, 34, 2, 4, 178, 124, 207, 203, 182, 43, 156, 118, 205, 192, 62, 17, 83, 211, 227, 64, 22, 96, 189, 171, 56, 240, 173, 71, 37, 156, 32, 56, 186, 118, 206, 70, 247, 197, 161, 175, 119, 96, 96, 117, 32, 78, 254, 203, 133, 216, 141, 232, 138, 176, 249, 170, 122, 126, 170, 249, 76, 92, 194, 72, 25, 140, 138, 251, 2, 228, 106, 195, 1, 249, 225, 235, 214, 105, 248, 212, 144, 160, 222, 92, 166, 45, 37, 9, 63, 159, 230, 8, 194, 50, 97, 78, 183, 91, 226, 119, 206, 227, 223, 143, 87, 230, 114, 195, 58}; // m03200_init$S0_all has been demoted // m03200_init$S1_all has been demoted // m03200_init$S2_all has been demoted // m03200_init$S3_all has been demoted .const .align 4 .b8 m03200_init$P[72] = {136, 106, 63, 36, 211, 8, 163, 133, 46, 138, 25, 19, 68, 115, 112, 3, 34, 56, 9, 164, 208, 49, 159, 41, 152, 250, 46, 8, 137, 108, 78, 236, 230, 33, 40, 69, 119, 19, 208, 56, 207, 102, 84, 190, 108, 12, 233, 52, 183, 41, 172, 192, 221, 80, 124, 201, 181, 213, 132, 63, 23, 9, 71, 181, 217, 213, 22, 146, 27, 251, 121, 137}; // m03200_loop$S0_all has been demoted // m03200_loop$S1_all has been demoted // m03200_loop$S2_all has been demoted // m03200_loop$S3_all has been demoted // m03200_comp$S0_all has been demoted // m03200_comp$S1_all has been demoted // m03200_comp$S2_all has been demoted // m03200_comp$S3_all has been demoted .entry gpu_decompress( .param .u64 .ptr .global .align 4 gpu_decompress_param_0, .param .u64 .ptr .global .align 4 gpu_decompress_param_1, .param .u64 .ptr .global .align 4 gpu_decompress_param_2, .param .u64 gpu_decompress_param_3 ) { .local .align 4 .b8 __local_depot0[260]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<9>; .reg .b32 %r<58>; .reg .b64 %rd<45>; mov.u64 %SPL, __local_depot0; ld.param.u64 %rd7, [gpu_decompress_param_0]; ld.param.u64 %rd8, [gpu_decompress_param_1]; ld.param.u64 %rd9, [gpu_decompress_param_2]; ld.param.u64 %rd10, [gpu_decompress_param_3]; add.u64 %rd43, %SPL, 0; mov.u32 %r24, %ctaid.x; mov.u32 %r25, %ntid.x; mov.b32 %r26, %envreg3; mad.lo.s32 %r1, %r24, %r25, %r26; mov.u32 %r27, %tid.x; add.s32 %r2, %r1, %r27; cvt.s64.s32 %rd12, %r2; setp.ge.u64 %p1, %rd12, %rd10; @%p1 bra BB0_12; mul.wide.s32 %rd13, %r2, 12; add.s64 %rd14, %rd7, %rd13; ld.global.u32 %r3, [%rd14]; ld.global.u32 %r4, [%rd14+4]; ld.global.u32 %r5, [%rd14+8]; mov.u64 %rd15, 0; st.local.u32 [%rd43+4], %rd15; st.local.u32 [%rd43], %rd15; st.local.u32 [%rd43+12], %rd15; st.local.u32 [%rd43+8], %rd15; st.local.u32 [%rd43+20], %rd15; st.local.u32 [%rd43+16], %rd15; st.local.u32 [%rd43+28], %rd15; st.local.u32 [%rd43+24], %rd15; st.local.u32 [%rd43+36], %rd15; st.local.u32 [%rd43+32], %rd15; st.local.u32 [%rd43+44], %rd15; st.local.u32 [%rd43+40], %rd15; st.local.u32 [%rd43+52], %rd15; st.local.u32 [%rd43+48], %rd15; st.local.u32 [%rd43+60], %rd15; st.local.u32 [%rd43+56], %rd15; st.local.u32 [%rd43+68], %rd15; st.local.u32 [%rd43+64], %rd15; st.local.u32 [%rd43+76], %rd15; st.local.u32 [%rd43+72], %rd15; st.local.u32 [%rd43+84], %rd15; st.local.u32 [%rd43+80], %rd15; st.local.u32 [%rd43+92], %rd15; st.local.u32 [%rd43+88], %rd15; st.local.u32 [%rd43+100], %rd15; st.local.u32 [%rd43+96], %rd15; st.local.u32 [%rd43+108], %rd15; st.local.u32 [%rd43+104], %rd15; st.local.u32 [%rd43+116], %rd15; st.local.u32 [%rd43+112], %rd15; st.local.u32 [%rd43+124], %rd15; st.local.u32 [%rd43+120], %rd15; st.local.u32 [%rd43+132], %rd15; st.local.u32 [%rd43+128], %rd15; st.local.u32 [%rd43+140], %rd15; st.local.u32 [%rd43+136], %rd15; st.local.u32 [%rd43+148], %rd15; st.local.u32 [%rd43+144], %rd15; st.local.u32 [%rd43+156], %rd15; st.local.u32 [%rd43+152], %rd15; st.local.u32 [%rd43+164], %rd15; st.local.u32 [%rd43+160], %rd15; st.local.u32 [%rd43+172], %rd15; st.local.u32 [%rd43+168], %rd15; st.local.u32 [%rd43+180], %rd15; st.local.u32 [%rd43+176], %rd15; st.local.u32 [%rd43+188], %rd15; st.local.u32 [%rd43+184], %rd15; st.local.u32 [%rd43+196], %rd15; st.local.u32 [%rd43+192], %rd15; st.local.u32 [%rd43+204], %rd15; st.local.u32 [%rd43+200], %rd15; st.local.u32 [%rd43+212], %rd15; st.local.u32 [%rd43+208], %rd15; st.local.u32 [%rd43+220], %rd15; st.local.u32 [%rd43+216], %rd15; st.local.u32 [%rd43+228], %rd15; st.local.u32 [%rd43+224], %rd15; st.local.u32 [%rd43+236], %rd15; st.local.u32 [%rd43+232], %rd15; st.local.u32 [%rd43+244], %rd15; st.local.u32 [%rd43+240], %rd15; st.local.u32 [%rd43+252], %rd15; st.local.u32 [%rd43+248], %rd15; setp.eq.s32 %p2, %r4, 0; @%p2 bra BB0_10; and.b32 %r6, %r4, 3; setp.eq.s32 %p3, %r6, 0; mov.u32 %r56, 0; @%p3 bra BB0_8; setp.eq.s32 %p4, %r6, 1; mov.u32 %r52, 0; @%p4 bra BB0_7; setp.eq.s32 %p5, %r6, 2; mov.u32 %r50, 0; @%p5 bra BB0_6; mul.wide.u32 %rd16, %r3, 4; add.s64 %rd17, %rd8, %rd16; ld.global.u32 %r32, [%rd17]; st.local.u32 [%rd43], %r32; add.s32 %r3, %r3, 1; mov.u32 %r50, 1; BB0_6: mul.wide.u32 %rd18, %r3, 4; add.s64 %rd19, %rd8, %rd18; ld.global.u32 %r33, [%rd19]; mul.wide.u32 %rd20, %r50, 4; add.s64 %rd21, %rd43, %rd20; st.local.u32 [%rd21], %r33; add.s32 %r52, %r50, 1; add.s32 %r3, %r3, 1; BB0_7: mul.wide.u32 %rd22, %r3, 4; add.s64 %rd23, %rd8, %rd22; ld.global.u32 %r34, [%rd23]; mul.wide.u32 %rd24, %r52, 4; add.s64 %rd25, %rd43, %rd24; st.local.u32 [%rd25], %r34; add.s32 %r56, %r52, 1; add.s32 %r3, %r3, 1; BB0_8: setp.lt.u32 %p6, %r4, 4; @%p6 bra BB0_10; BB0_9: mul.wide.u32 %rd26, %r3, 4; add.s64 %rd27, %rd8, %rd26; ld.global.u32 %r35, [%rd27]; mul.wide.u32 %rd28, %r56, 4; add.s64 %rd29, %rd43, %rd28; st.local.u32 [%rd29], %r35; add.s32 %r36, %r3, 1; mul.wide.u32 %rd30, %r36, 4; add.s64 %rd31, %rd8, %rd30; ld.global.u32 %r37, [%rd31]; add.s32 %r38, %r56, 1; mul.wide.u32 %rd32, %r38, 4; add.s64 %rd33, %rd43, %rd32; st.local.u32 [%rd33], %r37; add.s32 %r39, %r3, 2; mul.wide.u32 %rd34, %r39, 4; add.s64 %rd35, %rd8, %rd34; ld.global.u32 %r40, [%rd35]; add.s32 %r41, %r56, 2; mul.wide.u32 %rd36, %r41, 4; add.s64 %rd37, %rd43, %rd36; st.local.u32 [%rd37], %r40; add.s32 %r42, %r3, 3; mul.wide.u32 %rd38, %r42, 4; add.s64 %rd39, %rd8, %rd38; ld.global.u32 %r43, [%rd39]; add.s32 %r44, %r56, 3; mul.wide.u32 %rd40, %r44, 4; add.s64 %rd41, %rd43, %rd40; st.local.u32 [%rd41], %r43; add.s32 %r3, %r3, 4; add.s32 %r56, %r56, 4; setp.lt.u32 %p7, %r56, %r4; @%p7 bra BB0_9; BB0_10: st.local.u32 [%rd43+256], %r5; mul.wide.s32 %rd42, %r2, 260; add.s64 %rd44, %rd9, %rd42; mov.u32 %r57, 0; BB0_11: ld.local.u32 %r48, [%rd43]; st.global.u32 [%rd44], %r48; add.s64 %rd44, %rd44, 4; add.s64 %rd43, %rd43, 4; add.s32 %r57, %r57, 1; setp.lt.u32 %p8, %r57, 65; @%p8 bra BB0_11; BB0_12: ret; } // .globl gpu_memset .entry gpu_memset( .param .u64 .ptr .global .align 16 gpu_memset_param_0, .param .u32 gpu_memset_param_1, .param .u64 gpu_memset_param_2 ) { .reg .pred %p<2>; .reg .b32 %r<8>; .reg .b64 %rd<6>; ld.param.u64 %rd1, [gpu_memset_param_0]; ld.param.u32 %r2, [gpu_memset_param_1]; ld.param.u64 %rd2, [gpu_memset_param_2]; mov.b32 %r3, %envreg3; mov.u32 %r4, %ctaid.x; mov.u32 %r5, %ntid.x; mad.lo.s32 %r6, %r4, %r5, %r3; mov.u32 %r7, %tid.x; add.s32 %r1, %r6, %r7; cvt.s64.s32 %rd3, %r1; setp.ge.u64 %p1, %rd3, %rd2; @%p1 bra BB1_2; mul.wide.s32 %rd4, %r1, 16; add.s64 %rd5, %rd1, %rd4; st.global.v4.u32 [%rd5], {%r2, %r2, %r2, %r2}; BB1_2: ret; } // .globl gpu_atinit .entry gpu_atinit( .param .u64 .ptr .global .align 4 gpu_atinit_param_0, .param .u64 gpu_atinit_param_1 ) { .reg .pred %p<2>; .reg .b32 %r<13>; .reg .b64 %rd<7>; ld.param.u64 %rd2, [gpu_atinit_param_0]; ld.param.u64 %rd3, [gpu_atinit_param_1]; mov.b32 %r2, %envreg3; mov.u32 %r3, %ctaid.x; mov.u32 %r4, %ntid.x; mad.lo.s32 %r5, %r3, %r4, %r2; mov.u32 %r6, %tid.x; add.s32 %r1, %r5, %r6; cvt.s64.s32 %rd1, %r1; setp.ge.u64 %p1, %rd1, %rd3; @%p1 bra BB2_2; cvt.u32.u64 %r7, %rd1; shr.u64 %rd4, %rd1, 32; cvt.u32.u64 %r8, %rd4; xor.b32 %r9, %r7, 1549556828; xor.b32 %r10, %r8, 909522486; mul.wide.s32 %rd5, %r1, 260; add.s64 %rd6, %rd2, %rd5; st.global.u32 [%rd6], %r9; st.global.u32 [%rd6+4], %r10; mov.u32 %r11, 0; st.global.u32 [%rd6+8], %r11; st.global.u32 [%rd6+12], %r11; st.global.u32 [%rd6+16], %r11; st.global.u32 [%rd6+20], %r11; st.global.u32 [%rd6+24], %r11; st.global.u32 [%rd6+28], %r11; st.global.u32 [%rd6+32], %r11; st.global.u32 [%rd6+36], %r11; st.global.u32 [%rd6+40], %r11; st.global.u32 [%rd6+44], %r11; st.global.u32 [%rd6+48], %r11; st.global.u32 [%rd6+52], %r11; st.global.u32 [%rd6+56], %r11; st.global.u32 [%rd6+60], %r11; st.global.u32 [%rd6+64], %r11; st.global.u32 [%rd6+68], %r11; st.global.u32 [%rd6+72], %r11; st.global.u32 [%rd6+76], %r11; st.global.u32 [%rd6+80], %r11; st.global.u32 [%rd6+84], %r11; st.global.u32 [%rd6+88], %r11; st.global.u32 [%rd6+92], %r11; st.global.u32 [%rd6+96], %r11; st.global.u32 [%rd6+100], %r11; st.global.u32 [%rd6+104], %r11; st.global.u32 [%rd6+108], %r11; st.global.u32 [%rd6+112], %r11; st.global.u32 [%rd6+116], %r11; st.global.u32 [%rd6+120], %r11; st.global.u32 [%rd6+124], %r11; st.global.u32 [%rd6+128], %r11; st.global.u32 [%rd6+132], %r11; st.global.u32 [%rd6+136], %r11; st.global.u32 [%rd6+140], %r11; st.global.u32 [%rd6+144], %r11; st.global.u32 [%rd6+148], %r11; st.global.u32 [%rd6+152], %r11; st.global.u32 [%rd6+156], %r11; st.global.u32 [%rd6+160], %r11; st.global.u32 [%rd6+164], %r11; st.global.u32 [%rd6+168], %r11; st.global.u32 [%rd6+172], %r11; st.global.u32 [%rd6+176], %r11; st.global.u32 [%rd6+180], %r11; st.global.u32 [%rd6+184], %r11; st.global.u32 [%rd6+188], %r11; st.global.u32 [%rd6+192], %r11; st.global.u32 [%rd6+196], %r11; st.global.u32 [%rd6+200], %r11; st.global.u32 [%rd6+204], %r11; st.global.u32 [%rd6+208], %r11; st.global.u32 [%rd6+212], %r11; st.global.u32 [%rd6+216], %r11; st.global.u32 [%rd6+220], %r11; st.global.u32 [%rd6+224], %r11; st.global.u32 [%rd6+228], %r11; st.global.u32 [%rd6+232], %r11; st.global.u32 [%rd6+236], %r11; st.global.u32 [%rd6+240], %r11; st.global.u32 [%rd6+244], %r11; st.global.u32 [%rd6+248], %r11; st.global.u32 [%rd6+252], %r11; mov.u32 %r12, 7; st.global.u32 [%rd6+256], %r12; BB2_2: ret; } // .globl m03200_init .entry m03200_init( .param .u64 .ptr .global .align 4 m03200_init_param_0, .param .u64 .ptr .global .align 4 m03200_init_param_1, .param .u64 .ptr .global .align 4 m03200_init_param_2, .param .u64 .ptr .global .align 4 m03200_init_param_3, .param .u64 .ptr .global .align 4 m03200_init_param_4, .param .u64 .ptr .global .align 1 m03200_init_param_5, .param .u64 .ptr .global .align 4 m03200_init_param_6, .param .u64 .ptr .global .align 4 m03200_init_param_7, .param .u64 .ptr .global .align 4 m03200_init_param_8, .param .u64 .ptr .global .align 4 m03200_init_param_9, .param .u64 .ptr .global .align 4 m03200_init_param_10, .param .u64 .ptr .global .align 4 m03200_init_param_11, .param .u64 .ptr .global .align 4 m03200_init_param_12, .param .u64 .ptr .global .align 4 m03200_init_param_13, .param .u64 .ptr .global .align 8 m03200_init_param_14, .param .u64 .ptr .global .align 4 m03200_init_param_15, .param .u64 .ptr .global .align 4 m03200_init_param_16, .param .u64 .ptr .global .align 4 m03200_init_param_17, .param .u64 .ptr .global .align 1 m03200_init_param_18, .param .u64 .ptr .global .align 4 m03200_init_param_19, .param .u64 .ptr .global .align 4 m03200_init_param_20, .param .u64 .ptr .global .align 4 m03200_init_param_21, .param .u64 .ptr .global .align 4 m03200_init_param_22, .param .u64 .ptr .global .align 4 m03200_init_param_23, .param .u32 m03200_init_param_24, .param .u32 m03200_init_param_25, .param .u32 m03200_init_param_26, .param .u32 m03200_init_param_27, .param .u32 m03200_init_param_28, .param .u32 m03200_init_param_29, .param .u32 m03200_init_param_30, .param .u32 m03200_init_param_31, .param .u32 m03200_init_param_32, .param .u32 m03200_init_param_33, .param .u64 m03200_init_param_34 ) .reqntid 8, 1, 1 { .local .align 16 .b8 __local_depot3[240]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<18>; .reg .b16 %rs<8>; .reg .b32 %r<3909>; .reg .b64 %rd<1341>; // demoted variable .shared .align 4 .b8 m03200_init$S0_all[8192]; // demoted variable .shared .align 4 .b8 m03200_init$S1_all[8192]; // demoted variable .shared .align 4 .b8 m03200_init$S2_all[8192]; // demoted variable .shared .align 4 .b8 m03200_init$S3_all[8192]; mov.u64 %SPL, __local_depot3; cvta.local.u64 %SP, %SPL; ld.param.u64 %rd48, [m03200_init_param_0]; ld.param.u64 %rd49, [m03200_init_param_4]; ld.param.u64 %rd50, [m03200_init_param_17]; ld.param.u32 %r131, [m03200_init_param_27]; ld.param.u64 %rd51, [m03200_init_param_34]; add.u64 %rd52, %SP, 96; add.u64 %rd1336, %SPL, 96; mov.u32 %r132, %ctaid.x; mov.u32 %r133, %ntid.x; mov.b32 %r134, %envreg3; mad.lo.s32 %r135, %r132, %r133, %r134; mov.u32 %r1, %tid.x; add.s32 %r2, %r135, %r1; cvt.s64.s32 %rd53, %r2; cvt.s64.s32 %rd2, %r1; setp.ge.u64 %p1, %rd53, %rd51; @%p1 bra BB3_31; mul.wide.s32 %rd54, %r2, 260; add.s64 %rd55, %rd48, %rd54; ld.global.u32 %r3, [%rd55+256]; mov.u32 %r136, 0; ld.global.u32 %r137, [%rd55]; ld.global.u32 %r138, [%rd55+4]; ld.global.u32 %r139, [%rd55+8]; ld.global.u32 %r140, [%rd55+12]; add.u64 %rd57, %SPL, 0; ld.global.u32 %r141, [%rd55+16]; ld.global.u32 %r142, [%rd55+20]; ld.global.u32 %r143, [%rd55+24]; ld.global.u32 %r144, [%rd55+28]; ld.global.u32 %r145, [%rd55+32]; ld.global.u32 %r146, [%rd55+36]; ld.global.u32 %r147, [%rd55+40]; ld.global.u32 %r148, [%rd55+44]; ld.global.u32 %r149, [%rd55+48]; ld.global.u32 %r150, [%rd55+52]; ld.global.u32 %r151, [%rd55+56]; ld.global.u32 %r152, [%rd55+60]; ld.global.u32 %r153, [%rd55+64]; ld.global.u32 %r154, [%rd55+68]; st.local.v2.u32 [%rd57], {%r137, %r138}; st.local.v2.u32 [%rd57+8], {%r139, %r140}; st.local.v2.u32 [%rd57+16], {%r141, %r142}; st.local.v2.u32 [%rd57+24], {%r143, %r144}; st.local.v2.u32 [%rd57+32], {%r145, %r146}; st.local.v2.u32 [%rd57+40], {%r147, %r148}; st.local.v2.u32 [%rd57+48], {%r149, %r150}; st.local.v2.u32 [%rd57+56], {%r151, %r152}; st.local.v2.u32 [%rd57+64], {%r153, %r154}; add.u64 %rd6, %SPL, 168; add.u64 %rd5, %SPL, 80; mov.u64 %rd1326, %rd6; mov.u32 %r3867, %r136; BB3_2: st.local.u32 [%rd1326], %r136; add.s64 %rd1326, %rd1326, 4; add.s32 %r3867, %r3867, 1; setp.lt.u32 %p2, %r3867, 18; @%p2 bra BB3_2; not.b32 %r6, %r3; mov.u32 %r3868, 0; BB3_4: mov.u32 %r157, 72; sub.s32 %r158, %r157, %r3868; min.s32 %r8, %r3, %r158; setp.lt.s32 %p3, %r8, 1; @%p3 bra BB3_14; add.s32 %r160, %r3868, -73; max.s32 %r9, %r6, %r160; not.b32 %r161, %r9; and.b32 %r10, %r161, 3; setp.eq.s32 %p4, %r10, 0; mov.u32 %r3872, 0; @%p4 bra BB3_11; setp.eq.s32 %p5, %r10, 1; mov.u32 %r3870, 0; @%p5 bra BB3_10; setp.eq.s32 %p6, %r10, 2; mov.u32 %r3869, 0; @%p6 bra BB3_9; ld.local.u8 %rs1, [%rd57]; cvt.s64.s32 %rd61, %r3868; add.s64 %rd62, %rd6, %rd61; st.local.u8 [%rd62], %rs1; mov.u32 %r3869, 1; BB3_9: cvt.u64.u32 %rd63, %r3869; add.s64 %rd64, %rd57, %rd63; ld.local.u8 %rs2, [%rd64]; add.s32 %r165, %r3869, %r3868; cvt.s64.s32 %rd65, %r165; add.s64 %rd66, %rd6, %rd65; st.local.u8 [%rd66], %rs2; add.s32 %r3870, %r3869, 1; BB3_10: cvt.s64.s32 %rd67, %r3870; add.s64 %rd68, %rd57, %rd67; ld.local.u8 %rs3, [%rd68]; add.s32 %r166, %r3870, %r3868; cvt.s64.s32 %rd69, %r166; add.s64 %rd70, %rd6, %rd69; st.local.u8 [%rd70], %rs3; add.s32 %r3872, %r3870, 1; BB3_11: setp.gt.u32 %p7, %r9, -5; @%p7 bra BB3_14; add.s32 %r167, %r3872, %r3868; cvt.s64.s32 %rd71, %r167; add.s64 %rd1328, %rd6, %rd71; cvt.s64.s32 %rd72, %r3872; add.s64 %rd1327, %rd57, %rd72; BB3_13: ld.local.u8 %rs4, [%rd1327]; ld.local.u8 %rs5, [%rd1327+1]; ld.local.u8 %rs6, [%rd1327+2]; ld.local.u8 %rs7, [%rd1327+3]; st.local.u8 [%rd1328], %rs4; st.local.u8 [%rd1328+1], %rs5; st.local.u8 [%rd1328+2], %rs6; st.local.u8 [%rd1328+3], %rs7; add.s64 %rd1328, %rd1328, 4; add.s64 %rd1327, %rd1327, 4; add.s32 %r3872, %r3872, 4; setp.lt.s32 %p8, %r3872, %r8; @%p8 bra BB3_13; BB3_14: add.s32 %r168, %r3868, %r8; add.s32 %r3868, %r168, 1; setp.lt.s32 %p9, %r3868, 72; @%p9 bra BB3_4; ld.local.u32 %r170, [%rd6]; // inline asm prmt.b32 %r169, %r170, 0, 0x0123; // inline asm st.local.u32 [%rd6], %r169; ld.local.u32 %r172, [%rd6+4]; // inline asm prmt.b32 %r171, %r172, 0, 0x0123; // inline asm st.local.u32 [%rd6+4], %r171; ld.local.u32 %r174, [%rd6+8]; // inline asm prmt.b32 %r173, %r174, 0, 0x0123; // inline asm st.local.u32 [%rd6+8], %r173; ld.local.u32 %r176, [%rd6+12]; // inline asm prmt.b32 %r175, %r176, 0, 0x0123; // inline asm st.local.u32 [%rd6+12], %r175; ld.local.u32 %r178, [%rd6+16]; // inline asm prmt.b32 %r177, %r178, 0, 0x0123; // inline asm st.local.u32 [%rd6+16], %r177; ld.local.u32 %r180, [%rd6+20]; // inline asm prmt.b32 %r179, %r180, 0, 0x0123; // inline asm st.local.u32 [%rd6+20], %r179; ld.local.u32 %r182, [%rd6+24]; // inline asm prmt.b32 %r181, %r182, 0, 0x0123; // inline asm st.local.u32 [%rd6+24], %r181; ld.local.u32 %r184, [%rd6+28]; // inline asm prmt.b32 %r183, %r184, 0, 0x0123; // inline asm st.local.u32 [%rd6+28], %r183; ld.local.u32 %r186, [%rd6+32]; // inline asm prmt.b32 %r185, %r186, 0, 0x0123; // inline asm st.local.u32 [%rd6+32], %r185; ld.local.u32 %r188, [%rd6+36]; // inline asm prmt.b32 %r187, %r188, 0, 0x0123; // inline asm st.local.u32 [%rd6+36], %r187; ld.local.u32 %r190, [%rd6+40]; // inline asm prmt.b32 %r189, %r190, 0, 0x0123; // inline asm st.local.u32 [%rd6+40], %r189; ld.local.u32 %r192, [%rd6+44]; // inline asm prmt.b32 %r191, %r192, 0, 0x0123; // inline asm st.local.u32 [%rd6+44], %r191; ld.local.u32 %r194, [%rd6+48]; // inline asm prmt.b32 %r193, %r194, 0, 0x0123; // inline asm st.local.u32 [%rd6+48], %r193; ld.local.u32 %r196, [%rd6+52]; // inline asm prmt.b32 %r195, %r196, 0, 0x0123; // inline asm st.local.u32 [%rd6+52], %r195; ld.local.u32 %r198, [%rd6+56]; // inline asm prmt.b32 %r197, %r198, 0, 0x0123; // inline asm st.local.u32 [%rd6+56], %r197; ld.local.u32 %r200, [%rd6+60]; // inline asm prmt.b32 %r199, %r200, 0, 0x0123; // inline asm st.local.u32 [%rd6+60], %r199; ld.local.u32 %r202, [%rd6+64]; // inline asm prmt.b32 %r201, %r202, 0, 0x0123; // inline asm st.local.u32 [%rd6+64], %r201; ld.local.u32 %r204, [%rd6+68]; // inline asm prmt.b32 %r203, %r204, 0, 0x0123; // inline asm st.local.u32 [%rd6+68], %r203; mul.wide.s32 %rd74, %r2, 4240; add.s64 %rd16, %rd49, %rd74; st.global.u32 [%rd16], %r169; st.global.u32 [%rd16+4], %r171; st.global.u32 [%rd16+8], %r173; st.global.u32 [%rd16+12], %r175; st.global.u32 [%rd16+16], %r177; st.global.u32 [%rd16+20], %r179; st.global.u32 [%rd16+24], %r181; st.global.u32 [%rd16+28], %r183; st.global.u32 [%rd16+32], %r185; st.global.u32 [%rd16+36], %r187; st.global.u32 [%rd16+40], %r189; st.global.u32 [%rd16+44], %r191; st.global.u32 [%rd16+48], %r193; st.global.u32 [%rd16+52], %r195; st.global.u32 [%rd16+56], %r197; st.global.u32 [%rd16+60], %r199; st.global.u32 [%rd16+64], %r201; st.global.u32 [%rd16+68], %r203; mul.wide.u32 %rd75, %r131, 560; add.s64 %rd76, %rd50, %rd75; ld.global.u32 %r37, [%rd76]; mov.u32 %r3873, 0; ld.global.u32 %r38, [%rd76+4]; ld.global.u32 %r39, [%rd76+8]; ld.global.u32 %r40, [%rd76+12]; st.local.v4.u32 [%rd5], {%r37, %r38, %r39, %r40}; cvta.to.local.u64 %rd1330, %rd52; mov.u64 %rd1329, m03200_init$P; BB3_16: ld.const.u32 %r206, [%rd1329]; st.local.u32 [%rd1330], %r206; add.s64 %rd1330, %rd1330, 4; add.s64 %rd1329, %rd1329, 4; add.s32 %r3873, %r3873, 1; setp.lt.u32 %p10, %r3873, 18; @%p10 bra BB3_16; cvt.u32.u64 %r208, %rd2; mul.wide.s32 %rd1335, %r208, 1024; mov.u32 %r3874, -256; mov.u64 %rd1334, c_sbox3; mov.u64 %rd1333, c_sbox2; mov.u64 %rd1332, c_sbox1; mov.u64 %rd1331, c_sbox0; BB3_18: ld.const.v2.u32 {%r209, %r210}, [%rd1331]; mov.u64 %rd84, m03200_init$S0_all; add.s64 %rd85, %rd84, %rd1335; st.shared.u32 [%rd85], %r209; st.shared.u32 [%rd85+4], %r210; ld.const.v2.u32 {%r213, %r214}, [%rd1332]; mov.u64 %rd86, m03200_init$S1_all; add.s64 %rd87, %rd86, %rd1335; st.shared.u32 [%rd87], %r213; st.shared.u32 [%rd87+4], %r214; ld.const.v2.u32 {%r217, %r218}, [%rd1333]; mov.u64 %rd88, m03200_init$S2_all; add.s64 %rd89, %rd88, %rd1335; st.shared.u32 [%rd89], %r217; st.shared.u32 [%rd89+4], %r218; ld.const.v2.u32 {%r221, %r222}, [%rd1334]; mov.u64 %rd90, m03200_init$S3_all; add.s64 %rd91, %rd90, %rd1335; st.shared.u32 [%rd91], %r221; st.shared.u32 [%rd91+4], %r222; ld.const.v2.u32 {%r225, %r226}, [%rd1331+8]; st.shared.u32 [%rd85+8], %r225; st.shared.u32 [%rd85+12], %r226; ld.const.v2.u32 {%r229, %r230}, [%rd1332+8]; st.shared.u32 [%rd87+8], %r229; st.shared.u32 [%rd87+12], %r230; ld.const.v2.u32 {%r233, %r234}, [%rd1333+8]; st.shared.u32 [%rd89+8], %r233; st.shared.u32 [%rd89+12], %r234; ld.const.v2.u32 {%r237, %r238}, [%rd1334+8]; st.shared.u32 [%rd91+8], %r237; st.shared.u32 [%rd91+12], %r238; ld.const.v2.u32 {%r241, %r242}, [%rd1331+16]; st.shared.u32 [%rd85+16], %r241; st.shared.u32 [%rd85+20], %r242; ld.const.v2.u32 {%r245, %r246}, [%rd1332+16]; st.shared.u32 [%rd87+16], %r245; st.shared.u32 [%rd87+20], %r246; ld.const.v2.u32 {%r249, %r250}, [%rd1333+16]; st.shared.u32 [%rd89+16], %r249; st.shared.u32 [%rd89+20], %r250; ld.const.v2.u32 {%r253, %r254}, [%rd1334+16]; st.shared.u32 [%rd91+16], %r253; st.shared.u32 [%rd91+20], %r254; ld.const.v2.u32 {%r257, %r258}, [%rd1331+24]; st.shared.u32 [%rd85+24], %r257; st.shared.u32 [%rd85+28], %r258; ld.const.v2.u32 {%r261, %r262}, [%rd1332+24]; st.shared.u32 [%rd87+24], %r261; st.shared.u32 [%rd87+28], %r262; ld.const.v2.u32 {%r265, %r266}, [%rd1333+24]; st.shared.u32 [%rd89+24], %r265; st.shared.u32 [%rd89+28], %r266; ld.const.v2.u32 {%r269, %r270}, [%rd1334+24]; st.shared.u32 [%rd91+24], %r269; st.shared.u32 [%rd91+28], %r270; add.s64 %rd1335, %rd1335, 32; add.s64 %rd1334, %rd1334, 32; add.s64 %rd1333, %rd1333, 32; add.s64 %rd1332, %rd1332, 32; add.s64 %rd1331, %rd1331, 32; add.s32 %r3874, %r3874, 8; setp.ne.s32 %p11, %r3874, 0; @%p11 bra BB3_18; cvta.to.local.u64 %rd93, %rd52; ld.local.v2.u32 {%r276, %r277}, [%rd93]; mov.u32 %r275, 0; xor.b32 %r3892, %r276, %r169; xor.b32 %r3891, %r277, %r171; st.local.v2.u32 [%rd93], {%r3892, %r3891}; add.s64 %rd33, %rd1336, 8; ld.local.v2.u32 {%r280, %r281}, [%rd1336+8]; xor.b32 %r3890, %r280, %r173; xor.b32 %r3889, %r281, %r175; st.local.v2.u32 [%rd1336+8], {%r3890, %r3889}; ld.local.v2.u32 {%r284, %r285}, [%rd1336+16]; xor.b32 %r3888, %r284, %r177; xor.b32 %r3887, %r285, %r179; st.local.v2.u32 [%rd1336+16], {%r3888, %r3887}; ld.local.v2.u32 {%r288, %r289}, [%rd1336+24]; xor.b32 %r3886, %r288, %r181; xor.b32 %r3885, %r289, %r183; st.local.v2.u32 [%rd1336+24], {%r3886, %r3885}; ld.local.v2.u32 {%r292, %r293}, [%rd1336+32]; xor.b32 %r3884, %r292, %r185; xor.b32 %r3883, %r293, %r187; st.local.v2.u32 [%rd1336+32], {%r3884, %r3883}; ld.local.v2.u32 {%r296, %r297}, [%rd1336+40]; xor.b32 %r3882, %r296, %r189; xor.b32 %r3881, %r297, %r191; st.local.v2.u32 [%rd1336+40], {%r3882, %r3881}; ld.local.v2.u32 {%r300, %r301}, [%rd1336+48]; xor.b32 %r3880, %r300, %r193; xor.b32 %r3879, %r301, %r195; st.local.v2.u32 [%rd1336+48], {%r3880, %r3879}; ld.local.v2.u32 {%r304, %r305}, [%rd1336+56]; xor.b32 %r3878, %r304, %r197; xor.b32 %r3877, %r305, %r199; st.local.v2.u32 [%rd1336+56], {%r3878, %r3877}; ld.local.v2.u32 {%r308, %r309}, [%rd1336+64]; xor.b32 %r3876, %r308, %r201; xor.b32 %r3875, %r309, %r203; st.local.v2.u32 [%rd1336+64], {%r3876, %r3875}; mov.u32 %r3893, %r275; mov.u32 %r3897, %r275; mov.u32 %r3898, %r275; BB3_20: and.b32 %r568, %r3893, 2; mul.wide.u32 %rd94, %r568, 4; add.s64 %rd95, %rd5, %rd94; ld.local.u32 %r569, [%rd95]; xor.b32 %r570, %r569, %r3898; ld.local.u32 %r571, [%rd95+4]; xor.b32 %r325, %r570, %r3892; mov.u32 %r554, 24; mov.u32 %r567, 8; // inline asm bfe.u32 %r312, %r325, %r554, %r567; // inline asm shl.b64 %rd96, %rd2, 10; add.s64 %rd98, %rd84, %rd96; mul.wide.u32 %rd99, %r312, 4; add.s64 %rd100, %rd98, %rd99; ld.shared.u32 %r572, [%rd100]; mov.u32 %r558, 16; // inline asm bfe.u32 %r316, %r325, %r558, %r567; // inline asm add.s64 %rd102, %rd86, %rd96; mul.wide.u32 %rd103, %r316, 4; add.s64 %rd104, %rd102, %rd103; ld.shared.u32 %r573, [%rd104]; add.s32 %r574, %r573, %r572; // inline asm bfe.u32 %r320, %r325, %r567, %r567; // inline asm add.s64 %rd106, %rd88, %rd96; mul.wide.u32 %rd107, %r320, 4; add.s64 %rd108, %rd106, %rd107; ld.shared.u32 %r575, [%rd108]; xor.b32 %r576, %r575, %r574; // inline asm bfe.u32 %r324, %r325, %r275, %r567; // inline asm add.s64 %rd110, %rd90, %rd96; mul.wide.u32 %rd111, %r324, 4; add.s64 %rd112, %rd110, %rd111; ld.shared.u32 %r577, [%rd112]; add.s32 %r578, %r577, %r576; xor.b32 %r579, %r571, %r3897; xor.b32 %r580, %r579, %r3891; xor.b32 %r341, %r580, %r578; // inline asm bfe.u32 %r328, %r341, %r554, %r567; // inline asm mul.wide.u32 %rd113, %r328, 4; add.s64 %rd114, %rd98, %rd113; ld.shared.u32 %r581, [%rd114]; // inline asm bfe.u32 %r332, %r341, %r558, %r567; // inline asm mul.wide.u32 %rd115, %r332, 4; add.s64 %rd116, %rd102, %rd115; ld.shared.u32 %r582, [%rd116]; add.s32 %r583, %r582, %r581; // inline asm bfe.u32 %r336, %r341, %r567, %r567; // inline asm mul.wide.u32 %rd117, %r336, 4; add.s64 %rd118, %rd106, %rd117; ld.shared.u32 %r584, [%rd118]; xor.b32 %r585, %r584, %r583; // inline asm bfe.u32 %r340, %r341, %r275, %r567; // inline asm mul.wide.u32 %rd119, %r340, 4; add.s64 %rd120, %rd110, %rd119; ld.shared.u32 %r586, [%rd120]; add.s32 %r587, %r586, %r585; xor.b32 %r588, %r3890, %r325; xor.b32 %r357, %r588, %r587; // inline asm bfe.u32 %r344, %r357, %r554, %r567; // inline asm mul.wide.u32 %rd121, %r344, 4; add.s64 %rd122, %rd98, %rd121; ld.shared.u32 %r589, [%rd122]; // inline asm bfe.u32 %r348, %r357, %r558, %r567; // inline asm mul.wide.u32 %rd123, %r348, 4; add.s64 %rd124, %rd102, %rd123; ld.shared.u32 %r590, [%rd124]; add.s32 %r591, %r590, %r589; // inline asm bfe.u32 %r352, %r357, %r567, %r567; // inline asm mul.wide.u32 %rd125, %r352, 4; add.s64 %rd126, %rd106, %rd125; ld.shared.u32 %r592, [%rd126]; xor.b32 %r593, %r592, %r591; // inline asm bfe.u32 %r356, %r357, %r275, %r567; // inline asm mul.wide.u32 %rd127, %r356, 4; add.s64 %rd128, %rd110, %rd127; ld.shared.u32 %r594, [%rd128]; add.s32 %r595, %r594, %r593; xor.b32 %r596, %r3889, %r341; xor.b32 %r373, %r596, %r595; // inline asm bfe.u32 %r360, %r373, %r554, %r567; // inline asm mul.wide.u32 %rd129, %r360, 4; add.s64 %rd130, %rd98, %rd129; ld.shared.u32 %r597, [%rd130]; // inline asm bfe.u32 %r364, %r373, %r558, %r567; // inline asm mul.wide.u32 %rd131, %r364, 4; add.s64 %rd132, %rd102, %rd131; ld.shared.u32 %r598, [%rd132]; add.s32 %r599, %r598, %r597; // inline asm bfe.u32 %r368, %r373, %r567, %r567; // inline asm mul.wide.u32 %rd133, %r368, 4; add.s64 %rd134, %rd106, %rd133; ld.shared.u32 %r600, [%rd134]; xor.b32 %r601, %r600, %r599; // inline asm bfe.u32 %r372, %r373, %r275, %r567; // inline asm mul.wide.u32 %rd135, %r372, 4; add.s64 %rd136, %rd110, %rd135; ld.shared.u32 %r602, [%rd136]; add.s32 %r603, %r602, %r601; xor.b32 %r604, %r3888, %r357; xor.b32 %r389, %r604, %r603; // inline asm bfe.u32 %r376, %r389, %r554, %r567; // inline asm mul.wide.u32 %rd137, %r376, 4; add.s64 %rd138, %rd98, %rd137; ld.shared.u32 %r605, [%rd138]; // inline asm bfe.u32 %r380, %r389, %r558, %r567; // inline asm mul.wide.u32 %rd139, %r380, 4; add.s64 %rd140, %rd102, %rd139; ld.shared.u32 %r606, [%rd140]; add.s32 %r607, %r606, %r605; // inline asm bfe.u32 %r384, %r389, %r567, %r567; // inline asm mul.wide.u32 %rd141, %r384, 4; add.s64 %rd142, %rd106, %rd141; ld.shared.u32 %r608, [%rd142]; xor.b32 %r609, %r608, %r607; // inline asm bfe.u32 %r388, %r389, %r275, %r567; // inline asm mul.wide.u32 %rd143, %r388, 4; add.s64 %rd144, %rd110, %rd143; ld.shared.u32 %r610, [%rd144]; add.s32 %r611, %r610, %r609; xor.b32 %r612, %r3887, %r373; xor.b32 %r405, %r612, %r611; // inline asm bfe.u32 %r392, %r405, %r554, %r567; // inline asm mul.wide.u32 %rd145, %r392, 4; add.s64 %rd146, %rd98, %rd145; ld.shared.u32 %r613, [%rd146]; // inline asm bfe.u32 %r396, %r405, %r558, %r567; // inline asm mul.wide.u32 %rd147, %r396, 4; add.s64 %rd148, %rd102, %rd147; ld.shared.u32 %r614, [%rd148]; add.s32 %r615, %r614, %r613; // inline asm bfe.u32 %r400, %r405, %r567, %r567; // inline asm mul.wide.u32 %rd149, %r400, 4; add.s64 %rd150, %rd106, %rd149; ld.shared.u32 %r616, [%rd150]; xor.b32 %r617, %r616, %r615; // inline asm bfe.u32 %r404, %r405, %r275, %r567; // inline asm mul.wide.u32 %rd151, %r404, 4; add.s64 %rd152, %rd110, %rd151; ld.shared.u32 %r618, [%rd152]; add.s32 %r619, %r618, %r617; xor.b32 %r620, %r3886, %r389; xor.b32 %r421, %r620, %r619; // inline asm bfe.u32 %r408, %r421, %r554, %r567; // inline asm mul.wide.u32 %rd153, %r408, 4; add.s64 %rd154, %rd98, %rd153; ld.shared.u32 %r621, [%rd154]; // inline asm bfe.u32 %r412, %r421, %r558, %r567; // inline asm mul.wide.u32 %rd155, %r412, 4; add.s64 %rd156, %rd102, %rd155; ld.shared.u32 %r622, [%rd156]; add.s32 %r623, %r622, %r621; // inline asm bfe.u32 %r416, %r421, %r567, %r567; // inline asm mul.wide.u32 %rd157, %r416, 4; add.s64 %rd158, %rd106, %rd157; ld.shared.u32 %r624, [%rd158]; xor.b32 %r625, %r624, %r623; // inline asm bfe.u32 %r420, %r421, %r275, %r567; // inline asm mul.wide.u32 %rd159, %r420, 4; add.s64 %rd160, %rd110, %rd159; ld.shared.u32 %r626, [%rd160]; add.s32 %r627, %r626, %r625; xor.b32 %r628, %r3885, %r405; xor.b32 %r437, %r628, %r627; // inline asm bfe.u32 %r424, %r437, %r554, %r567; // inline asm mul.wide.u32 %rd161, %r424, 4; add.s64 %rd162, %rd98, %rd161; ld.shared.u32 %r629, [%rd162]; // inline asm bfe.u32 %r428, %r437, %r558, %r567; // inline asm mul.wide.u32 %rd163, %r428, 4; add.s64 %rd164, %rd102, %rd163; ld.shared.u32 %r630, [%rd164]; add.s32 %r631, %r630, %r629; // inline asm bfe.u32 %r432, %r437, %r567, %r567; // inline asm mul.wide.u32 %rd165, %r432, 4; add.s64 %rd166, %rd106, %rd165; ld.shared.u32 %r632, [%rd166]; xor.b32 %r633, %r632, %r631; // inline asm bfe.u32 %r436, %r437, %r275, %r567; // inline asm mul.wide.u32 %rd167, %r436, 4; add.s64 %rd168, %rd110, %rd167; ld.shared.u32 %r634, [%rd168]; add.s32 %r635, %r634, %r633; xor.b32 %r636, %r3884, %r421; xor.b32 %r453, %r636, %r635; // inline asm bfe.u32 %r440, %r453, %r554, %r567; // inline asm mul.wide.u32 %rd169, %r440, 4; add.s64 %rd170, %rd98, %rd169; ld.shared.u32 %r637, [%rd170]; // inline asm bfe.u32 %r444, %r453, %r558, %r567; // inline asm mul.wide.u32 %rd171, %r444, 4; add.s64 %rd172, %rd102, %rd171; ld.shared.u32 %r638, [%rd172]; add.s32 %r639, %r638, %r637; // inline asm bfe.u32 %r448, %r453, %r567, %r567; // inline asm mul.wide.u32 %rd173, %r448, 4; add.s64 %rd174, %rd106, %rd173; ld.shared.u32 %r640, [%rd174]; xor.b32 %r641, %r640, %r639; // inline asm bfe.u32 %r452, %r453, %r275, %r567; // inline asm mul.wide.u32 %rd175, %r452, 4; add.s64 %rd176, %rd110, %rd175; ld.shared.u32 %r642, [%rd176]; add.s32 %r643, %r642, %r641; xor.b32 %r644, %r3883, %r437; xor.b32 %r469, %r644, %r643; // inline asm bfe.u32 %r456, %r469, %r554, %r567; // inline asm mul.wide.u32 %rd177, %r456, 4; add.s64 %rd178, %rd98, %rd177; ld.shared.u32 %r645, [%rd178]; // inline asm bfe.u32 %r460, %r469, %r558, %r567; // inline asm mul.wide.u32 %rd179, %r460, 4; add.s64 %rd180, %rd102, %rd179; ld.shared.u32 %r646, [%rd180]; add.s32 %r647, %r646, %r645; // inline asm bfe.u32 %r464, %r469, %r567, %r567; // inline asm mul.wide.u32 %rd181, %r464, 4; add.s64 %rd182, %rd106, %rd181; ld.shared.u32 %r648, [%rd182]; xor.b32 %r649, %r648, %r647; // inline asm bfe.u32 %r468, %r469, %r275, %r567; // inline asm mul.wide.u32 %rd183, %r468, 4; add.s64 %rd184, %rd110, %rd183; ld.shared.u32 %r650, [%rd184]; add.s32 %r651, %r650, %r649; xor.b32 %r652, %r3882, %r453; xor.b32 %r485, %r652, %r651; // inline asm bfe.u32 %r472, %r485, %r554, %r567; // inline asm mul.wide.u32 %rd185, %r472, 4; add.s64 %rd186, %rd98, %rd185; ld.shared.u32 %r653, [%rd186]; // inline asm bfe.u32 %r476, %r485, %r558, %r567; // inline asm mul.wide.u32 %rd187, %r476, 4; add.s64 %rd188, %rd102, %rd187; ld.shared.u32 %r654, [%rd188]; add.s32 %r655, %r654, %r653; // inline asm bfe.u32 %r480, %r485, %r567, %r567; // inline asm mul.wide.u32 %rd189, %r480, 4; add.s64 %rd190, %rd106, %rd189; ld.shared.u32 %r656, [%rd190]; xor.b32 %r657, %r656, %r655; // inline asm bfe.u32 %r484, %r485, %r275, %r567; // inline asm mul.wide.u32 %rd191, %r484, 4; add.s64 %rd192, %rd110, %rd191; ld.shared.u32 %r658, [%rd192]; add.s32 %r659, %r658, %r657; xor.b32 %r660, %r3881, %r469; xor.b32 %r501, %r660, %r659; // inline asm bfe.u32 %r488, %r501, %r554, %r567; // inline asm mul.wide.u32 %rd193, %r488, 4; add.s64 %rd194, %rd98, %rd193; ld.shared.u32 %r661, [%rd194]; // inline asm bfe.u32 %r492, %r501, %r558, %r567; // inline asm mul.wide.u32 %rd195, %r492, 4; add.s64 %rd196, %rd102, %rd195; ld.shared.u32 %r662, [%rd196]; add.s32 %r663, %r662, %r661; // inline asm bfe.u32 %r496, %r501, %r567, %r567; // inline asm mul.wide.u32 %rd197, %r496, 4; add.s64 %rd198, %rd106, %rd197; ld.shared.u32 %r664, [%rd198]; xor.b32 %r665, %r664, %r663; // inline asm bfe.u32 %r500, %r501, %r275, %r567; // inline asm mul.wide.u32 %rd199, %r500, 4; add.s64 %rd200, %rd110, %rd199; ld.shared.u32 %r666, [%rd200]; add.s32 %r667, %r666, %r665; xor.b32 %r668, %r3880, %r485; xor.b32 %r517, %r668, %r667; // inline asm bfe.u32 %r504, %r517, %r554, %r567; // inline asm mul.wide.u32 %rd201, %r504, 4; add.s64 %rd202, %rd98, %rd201; ld.shared.u32 %r669, [%rd202]; // inline asm bfe.u32 %r508, %r517, %r558, %r567; // inline asm mul.wide.u32 %rd203, %r508, 4; add.s64 %rd204, %rd102, %rd203; ld.shared.u32 %r670, [%rd204]; add.s32 %r671, %r670, %r669; // inline asm bfe.u32 %r512, %r517, %r567, %r567; // inline asm mul.wide.u32 %rd205, %r512, 4; add.s64 %rd206, %rd106, %rd205; ld.shared.u32 %r672, [%rd206]; xor.b32 %r673, %r672, %r671; // inline asm bfe.u32 %r516, %r517, %r275, %r567; // inline asm mul.wide.u32 %rd207, %r516, 4; add.s64 %rd208, %rd110, %rd207; ld.shared.u32 %r674, [%rd208]; add.s32 %r675, %r674, %r673; xor.b32 %r676, %r3879, %r501; xor.b32 %r533, %r676, %r675; // inline asm bfe.u32 %r520, %r533, %r554, %r567; // inline asm mul.wide.u32 %rd209, %r520, 4; add.s64 %rd210, %rd98, %rd209; ld.shared.u32 %r677, [%rd210]; // inline asm bfe.u32 %r524, %r533, %r558, %r567; // inline asm mul.wide.u32 %rd211, %r524, 4; add.s64 %rd212, %rd102, %rd211; ld.shared.u32 %r678, [%rd212]; add.s32 %r679, %r678, %r677; // inline asm bfe.u32 %r528, %r533, %r567, %r567; // inline asm mul.wide.u32 %rd213, %r528, 4; add.s64 %rd214, %rd106, %rd213; ld.shared.u32 %r680, [%rd214]; xor.b32 %r681, %r680, %r679; // inline asm bfe.u32 %r532, %r533, %r275, %r567; // inline asm mul.wide.u32 %rd215, %r532, 4; add.s64 %rd216, %rd110, %rd215; ld.shared.u32 %r682, [%rd216]; add.s32 %r683, %r682, %r681; xor.b32 %r684, %r3878, %r517; xor.b32 %r549, %r684, %r683; // inline asm bfe.u32 %r536, %r549, %r554, %r567; // inline asm mul.wide.u32 %rd217, %r536, 4; add.s64 %rd218, %rd98, %rd217; ld.shared.u32 %r685, [%rd218]; // inline asm bfe.u32 %r540, %r549, %r558, %r567; // inline asm mul.wide.u32 %rd219, %r540, 4; add.s64 %rd220, %rd102, %rd219; ld.shared.u32 %r686, [%rd220]; add.s32 %r687, %r686, %r685; // inline asm bfe.u32 %r544, %r549, %r567, %r567; // inline asm mul.wide.u32 %rd221, %r544, 4; add.s64 %rd222, %rd106, %rd221; ld.shared.u32 %r688, [%rd222]; xor.b32 %r689, %r688, %r687; // inline asm bfe.u32 %r548, %r549, %r275, %r567; // inline asm mul.wide.u32 %rd223, %r548, 4; add.s64 %rd224, %rd110, %rd223; ld.shared.u32 %r690, [%rd224]; add.s32 %r691, %r690, %r689; xor.b32 %r692, %r3877, %r533; xor.b32 %r565, %r692, %r691; // inline asm bfe.u32 %r552, %r565, %r554, %r567; // inline asm mul.wide.u32 %rd225, %r552, 4; add.s64 %rd226, %rd98, %rd225; ld.shared.u32 %r693, [%rd226]; // inline asm bfe.u32 %r556, %r565, %r558, %r567; // inline asm mul.wide.u32 %rd227, %r556, 4; add.s64 %rd228, %rd102, %rd227; ld.shared.u32 %r694, [%rd228]; add.s32 %r695, %r694, %r693; // inline asm bfe.u32 %r560, %r565, %r567, %r567; // inline asm mul.wide.u32 %rd229, %r560, 4; add.s64 %rd230, %rd106, %rd229; ld.shared.u32 %r696, [%rd230]; xor.b32 %r697, %r696, %r695; // inline asm bfe.u32 %r564, %r565, %r275, %r567; // inline asm mul.wide.u32 %rd231, %r564, 4; add.s64 %rd232, %rd110, %rd231; ld.shared.u32 %r698, [%rd232]; add.s32 %r699, %r698, %r697; xor.b32 %r700, %r3876, %r549; xor.b32 %r3897, %r700, %r699; xor.b32 %r3898, %r3875, %r565; st.local.v2.u32 [%rd1336], {%r3898, %r3897}; ld.local.v2.u32 {%r3892, %r3891}, [%rd93]; ld.local.v2.u32 {%r3890, %r3889}, [%rd33]; ld.local.v2.u32 {%r3888, %r3887}, [%rd33+8]; ld.local.v2.u32 {%r3886, %r3885}, [%rd33+16]; ld.local.v2.u32 {%r3884, %r3883}, [%rd33+24]; ld.local.v2.u32 {%r3882, %r3881}, [%rd33+32]; ld.local.v2.u32 {%r3880, %r3879}, [%rd33+40]; ld.local.v2.u32 {%r3878, %r3877}, [%rd33+48]; ld.local.v2.u32 {%r3876, %r3875}, [%rd33+56]; add.s64 %rd1336, %rd1336, 8; add.s32 %r3893, %r3893, 2; setp.lt.u32 %p12, %r3893, 18; @%p12 bra BB3_20; mul.wide.s32 %rd235, %r1, 1024; add.s64 %rd1337, %rd84, %rd235; mov.u32 %r719, 0; mov.u32 %r3896, %r719; BB3_22: mov.u32 %r3863, 16; mov.u32 %r3862, 8; mov.u32 %r3861, 24; xor.b32 %r1232, %r39, %r3898; xor.b32 %r733, %r1232, %r3892; // inline asm bfe.u32 %r720, %r733, %r3861, %r3862; // inline asm mul.wide.u32 %rd240, %r720, 4; add.s64 %rd241, %rd98, %rd240; ld.shared.u32 %r1233, [%rd241]; // inline asm bfe.u32 %r724, %r733, %r3863, %r3862; // inline asm mul.wide.u32 %rd244, %r724, 4; add.s64 %rd245, %rd102, %rd244; ld.shared.u32 %r1234, [%rd245]; add.s32 %r1235, %r1234, %r1233; // inline asm bfe.u32 %r728, %r733, %r3862, %r3862; // inline asm mul.wide.u32 %rd248, %r728, 4; add.s64 %rd249, %rd106, %rd248; ld.shared.u32 %r1236, [%rd249]; xor.b32 %r1237, %r1236, %r1235; // inline asm bfe.u32 %r732, %r733, %r719, %r3862; // inline asm mul.wide.u32 %rd252, %r732, 4; add.s64 %rd253, %rd110, %rd252; ld.shared.u32 %r1238, [%rd253]; add.s32 %r1239, %r1238, %r1237; xor.b32 %r1240, %r40, %r3897; xor.b32 %r1241, %r1240, %r3891; xor.b32 %r749, %r1241, %r1239; // inline asm bfe.u32 %r736, %r749, %r3861, %r3862; // inline asm mul.wide.u32 %rd254, %r736, 4; add.s64 %rd255, %rd98, %rd254; ld.shared.u32 %r1242, [%rd255]; // inline asm bfe.u32 %r740, %r749, %r3863, %r3862; // inline asm mul.wide.u32 %rd256, %r740, 4; add.s64 %rd257, %rd102, %rd256; ld.shared.u32 %r1243, [%rd257]; add.s32 %r1244, %r1243, %r1242; // inline asm bfe.u32 %r744, %r749, %r3862, %r3862; // inline asm mul.wide.u32 %rd258, %r744, 4; add.s64 %rd259, %rd106, %rd258; ld.shared.u32 %r1245, [%rd259]; xor.b32 %r1246, %r1245, %r1244; // inline asm bfe.u32 %r748, %r749, %r719, %r3862; // inline asm mul.wide.u32 %rd260, %r748, 4; add.s64 %rd261, %rd110, %rd260; ld.shared.u32 %r1247, [%rd261]; add.s32 %r1248, %r1247, %r1246; xor.b32 %r1249, %r3890, %r733; xor.b32 %r765, %r1249, %r1248; // inline asm bfe.u32 %r752, %r765, %r3861, %r3862; // inline asm mul.wide.u32 %rd262, %r752, 4; add.s64 %rd263, %rd98, %rd262; ld.shared.u32 %r1250, [%rd263]; // inline asm bfe.u32 %r756, %r765, %r3863, %r3862; // inline asm mul.wide.u32 %rd264, %r756, 4; add.s64 %rd265, %rd102, %rd264; ld.shared.u32 %r1251, [%rd265]; add.s32 %r1252, %r1251, %r1250; // inline asm bfe.u32 %r760, %r765, %r3862, %r3862; // inline asm mul.wide.u32 %rd266, %r760, 4; add.s64 %rd267, %rd106, %rd266; ld.shared.u32 %r1253, [%rd267]; xor.b32 %r1254, %r1253, %r1252; // inline asm bfe.u32 %r764, %r765, %r719, %r3862; // inline asm mul.wide.u32 %rd268, %r764, 4; add.s64 %rd269, %rd110, %rd268; ld.shared.u32 %r1255, [%rd269]; add.s32 %r1256, %r1255, %r1254; xor.b32 %r1257, %r3889, %r749; xor.b32 %r781, %r1257, %r1256; // inline asm bfe.u32 %r768, %r781, %r3861, %r3862; // inline asm mul.wide.u32 %rd270, %r768, 4; add.s64 %rd271, %rd98, %rd270; ld.shared.u32 %r1258, [%rd271]; // inline asm bfe.u32 %r772, %r781, %r3863, %r3862; // inline asm mul.wide.u32 %rd272, %r772, 4; add.s64 %rd273, %rd102, %rd272; ld.shared.u32 %r1259, [%rd273]; add.s32 %r1260, %r1259, %r1258; // inline asm bfe.u32 %r776, %r781, %r3862, %r3862; // inline asm mul.wide.u32 %rd274, %r776, 4; add.s64 %rd275, %rd106, %rd274; ld.shared.u32 %r1261, [%rd275]; xor.b32 %r1262, %r1261, %r1260; // inline asm bfe.u32 %r780, %r781, %r719, %r3862; // inline asm mul.wide.u32 %rd276, %r780, 4; add.s64 %rd277, %rd110, %rd276; ld.shared.u32 %r1263, [%rd277]; add.s32 %r1264, %r1263, %r1262; xor.b32 %r1265, %r3888, %r765; xor.b32 %r797, %r1265, %r1264; // inline asm bfe.u32 %r784, %r797, %r3861, %r3862; // inline asm mul.wide.u32 %rd278, %r784, 4; add.s64 %rd279, %rd98, %rd278; ld.shared.u32 %r1266, [%rd279]; // inline asm bfe.u32 %r788, %r797, %r3863, %r3862; // inline asm mul.wide.u32 %rd280, %r788, 4; add.s64 %rd281, %rd102, %rd280; ld.shared.u32 %r1267, [%rd281]; add.s32 %r1268, %r1267, %r1266; // inline asm bfe.u32 %r792, %r797, %r3862, %r3862; // inline asm mul.wide.u32 %rd282, %r792, 4; add.s64 %rd283, %rd106, %rd282; ld.shared.u32 %r1269, [%rd283]; xor.b32 %r1270, %r1269, %r1268; // inline asm bfe.u32 %r796, %r797, %r719, %r3862; // inline asm mul.wide.u32 %rd284, %r796, 4; add.s64 %rd285, %rd110, %rd284; ld.shared.u32 %r1271, [%rd285]; add.s32 %r1272, %r1271, %r1270; xor.b32 %r1273, %r3887, %r781; xor.b32 %r813, %r1273, %r1272; // inline asm bfe.u32 %r800, %r813, %r3861, %r3862; // inline asm mul.wide.u32 %rd286, %r800, 4; add.s64 %rd287, %rd98, %rd286; ld.shared.u32 %r1274, [%rd287]; // inline asm bfe.u32 %r804, %r813, %r3863, %r3862; // inline asm mul.wide.u32 %rd288, %r804, 4; add.s64 %rd289, %rd102, %rd288; ld.shared.u32 %r1275, [%rd289]; add.s32 %r1276, %r1275, %r1274; // inline asm bfe.u32 %r808, %r813, %r3862, %r3862; // inline asm mul.wide.u32 %rd290, %r808, 4; add.s64 %rd291, %rd106, %rd290; ld.shared.u32 %r1277, [%rd291]; xor.b32 %r1278, %r1277, %r1276; // inline asm bfe.u32 %r812, %r813, %r719, %r3862; // inline asm mul.wide.u32 %rd292, %r812, 4; add.s64 %rd293, %rd110, %rd292; ld.shared.u32 %r1279, [%rd293]; add.s32 %r1280, %r1279, %r1278; xor.b32 %r1281, %r3886, %r797; xor.b32 %r829, %r1281, %r1280; // inline asm bfe.u32 %r816, %r829, %r3861, %r3862; // inline asm mul.wide.u32 %rd294, %r816, 4; add.s64 %rd295, %rd98, %rd294; ld.shared.u32 %r1282, [%rd295]; // inline asm bfe.u32 %r820, %r829, %r3863, %r3862; // inline asm mul.wide.u32 %rd296, %r820, 4; add.s64 %rd297, %rd102, %rd296; ld.shared.u32 %r1283, [%rd297]; add.s32 %r1284, %r1283, %r1282; // inline asm bfe.u32 %r824, %r829, %r3862, %r3862; // inline asm mul.wide.u32 %rd298, %r824, 4; add.s64 %rd299, %rd106, %rd298; ld.shared.u32 %r1285, [%rd299]; xor.b32 %r1286, %r1285, %r1284; // inline asm bfe.u32 %r828, %r829, %r719, %r3862; // inline asm mul.wide.u32 %rd300, %r828, 4; add.s64 %rd301, %rd110, %rd300; ld.shared.u32 %r1287, [%rd301]; add.s32 %r1288, %r1287, %r1286; xor.b32 %r1289, %r3885, %r813; xor.b32 %r845, %r1289, %r1288; // inline asm bfe.u32 %r832, %r845, %r3861, %r3862; // inline asm mul.wide.u32 %rd302, %r832, 4; add.s64 %rd303, %rd98, %rd302; ld.shared.u32 %r1290, [%rd303]; // inline asm bfe.u32 %r836, %r845, %r3863, %r3862; // inline asm mul.wide.u32 %rd304, %r836, 4; add.s64 %rd305, %rd102, %rd304; ld.shared.u32 %r1291, [%rd305]; add.s32 %r1292, %r1291, %r1290; // inline asm bfe.u32 %r840, %r845, %r3862, %r3862; // inline asm mul.wide.u32 %rd306, %r840, 4; add.s64 %rd307, %rd106, %rd306; ld.shared.u32 %r1293, [%rd307]; xor.b32 %r1294, %r1293, %r1292; // inline asm bfe.u32 %r844, %r845, %r719, %r3862; // inline asm mul.wide.u32 %rd308, %r844, 4; add.s64 %rd309, %rd110, %rd308; ld.shared.u32 %r1295, [%rd309]; add.s32 %r1296, %r1295, %r1294; xor.b32 %r1297, %r3884, %r829; xor.b32 %r861, %r1297, %r1296; // inline asm bfe.u32 %r848, %r861, %r3861, %r3862; // inline asm mul.wide.u32 %rd310, %r848, 4; add.s64 %rd311, %rd98, %rd310; ld.shared.u32 %r1298, [%rd311]; // inline asm bfe.u32 %r852, %r861, %r3863, %r3862; // inline asm mul.wide.u32 %rd312, %r852, 4; add.s64 %rd313, %rd102, %rd312; ld.shared.u32 %r1299, [%rd313]; add.s32 %r1300, %r1299, %r1298; // inline asm bfe.u32 %r856, %r861, %r3862, %r3862; // inline asm mul.wide.u32 %rd314, %r856, 4; add.s64 %rd315, %rd106, %rd314; ld.shared.u32 %r1301, [%rd315]; xor.b32 %r1302, %r1301, %r1300; // inline asm bfe.u32 %r860, %r861, %r719, %r3862; // inline asm mul.wide.u32 %rd316, %r860, 4; add.s64 %rd317, %rd110, %rd316; ld.shared.u32 %r1303, [%rd317]; add.s32 %r1304, %r1303, %r1302; xor.b32 %r1305, %r3883, %r845; xor.b32 %r877, %r1305, %r1304; // inline asm bfe.u32 %r864, %r877, %r3861, %r3862; // inline asm mul.wide.u32 %rd318, %r864, 4; add.s64 %rd319, %rd98, %rd318; ld.shared.u32 %r1306, [%rd319]; // inline asm bfe.u32 %r868, %r877, %r3863, %r3862; // inline asm mul.wide.u32 %rd320, %r868, 4; add.s64 %rd321, %rd102, %rd320; ld.shared.u32 %r1307, [%rd321]; add.s32 %r1308, %r1307, %r1306; // inline asm bfe.u32 %r872, %r877, %r3862, %r3862; // inline asm mul.wide.u32 %rd322, %r872, 4; add.s64 %rd323, %rd106, %rd322; ld.shared.u32 %r1309, [%rd323]; xor.b32 %r1310, %r1309, %r1308; // inline asm bfe.u32 %r876, %r877, %r719, %r3862; // inline asm mul.wide.u32 %rd324, %r876, 4; add.s64 %rd325, %rd110, %rd324; ld.shared.u32 %r1311, [%rd325]; add.s32 %r1312, %r1311, %r1310; xor.b32 %r1313, %r3882, %r861; xor.b32 %r893, %r1313, %r1312; // inline asm bfe.u32 %r880, %r893, %r3861, %r3862; // inline asm mul.wide.u32 %rd326, %r880, 4; add.s64 %rd327, %rd98, %rd326; ld.shared.u32 %r1314, [%rd327]; // inline asm bfe.u32 %r884, %r893, %r3863, %r3862; // inline asm mul.wide.u32 %rd328, %r884, 4; add.s64 %rd329, %rd102, %rd328; ld.shared.u32 %r1315, [%rd329]; add.s32 %r1316, %r1315, %r1314; // inline asm bfe.u32 %r888, %r893, %r3862, %r3862; // inline asm mul.wide.u32 %rd330, %r888, 4; add.s64 %rd331, %rd106, %rd330; ld.shared.u32 %r1317, [%rd331]; xor.b32 %r1318, %r1317, %r1316; // inline asm bfe.u32 %r892, %r893, %r719, %r3862; // inline asm mul.wide.u32 %rd332, %r892, 4; add.s64 %rd333, %rd110, %rd332; ld.shared.u32 %r1319, [%rd333]; add.s32 %r1320, %r1319, %r1318; xor.b32 %r1321, %r3881, %r877; xor.b32 %r909, %r1321, %r1320; // inline asm bfe.u32 %r896, %r909, %r3861, %r3862; // inline asm mul.wide.u32 %rd334, %r896, 4; add.s64 %rd335, %rd98, %rd334; ld.shared.u32 %r1322, [%rd335]; // inline asm bfe.u32 %r900, %r909, %r3863, %r3862; // inline asm mul.wide.u32 %rd336, %r900, 4; add.s64 %rd337, %rd102, %rd336; ld.shared.u32 %r1323, [%rd337]; add.s32 %r1324, %r1323, %r1322; // inline asm bfe.u32 %r904, %r909, %r3862, %r3862; // inline asm mul.wide.u32 %rd338, %r904, 4; add.s64 %rd339, %rd106, %rd338; ld.shared.u32 %r1325, [%rd339]; xor.b32 %r1326, %r1325, %r1324; // inline asm bfe.u32 %r908, %r909, %r719, %r3862; // inline asm mul.wide.u32 %rd340, %r908, 4; add.s64 %rd341, %rd110, %rd340; ld.shared.u32 %r1327, [%rd341]; add.s32 %r1328, %r1327, %r1326; xor.b32 %r1329, %r3880, %r893; xor.b32 %r925, %r1329, %r1328; // inline asm bfe.u32 %r912, %r925, %r3861, %r3862; // inline asm mul.wide.u32 %rd342, %r912, 4; add.s64 %rd343, %rd98, %rd342; ld.shared.u32 %r1330, [%rd343]; // inline asm bfe.u32 %r916, %r925, %r3863, %r3862; // inline asm mul.wide.u32 %rd344, %r916, 4; add.s64 %rd345, %rd102, %rd344; ld.shared.u32 %r1331, [%rd345]; add.s32 %r1332, %r1331, %r1330; // inline asm bfe.u32 %r920, %r925, %r3862, %r3862; // inline asm mul.wide.u32 %rd346, %r920, 4; add.s64 %rd347, %rd106, %rd346; ld.shared.u32 %r1333, [%rd347]; xor.b32 %r1334, %r1333, %r1332; // inline asm bfe.u32 %r924, %r925, %r719, %r3862; // inline asm mul.wide.u32 %rd348, %r924, 4; add.s64 %rd349, %rd110, %rd348; ld.shared.u32 %r1335, [%rd349]; add.s32 %r1336, %r1335, %r1334; xor.b32 %r1337, %r3879, %r909; xor.b32 %r941, %r1337, %r1336; // inline asm bfe.u32 %r928, %r941, %r3861, %r3862; // inline asm mul.wide.u32 %rd350, %r928, 4; add.s64 %rd351, %rd98, %rd350; ld.shared.u32 %r1338, [%rd351]; // inline asm bfe.u32 %r932, %r941, %r3863, %r3862; // inline asm mul.wide.u32 %rd352, %r932, 4; add.s64 %rd353, %rd102, %rd352; ld.shared.u32 %r1339, [%rd353]; add.s32 %r1340, %r1339, %r1338; // inline asm bfe.u32 %r936, %r941, %r3862, %r3862; // inline asm mul.wide.u32 %rd354, %r936, 4; add.s64 %rd355, %rd106, %rd354; ld.shared.u32 %r1341, [%rd355]; xor.b32 %r1342, %r1341, %r1340; // inline asm bfe.u32 %r940, %r941, %r719, %r3862; // inline asm mul.wide.u32 %rd356, %r940, 4; add.s64 %rd357, %rd110, %rd356; ld.shared.u32 %r1343, [%rd357]; add.s32 %r1344, %r1343, %r1342; xor.b32 %r1345, %r3878, %r925; xor.b32 %r957, %r1345, %r1344; // inline asm bfe.u32 %r944, %r957, %r3861, %r3862; // inline asm mul.wide.u32 %rd358, %r944, 4; add.s64 %rd359, %rd98, %rd358; ld.shared.u32 %r1346, [%rd359]; // inline asm bfe.u32 %r948, %r957, %r3863, %r3862; // inline asm mul.wide.u32 %rd360, %r948, 4; add.s64 %rd361, %rd102, %rd360; ld.shared.u32 %r1347, [%rd361]; add.s32 %r1348, %r1347, %r1346; // inline asm bfe.u32 %r952, %r957, %r3862, %r3862; // inline asm mul.wide.u32 %rd362, %r952, 4; add.s64 %rd363, %rd106, %rd362; ld.shared.u32 %r1349, [%rd363]; xor.b32 %r1350, %r1349, %r1348; // inline asm bfe.u32 %r956, %r957, %r719, %r3862; // inline asm mul.wide.u32 %rd364, %r956, 4; add.s64 %rd365, %rd110, %rd364; ld.shared.u32 %r1351, [%rd365]; add.s32 %r1352, %r1351, %r1350; xor.b32 %r1353, %r3877, %r941; xor.b32 %r973, %r1353, %r1352; // inline asm bfe.u32 %r960, %r973, %r3861, %r3862; // inline asm mul.wide.u32 %rd366, %r960, 4; add.s64 %rd367, %rd98, %rd366; ld.shared.u32 %r1354, [%rd367]; // inline asm bfe.u32 %r964, %r973, %r3863, %r3862; // inline asm mul.wide.u32 %rd368, %r964, 4; add.s64 %rd369, %rd102, %rd368; ld.shared.u32 %r1355, [%rd369]; add.s32 %r1356, %r1355, %r1354; // inline asm bfe.u32 %r968, %r973, %r3862, %r3862; // inline asm mul.wide.u32 %rd370, %r968, 4; add.s64 %rd371, %rd106, %rd370; ld.shared.u32 %r1357, [%rd371]; xor.b32 %r1358, %r1357, %r1356; // inline asm bfe.u32 %r972, %r973, %r719, %r3862; // inline asm mul.wide.u32 %rd372, %r972, 4; add.s64 %rd373, %rd110, %rd372; ld.shared.u32 %r1359, [%rd373]; add.s32 %r1360, %r1359, %r1358; xor.b32 %r1361, %r3876, %r957; xor.b32 %r1362, %r1361, %r1360; xor.b32 %r1363, %r3875, %r973; st.shared.u32 [%rd1337], %r1363; st.shared.u32 [%rd1337+4], %r1362; xor.b32 %r1364, %r1363, %r37; xor.b32 %r989, %r1364, %r3892; // inline asm bfe.u32 %r976, %r989, %r3861, %r3862; // inline asm mul.wide.u32 %rd374, %r976, 4; add.s64 %rd375, %rd98, %rd374; ld.shared.u32 %r1365, [%rd375]; // inline asm bfe.u32 %r980, %r989, %r3863, %r3862; // inline asm mul.wide.u32 %rd376, %r980, 4; add.s64 %rd377, %rd102, %rd376; ld.shared.u32 %r1366, [%rd377]; add.s32 %r1367, %r1366, %r1365; // inline asm bfe.u32 %r984, %r989, %r3862, %r3862; // inline asm mul.wide.u32 %rd378, %r984, 4; add.s64 %rd379, %rd106, %rd378; ld.shared.u32 %r1368, [%rd379]; xor.b32 %r1369, %r1368, %r1367; // inline asm bfe.u32 %r988, %r989, %r719, %r3862; // inline asm mul.wide.u32 %rd380, %r988, 4; add.s64 %rd381, %rd110, %rd380; ld.shared.u32 %r1370, [%rd381]; add.s32 %r1371, %r1370, %r1369; xor.b32 %r1372, %r38, %r1362; xor.b32 %r1373, %r1372, %r3891; xor.b32 %r1005, %r1373, %r1371; // inline asm bfe.u32 %r992, %r1005, %r3861, %r3862; // inline asm mul.wide.u32 %rd382, %r992, 4; add.s64 %rd383, %rd98, %rd382; ld.shared.u32 %r1374, [%rd383]; // inline asm bfe.u32 %r996, %r1005, %r3863, %r3862; // inline asm mul.wide.u32 %rd384, %r996, 4; add.s64 %rd385, %rd102, %rd384; ld.shared.u32 %r1375, [%rd385]; add.s32 %r1376, %r1375, %r1374; // inline asm bfe.u32 %r1000, %r1005, %r3862, %r3862; // inline asm mul.wide.u32 %rd386, %r1000, 4; add.s64 %rd387, %rd106, %rd386; ld.shared.u32 %r1377, [%rd387]; xor.b32 %r1378, %r1377, %r1376; // inline asm bfe.u32 %r1004, %r1005, %r719, %r3862; // inline asm mul.wide.u32 %rd388, %r1004, 4; add.s64 %rd389, %rd110, %rd388; ld.shared.u32 %r1379, [%rd389]; add.s32 %r1380, %r1379, %r1378; xor.b32 %r1381, %r3890, %r989; xor.b32 %r1021, %r1381, %r1380; // inline asm bfe.u32 %r1008, %r1021, %r3861, %r3862; // inline asm mul.wide.u32 %rd390, %r1008, 4; add.s64 %rd391, %rd98, %rd390; ld.shared.u32 %r1382, [%rd391]; // inline asm bfe.u32 %r1012, %r1021, %r3863, %r3862; // inline asm mul.wide.u32 %rd392, %r1012, 4; add.s64 %rd393, %rd102, %rd392; ld.shared.u32 %r1383, [%rd393]; add.s32 %r1384, %r1383, %r1382; // inline asm bfe.u32 %r1016, %r1021, %r3862, %r3862; // inline asm mul.wide.u32 %rd394, %r1016, 4; add.s64 %rd395, %rd106, %rd394; ld.shared.u32 %r1385, [%rd395]; xor.b32 %r1386, %r1385, %r1384; // inline asm bfe.u32 %r1020, %r1021, %r719, %r3862; // inline asm mul.wide.u32 %rd396, %r1020, 4; add.s64 %rd397, %rd110, %rd396; ld.shared.u32 %r1387, [%rd397]; add.s32 %r1388, %r1387, %r1386; xor.b32 %r1389, %r3889, %r1005; xor.b32 %r1037, %r1389, %r1388; // inline asm bfe.u32 %r1024, %r1037, %r3861, %r3862; // inline asm mul.wide.u32 %rd398, %r1024, 4; add.s64 %rd399, %rd98, %rd398; ld.shared.u32 %r1390, [%rd399]; // inline asm bfe.u32 %r1028, %r1037, %r3863, %r3862; // inline asm mul.wide.u32 %rd400, %r1028, 4; add.s64 %rd401, %rd102, %rd400; ld.shared.u32 %r1391, [%rd401]; add.s32 %r1392, %r1391, %r1390; // inline asm bfe.u32 %r1032, %r1037, %r3862, %r3862; // inline asm mul.wide.u32 %rd402, %r1032, 4; add.s64 %rd403, %rd106, %rd402; ld.shared.u32 %r1393, [%rd403]; xor.b32 %r1394, %r1393, %r1392; // inline asm bfe.u32 %r1036, %r1037, %r719, %r3862; // inline asm mul.wide.u32 %rd404, %r1036, 4; add.s64 %rd405, %rd110, %rd404; ld.shared.u32 %r1395, [%rd405]; add.s32 %r1396, %r1395, %r1394; xor.b32 %r1397, %r3888, %r1021; xor.b32 %r1053, %r1397, %r1396; // inline asm bfe.u32 %r1040, %r1053, %r3861, %r3862; // inline asm mul.wide.u32 %rd406, %r1040, 4; add.s64 %rd407, %rd98, %rd406; ld.shared.u32 %r1398, [%rd407]; // inline asm bfe.u32 %r1044, %r1053, %r3863, %r3862; // inline asm mul.wide.u32 %rd408, %r1044, 4; add.s64 %rd409, %rd102, %rd408; ld.shared.u32 %r1399, [%rd409]; add.s32 %r1400, %r1399, %r1398; // inline asm bfe.u32 %r1048, %r1053, %r3862, %r3862; // inline asm mul.wide.u32 %rd410, %r1048, 4; add.s64 %rd411, %rd106, %rd410; ld.shared.u32 %r1401, [%rd411]; xor.b32 %r1402, %r1401, %r1400; // inline asm bfe.u32 %r1052, %r1053, %r719, %r3862; // inline asm mul.wide.u32 %rd412, %r1052, 4; add.s64 %rd413, %rd110, %rd412; ld.shared.u32 %r1403, [%rd413]; add.s32 %r1404, %r1403, %r1402; xor.b32 %r1405, %r3887, %r1037; xor.b32 %r1069, %r1405, %r1404; // inline asm bfe.u32 %r1056, %r1069, %r3861, %r3862; // inline asm mul.wide.u32 %rd414, %r1056, 4; add.s64 %rd415, %rd98, %rd414; ld.shared.u32 %r1406, [%rd415]; // inline asm bfe.u32 %r1060, %r1069, %r3863, %r3862; // inline asm mul.wide.u32 %rd416, %r1060, 4; add.s64 %rd417, %rd102, %rd416; ld.shared.u32 %r1407, [%rd417]; add.s32 %r1408, %r1407, %r1406; // inline asm bfe.u32 %r1064, %r1069, %r3862, %r3862; // inline asm mul.wide.u32 %rd418, %r1064, 4; add.s64 %rd419, %rd106, %rd418; ld.shared.u32 %r1409, [%rd419]; xor.b32 %r1410, %r1409, %r1408; // inline asm bfe.u32 %r1068, %r1069, %r719, %r3862; // inline asm mul.wide.u32 %rd420, %r1068, 4; add.s64 %rd421, %rd110, %rd420; ld.shared.u32 %r1411, [%rd421]; add.s32 %r1412, %r1411, %r1410; xor.b32 %r1413, %r3886, %r1053; xor.b32 %r1085, %r1413, %r1412; // inline asm bfe.u32 %r1072, %r1085, %r3861, %r3862; // inline asm mul.wide.u32 %rd422, %r1072, 4; add.s64 %rd423, %rd98, %rd422; ld.shared.u32 %r1414, [%rd423]; // inline asm bfe.u32 %r1076, %r1085, %r3863, %r3862; // inline asm mul.wide.u32 %rd424, %r1076, 4; add.s64 %rd425, %rd102, %rd424; ld.shared.u32 %r1415, [%rd425]; add.s32 %r1416, %r1415, %r1414; // inline asm bfe.u32 %r1080, %r1085, %r3862, %r3862; // inline asm mul.wide.u32 %rd426, %r1080, 4; add.s64 %rd427, %rd106, %rd426; ld.shared.u32 %r1417, [%rd427]; xor.b32 %r1418, %r1417, %r1416; // inline asm bfe.u32 %r1084, %r1085, %r719, %r3862; // inline asm mul.wide.u32 %rd428, %r1084, 4; add.s64 %rd429, %rd110, %rd428; ld.shared.u32 %r1419, [%rd429]; add.s32 %r1420, %r1419, %r1418; xor.b32 %r1421, %r3885, %r1069; xor.b32 %r1101, %r1421, %r1420; // inline asm bfe.u32 %r1088, %r1101, %r3861, %r3862; // inline asm mul.wide.u32 %rd430, %r1088, 4; add.s64 %rd431, %rd98, %rd430; ld.shared.u32 %r1422, [%rd431]; // inline asm bfe.u32 %r1092, %r1101, %r3863, %r3862; // inline asm mul.wide.u32 %rd432, %r1092, 4; add.s64 %rd433, %rd102, %rd432; ld.shared.u32 %r1423, [%rd433]; add.s32 %r1424, %r1423, %r1422; // inline asm bfe.u32 %r1096, %r1101, %r3862, %r3862; // inline asm mul.wide.u32 %rd434, %r1096, 4; add.s64 %rd435, %rd106, %rd434; ld.shared.u32 %r1425, [%rd435]; xor.b32 %r1426, %r1425, %r1424; // inline asm bfe.u32 %r1100, %r1101, %r719, %r3862; // inline asm mul.wide.u32 %rd436, %r1100, 4; add.s64 %rd437, %rd110, %rd436; ld.shared.u32 %r1427, [%rd437]; add.s32 %r1428, %r1427, %r1426; xor.b32 %r1429, %r3884, %r1085; xor.b32 %r1117, %r1429, %r1428; // inline asm bfe.u32 %r1104, %r1117, %r3861, %r3862; // inline asm mul.wide.u32 %rd438, %r1104, 4; add.s64 %rd439, %rd98, %rd438; ld.shared.u32 %r1430, [%rd439]; // inline asm bfe.u32 %r1108, %r1117, %r3863, %r3862; // inline asm mul.wide.u32 %rd440, %r1108, 4; add.s64 %rd441, %rd102, %rd440; ld.shared.u32 %r1431, [%rd441]; add.s32 %r1432, %r1431, %r1430; // inline asm bfe.u32 %r1112, %r1117, %r3862, %r3862; // inline asm mul.wide.u32 %rd442, %r1112, 4; add.s64 %rd443, %rd106, %rd442; ld.shared.u32 %r1433, [%rd443]; xor.b32 %r1434, %r1433, %r1432; // inline asm bfe.u32 %r1116, %r1117, %r719, %r3862; // inline asm mul.wide.u32 %rd444, %r1116, 4; add.s64 %rd445, %rd110, %rd444; ld.shared.u32 %r1435, [%rd445]; add.s32 %r1436, %r1435, %r1434; xor.b32 %r1437, %r3883, %r1101; xor.b32 %r1133, %r1437, %r1436; // inline asm bfe.u32 %r1120, %r1133, %r3861, %r3862; // inline asm mul.wide.u32 %rd446, %r1120, 4; add.s64 %rd447, %rd98, %rd446; ld.shared.u32 %r1438, [%rd447]; // inline asm bfe.u32 %r1124, %r1133, %r3863, %r3862; // inline asm mul.wide.u32 %rd448, %r1124, 4; add.s64 %rd449, %rd102, %rd448; ld.shared.u32 %r1439, [%rd449]; add.s32 %r1440, %r1439, %r1438; // inline asm bfe.u32 %r1128, %r1133, %r3862, %r3862; // inline asm mul.wide.u32 %rd450, %r1128, 4; add.s64 %rd451, %rd106, %rd450; ld.shared.u32 %r1441, [%rd451]; xor.b32 %r1442, %r1441, %r1440; // inline asm bfe.u32 %r1132, %r1133, %r719, %r3862; // inline asm mul.wide.u32 %rd452, %r1132, 4; add.s64 %rd453, %rd110, %rd452; ld.shared.u32 %r1443, [%rd453]; add.s32 %r1444, %r1443, %r1442; xor.b32 %r1445, %r3882, %r1117; xor.b32 %r1149, %r1445, %r1444; // inline asm bfe.u32 %r1136, %r1149, %r3861, %r3862; // inline asm mul.wide.u32 %rd454, %r1136, 4; add.s64 %rd455, %rd98, %rd454; ld.shared.u32 %r1446, [%rd455]; // inline asm bfe.u32 %r1140, %r1149, %r3863, %r3862; // inline asm mul.wide.u32 %rd456, %r1140, 4; add.s64 %rd457, %rd102, %rd456; ld.shared.u32 %r1447, [%rd457]; add.s32 %r1448, %r1447, %r1446; // inline asm bfe.u32 %r1144, %r1149, %r3862, %r3862; // inline asm mul.wide.u32 %rd458, %r1144, 4; add.s64 %rd459, %rd106, %rd458; ld.shared.u32 %r1449, [%rd459]; xor.b32 %r1450, %r1449, %r1448; // inline asm bfe.u32 %r1148, %r1149, %r719, %r3862; // inline asm mul.wide.u32 %rd460, %r1148, 4; add.s64 %rd461, %rd110, %rd460; ld.shared.u32 %r1451, [%rd461]; add.s32 %r1452, %r1451, %r1450; xor.b32 %r1453, %r3881, %r1133; xor.b32 %r1165, %r1453, %r1452; // inline asm bfe.u32 %r1152, %r1165, %r3861, %r3862; // inline asm mul.wide.u32 %rd462, %r1152, 4; add.s64 %rd463, %rd98, %rd462; ld.shared.u32 %r1454, [%rd463]; // inline asm bfe.u32 %r1156, %r1165, %r3863, %r3862; // inline asm mul.wide.u32 %rd464, %r1156, 4; add.s64 %rd465, %rd102, %rd464; ld.shared.u32 %r1455, [%rd465]; add.s32 %r1456, %r1455, %r1454; // inline asm bfe.u32 %r1160, %r1165, %r3862, %r3862; // inline asm mul.wide.u32 %rd466, %r1160, 4; add.s64 %rd467, %rd106, %rd466; ld.shared.u32 %r1457, [%rd467]; xor.b32 %r1458, %r1457, %r1456; // inline asm bfe.u32 %r1164, %r1165, %r719, %r3862; // inline asm mul.wide.u32 %rd468, %r1164, 4; add.s64 %rd469, %rd110, %rd468; ld.shared.u32 %r1459, [%rd469]; add.s32 %r1460, %r1459, %r1458; xor.b32 %r1461, %r3880, %r1149; xor.b32 %r1181, %r1461, %r1460; // inline asm bfe.u32 %r1168, %r1181, %r3861, %r3862; // inline asm mul.wide.u32 %rd470, %r1168, 4; add.s64 %rd471, %rd98, %rd470; ld.shared.u32 %r1462, [%rd471]; // inline asm bfe.u32 %r1172, %r1181, %r3863, %r3862; // inline asm mul.wide.u32 %rd472, %r1172, 4; add.s64 %rd473, %rd102, %rd472; ld.shared.u32 %r1463, [%rd473]; add.s32 %r1464, %r1463, %r1462; // inline asm bfe.u32 %r1176, %r1181, %r3862, %r3862; // inline asm mul.wide.u32 %rd474, %r1176, 4; add.s64 %rd475, %rd106, %rd474; ld.shared.u32 %r1465, [%rd475]; xor.b32 %r1466, %r1465, %r1464; // inline asm bfe.u32 %r1180, %r1181, %r719, %r3862; // inline asm mul.wide.u32 %rd476, %r1180, 4; add.s64 %rd477, %rd110, %rd476; ld.shared.u32 %r1467, [%rd477]; add.s32 %r1468, %r1467, %r1466; xor.b32 %r1469, %r3879, %r1165; xor.b32 %r1197, %r1469, %r1468; // inline asm bfe.u32 %r1184, %r1197, %r3861, %r3862; // inline asm mul.wide.u32 %rd478, %r1184, 4; add.s64 %rd479, %rd98, %rd478; ld.shared.u32 %r1470, [%rd479]; // inline asm bfe.u32 %r1188, %r1197, %r3863, %r3862; // inline asm mul.wide.u32 %rd480, %r1188, 4; add.s64 %rd481, %rd102, %rd480; ld.shared.u32 %r1471, [%rd481]; add.s32 %r1472, %r1471, %r1470; // inline asm bfe.u32 %r1192, %r1197, %r3862, %r3862; // inline asm mul.wide.u32 %rd482, %r1192, 4; add.s64 %rd483, %rd106, %rd482; ld.shared.u32 %r1473, [%rd483]; xor.b32 %r1474, %r1473, %r1472; // inline asm bfe.u32 %r1196, %r1197, %r719, %r3862; // inline asm mul.wide.u32 %rd484, %r1196, 4; add.s64 %rd485, %rd110, %rd484; ld.shared.u32 %r1475, [%rd485]; add.s32 %r1476, %r1475, %r1474; xor.b32 %r1477, %r3878, %r1181; xor.b32 %r1213, %r1477, %r1476; // inline asm bfe.u32 %r1200, %r1213, %r3861, %r3862; // inline asm mul.wide.u32 %rd486, %r1200, 4; add.s64 %rd487, %rd98, %rd486; ld.shared.u32 %r1478, [%rd487]; // inline asm bfe.u32 %r1204, %r1213, %r3863, %r3862; // inline asm mul.wide.u32 %rd488, %r1204, 4; add.s64 %rd489, %rd102, %rd488; ld.shared.u32 %r1479, [%rd489]; add.s32 %r1480, %r1479, %r1478; // inline asm bfe.u32 %r1208, %r1213, %r3862, %r3862; // inline asm mul.wide.u32 %rd490, %r1208, 4; add.s64 %rd491, %rd106, %rd490; ld.shared.u32 %r1481, [%rd491]; xor.b32 %r1482, %r1481, %r1480; // inline asm bfe.u32 %r1212, %r1213, %r719, %r3862; // inline asm mul.wide.u32 %rd492, %r1212, 4; add.s64 %rd493, %rd110, %rd492; ld.shared.u32 %r1483, [%rd493]; add.s32 %r1484, %r1483, %r1482; xor.b32 %r1485, %r3877, %r1197; xor.b32 %r1229, %r1485, %r1484; // inline asm bfe.u32 %r1216, %r1229, %r3861, %r3862; // inline asm mul.wide.u32 %rd494, %r1216, 4; add.s64 %rd495, %rd98, %rd494; ld.shared.u32 %r1486, [%rd495]; // inline asm bfe.u32 %r1220, %r1229, %r3863, %r3862; // inline asm mul.wide.u32 %rd496, %r1220, 4; add.s64 %rd497, %rd102, %rd496; ld.shared.u32 %r1487, [%rd497]; add.s32 %r1488, %r1487, %r1486; // inline asm bfe.u32 %r1224, %r1229, %r3862, %r3862; // inline asm mul.wide.u32 %rd498, %r1224, 4; add.s64 %rd499, %rd106, %rd498; ld.shared.u32 %r1489, [%rd499]; xor.b32 %r1490, %r1489, %r1488; // inline asm bfe.u32 %r1228, %r1229, %r719, %r3862; // inline asm mul.wide.u32 %rd500, %r1228, 4; add.s64 %rd501, %rd110, %rd500; ld.shared.u32 %r1491, [%rd501]; add.s32 %r1492, %r1491, %r1490; xor.b32 %r1493, %r3876, %r1213; xor.b32 %r3897, %r1493, %r1492; xor.b32 %r3898, %r3875, %r1229; st.shared.u32 [%rd1337+8], %r3898; st.shared.u32 [%rd1337+12], %r3897; add.s64 %rd1337, %rd1337, 16; add.s32 %r3896, %r3896, 4; setp.lt.u32 %p13, %r3896, 256; @%p13 bra BB3_22; mov.u32 %r3866, %tid.x; mul.wide.s32 %rd1325, %r3866, 1024; mov.u64 %rd1320, m03200_init$S1_all; add.s64 %rd1338, %rd1320, %rd1325; mov.u32 %r1494, 0; mov.u32 %r3899, %r1494; BB3_24: mov.u32 %r3854, 16; mov.u32 %r3853, 8; mov.u32 %r3852, 24; xor.b32 %r2007, %r39, %r3898; xor.b32 %r1508, %r2007, %r3892; // inline asm bfe.u32 %r1495, %r1508, %r3852, %r3853; // inline asm mul.wide.u32 %rd507, %r1495, 4; add.s64 %rd508, %rd98, %rd507; ld.shared.u32 %r2008, [%rd508]; // inline asm bfe.u32 %r1499, %r1508, %r3854, %r3853; // inline asm mul.wide.u32 %rd511, %r1499, 4; add.s64 %rd512, %rd102, %rd511; ld.shared.u32 %r2009, [%rd512]; add.s32 %r2010, %r2009, %r2008; // inline asm bfe.u32 %r1503, %r1508, %r3853, %r3853; // inline asm mul.wide.u32 %rd515, %r1503, 4; add.s64 %rd516, %rd106, %rd515; ld.shared.u32 %r2011, [%rd516]; xor.b32 %r2012, %r2011, %r2010; // inline asm bfe.u32 %r1507, %r1508, %r1494, %r3853; // inline asm mul.wide.u32 %rd519, %r1507, 4; add.s64 %rd520, %rd110, %rd519; ld.shared.u32 %r2013, [%rd520]; add.s32 %r2014, %r2013, %r2012; xor.b32 %r2015, %r40, %r3897; xor.b32 %r2016, %r2015, %r3891; xor.b32 %r1524, %r2016, %r2014; // inline asm bfe.u32 %r1511, %r1524, %r3852, %r3853; // inline asm mul.wide.u32 %rd521, %r1511, 4; add.s64 %rd522, %rd98, %rd521; ld.shared.u32 %r2017, [%rd522]; // inline asm bfe.u32 %r1515, %r1524, %r3854, %r3853; // inline asm mul.wide.u32 %rd523, %r1515, 4; add.s64 %rd524, %rd102, %rd523; ld.shared.u32 %r2018, [%rd524]; add.s32 %r2019, %r2018, %r2017; // inline asm bfe.u32 %r1519, %r1524, %r3853, %r3853; // inline asm mul.wide.u32 %rd525, %r1519, 4; add.s64 %rd526, %rd106, %rd525; ld.shared.u32 %r2020, [%rd526]; xor.b32 %r2021, %r2020, %r2019; // inline asm bfe.u32 %r1523, %r1524, %r1494, %r3853; // inline asm mul.wide.u32 %rd527, %r1523, 4; add.s64 %rd528, %rd110, %rd527; ld.shared.u32 %r2022, [%rd528]; add.s32 %r2023, %r2022, %r2021; xor.b32 %r2024, %r3890, %r1508; xor.b32 %r1540, %r2024, %r2023; // inline asm bfe.u32 %r1527, %r1540, %r3852, %r3853; // inline asm mul.wide.u32 %rd529, %r1527, 4; add.s64 %rd530, %rd98, %rd529; ld.shared.u32 %r2025, [%rd530]; // inline asm bfe.u32 %r1531, %r1540, %r3854, %r3853; // inline asm mul.wide.u32 %rd531, %r1531, 4; add.s64 %rd532, %rd102, %rd531; ld.shared.u32 %r2026, [%rd532]; add.s32 %r2027, %r2026, %r2025; // inline asm bfe.u32 %r1535, %r1540, %r3853, %r3853; // inline asm mul.wide.u32 %rd533, %r1535, 4; add.s64 %rd534, %rd106, %rd533; ld.shared.u32 %r2028, [%rd534]; xor.b32 %r2029, %r2028, %r2027; // inline asm bfe.u32 %r1539, %r1540, %r1494, %r3853; // inline asm mul.wide.u32 %rd535, %r1539, 4; add.s64 %rd536, %rd110, %rd535; ld.shared.u32 %r2030, [%rd536]; add.s32 %r2031, %r2030, %r2029; xor.b32 %r2032, %r3889, %r1524; xor.b32 %r1556, %r2032, %r2031; // inline asm bfe.u32 %r1543, %r1556, %r3852, %r3853; // inline asm mul.wide.u32 %rd537, %r1543, 4; add.s64 %rd538, %rd98, %rd537; ld.shared.u32 %r2033, [%rd538]; // inline asm bfe.u32 %r1547, %r1556, %r3854, %r3853; // inline asm mul.wide.u32 %rd539, %r1547, 4; add.s64 %rd540, %rd102, %rd539; ld.shared.u32 %r2034, [%rd540]; add.s32 %r2035, %r2034, %r2033; // inline asm bfe.u32 %r1551, %r1556, %r3853, %r3853; // inline asm mul.wide.u32 %rd541, %r1551, 4; add.s64 %rd542, %rd106, %rd541; ld.shared.u32 %r2036, [%rd542]; xor.b32 %r2037, %r2036, %r2035; // inline asm bfe.u32 %r1555, %r1556, %r1494, %r3853; // inline asm mul.wide.u32 %rd543, %r1555, 4; add.s64 %rd544, %rd110, %rd543; ld.shared.u32 %r2038, [%rd544]; add.s32 %r2039, %r2038, %r2037; xor.b32 %r2040, %r3888, %r1540; xor.b32 %r1572, %r2040, %r2039; // inline asm bfe.u32 %r1559, %r1572, %r3852, %r3853; // inline asm mul.wide.u32 %rd545, %r1559, 4; add.s64 %rd546, %rd98, %rd545; ld.shared.u32 %r2041, [%rd546]; // inline asm bfe.u32 %r1563, %r1572, %r3854, %r3853; // inline asm mul.wide.u32 %rd547, %r1563, 4; add.s64 %rd548, %rd102, %rd547; ld.shared.u32 %r2042, [%rd548]; add.s32 %r2043, %r2042, %r2041; // inline asm bfe.u32 %r1567, %r1572, %r3853, %r3853; // inline asm mul.wide.u32 %rd549, %r1567, 4; add.s64 %rd550, %rd106, %rd549; ld.shared.u32 %r2044, [%rd550]; xor.b32 %r2045, %r2044, %r2043; // inline asm bfe.u32 %r1571, %r1572, %r1494, %r3853; // inline asm mul.wide.u32 %rd551, %r1571, 4; add.s64 %rd552, %rd110, %rd551; ld.shared.u32 %r2046, [%rd552]; add.s32 %r2047, %r2046, %r2045; xor.b32 %r2048, %r3887, %r1556; xor.b32 %r1588, %r2048, %r2047; // inline asm bfe.u32 %r1575, %r1588, %r3852, %r3853; // inline asm mul.wide.u32 %rd553, %r1575, 4; add.s64 %rd554, %rd98, %rd553; ld.shared.u32 %r2049, [%rd554]; // inline asm bfe.u32 %r1579, %r1588, %r3854, %r3853; // inline asm mul.wide.u32 %rd555, %r1579, 4; add.s64 %rd556, %rd102, %rd555; ld.shared.u32 %r2050, [%rd556]; add.s32 %r2051, %r2050, %r2049; // inline asm bfe.u32 %r1583, %r1588, %r3853, %r3853; // inline asm mul.wide.u32 %rd557, %r1583, 4; add.s64 %rd558, %rd106, %rd557; ld.shared.u32 %r2052, [%rd558]; xor.b32 %r2053, %r2052, %r2051; // inline asm bfe.u32 %r1587, %r1588, %r1494, %r3853; // inline asm mul.wide.u32 %rd559, %r1587, 4; add.s64 %rd560, %rd110, %rd559; ld.shared.u32 %r2054, [%rd560]; add.s32 %r2055, %r2054, %r2053; xor.b32 %r2056, %r3886, %r1572; xor.b32 %r1604, %r2056, %r2055; // inline asm bfe.u32 %r1591, %r1604, %r3852, %r3853; // inline asm mul.wide.u32 %rd561, %r1591, 4; add.s64 %rd562, %rd98, %rd561; ld.shared.u32 %r2057, [%rd562]; // inline asm bfe.u32 %r1595, %r1604, %r3854, %r3853; // inline asm mul.wide.u32 %rd563, %r1595, 4; add.s64 %rd564, %rd102, %rd563; ld.shared.u32 %r2058, [%rd564]; add.s32 %r2059, %r2058, %r2057; // inline asm bfe.u32 %r1599, %r1604, %r3853, %r3853; // inline asm mul.wide.u32 %rd565, %r1599, 4; add.s64 %rd566, %rd106, %rd565; ld.shared.u32 %r2060, [%rd566]; xor.b32 %r2061, %r2060, %r2059; // inline asm bfe.u32 %r1603, %r1604, %r1494, %r3853; // inline asm mul.wide.u32 %rd567, %r1603, 4; add.s64 %rd568, %rd110, %rd567; ld.shared.u32 %r2062, [%rd568]; add.s32 %r2063, %r2062, %r2061; xor.b32 %r2064, %r3885, %r1588; xor.b32 %r1620, %r2064, %r2063; // inline asm bfe.u32 %r1607, %r1620, %r3852, %r3853; // inline asm mul.wide.u32 %rd569, %r1607, 4; add.s64 %rd570, %rd98, %rd569; ld.shared.u32 %r2065, [%rd570]; // inline asm bfe.u32 %r1611, %r1620, %r3854, %r3853; // inline asm mul.wide.u32 %rd571, %r1611, 4; add.s64 %rd572, %rd102, %rd571; ld.shared.u32 %r2066, [%rd572]; add.s32 %r2067, %r2066, %r2065; // inline asm bfe.u32 %r1615, %r1620, %r3853, %r3853; // inline asm mul.wide.u32 %rd573, %r1615, 4; add.s64 %rd574, %rd106, %rd573; ld.shared.u32 %r2068, [%rd574]; xor.b32 %r2069, %r2068, %r2067; // inline asm bfe.u32 %r1619, %r1620, %r1494, %r3853; // inline asm mul.wide.u32 %rd575, %r1619, 4; add.s64 %rd576, %rd110, %rd575; ld.shared.u32 %r2070, [%rd576]; add.s32 %r2071, %r2070, %r2069; xor.b32 %r2072, %r3884, %r1604; xor.b32 %r1636, %r2072, %r2071; // inline asm bfe.u32 %r1623, %r1636, %r3852, %r3853; // inline asm mul.wide.u32 %rd577, %r1623, 4; add.s64 %rd578, %rd98, %rd577; ld.shared.u32 %r2073, [%rd578]; // inline asm bfe.u32 %r1627, %r1636, %r3854, %r3853; // inline asm mul.wide.u32 %rd579, %r1627, 4; add.s64 %rd580, %rd102, %rd579; ld.shared.u32 %r2074, [%rd580]; add.s32 %r2075, %r2074, %r2073; // inline asm bfe.u32 %r1631, %r1636, %r3853, %r3853; // inline asm mul.wide.u32 %rd581, %r1631, 4; add.s64 %rd582, %rd106, %rd581; ld.shared.u32 %r2076, [%rd582]; xor.b32 %r2077, %r2076, %r2075; // inline asm bfe.u32 %r1635, %r1636, %r1494, %r3853; // inline asm mul.wide.u32 %rd583, %r1635, 4; add.s64 %rd584, %rd110, %rd583; ld.shared.u32 %r2078, [%rd584]; add.s32 %r2079, %r2078, %r2077; xor.b32 %r2080, %r3883, %r1620; xor.b32 %r1652, %r2080, %r2079; // inline asm bfe.u32 %r1639, %r1652, %r3852, %r3853; // inline asm mul.wide.u32 %rd585, %r1639, 4; add.s64 %rd586, %rd98, %rd585; ld.shared.u32 %r2081, [%rd586]; // inline asm bfe.u32 %r1643, %r1652, %r3854, %r3853; // inline asm mul.wide.u32 %rd587, %r1643, 4; add.s64 %rd588, %rd102, %rd587; ld.shared.u32 %r2082, [%rd588]; add.s32 %r2083, %r2082, %r2081; // inline asm bfe.u32 %r1647, %r1652, %r3853, %r3853; // inline asm mul.wide.u32 %rd589, %r1647, 4; add.s64 %rd590, %rd106, %rd589; ld.shared.u32 %r2084, [%rd590]; xor.b32 %r2085, %r2084, %r2083; // inline asm bfe.u32 %r1651, %r1652, %r1494, %r3853; // inline asm mul.wide.u32 %rd591, %r1651, 4; add.s64 %rd592, %rd110, %rd591; ld.shared.u32 %r2086, [%rd592]; add.s32 %r2087, %r2086, %r2085; xor.b32 %r2088, %r3882, %r1636; xor.b32 %r1668, %r2088, %r2087; // inline asm bfe.u32 %r1655, %r1668, %r3852, %r3853; // inline asm mul.wide.u32 %rd593, %r1655, 4; add.s64 %rd594, %rd98, %rd593; ld.shared.u32 %r2089, [%rd594]; // inline asm bfe.u32 %r1659, %r1668, %r3854, %r3853; // inline asm mul.wide.u32 %rd595, %r1659, 4; add.s64 %rd596, %rd102, %rd595; ld.shared.u32 %r2090, [%rd596]; add.s32 %r2091, %r2090, %r2089; // inline asm bfe.u32 %r1663, %r1668, %r3853, %r3853; // inline asm mul.wide.u32 %rd597, %r1663, 4; add.s64 %rd598, %rd106, %rd597; ld.shared.u32 %r2092, [%rd598]; xor.b32 %r2093, %r2092, %r2091; // inline asm bfe.u32 %r1667, %r1668, %r1494, %r3853; // inline asm mul.wide.u32 %rd599, %r1667, 4; add.s64 %rd600, %rd110, %rd599; ld.shared.u32 %r2094, [%rd600]; add.s32 %r2095, %r2094, %r2093; xor.b32 %r2096, %r3881, %r1652; xor.b32 %r1684, %r2096, %r2095; // inline asm bfe.u32 %r1671, %r1684, %r3852, %r3853; // inline asm mul.wide.u32 %rd601, %r1671, 4; add.s64 %rd602, %rd98, %rd601; ld.shared.u32 %r2097, [%rd602]; // inline asm bfe.u32 %r1675, %r1684, %r3854, %r3853; // inline asm mul.wide.u32 %rd603, %r1675, 4; add.s64 %rd604, %rd102, %rd603; ld.shared.u32 %r2098, [%rd604]; add.s32 %r2099, %r2098, %r2097; // inline asm bfe.u32 %r1679, %r1684, %r3853, %r3853; // inline asm mul.wide.u32 %rd605, %r1679, 4; add.s64 %rd606, %rd106, %rd605; ld.shared.u32 %r2100, [%rd606]; xor.b32 %r2101, %r2100, %r2099; // inline asm bfe.u32 %r1683, %r1684, %r1494, %r3853; // inline asm mul.wide.u32 %rd607, %r1683, 4; add.s64 %rd608, %rd110, %rd607; ld.shared.u32 %r2102, [%rd608]; add.s32 %r2103, %r2102, %r2101; xor.b32 %r2104, %r3880, %r1668; xor.b32 %r1700, %r2104, %r2103; // inline asm bfe.u32 %r1687, %r1700, %r3852, %r3853; // inline asm mul.wide.u32 %rd609, %r1687, 4; add.s64 %rd610, %rd98, %rd609; ld.shared.u32 %r2105, [%rd610]; // inline asm bfe.u32 %r1691, %r1700, %r3854, %r3853; // inline asm mul.wide.u32 %rd611, %r1691, 4; add.s64 %rd612, %rd102, %rd611; ld.shared.u32 %r2106, [%rd612]; add.s32 %r2107, %r2106, %r2105; // inline asm bfe.u32 %r1695, %r1700, %r3853, %r3853; // inline asm mul.wide.u32 %rd613, %r1695, 4; add.s64 %rd614, %rd106, %rd613; ld.shared.u32 %r2108, [%rd614]; xor.b32 %r2109, %r2108, %r2107; // inline asm bfe.u32 %r1699, %r1700, %r1494, %r3853; // inline asm mul.wide.u32 %rd615, %r1699, 4; add.s64 %rd616, %rd110, %rd615; ld.shared.u32 %r2110, [%rd616]; add.s32 %r2111, %r2110, %r2109; xor.b32 %r2112, %r3879, %r1684; xor.b32 %r1716, %r2112, %r2111; // inline asm bfe.u32 %r1703, %r1716, %r3852, %r3853; // inline asm mul.wide.u32 %rd617, %r1703, 4; add.s64 %rd618, %rd98, %rd617; ld.shared.u32 %r2113, [%rd618]; // inline asm bfe.u32 %r1707, %r1716, %r3854, %r3853; // inline asm mul.wide.u32 %rd619, %r1707, 4; add.s64 %rd620, %rd102, %rd619; ld.shared.u32 %r2114, [%rd620]; add.s32 %r2115, %r2114, %r2113; // inline asm bfe.u32 %r1711, %r1716, %r3853, %r3853; // inline asm mul.wide.u32 %rd621, %r1711, 4; add.s64 %rd622, %rd106, %rd621; ld.shared.u32 %r2116, [%rd622]; xor.b32 %r2117, %r2116, %r2115; // inline asm bfe.u32 %r1715, %r1716, %r1494, %r3853; // inline asm mul.wide.u32 %rd623, %r1715, 4; add.s64 %rd624, %rd110, %rd623; ld.shared.u32 %r2118, [%rd624]; add.s32 %r2119, %r2118, %r2117; xor.b32 %r2120, %r3878, %r1700; xor.b32 %r1732, %r2120, %r2119; // inline asm bfe.u32 %r1719, %r1732, %r3852, %r3853; // inline asm mul.wide.u32 %rd625, %r1719, 4; add.s64 %rd626, %rd98, %rd625; ld.shared.u32 %r2121, [%rd626]; // inline asm bfe.u32 %r1723, %r1732, %r3854, %r3853; // inline asm mul.wide.u32 %rd627, %r1723, 4; add.s64 %rd628, %rd102, %rd627; ld.shared.u32 %r2122, [%rd628]; add.s32 %r2123, %r2122, %r2121; // inline asm bfe.u32 %r1727, %r1732, %r3853, %r3853; // inline asm mul.wide.u32 %rd629, %r1727, 4; add.s64 %rd630, %rd106, %rd629; ld.shared.u32 %r2124, [%rd630]; xor.b32 %r2125, %r2124, %r2123; // inline asm bfe.u32 %r1731, %r1732, %r1494, %r3853; // inline asm mul.wide.u32 %rd631, %r1731, 4; add.s64 %rd632, %rd110, %rd631; ld.shared.u32 %r2126, [%rd632]; add.s32 %r2127, %r2126, %r2125; xor.b32 %r2128, %r3877, %r1716; xor.b32 %r1748, %r2128, %r2127; // inline asm bfe.u32 %r1735, %r1748, %r3852, %r3853; // inline asm mul.wide.u32 %rd633, %r1735, 4; add.s64 %rd634, %rd98, %rd633; ld.shared.u32 %r2129, [%rd634]; // inline asm bfe.u32 %r1739, %r1748, %r3854, %r3853; // inline asm mul.wide.u32 %rd635, %r1739, 4; add.s64 %rd636, %rd102, %rd635; ld.shared.u32 %r2130, [%rd636]; add.s32 %r2131, %r2130, %r2129; // inline asm bfe.u32 %r1743, %r1748, %r3853, %r3853; // inline asm mul.wide.u32 %rd637, %r1743, 4; add.s64 %rd638, %rd106, %rd637; ld.shared.u32 %r2132, [%rd638]; xor.b32 %r2133, %r2132, %r2131; // inline asm bfe.u32 %r1747, %r1748, %r1494, %r3853; // inline asm mul.wide.u32 %rd639, %r1747, 4; add.s64 %rd640, %rd110, %rd639; ld.shared.u32 %r2134, [%rd640]; add.s32 %r2135, %r2134, %r2133; xor.b32 %r2136, %r3876, %r1732; xor.b32 %r2137, %r2136, %r2135; xor.b32 %r2138, %r3875, %r1748; st.shared.u32 [%rd1338], %r2138; st.shared.u32 [%rd1338+4], %r2137; xor.b32 %r2139, %r2138, %r37; xor.b32 %r1764, %r2139, %r3892; // inline asm bfe.u32 %r1751, %r1764, %r3852, %r3853; // inline asm mul.wide.u32 %rd641, %r1751, 4; add.s64 %rd642, %rd98, %rd641; ld.shared.u32 %r2140, [%rd642]; // inline asm bfe.u32 %r1755, %r1764, %r3854, %r3853; // inline asm mul.wide.u32 %rd643, %r1755, 4; add.s64 %rd644, %rd102, %rd643; ld.shared.u32 %r2141, [%rd644]; add.s32 %r2142, %r2141, %r2140; // inline asm bfe.u32 %r1759, %r1764, %r3853, %r3853; // inline asm mul.wide.u32 %rd645, %r1759, 4; add.s64 %rd646, %rd106, %rd645; ld.shared.u32 %r2143, [%rd646]; xor.b32 %r2144, %r2143, %r2142; // inline asm bfe.u32 %r1763, %r1764, %r1494, %r3853; // inline asm mul.wide.u32 %rd647, %r1763, 4; add.s64 %rd648, %rd110, %rd647; ld.shared.u32 %r2145, [%rd648]; add.s32 %r2146, %r2145, %r2144; xor.b32 %r2147, %r38, %r2137; xor.b32 %r2148, %r2147, %r3891; xor.b32 %r1780, %r2148, %r2146; // inline asm bfe.u32 %r1767, %r1780, %r3852, %r3853; // inline asm mul.wide.u32 %rd649, %r1767, 4; add.s64 %rd650, %rd98, %rd649; ld.shared.u32 %r2149, [%rd650]; // inline asm bfe.u32 %r1771, %r1780, %r3854, %r3853; // inline asm mul.wide.u32 %rd651, %r1771, 4; add.s64 %rd652, %rd102, %rd651; ld.shared.u32 %r2150, [%rd652]; add.s32 %r2151, %r2150, %r2149; // inline asm bfe.u32 %r1775, %r1780, %r3853, %r3853; // inline asm mul.wide.u32 %rd653, %r1775, 4; add.s64 %rd654, %rd106, %rd653; ld.shared.u32 %r2152, [%rd654]; xor.b32 %r2153, %r2152, %r2151; // inline asm bfe.u32 %r1779, %r1780, %r1494, %r3853; // inline asm mul.wide.u32 %rd655, %r1779, 4; add.s64 %rd656, %rd110, %rd655; ld.shared.u32 %r2154, [%rd656]; add.s32 %r2155, %r2154, %r2153; xor.b32 %r2156, %r3890, %r1764; xor.b32 %r1796, %r2156, %r2155; // inline asm bfe.u32 %r1783, %r1796, %r3852, %r3853; // inline asm mul.wide.u32 %rd657, %r1783, 4; add.s64 %rd658, %rd98, %rd657; ld.shared.u32 %r2157, [%rd658]; // inline asm bfe.u32 %r1787, %r1796, %r3854, %r3853; // inline asm mul.wide.u32 %rd659, %r1787, 4; add.s64 %rd660, %rd102, %rd659; ld.shared.u32 %r2158, [%rd660]; add.s32 %r2159, %r2158, %r2157; // inline asm bfe.u32 %r1791, %r1796, %r3853, %r3853; // inline asm mul.wide.u32 %rd661, %r1791, 4; add.s64 %rd662, %rd106, %rd661; ld.shared.u32 %r2160, [%rd662]; xor.b32 %r2161, %r2160, %r2159; // inline asm bfe.u32 %r1795, %r1796, %r1494, %r3853; // inline asm mul.wide.u32 %rd663, %r1795, 4; add.s64 %rd664, %rd110, %rd663; ld.shared.u32 %r2162, [%rd664]; add.s32 %r2163, %r2162, %r2161; xor.b32 %r2164, %r3889, %r1780; xor.b32 %r1812, %r2164, %r2163; // inline asm bfe.u32 %r1799, %r1812, %r3852, %r3853; // inline asm mul.wide.u32 %rd665, %r1799, 4; add.s64 %rd666, %rd98, %rd665; ld.shared.u32 %r2165, [%rd666]; // inline asm bfe.u32 %r1803, %r1812, %r3854, %r3853; // inline asm mul.wide.u32 %rd667, %r1803, 4; add.s64 %rd668, %rd102, %rd667; ld.shared.u32 %r2166, [%rd668]; add.s32 %r2167, %r2166, %r2165; // inline asm bfe.u32 %r1807, %r1812, %r3853, %r3853; // inline asm mul.wide.u32 %rd669, %r1807, 4; add.s64 %rd670, %rd106, %rd669; ld.shared.u32 %r2168, [%rd670]; xor.b32 %r2169, %r2168, %r2167; // inline asm bfe.u32 %r1811, %r1812, %r1494, %r3853; // inline asm mul.wide.u32 %rd671, %r1811, 4; add.s64 %rd672, %rd110, %rd671; ld.shared.u32 %r2170, [%rd672]; add.s32 %r2171, %r2170, %r2169; xor.b32 %r2172, %r3888, %r1796; xor.b32 %r1828, %r2172, %r2171; // inline asm bfe.u32 %r1815, %r1828, %r3852, %r3853; // inline asm mul.wide.u32 %rd673, %r1815, 4; add.s64 %rd674, %rd98, %rd673; ld.shared.u32 %r2173, [%rd674]; // inline asm bfe.u32 %r1819, %r1828, %r3854, %r3853; // inline asm mul.wide.u32 %rd675, %r1819, 4; add.s64 %rd676, %rd102, %rd675; ld.shared.u32 %r2174, [%rd676]; add.s32 %r2175, %r2174, %r2173; // inline asm bfe.u32 %r1823, %r1828, %r3853, %r3853; // inline asm mul.wide.u32 %rd677, %r1823, 4; add.s64 %rd678, %rd106, %rd677; ld.shared.u32 %r2176, [%rd678]; xor.b32 %r2177, %r2176, %r2175; // inline asm bfe.u32 %r1827, %r1828, %r1494, %r3853; // inline asm mul.wide.u32 %rd679, %r1827, 4; add.s64 %rd680, %rd110, %rd679; ld.shared.u32 %r2178, [%rd680]; add.s32 %r2179, %r2178, %r2177; xor.b32 %r2180, %r3887, %r1812; xor.b32 %r1844, %r2180, %r2179; // inline asm bfe.u32 %r1831, %r1844, %r3852, %r3853; // inline asm mul.wide.u32 %rd681, %r1831, 4; add.s64 %rd682, %rd98, %rd681; ld.shared.u32 %r2181, [%rd682]; // inline asm bfe.u32 %r1835, %r1844, %r3854, %r3853; // inline asm mul.wide.u32 %rd683, %r1835, 4; add.s64 %rd684, %rd102, %rd683; ld.shared.u32 %r2182, [%rd684]; add.s32 %r2183, %r2182, %r2181; // inline asm bfe.u32 %r1839, %r1844, %r3853, %r3853; // inline asm mul.wide.u32 %rd685, %r1839, 4; add.s64 %rd686, %rd106, %rd685; ld.shared.u32 %r2184, [%rd686]; xor.b32 %r2185, %r2184, %r2183; // inline asm bfe.u32 %r1843, %r1844, %r1494, %r3853; // inline asm mul.wide.u32 %rd687, %r1843, 4; add.s64 %rd688, %rd110, %rd687; ld.shared.u32 %r2186, [%rd688]; add.s32 %r2187, %r2186, %r2185; xor.b32 %r2188, %r3886, %r1828; xor.b32 %r1860, %r2188, %r2187; // inline asm bfe.u32 %r1847, %r1860, %r3852, %r3853; // inline asm mul.wide.u32 %rd689, %r1847, 4; add.s64 %rd690, %rd98, %rd689; ld.shared.u32 %r2189, [%rd690]; // inline asm bfe.u32 %r1851, %r1860, %r3854, %r3853; // inline asm mul.wide.u32 %rd691, %r1851, 4; add.s64 %rd692, %rd102, %rd691; ld.shared.u32 %r2190, [%rd692]; add.s32 %r2191, %r2190, %r2189; // inline asm bfe.u32 %r1855, %r1860, %r3853, %r3853; // inline asm mul.wide.u32 %rd693, %r1855, 4; add.s64 %rd694, %rd106, %rd693; ld.shared.u32 %r2192, [%rd694]; xor.b32 %r2193, %r2192, %r2191; // inline asm bfe.u32 %r1859, %r1860, %r1494, %r3853; // inline asm mul.wide.u32 %rd695, %r1859, 4; add.s64 %rd696, %rd110, %rd695; ld.shared.u32 %r2194, [%rd696]; add.s32 %r2195, %r2194, %r2193; xor.b32 %r2196, %r3885, %r1844; xor.b32 %r1876, %r2196, %r2195; // inline asm bfe.u32 %r1863, %r1876, %r3852, %r3853; // inline asm mul.wide.u32 %rd697, %r1863, 4; add.s64 %rd698, %rd98, %rd697; ld.shared.u32 %r2197, [%rd698]; // inline asm bfe.u32 %r1867, %r1876, %r3854, %r3853; // inline asm mul.wide.u32 %rd699, %r1867, 4; add.s64 %rd700, %rd102, %rd699; ld.shared.u32 %r2198, [%rd700]; add.s32 %r2199, %r2198, %r2197; // inline asm bfe.u32 %r1871, %r1876, %r3853, %r3853; // inline asm mul.wide.u32 %rd701, %r1871, 4; add.s64 %rd702, %rd106, %rd701; ld.shared.u32 %r2200, [%rd702]; xor.b32 %r2201, %r2200, %r2199; // inline asm bfe.u32 %r1875, %r1876, %r1494, %r3853; // inline asm mul.wide.u32 %rd703, %r1875, 4; add.s64 %rd704, %rd110, %rd703; ld.shared.u32 %r2202, [%rd704]; add.s32 %r2203, %r2202, %r2201; xor.b32 %r2204, %r3884, %r1860; xor.b32 %r1892, %r2204, %r2203; // inline asm bfe.u32 %r1879, %r1892, %r3852, %r3853; // inline asm mul.wide.u32 %rd705, %r1879, 4; add.s64 %rd706, %rd98, %rd705; ld.shared.u32 %r2205, [%rd706]; // inline asm bfe.u32 %r1883, %r1892, %r3854, %r3853; // inline asm mul.wide.u32 %rd707, %r1883, 4; add.s64 %rd708, %rd102, %rd707; ld.shared.u32 %r2206, [%rd708]; add.s32 %r2207, %r2206, %r2205; // inline asm bfe.u32 %r1887, %r1892, %r3853, %r3853; // inline asm mul.wide.u32 %rd709, %r1887, 4; add.s64 %rd710, %rd106, %rd709; ld.shared.u32 %r2208, [%rd710]; xor.b32 %r2209, %r2208, %r2207; // inline asm bfe.u32 %r1891, %r1892, %r1494, %r3853; // inline asm mul.wide.u32 %rd711, %r1891, 4; add.s64 %rd712, %rd110, %rd711; ld.shared.u32 %r2210, [%rd712]; add.s32 %r2211, %r2210, %r2209; xor.b32 %r2212, %r3883, %r1876; xor.b32 %r1908, %r2212, %r2211; // inline asm bfe.u32 %r1895, %r1908, %r3852, %r3853; // inline asm mul.wide.u32 %rd713, %r1895, 4; add.s64 %rd714, %rd98, %rd713; ld.shared.u32 %r2213, [%rd714]; // inline asm bfe.u32 %r1899, %r1908, %r3854, %r3853; // inline asm mul.wide.u32 %rd715, %r1899, 4; add.s64 %rd716, %rd102, %rd715; ld.shared.u32 %r2214, [%rd716]; add.s32 %r2215, %r2214, %r2213; // inline asm bfe.u32 %r1903, %r1908, %r3853, %r3853; // inline asm mul.wide.u32 %rd717, %r1903, 4; add.s64 %rd718, %rd106, %rd717; ld.shared.u32 %r2216, [%rd718]; xor.b32 %r2217, %r2216, %r2215; // inline asm bfe.u32 %r1907, %r1908, %r1494, %r3853; // inline asm mul.wide.u32 %rd719, %r1907, 4; add.s64 %rd720, %rd110, %rd719; ld.shared.u32 %r2218, [%rd720]; add.s32 %r2219, %r2218, %r2217; xor.b32 %r2220, %r3882, %r1892; xor.b32 %r1924, %r2220, %r2219; // inline asm bfe.u32 %r1911, %r1924, %r3852, %r3853; // inline asm mul.wide.u32 %rd721, %r1911, 4; add.s64 %rd722, %rd98, %rd721; ld.shared.u32 %r2221, [%rd722]; // inline asm bfe.u32 %r1915, %r1924, %r3854, %r3853; // inline asm mul.wide.u32 %rd723, %r1915, 4; add.s64 %rd724, %rd102, %rd723; ld.shared.u32 %r2222, [%rd724]; add.s32 %r2223, %r2222, %r2221; // inline asm bfe.u32 %r1919, %r1924, %r3853, %r3853; // inline asm mul.wide.u32 %rd725, %r1919, 4; add.s64 %rd726, %rd106, %rd725; ld.shared.u32 %r2224, [%rd726]; xor.b32 %r2225, %r2224, %r2223; // inline asm bfe.u32 %r1923, %r1924, %r1494, %r3853; // inline asm mul.wide.u32 %rd727, %r1923, 4; add.s64 %rd728, %rd110, %rd727; ld.shared.u32 %r2226, [%rd728]; add.s32 %r2227, %r2226, %r2225; xor.b32 %r2228, %r3881, %r1908; xor.b32 %r1940, %r2228, %r2227; // inline asm bfe.u32 %r1927, %r1940, %r3852, %r3853; // inline asm mul.wide.u32 %rd729, %r1927, 4; add.s64 %rd730, %rd98, %rd729; ld.shared.u32 %r2229, [%rd730]; // inline asm bfe.u32 %r1931, %r1940, %r3854, %r3853; // inline asm mul.wide.u32 %rd731, %r1931, 4; add.s64 %rd732, %rd102, %rd731; ld.shared.u32 %r2230, [%rd732]; add.s32 %r2231, %r2230, %r2229; // inline asm bfe.u32 %r1935, %r1940, %r3853, %r3853; // inline asm mul.wide.u32 %rd733, %r1935, 4; add.s64 %rd734, %rd106, %rd733; ld.shared.u32 %r2232, [%rd734]; xor.b32 %r2233, %r2232, %r2231; // inline asm bfe.u32 %r1939, %r1940, %r1494, %r3853; // inline asm mul.wide.u32 %rd735, %r1939, 4; add.s64 %rd736, %rd110, %rd735; ld.shared.u32 %r2234, [%rd736]; add.s32 %r2235, %r2234, %r2233; xor.b32 %r2236, %r3880, %r1924; xor.b32 %r1956, %r2236, %r2235; // inline asm bfe.u32 %r1943, %r1956, %r3852, %r3853; // inline asm mul.wide.u32 %rd737, %r1943, 4; add.s64 %rd738, %rd98, %rd737; ld.shared.u32 %r2237, [%rd738]; // inline asm bfe.u32 %r1947, %r1956, %r3854, %r3853; // inline asm mul.wide.u32 %rd739, %r1947, 4; add.s64 %rd740, %rd102, %rd739; ld.shared.u32 %r2238, [%rd740]; add.s32 %r2239, %r2238, %r2237; // inline asm bfe.u32 %r1951, %r1956, %r3853, %r3853; // inline asm mul.wide.u32 %rd741, %r1951, 4; add.s64 %rd742, %rd106, %rd741; ld.shared.u32 %r2240, [%rd742]; xor.b32 %r2241, %r2240, %r2239; // inline asm bfe.u32 %r1955, %r1956, %r1494, %r3853; // inline asm mul.wide.u32 %rd743, %r1955, 4; add.s64 %rd744, %rd110, %rd743; ld.shared.u32 %r2242, [%rd744]; add.s32 %r2243, %r2242, %r2241; xor.b32 %r2244, %r3879, %r1940; xor.b32 %r1972, %r2244, %r2243; // inline asm bfe.u32 %r1959, %r1972, %r3852, %r3853; // inline asm mul.wide.u32 %rd745, %r1959, 4; add.s64 %rd746, %rd98, %rd745; ld.shared.u32 %r2245, [%rd746]; // inline asm bfe.u32 %r1963, %r1972, %r3854, %r3853; // inline asm mul.wide.u32 %rd747, %r1963, 4; add.s64 %rd748, %rd102, %rd747; ld.shared.u32 %r2246, [%rd748]; add.s32 %r2247, %r2246, %r2245; // inline asm bfe.u32 %r1967, %r1972, %r3853, %r3853; // inline asm mul.wide.u32 %rd749, %r1967, 4; add.s64 %rd750, %rd106, %rd749; ld.shared.u32 %r2248, [%rd750]; xor.b32 %r2249, %r2248, %r2247; // inline asm bfe.u32 %r1971, %r1972, %r1494, %r3853; // inline asm mul.wide.u32 %rd751, %r1971, 4; add.s64 %rd752, %rd110, %rd751; ld.shared.u32 %r2250, [%rd752]; add.s32 %r2251, %r2250, %r2249; xor.b32 %r2252, %r3878, %r1956; xor.b32 %r1988, %r2252, %r2251; // inline asm bfe.u32 %r1975, %r1988, %r3852, %r3853; // inline asm mul.wide.u32 %rd753, %r1975, 4; add.s64 %rd754, %rd98, %rd753; ld.shared.u32 %r2253, [%rd754]; // inline asm bfe.u32 %r1979, %r1988, %r3854, %r3853; // inline asm mul.wide.u32 %rd755, %r1979, 4; add.s64 %rd756, %rd102, %rd755; ld.shared.u32 %r2254, [%rd756]; add.s32 %r2255, %r2254, %r2253; // inline asm bfe.u32 %r1983, %r1988, %r3853, %r3853; // inline asm mul.wide.u32 %rd757, %r1983, 4; add.s64 %rd758, %rd106, %rd757; ld.shared.u32 %r2256, [%rd758]; xor.b32 %r2257, %r2256, %r2255; // inline asm bfe.u32 %r1987, %r1988, %r1494, %r3853; // inline asm mul.wide.u32 %rd759, %r1987, 4; add.s64 %rd760, %rd110, %rd759; ld.shared.u32 %r2258, [%rd760]; add.s32 %r2259, %r2258, %r2257; xor.b32 %r2260, %r3877, %r1972; xor.b32 %r2004, %r2260, %r2259; // inline asm bfe.u32 %r1991, %r2004, %r3852, %r3853; // inline asm mul.wide.u32 %rd761, %r1991, 4; add.s64 %rd762, %rd98, %rd761; ld.shared.u32 %r2261, [%rd762]; // inline asm bfe.u32 %r1995, %r2004, %r3854, %r3853; // inline asm mul.wide.u32 %rd763, %r1995, 4; add.s64 %rd764, %rd102, %rd763; ld.shared.u32 %r2262, [%rd764]; add.s32 %r2263, %r2262, %r2261; // inline asm bfe.u32 %r1999, %r2004, %r3853, %r3853; // inline asm mul.wide.u32 %rd765, %r1999, 4; add.s64 %rd766, %rd106, %rd765; ld.shared.u32 %r2264, [%rd766]; xor.b32 %r2265, %r2264, %r2263; // inline asm bfe.u32 %r2003, %r2004, %r1494, %r3853; // inline asm mul.wide.u32 %rd767, %r2003, 4; add.s64 %rd768, %rd110, %rd767; ld.shared.u32 %r2266, [%rd768]; add.s32 %r2267, %r2266, %r2265; xor.b32 %r2268, %r3876, %r1988; xor.b32 %r3897, %r2268, %r2267; xor.b32 %r3898, %r3875, %r2004; st.shared.u32 [%rd1338+8], %r3898; st.shared.u32 [%rd1338+12], %r3897; add.s64 %rd1338, %rd1338, 16; add.s32 %r3899, %r3899, 4; setp.lt.u32 %p14, %r3899, 256; @%p14 bra BB3_24; mov.u32 %r3864, %tid.x; mul.wide.s32 %rd1323, %r3864, 1024; mov.u64 %rd1321, m03200_init$S2_all; add.s64 %rd1339, %rd1321, %rd1323; mov.u32 %r2269, 0; mov.u32 %r3902, %r2269; BB3_26: mov.u32 %r3857, 16; mov.u32 %r3856, 8; mov.u32 %r3855, 24; xor.b32 %r2782, %r39, %r3898; xor.b32 %r2283, %r2782, %r3892; // inline asm bfe.u32 %r2270, %r2283, %r3855, %r3856; // inline asm mul.wide.u32 %rd774, %r2270, 4; add.s64 %rd775, %rd98, %rd774; ld.shared.u32 %r2783, [%rd775]; // inline asm bfe.u32 %r2274, %r2283, %r3857, %r3856; // inline asm mul.wide.u32 %rd778, %r2274, 4; add.s64 %rd779, %rd102, %rd778; ld.shared.u32 %r2784, [%rd779]; add.s32 %r2785, %r2784, %r2783; // inline asm bfe.u32 %r2278, %r2283, %r3856, %r3856; // inline asm mul.wide.u32 %rd782, %r2278, 4; add.s64 %rd783, %rd106, %rd782; ld.shared.u32 %r2786, [%rd783]; xor.b32 %r2787, %r2786, %r2785; // inline asm bfe.u32 %r2282, %r2283, %r2269, %r3856; // inline asm mul.wide.u32 %rd786, %r2282, 4; add.s64 %rd787, %rd110, %rd786; ld.shared.u32 %r2788, [%rd787]; add.s32 %r2789, %r2788, %r2787; xor.b32 %r2790, %r40, %r3897; xor.b32 %r2791, %r2790, %r3891; xor.b32 %r2299, %r2791, %r2789; // inline asm bfe.u32 %r2286, %r2299, %r3855, %r3856; // inline asm mul.wide.u32 %rd788, %r2286, 4; add.s64 %rd789, %rd98, %rd788; ld.shared.u32 %r2792, [%rd789]; // inline asm bfe.u32 %r2290, %r2299, %r3857, %r3856; // inline asm mul.wide.u32 %rd790, %r2290, 4; add.s64 %rd791, %rd102, %rd790; ld.shared.u32 %r2793, [%rd791]; add.s32 %r2794, %r2793, %r2792; // inline asm bfe.u32 %r2294, %r2299, %r3856, %r3856; // inline asm mul.wide.u32 %rd792, %r2294, 4; add.s64 %rd793, %rd106, %rd792; ld.shared.u32 %r2795, [%rd793]; xor.b32 %r2796, %r2795, %r2794; // inline asm bfe.u32 %r2298, %r2299, %r2269, %r3856; // inline asm mul.wide.u32 %rd794, %r2298, 4; add.s64 %rd795, %rd110, %rd794; ld.shared.u32 %r2797, [%rd795]; add.s32 %r2798, %r2797, %r2796; xor.b32 %r2799, %r3890, %r2283; xor.b32 %r2315, %r2799, %r2798; // inline asm bfe.u32 %r2302, %r2315, %r3855, %r3856; // inline asm mul.wide.u32 %rd796, %r2302, 4; add.s64 %rd797, %rd98, %rd796; ld.shared.u32 %r2800, [%rd797]; // inline asm bfe.u32 %r2306, %r2315, %r3857, %r3856; // inline asm mul.wide.u32 %rd798, %r2306, 4; add.s64 %rd799, %rd102, %rd798; ld.shared.u32 %r2801, [%rd799]; add.s32 %r2802, %r2801, %r2800; // inline asm bfe.u32 %r2310, %r2315, %r3856, %r3856; // inline asm mul.wide.u32 %rd800, %r2310, 4; add.s64 %rd801, %rd106, %rd800; ld.shared.u32 %r2803, [%rd801]; xor.b32 %r2804, %r2803, %r2802; // inline asm bfe.u32 %r2314, %r2315, %r2269, %r3856; // inline asm mul.wide.u32 %rd802, %r2314, 4; add.s64 %rd803, %rd110, %rd802; ld.shared.u32 %r2805, [%rd803]; add.s32 %r2806, %r2805, %r2804; xor.b32 %r2807, %r3889, %r2299; xor.b32 %r2331, %r2807, %r2806; // inline asm bfe.u32 %r2318, %r2331, %r3855, %r3856; // inline asm mul.wide.u32 %rd804, %r2318, 4; add.s64 %rd805, %rd98, %rd804; ld.shared.u32 %r2808, [%rd805]; // inline asm bfe.u32 %r2322, %r2331, %r3857, %r3856; // inline asm mul.wide.u32 %rd806, %r2322, 4; add.s64 %rd807, %rd102, %rd806; ld.shared.u32 %r2809, [%rd807]; add.s32 %r2810, %r2809, %r2808; // inline asm bfe.u32 %r2326, %r2331, %r3856, %r3856; // inline asm mul.wide.u32 %rd808, %r2326, 4; add.s64 %rd809, %rd106, %rd808; ld.shared.u32 %r2811, [%rd809]; xor.b32 %r2812, %r2811, %r2810; // inline asm bfe.u32 %r2330, %r2331, %r2269, %r3856; // inline asm mul.wide.u32 %rd810, %r2330, 4; add.s64 %rd811, %rd110, %rd810; ld.shared.u32 %r2813, [%rd811]; add.s32 %r2814, %r2813, %r2812; xor.b32 %r2815, %r3888, %r2315; xor.b32 %r2347, %r2815, %r2814; // inline asm bfe.u32 %r2334, %r2347, %r3855, %r3856; // inline asm mul.wide.u32 %rd812, %r2334, 4; add.s64 %rd813, %rd98, %rd812; ld.shared.u32 %r2816, [%rd813]; // inline asm bfe.u32 %r2338, %r2347, %r3857, %r3856; // inline asm mul.wide.u32 %rd814, %r2338, 4; add.s64 %rd815, %rd102, %rd814; ld.shared.u32 %r2817, [%rd815]; add.s32 %r2818, %r2817, %r2816; // inline asm bfe.u32 %r2342, %r2347, %r3856, %r3856; // inline asm mul.wide.u32 %rd816, %r2342, 4; add.s64 %rd817, %rd106, %rd816; ld.shared.u32 %r2819, [%rd817]; xor.b32 %r2820, %r2819, %r2818; // inline asm bfe.u32 %r2346, %r2347, %r2269, %r3856; // inline asm mul.wide.u32 %rd818, %r2346, 4; add.s64 %rd819, %rd110, %rd818; ld.shared.u32 %r2821, [%rd819]; add.s32 %r2822, %r2821, %r2820; xor.b32 %r2823, %r3887, %r2331; xor.b32 %r2363, %r2823, %r2822; // inline asm bfe.u32 %r2350, %r2363, %r3855, %r3856; // inline asm mul.wide.u32 %rd820, %r2350, 4; add.s64 %rd821, %rd98, %rd820; ld.shared.u32 %r2824, [%rd821]; // inline asm bfe.u32 %r2354, %r2363, %r3857, %r3856; // inline asm mul.wide.u32 %rd822, %r2354, 4; add.s64 %rd823, %rd102, %rd822; ld.shared.u32 %r2825, [%rd823]; add.s32 %r2826, %r2825, %r2824; // inline asm bfe.u32 %r2358, %r2363, %r3856, %r3856; // inline asm mul.wide.u32 %rd824, %r2358, 4; add.s64 %rd825, %rd106, %rd824; ld.shared.u32 %r2827, [%rd825]; xor.b32 %r2828, %r2827, %r2826; // inline asm bfe.u32 %r2362, %r2363, %r2269, %r3856; // inline asm mul.wide.u32 %rd826, %r2362, 4; add.s64 %rd827, %rd110, %rd826; ld.shared.u32 %r2829, [%rd827]; add.s32 %r2830, %r2829, %r2828; xor.b32 %r2831, %r3886, %r2347; xor.b32 %r2379, %r2831, %r2830; // inline asm bfe.u32 %r2366, %r2379, %r3855, %r3856; // inline asm mul.wide.u32 %rd828, %r2366, 4; add.s64 %rd829, %rd98, %rd828; ld.shared.u32 %r2832, [%rd829]; // inline asm bfe.u32 %r2370, %r2379, %r3857, %r3856; // inline asm mul.wide.u32 %rd830, %r2370, 4; add.s64 %rd831, %rd102, %rd830; ld.shared.u32 %r2833, [%rd831]; add.s32 %r2834, %r2833, %r2832; // inline asm bfe.u32 %r2374, %r2379, %r3856, %r3856; // inline asm mul.wide.u32 %rd832, %r2374, 4; add.s64 %rd833, %rd106, %rd832; ld.shared.u32 %r2835, [%rd833]; xor.b32 %r2836, %r2835, %r2834; // inline asm bfe.u32 %r2378, %r2379, %r2269, %r3856; // inline asm mul.wide.u32 %rd834, %r2378, 4; add.s64 %rd835, %rd110, %rd834; ld.shared.u32 %r2837, [%rd835]; add.s32 %r2838, %r2837, %r2836; xor.b32 %r2839, %r3885, %r2363; xor.b32 %r2395, %r2839, %r2838; // inline asm bfe.u32 %r2382, %r2395, %r3855, %r3856; // inline asm mul.wide.u32 %rd836, %r2382, 4; add.s64 %rd837, %rd98, %rd836; ld.shared.u32 %r2840, [%rd837]; // inline asm bfe.u32 %r2386, %r2395, %r3857, %r3856; // inline asm mul.wide.u32 %rd838, %r2386, 4; add.s64 %rd839, %rd102, %rd838; ld.shared.u32 %r2841, [%rd839]; add.s32 %r2842, %r2841, %r2840; // inline asm bfe.u32 %r2390, %r2395, %r3856, %r3856; // inline asm mul.wide.u32 %rd840, %r2390, 4; add.s64 %rd841, %rd106, %rd840; ld.shared.u32 %r2843, [%rd841]; xor.b32 %r2844, %r2843, %r2842; // inline asm bfe.u32 %r2394, %r2395, %r2269, %r3856; // inline asm mul.wide.u32 %rd842, %r2394, 4; add.s64 %rd843, %rd110, %rd842; ld.shared.u32 %r2845, [%rd843]; add.s32 %r2846, %r2845, %r2844; xor.b32 %r2847, %r3884, %r2379; xor.b32 %r2411, %r2847, %r2846; // inline asm bfe.u32 %r2398, %r2411, %r3855, %r3856; // inline asm mul.wide.u32 %rd844, %r2398, 4; add.s64 %rd845, %rd98, %rd844; ld.shared.u32 %r2848, [%rd845]; // inline asm bfe.u32 %r2402, %r2411, %r3857, %r3856; // inline asm mul.wide.u32 %rd846, %r2402, 4; add.s64 %rd847, %rd102, %rd846; ld.shared.u32 %r2849, [%rd847]; add.s32 %r2850, %r2849, %r2848; // inline asm bfe.u32 %r2406, %r2411, %r3856, %r3856; // inline asm mul.wide.u32 %rd848, %r2406, 4; add.s64 %rd849, %rd106, %rd848; ld.shared.u32 %r2851, [%rd849]; xor.b32 %r2852, %r2851, %r2850; // inline asm bfe.u32 %r2410, %r2411, %r2269, %r3856; // inline asm mul.wide.u32 %rd850, %r2410, 4; add.s64 %rd851, %rd110, %rd850; ld.shared.u32 %r2853, [%rd851]; add.s32 %r2854, %r2853, %r2852; xor.b32 %r2855, %r3883, %r2395; xor.b32 %r2427, %r2855, %r2854; // inline asm bfe.u32 %r2414, %r2427, %r3855, %r3856; // inline asm mul.wide.u32 %rd852, %r2414, 4; add.s64 %rd853, %rd98, %rd852; ld.shared.u32 %r2856, [%rd853]; // inline asm bfe.u32 %r2418, %r2427, %r3857, %r3856; // inline asm mul.wide.u32 %rd854, %r2418, 4; add.s64 %rd855, %rd102, %rd854; ld.shared.u32 %r2857, [%rd855]; add.s32 %r2858, %r2857, %r2856; // inline asm bfe.u32 %r2422, %r2427, %r3856, %r3856; // inline asm mul.wide.u32 %rd856, %r2422, 4; add.s64 %rd857, %rd106, %rd856; ld.shared.u32 %r2859, [%rd857]; xor.b32 %r2860, %r2859, %r2858; // inline asm bfe.u32 %r2426, %r2427, %r2269, %r3856; // inline asm mul.wide.u32 %rd858, %r2426, 4; add.s64 %rd859, %rd110, %rd858; ld.shared.u32 %r2861, [%rd859]; add.s32 %r2862, %r2861, %r2860; xor.b32 %r2863, %r3882, %r2411; xor.b32 %r2443, %r2863, %r2862; // inline asm bfe.u32 %r2430, %r2443, %r3855, %r3856; // inline asm mul.wide.u32 %rd860, %r2430, 4; add.s64 %rd861, %rd98, %rd860; ld.shared.u32 %r2864, [%rd861]; // inline asm bfe.u32 %r2434, %r2443, %r3857, %r3856; // inline asm mul.wide.u32 %rd862, %r2434, 4; add.s64 %rd863, %rd102, %rd862; ld.shared.u32 %r2865, [%rd863]; add.s32 %r2866, %r2865, %r2864; // inline asm bfe.u32 %r2438, %r2443, %r3856, %r3856; // inline asm mul.wide.u32 %rd864, %r2438, 4; add.s64 %rd865, %rd106, %rd864; ld.shared.u32 %r2867, [%rd865]; xor.b32 %r2868, %r2867, %r2866; // inline asm bfe.u32 %r2442, %r2443, %r2269, %r3856; // inline asm mul.wide.u32 %rd866, %r2442, 4; add.s64 %rd867, %rd110, %rd866; ld.shared.u32 %r2869, [%rd867]; add.s32 %r2870, %r2869, %r2868; xor.b32 %r2871, %r3881, %r2427; xor.b32 %r2459, %r2871, %r2870; // inline asm bfe.u32 %r2446, %r2459, %r3855, %r3856; // inline asm mul.wide.u32 %rd868, %r2446, 4; add.s64 %rd869, %rd98, %rd868; ld.shared.u32 %r2872, [%rd869]; // inline asm bfe.u32 %r2450, %r2459, %r3857, %r3856; // inline asm mul.wide.u32 %rd870, %r2450, 4; add.s64 %rd871, %rd102, %rd870; ld.shared.u32 %r2873, [%rd871]; add.s32 %r2874, %r2873, %r2872; // inline asm bfe.u32 %r2454, %r2459, %r3856, %r3856; // inline asm mul.wide.u32 %rd872, %r2454, 4; add.s64 %rd873, %rd106, %rd872; ld.shared.u32 %r2875, [%rd873]; xor.b32 %r2876, %r2875, %r2874; // inline asm bfe.u32 %r2458, %r2459, %r2269, %r3856; // inline asm mul.wide.u32 %rd874, %r2458, 4; add.s64 %rd875, %rd110, %rd874; ld.shared.u32 %r2877, [%rd875]; add.s32 %r2878, %r2877, %r2876; xor.b32 %r2879, %r3880, %r2443; xor.b32 %r2475, %r2879, %r2878; // inline asm bfe.u32 %r2462, %r2475, %r3855, %r3856; // inline asm mul.wide.u32 %rd876, %r2462, 4; add.s64 %rd877, %rd98, %rd876; ld.shared.u32 %r2880, [%rd877]; // inline asm bfe.u32 %r2466, %r2475, %r3857, %r3856; // inline asm mul.wide.u32 %rd878, %r2466, 4; add.s64 %rd879, %rd102, %rd878; ld.shared.u32 %r2881, [%rd879]; add.s32 %r2882, %r2881, %r2880; // inline asm bfe.u32 %r2470, %r2475, %r3856, %r3856; // inline asm mul.wide.u32 %rd880, %r2470, 4; add.s64 %rd881, %rd106, %rd880; ld.shared.u32 %r2883, [%rd881]; xor.b32 %r2884, %r2883, %r2882; // inline asm bfe.u32 %r2474, %r2475, %r2269, %r3856; // inline asm mul.wide.u32 %rd882, %r2474, 4; add.s64 %rd883, %rd110, %rd882; ld.shared.u32 %r2885, [%rd883]; add.s32 %r2886, %r2885, %r2884; xor.b32 %r2887, %r3879, %r2459; xor.b32 %r2491, %r2887, %r2886; // inline asm bfe.u32 %r2478, %r2491, %r3855, %r3856; // inline asm mul.wide.u32 %rd884, %r2478, 4; add.s64 %rd885, %rd98, %rd884; ld.shared.u32 %r2888, [%rd885]; // inline asm bfe.u32 %r2482, %r2491, %r3857, %r3856; // inline asm mul.wide.u32 %rd886, %r2482, 4; add.s64 %rd887, %rd102, %rd886; ld.shared.u32 %r2889, [%rd887]; add.s32 %r2890, %r2889, %r2888; // inline asm bfe.u32 %r2486, %r2491, %r3856, %r3856; // inline asm mul.wide.u32 %rd888, %r2486, 4; add.s64 %rd889, %rd106, %rd888; ld.shared.u32 %r2891, [%rd889]; xor.b32 %r2892, %r2891, %r2890; // inline asm bfe.u32 %r2490, %r2491, %r2269, %r3856; // inline asm mul.wide.u32 %rd890, %r2490, 4; add.s64 %rd891, %rd110, %rd890; ld.shared.u32 %r2893, [%rd891]; add.s32 %r2894, %r2893, %r2892; xor.b32 %r2895, %r3878, %r2475; xor.b32 %r2507, %r2895, %r2894; // inline asm bfe.u32 %r2494, %r2507, %r3855, %r3856; // inline asm mul.wide.u32 %rd892, %r2494, 4; add.s64 %rd893, %rd98, %rd892; ld.shared.u32 %r2896, [%rd893]; // inline asm bfe.u32 %r2498, %r2507, %r3857, %r3856; // inline asm mul.wide.u32 %rd894, %r2498, 4; add.s64 %rd895, %rd102, %rd894; ld.shared.u32 %r2897, [%rd895]; add.s32 %r2898, %r2897, %r2896; // inline asm bfe.u32 %r2502, %r2507, %r3856, %r3856; // inline asm mul.wide.u32 %rd896, %r2502, 4; add.s64 %rd897, %rd106, %rd896; ld.shared.u32 %r2899, [%rd897]; xor.b32 %r2900, %r2899, %r2898; // inline asm bfe.u32 %r2506, %r2507, %r2269, %r3856; // inline asm mul.wide.u32 %rd898, %r2506, 4; add.s64 %rd899, %rd110, %rd898; ld.shared.u32 %r2901, [%rd899]; add.s32 %r2902, %r2901, %r2900; xor.b32 %r2903, %r3877, %r2491; xor.b32 %r2523, %r2903, %r2902; // inline asm bfe.u32 %r2510, %r2523, %r3855, %r3856; // inline asm mul.wide.u32 %rd900, %r2510, 4; add.s64 %rd901, %rd98, %rd900; ld.shared.u32 %r2904, [%rd901]; // inline asm bfe.u32 %r2514, %r2523, %r3857, %r3856; // inline asm mul.wide.u32 %rd902, %r2514, 4; add.s64 %rd903, %rd102, %rd902; ld.shared.u32 %r2905, [%rd903]; add.s32 %r2906, %r2905, %r2904; // inline asm bfe.u32 %r2518, %r2523, %r3856, %r3856; // inline asm mul.wide.u32 %rd904, %r2518, 4; add.s64 %rd905, %rd106, %rd904; ld.shared.u32 %r2907, [%rd905]; xor.b32 %r2908, %r2907, %r2906; // inline asm bfe.u32 %r2522, %r2523, %r2269, %r3856; // inline asm mul.wide.u32 %rd906, %r2522, 4; add.s64 %rd907, %rd110, %rd906; ld.shared.u32 %r2909, [%rd907]; add.s32 %r2910, %r2909, %r2908; xor.b32 %r2911, %r3876, %r2507; xor.b32 %r2912, %r2911, %r2910; xor.b32 %r2913, %r3875, %r2523; st.shared.u32 [%rd1339], %r2913; st.shared.u32 [%rd1339+4], %r2912; xor.b32 %r2914, %r2913, %r37; xor.b32 %r2539, %r2914, %r3892; // inline asm bfe.u32 %r2526, %r2539, %r3855, %r3856; // inline asm mul.wide.u32 %rd908, %r2526, 4; add.s64 %rd909, %rd98, %rd908; ld.shared.u32 %r2915, [%rd909]; // inline asm bfe.u32 %r2530, %r2539, %r3857, %r3856; // inline asm mul.wide.u32 %rd910, %r2530, 4; add.s64 %rd911, %rd102, %rd910; ld.shared.u32 %r2916, [%rd911]; add.s32 %r2917, %r2916, %r2915; // inline asm bfe.u32 %r2534, %r2539, %r3856, %r3856; // inline asm mul.wide.u32 %rd912, %r2534, 4; add.s64 %rd913, %rd106, %rd912; ld.shared.u32 %r2918, [%rd913]; xor.b32 %r2919, %r2918, %r2917; // inline asm bfe.u32 %r2538, %r2539, %r2269, %r3856; // inline asm mul.wide.u32 %rd914, %r2538, 4; add.s64 %rd915, %rd110, %rd914; ld.shared.u32 %r2920, [%rd915]; add.s32 %r2921, %r2920, %r2919; xor.b32 %r2922, %r38, %r2912; xor.b32 %r2923, %r2922, %r3891; xor.b32 %r2555, %r2923, %r2921; // inline asm bfe.u32 %r2542, %r2555, %r3855, %r3856; // inline asm mul.wide.u32 %rd916, %r2542, 4; add.s64 %rd917, %rd98, %rd916; ld.shared.u32 %r2924, [%rd917]; // inline asm bfe.u32 %r2546, %r2555, %r3857, %r3856; // inline asm mul.wide.u32 %rd918, %r2546, 4; add.s64 %rd919, %rd102, %rd918; ld.shared.u32 %r2925, [%rd919]; add.s32 %r2926, %r2925, %r2924; // inline asm bfe.u32 %r2550, %r2555, %r3856, %r3856; // inline asm mul.wide.u32 %rd920, %r2550, 4; add.s64 %rd921, %rd106, %rd920; ld.shared.u32 %r2927, [%rd921]; xor.b32 %r2928, %r2927, %r2926; // inline asm bfe.u32 %r2554, %r2555, %r2269, %r3856; // inline asm mul.wide.u32 %rd922, %r2554, 4; add.s64 %rd923, %rd110, %rd922; ld.shared.u32 %r2929, [%rd923]; add.s32 %r2930, %r2929, %r2928; xor.b32 %r2931, %r3890, %r2539; xor.b32 %r2571, %r2931, %r2930; // inline asm bfe.u32 %r2558, %r2571, %r3855, %r3856; // inline asm mul.wide.u32 %rd924, %r2558, 4; add.s64 %rd925, %rd98, %rd924; ld.shared.u32 %r2932, [%rd925]; // inline asm bfe.u32 %r2562, %r2571, %r3857, %r3856; // inline asm mul.wide.u32 %rd926, %r2562, 4; add.s64 %rd927, %rd102, %rd926; ld.shared.u32 %r2933, [%rd927]; add.s32 %r2934, %r2933, %r2932; // inline asm bfe.u32 %r2566, %r2571, %r3856, %r3856; // inline asm mul.wide.u32 %rd928, %r2566, 4; add.s64 %rd929, %rd106, %rd928; ld.shared.u32 %r2935, [%rd929]; xor.b32 %r2936, %r2935, %r2934; // inline asm bfe.u32 %r2570, %r2571, %r2269, %r3856; // inline asm mul.wide.u32 %rd930, %r2570, 4; add.s64 %rd931, %rd110, %rd930; ld.shared.u32 %r2937, [%rd931]; add.s32 %r2938, %r2937, %r2936; xor.b32 %r2939, %r3889, %r2555; xor.b32 %r2587, %r2939, %r2938; // inline asm bfe.u32 %r2574, %r2587, %r3855, %r3856; // inline asm mul.wide.u32 %rd932, %r2574, 4; add.s64 %rd933, %rd98, %rd932; ld.shared.u32 %r2940, [%rd933]; // inline asm bfe.u32 %r2578, %r2587, %r3857, %r3856; // inline asm mul.wide.u32 %rd934, %r2578, 4; add.s64 %rd935, %rd102, %rd934; ld.shared.u32 %r2941, [%rd935]; add.s32 %r2942, %r2941, %r2940; // inline asm bfe.u32 %r2582, %r2587, %r3856, %r3856; // inline asm mul.wide.u32 %rd936, %r2582, 4; add.s64 %rd937, %rd106, %rd936; ld.shared.u32 %r2943, [%rd937]; xor.b32 %r2944, %r2943, %r2942; // inline asm bfe.u32 %r2586, %r2587, %r2269, %r3856; // inline asm mul.wide.u32 %rd938, %r2586, 4; add.s64 %rd939, %rd110, %rd938; ld.shared.u32 %r2945, [%rd939]; add.s32 %r2946, %r2945, %r2944; xor.b32 %r2947, %r3888, %r2571; xor.b32 %r2603, %r2947, %r2946; // inline asm bfe.u32 %r2590, %r2603, %r3855, %r3856; // inline asm mul.wide.u32 %rd940, %r2590, 4; add.s64 %rd941, %rd98, %rd940; ld.shared.u32 %r2948, [%rd941]; // inline asm bfe.u32 %r2594, %r2603, %r3857, %r3856; // inline asm mul.wide.u32 %rd942, %r2594, 4; add.s64 %rd943, %rd102, %rd942; ld.shared.u32 %r2949, [%rd943]; add.s32 %r2950, %r2949, %r2948; // inline asm bfe.u32 %r2598, %r2603, %r3856, %r3856; // inline asm mul.wide.u32 %rd944, %r2598, 4; add.s64 %rd945, %rd106, %rd944; ld.shared.u32 %r2951, [%rd945]; xor.b32 %r2952, %r2951, %r2950; // inline asm bfe.u32 %r2602, %r2603, %r2269, %r3856; // inline asm mul.wide.u32 %rd946, %r2602, 4; add.s64 %rd947, %rd110, %rd946; ld.shared.u32 %r2953, [%rd947]; add.s32 %r2954, %r2953, %r2952; xor.b32 %r2955, %r3887, %r2587; xor.b32 %r2619, %r2955, %r2954; // inline asm bfe.u32 %r2606, %r2619, %r3855, %r3856; // inline asm mul.wide.u32 %rd948, %r2606, 4; add.s64 %rd949, %rd98, %rd948; ld.shared.u32 %r2956, [%rd949]; // inline asm bfe.u32 %r2610, %r2619, %r3857, %r3856; // inline asm mul.wide.u32 %rd950, %r2610, 4; add.s64 %rd951, %rd102, %rd950; ld.shared.u32 %r2957, [%rd951]; add.s32 %r2958, %r2957, %r2956; // inline asm bfe.u32 %r2614, %r2619, %r3856, %r3856; // inline asm mul.wide.u32 %rd952, %r2614, 4; add.s64 %rd953, %rd106, %rd952; ld.shared.u32 %r2959, [%rd953]; xor.b32 %r2960, %r2959, %r2958; // inline asm bfe.u32 %r2618, %r2619, %r2269, %r3856; // inline asm mul.wide.u32 %rd954, %r2618, 4; add.s64 %rd955, %rd110, %rd954; ld.shared.u32 %r2961, [%rd955]; add.s32 %r2962, %r2961, %r2960; xor.b32 %r2963, %r3886, %r2603; xor.b32 %r2635, %r2963, %r2962; // inline asm bfe.u32 %r2622, %r2635, %r3855, %r3856; // inline asm mul.wide.u32 %rd956, %r2622, 4; add.s64 %rd957, %rd98, %rd956; ld.shared.u32 %r2964, [%rd957]; // inline asm bfe.u32 %r2626, %r2635, %r3857, %r3856; // inline asm mul.wide.u32 %rd958, %r2626, 4; add.s64 %rd959, %rd102, %rd958; ld.shared.u32 %r2965, [%rd959]; add.s32 %r2966, %r2965, %r2964; // inline asm bfe.u32 %r2630, %r2635, %r3856, %r3856; // inline asm mul.wide.u32 %rd960, %r2630, 4; add.s64 %rd961, %rd106, %rd960; ld.shared.u32 %r2967, [%rd961]; xor.b32 %r2968, %r2967, %r2966; // inline asm bfe.u32 %r2634, %r2635, %r2269, %r3856; // inline asm mul.wide.u32 %rd962, %r2634, 4; add.s64 %rd963, %rd110, %rd962; ld.shared.u32 %r2969, [%rd963]; add.s32 %r2970, %r2969, %r2968; xor.b32 %r2971, %r3885, %r2619; xor.b32 %r2651, %r2971, %r2970; // inline asm bfe.u32 %r2638, %r2651, %r3855, %r3856; // inline asm mul.wide.u32 %rd964, %r2638, 4; add.s64 %rd965, %rd98, %rd964; ld.shared.u32 %r2972, [%rd965]; // inline asm bfe.u32 %r2642, %r2651, %r3857, %r3856; // inline asm mul.wide.u32 %rd966, %r2642, 4; add.s64 %rd967, %rd102, %rd966; ld.shared.u32 %r2973, [%rd967]; add.s32 %r2974, %r2973, %r2972; // inline asm bfe.u32 %r2646, %r2651, %r3856, %r3856; // inline asm mul.wide.u32 %rd968, %r2646, 4; add.s64 %rd969, %rd106, %rd968; ld.shared.u32 %r2975, [%rd969]; xor.b32 %r2976, %r2975, %r2974; // inline asm bfe.u32 %r2650, %r2651, %r2269, %r3856; // inline asm mul.wide.u32 %rd970, %r2650, 4; add.s64 %rd971, %rd110, %rd970; ld.shared.u32 %r2977, [%rd971]; add.s32 %r2978, %r2977, %r2976; xor.b32 %r2979, %r3884, %r2635; xor.b32 %r2667, %r2979, %r2978; // inline asm bfe.u32 %r2654, %r2667, %r3855, %r3856; // inline asm mul.wide.u32 %rd972, %r2654, 4; add.s64 %rd973, %rd98, %rd972; ld.shared.u32 %r2980, [%rd973]; // inline asm bfe.u32 %r2658, %r2667, %r3857, %r3856; // inline asm mul.wide.u32 %rd974, %r2658, 4; add.s64 %rd975, %rd102, %rd974; ld.shared.u32 %r2981, [%rd975]; add.s32 %r2982, %r2981, %r2980; // inline asm bfe.u32 %r2662, %r2667, %r3856, %r3856; // inline asm mul.wide.u32 %rd976, %r2662, 4; add.s64 %rd977, %rd106, %rd976; ld.shared.u32 %r2983, [%rd977]; xor.b32 %r2984, %r2983, %r2982; // inline asm bfe.u32 %r2666, %r2667, %r2269, %r3856; // inline asm mul.wide.u32 %rd978, %r2666, 4; add.s64 %rd979, %rd110, %rd978; ld.shared.u32 %r2985, [%rd979]; add.s32 %r2986, %r2985, %r2984; xor.b32 %r2987, %r3883, %r2651; xor.b32 %r2683, %r2987, %r2986; // inline asm bfe.u32 %r2670, %r2683, %r3855, %r3856; // inline asm mul.wide.u32 %rd980, %r2670, 4; add.s64 %rd981, %rd98, %rd980; ld.shared.u32 %r2988, [%rd981]; // inline asm bfe.u32 %r2674, %r2683, %r3857, %r3856; // inline asm mul.wide.u32 %rd982, %r2674, 4; add.s64 %rd983, %rd102, %rd982; ld.shared.u32 %r2989, [%rd983]; add.s32 %r2990, %r2989, %r2988; // inline asm bfe.u32 %r2678, %r2683, %r3856, %r3856; // inline asm mul.wide.u32 %rd984, %r2678, 4; add.s64 %rd985, %rd106, %rd984; ld.shared.u32 %r2991, [%rd985]; xor.b32 %r2992, %r2991, %r2990; // inline asm bfe.u32 %r2682, %r2683, %r2269, %r3856; // inline asm mul.wide.u32 %rd986, %r2682, 4; add.s64 %rd987, %rd110, %rd986; ld.shared.u32 %r2993, [%rd987]; add.s32 %r2994, %r2993, %r2992; xor.b32 %r2995, %r3882, %r2667; xor.b32 %r2699, %r2995, %r2994; // inline asm bfe.u32 %r2686, %r2699, %r3855, %r3856; // inline asm mul.wide.u32 %rd988, %r2686, 4; add.s64 %rd989, %rd98, %rd988; ld.shared.u32 %r2996, [%rd989]; // inline asm bfe.u32 %r2690, %r2699, %r3857, %r3856; // inline asm mul.wide.u32 %rd990, %r2690, 4; add.s64 %rd991, %rd102, %rd990; ld.shared.u32 %r2997, [%rd991]; add.s32 %r2998, %r2997, %r2996; // inline asm bfe.u32 %r2694, %r2699, %r3856, %r3856; // inline asm mul.wide.u32 %rd992, %r2694, 4; add.s64 %rd993, %rd106, %rd992; ld.shared.u32 %r2999, [%rd993]; xor.b32 %r3000, %r2999, %r2998; // inline asm bfe.u32 %r2698, %r2699, %r2269, %r3856; // inline asm mul.wide.u32 %rd994, %r2698, 4; add.s64 %rd995, %rd110, %rd994; ld.shared.u32 %r3001, [%rd995]; add.s32 %r3002, %r3001, %r3000; xor.b32 %r3003, %r3881, %r2683; xor.b32 %r2715, %r3003, %r3002; // inline asm bfe.u32 %r2702, %r2715, %r3855, %r3856; // inline asm mul.wide.u32 %rd996, %r2702, 4; add.s64 %rd997, %rd98, %rd996; ld.shared.u32 %r3004, [%rd997]; // inline asm bfe.u32 %r2706, %r2715, %r3857, %r3856; // inline asm mul.wide.u32 %rd998, %r2706, 4; add.s64 %rd999, %rd102, %rd998; ld.shared.u32 %r3005, [%rd999]; add.s32 %r3006, %r3005, %r3004; // inline asm bfe.u32 %r2710, %r2715, %r3856, %r3856; // inline asm mul.wide.u32 %rd1000, %r2710, 4; add.s64 %rd1001, %rd106, %rd1000; ld.shared.u32 %r3007, [%rd1001]; xor.b32 %r3008, %r3007, %r3006; // inline asm bfe.u32 %r2714, %r2715, %r2269, %r3856; // inline asm mul.wide.u32 %rd1002, %r2714, 4; add.s64 %rd1003, %rd110, %rd1002; ld.shared.u32 %r3009, [%rd1003]; add.s32 %r3010, %r3009, %r3008; xor.b32 %r3011, %r3880, %r2699; xor.b32 %r2731, %r3011, %r3010; // inline asm bfe.u32 %r2718, %r2731, %r3855, %r3856; // inline asm mul.wide.u32 %rd1004, %r2718, 4; add.s64 %rd1005, %rd98, %rd1004; ld.shared.u32 %r3012, [%rd1005]; // inline asm bfe.u32 %r2722, %r2731, %r3857, %r3856; // inline asm mul.wide.u32 %rd1006, %r2722, 4; add.s64 %rd1007, %rd102, %rd1006; ld.shared.u32 %r3013, [%rd1007]; add.s32 %r3014, %r3013, %r3012; // inline asm bfe.u32 %r2726, %r2731, %r3856, %r3856; // inline asm mul.wide.u32 %rd1008, %r2726, 4; add.s64 %rd1009, %rd106, %rd1008; ld.shared.u32 %r3015, [%rd1009]; xor.b32 %r3016, %r3015, %r3014; // inline asm bfe.u32 %r2730, %r2731, %r2269, %r3856; // inline asm mul.wide.u32 %rd1010, %r2730, 4; add.s64 %rd1011, %rd110, %rd1010; ld.shared.u32 %r3017, [%rd1011]; add.s32 %r3018, %r3017, %r3016; xor.b32 %r3019, %r3879, %r2715; xor.b32 %r2747, %r3019, %r3018; // inline asm bfe.u32 %r2734, %r2747, %r3855, %r3856; // inline asm mul.wide.u32 %rd1012, %r2734, 4; add.s64 %rd1013, %rd98, %rd1012; ld.shared.u32 %r3020, [%rd1013]; // inline asm bfe.u32 %r2738, %r2747, %r3857, %r3856; // inline asm mul.wide.u32 %rd1014, %r2738, 4; add.s64 %rd1015, %rd102, %rd1014; ld.shared.u32 %r3021, [%rd1015]; add.s32 %r3022, %r3021, %r3020; // inline asm bfe.u32 %r2742, %r2747, %r3856, %r3856; // inline asm mul.wide.u32 %rd1016, %r2742, 4; add.s64 %rd1017, %rd106, %rd1016; ld.shared.u32 %r3023, [%rd1017]; xor.b32 %r3024, %r3023, %r3022; // inline asm bfe.u32 %r2746, %r2747, %r2269, %r3856; // inline asm mul.wide.u32 %rd1018, %r2746, 4; add.s64 %rd1019, %rd110, %rd1018; ld.shared.u32 %r3025, [%rd1019]; add.s32 %r3026, %r3025, %r3024; xor.b32 %r3027, %r3878, %r2731; xor.b32 %r2763, %r3027, %r3026; // inline asm bfe.u32 %r2750, %r2763, %r3855, %r3856; // inline asm mul.wide.u32 %rd1020, %r2750, 4; add.s64 %rd1021, %rd98, %rd1020; ld.shared.u32 %r3028, [%rd1021]; // inline asm bfe.u32 %r2754, %r2763, %r3857, %r3856; // inline asm mul.wide.u32 %rd1022, %r2754, 4; add.s64 %rd1023, %rd102, %rd1022; ld.shared.u32 %r3029, [%rd1023]; add.s32 %r3030, %r3029, %r3028; // inline asm bfe.u32 %r2758, %r2763, %r3856, %r3856; // inline asm mul.wide.u32 %rd1024, %r2758, 4; add.s64 %rd1025, %rd106, %rd1024; ld.shared.u32 %r3031, [%rd1025]; xor.b32 %r3032, %r3031, %r3030; // inline asm bfe.u32 %r2762, %r2763, %r2269, %r3856; // inline asm mul.wide.u32 %rd1026, %r2762, 4; add.s64 %rd1027, %rd110, %rd1026; ld.shared.u32 %r3033, [%rd1027]; add.s32 %r3034, %r3033, %r3032; xor.b32 %r3035, %r3877, %r2747; xor.b32 %r2779, %r3035, %r3034; // inline asm bfe.u32 %r2766, %r2779, %r3855, %r3856; // inline asm mul.wide.u32 %rd1028, %r2766, 4; add.s64 %rd1029, %rd98, %rd1028; ld.shared.u32 %r3036, [%rd1029]; // inline asm bfe.u32 %r2770, %r2779, %r3857, %r3856; // inline asm mul.wide.u32 %rd1030, %r2770, 4; add.s64 %rd1031, %rd102, %rd1030; ld.shared.u32 %r3037, [%rd1031]; add.s32 %r3038, %r3037, %r3036; // inline asm bfe.u32 %r2774, %r2779, %r3856, %r3856; // inline asm mul.wide.u32 %rd1032, %r2774, 4; add.s64 %rd1033, %rd106, %rd1032; ld.shared.u32 %r3039, [%rd1033]; xor.b32 %r3040, %r3039, %r3038; // inline asm bfe.u32 %r2778, %r2779, %r2269, %r3856; // inline asm mul.wide.u32 %rd1034, %r2778, 4; add.s64 %rd1035, %rd110, %rd1034; ld.shared.u32 %r3041, [%rd1035]; add.s32 %r3042, %r3041, %r3040; xor.b32 %r3043, %r3876, %r2763; xor.b32 %r3897, %r3043, %r3042; xor.b32 %r3898, %r3875, %r2779; st.shared.u32 [%rd1339+8], %r3898; st.shared.u32 [%rd1339+12], %r3897; add.s64 %rd1339, %rd1339, 16; add.s32 %r3902, %r3902, 4; setp.lt.u32 %p15, %r3902, 256; @%p15 bra BB3_26; mov.u32 %r3865, %tid.x; mul.wide.s32 %rd1324, %r3865, 1024; mov.u64 %rd1322, m03200_init$S3_all; add.s64 %rd1340, %rd1322, %rd1324; mov.u32 %r3044, 0; mov.u32 %r3905, %r3044; BB3_28: mov.u32 %r3860, 16; mov.u32 %r3859, 8; mov.u32 %r3858, 24; xor.b32 %r3557, %r39, %r3898; xor.b32 %r3058, %r3557, %r3892; // inline asm bfe.u32 %r3045, %r3058, %r3858, %r3859; // inline asm mul.wide.u32 %rd1041, %r3045, 4; add.s64 %rd1042, %rd98, %rd1041; ld.shared.u32 %r3558, [%rd1042]; // inline asm bfe.u32 %r3049, %r3058, %r3860, %r3859; // inline asm mul.wide.u32 %rd1045, %r3049, 4; add.s64 %rd1046, %rd102, %rd1045; ld.shared.u32 %r3559, [%rd1046]; add.s32 %r3560, %r3559, %r3558; // inline asm bfe.u32 %r3053, %r3058, %r3859, %r3859; // inline asm mul.wide.u32 %rd1049, %r3053, 4; add.s64 %rd1050, %rd106, %rd1049; ld.shared.u32 %r3561, [%rd1050]; xor.b32 %r3562, %r3561, %r3560; // inline asm bfe.u32 %r3057, %r3058, %r3044, %r3859; // inline asm mul.wide.u32 %rd1053, %r3057, 4; add.s64 %rd1054, %rd110, %rd1053; ld.shared.u32 %r3563, [%rd1054]; add.s32 %r3564, %r3563, %r3562; xor.b32 %r3565, %r40, %r3897; xor.b32 %r3566, %r3565, %r3891; xor.b32 %r3074, %r3566, %r3564; // inline asm bfe.u32 %r3061, %r3074, %r3858, %r3859; // inline asm mul.wide.u32 %rd1055, %r3061, 4; add.s64 %rd1056, %rd98, %rd1055; ld.shared.u32 %r3567, [%rd1056]; // inline asm bfe.u32 %r3065, %r3074, %r3860, %r3859; // inline asm mul.wide.u32 %rd1057, %r3065, 4; add.s64 %rd1058, %rd102, %rd1057; ld.shared.u32 %r3568, [%rd1058]; add.s32 %r3569, %r3568, %r3567; // inline asm bfe.u32 %r3069, %r3074, %r3859, %r3859; // inline asm mul.wide.u32 %rd1059, %r3069, 4; add.s64 %rd1060, %rd106, %rd1059; ld.shared.u32 %r3570, [%rd1060]; xor.b32 %r3571, %r3570, %r3569; // inline asm bfe.u32 %r3073, %r3074, %r3044, %r3859; // inline asm mul.wide.u32 %rd1061, %r3073, 4; add.s64 %rd1062, %rd110, %rd1061; ld.shared.u32 %r3572, [%rd1062]; add.s32 %r3573, %r3572, %r3571; xor.b32 %r3574, %r3890, %r3058; xor.b32 %r3090, %r3574, %r3573; // inline asm bfe.u32 %r3077, %r3090, %r3858, %r3859; // inline asm mul.wide.u32 %rd1063, %r3077, 4; add.s64 %rd1064, %rd98, %rd1063; ld.shared.u32 %r3575, [%rd1064]; // inline asm bfe.u32 %r3081, %r3090, %r3860, %r3859; // inline asm mul.wide.u32 %rd1065, %r3081, 4; add.s64 %rd1066, %rd102, %rd1065; ld.shared.u32 %r3576, [%rd1066]; add.s32 %r3577, %r3576, %r3575; // inline asm bfe.u32 %r3085, %r3090, %r3859, %r3859; // inline asm mul.wide.u32 %rd1067, %r3085, 4; add.s64 %rd1068, %rd106, %rd1067; ld.shared.u32 %r3578, [%rd1068]; xor.b32 %r3579, %r3578, %r3577; // inline asm bfe.u32 %r3089, %r3090, %r3044, %r3859; // inline asm mul.wide.u32 %rd1069, %r3089, 4; add.s64 %rd1070, %rd110, %rd1069; ld.shared.u32 %r3580, [%rd1070]; add.s32 %r3581, %r3580, %r3579; xor.b32 %r3582, %r3889, %r3074; xor.b32 %r3106, %r3582, %r3581; // inline asm bfe.u32 %r3093, %r3106, %r3858, %r3859; // inline asm mul.wide.u32 %rd1071, %r3093, 4; add.s64 %rd1072, %rd98, %rd1071; ld.shared.u32 %r3583, [%rd1072]; // inline asm bfe.u32 %r3097, %r3106, %r3860, %r3859; // inline asm mul.wide.u32 %rd1073, %r3097, 4; add.s64 %rd1074, %rd102, %rd1073; ld.shared.u32 %r3584, [%rd1074]; add.s32 %r3585, %r3584, %r3583; // inline asm bfe.u32 %r3101, %r3106, %r3859, %r3859; // inline asm mul.wide.u32 %rd1075, %r3101, 4; add.s64 %rd1076, %rd106, %rd1075; ld.shared.u32 %r3586, [%rd1076]; xor.b32 %r3587, %r3586, %r3585; // inline asm bfe.u32 %r3105, %r3106, %r3044, %r3859; // inline asm mul.wide.u32 %rd1077, %r3105, 4; add.s64 %rd1078, %rd110, %rd1077; ld.shared.u32 %r3588, [%rd1078]; add.s32 %r3589, %r3588, %r3587; xor.b32 %r3590, %r3888, %r3090; xor.b32 %r3122, %r3590, %r3589; // inline asm bfe.u32 %r3109, %r3122, %r3858, %r3859; // inline asm mul.wide.u32 %rd1079, %r3109, 4; add.s64 %rd1080, %rd98, %rd1079; ld.shared.u32 %r3591, [%rd1080]; // inline asm bfe.u32 %r3113, %r3122, %r3860, %r3859; // inline asm mul.wide.u32 %rd1081, %r3113, 4; add.s64 %rd1082, %rd102, %rd1081; ld.shared.u32 %r3592, [%rd1082]; add.s32 %r3593, %r3592, %r3591; // inline asm bfe.u32 %r3117, %r3122, %r3859, %r3859; // inline asm mul.wide.u32 %rd1083, %r3117, 4; add.s64 %rd1084, %rd106, %rd1083; ld.shared.u32 %r3594, [%rd1084]; xor.b32 %r3595, %r3594, %r3593; // inline asm bfe.u32 %r3121, %r3122, %r3044, %r3859; // inline asm mul.wide.u32 %rd1085, %r3121, 4; add.s64 %rd1086, %rd110, %rd1085; ld.shared.u32 %r3596, [%rd1086]; add.s32 %r3597, %r3596, %r3595; xor.b32 %r3598, %r3887, %r3106; xor.b32 %r3138, %r3598, %r3597; // inline asm bfe.u32 %r3125, %r3138, %r3858, %r3859; // inline asm mul.wide.u32 %rd1087, %r3125, 4; add.s64 %rd1088, %rd98, %rd1087; ld.shared.u32 %r3599, [%rd1088]; // inline asm bfe.u32 %r3129, %r3138, %r3860, %r3859; // inline asm mul.wide.u32 %rd1089, %r3129, 4; add.s64 %rd1090, %rd102, %rd1089; ld.shared.u32 %r3600, [%rd1090]; add.s32 %r3601, %r3600, %r3599; // inline asm bfe.u32 %r3133, %r3138, %r3859, %r3859; // inline asm mul.wide.u32 %rd1091, %r3133, 4; add.s64 %rd1092, %rd106, %rd1091; ld.shared.u32 %r3602, [%rd1092]; xor.b32 %r3603, %r3602, %r3601; // inline asm bfe.u32 %r3137, %r3138, %r3044, %r3859; // inline asm mul.wide.u32 %rd1093, %r3137, 4; add.s64 %rd1094, %rd110, %rd1093; ld.shared.u32 %r3604, [%rd1094]; add.s32 %r3605, %r3604, %r3603; xor.b32 %r3606, %r3886, %r3122; xor.b32 %r3154, %r3606, %r3605; // inline asm bfe.u32 %r3141, %r3154, %r3858, %r3859; // inline asm mul.wide.u32 %rd1095, %r3141, 4; add.s64 %rd1096, %rd98, %rd1095; ld.shared.u32 %r3607, [%rd1096]; // inline asm bfe.u32 %r3145, %r3154, %r3860, %r3859; // inline asm mul.wide.u32 %rd1097, %r3145, 4; add.s64 %rd1098, %rd102, %rd1097; ld.shared.u32 %r3608, [%rd1098]; add.s32 %r3609, %r3608, %r3607; // inline asm bfe.u32 %r3149, %r3154, %r3859, %r3859; // inline asm mul.wide.u32 %rd1099, %r3149, 4; add.s64 %rd1100, %rd106, %rd1099; ld.shared.u32 %r3610, [%rd1100]; xor.b32 %r3611, %r3610, %r3609; // inline asm bfe.u32 %r3153, %r3154, %r3044, %r3859; // inline asm mul.wide.u32 %rd1101, %r3153, 4; add.s64 %rd1102, %rd110, %rd1101; ld.shared.u32 %r3612, [%rd1102]; add.s32 %r3613, %r3612, %r3611; xor.b32 %r3614, %r3885, %r3138; xor.b32 %r3170, %r3614, %r3613; // inline asm bfe.u32 %r3157, %r3170, %r3858, %r3859; // inline asm mul.wide.u32 %rd1103, %r3157, 4; add.s64 %rd1104, %rd98, %rd1103; ld.shared.u32 %r3615, [%rd1104]; // inline asm bfe.u32 %r3161, %r3170, %r3860, %r3859; // inline asm mul.wide.u32 %rd1105, %r3161, 4; add.s64 %rd1106, %rd102, %rd1105; ld.shared.u32 %r3616, [%rd1106]; add.s32 %r3617, %r3616, %r3615; // inline asm bfe.u32 %r3165, %r3170, %r3859, %r3859; // inline asm mul.wide.u32 %rd1107, %r3165, 4; add.s64 %rd1108, %rd106, %rd1107; ld.shared.u32 %r3618, [%rd1108]; xor.b32 %r3619, %r3618, %r3617; // inline asm bfe.u32 %r3169, %r3170, %r3044, %r3859; // inline asm mul.wide.u32 %rd1109, %r3169, 4; add.s64 %rd1110, %rd110, %rd1109; ld.shared.u32 %r3620, [%rd1110]; add.s32 %r3621, %r3620, %r3619; xor.b32 %r3622, %r3884, %r3154; xor.b32 %r3186, %r3622, %r3621; // inline asm bfe.u32 %r3173, %r3186, %r3858, %r3859; // inline asm mul.wide.u32 %rd1111, %r3173, 4; add.s64 %rd1112, %rd98, %rd1111; ld.shared.u32 %r3623, [%rd1112]; // inline asm bfe.u32 %r3177, %r3186, %r3860, %r3859; // inline asm mul.wide.u32 %rd1113, %r3177, 4; add.s64 %rd1114, %rd102, %rd1113; ld.shared.u32 %r3624, [%rd1114]; add.s32 %r3625, %r3624, %r3623; // inline asm bfe.u32 %r3181, %r3186, %r3859, %r3859; // inline asm mul.wide.u32 %rd1115, %r3181, 4; add.s64 %rd1116, %rd106, %rd1115; ld.shared.u32 %r3626, [%rd1116]; xor.b32 %r3627, %r3626, %r3625; // inline asm bfe.u32 %r3185, %r3186, %r3044, %r3859; // inline asm mul.wide.u32 %rd1117, %r3185, 4; add.s64 %rd1118, %rd110, %rd1117; ld.shared.u32 %r3628, [%rd1118]; add.s32 %r3629, %r3628, %r3627; xor.b32 %r3630, %r3883, %r3170; xor.b32 %r3202, %r3630, %r3629; // inline asm bfe.u32 %r3189, %r3202, %r3858, %r3859; // inline asm mul.wide.u32 %rd1119, %r3189, 4; add.s64 %rd1120, %rd98, %rd1119; ld.shared.u32 %r3631, [%rd1120]; // inline asm bfe.u32 %r3193, %r3202, %r3860, %r3859; // inline asm mul.wide.u32 %rd1121, %r3193, 4; add.s64 %rd1122, %rd102, %rd1121; ld.shared.u32 %r3632, [%rd1122]; add.s32 %r3633, %r3632, %r3631; // inline asm bfe.u32 %r3197, %r3202, %r3859, %r3859; // inline asm mul.wide.u32 %rd1123, %r3197, 4; add.s64 %rd1124, %rd106, %rd1123; ld.shared.u32 %r3634, [%rd1124]; xor.b32 %r3635, %r3634, %r3633; // inline asm bfe.u32 %r3201, %r3202, %r3044, %r3859; // inline asm mul.wide.u32 %rd1125, %r3201, 4; add.s64 %rd1126, %rd110, %rd1125; ld.shared.u32 %r3636, [%rd1126]; add.s32 %r3637, %r3636, %r3635; xor.b32 %r3638, %r3882, %r3186; xor.b32 %r3218, %r3638, %r3637; // inline asm bfe.u32 %r3205, %r3218, %r3858, %r3859; // inline asm mul.wide.u32 %rd1127, %r3205, 4; add.s64 %rd1128, %rd98, %rd1127; ld.shared.u32 %r3639, [%rd1128]; // inline asm bfe.u32 %r3209, %r3218, %r3860, %r3859; // inline asm mul.wide.u32 %rd1129, %r3209, 4; add.s64 %rd1130, %rd102, %rd1129; ld.shared.u32 %r3640, [%rd1130]; add.s32 %r3641, %r3640, %r3639; // inline asm bfe.u32 %r3213, %r3218, %r3859, %r3859; // inline asm mul.wide.u32 %rd1131, %r3213, 4; add.s64 %rd1132, %rd106, %rd1131; ld.shared.u32 %r3642, [%rd1132]; xor.b32 %r3643, %r3642, %r3641; // inline asm bfe.u32 %r3217, %r3218, %r3044, %r3859; // inline asm mul.wide.u32 %rd1133, %r3217, 4; add.s64 %rd1134, %rd110, %rd1133; ld.shared.u32 %r3644, [%rd1134]; add.s32 %r3645, %r3644, %r3643; xor.b32 %r3646, %r3881, %r3202; xor.b32 %r3234, %r3646, %r3645; // inline asm bfe.u32 %r3221, %r3234, %r3858, %r3859; // inline asm mul.wide.u32 %rd1135, %r3221, 4; add.s64 %rd1136, %rd98, %rd1135; ld.shared.u32 %r3647, [%rd1136]; // inline asm bfe.u32 %r3225, %r3234, %r3860, %r3859; // inline asm mul.wide.u32 %rd1137, %r3225, 4; add.s64 %rd1138, %rd102, %rd1137; ld.shared.u32 %r3648, [%rd1138]; add.s32 %r3649, %r3648, %r3647; // inline asm bfe.u32 %r3229, %r3234, %r3859, %r3859; // inline asm mul.wide.u32 %rd1139, %r3229, 4; add.s64 %rd1140, %rd106, %rd1139; ld.shared.u32 %r3650, [%rd1140]; xor.b32 %r3651, %r3650, %r3649; // inline asm bfe.u32 %r3233, %r3234, %r3044, %r3859; // inline asm mul.wide.u32 %rd1141, %r3233, 4; add.s64 %rd1142, %rd110, %rd1141; ld.shared.u32 %r3652, [%rd1142]; add.s32 %r3653, %r3652, %r3651; xor.b32 %r3654, %r3880, %r3218; xor.b32 %r3250, %r3654, %r3653; // inline asm bfe.u32 %r3237, %r3250, %r3858, %r3859; // inline asm mul.wide.u32 %rd1143, %r3237, 4; add.s64 %rd1144, %rd98, %rd1143; ld.shared.u32 %r3655, [%rd1144]; // inline asm bfe.u32 %r3241, %r3250, %r3860, %r3859; // inline asm mul.wide.u32 %rd1145, %r3241, 4; add.s64 %rd1146, %rd102, %rd1145; ld.shared.u32 %r3656, [%rd1146]; add.s32 %r3657, %r3656, %r3655; // inline asm bfe.u32 %r3245, %r3250, %r3859, %r3859; // inline asm mul.wide.u32 %rd1147, %r3245, 4; add.s64 %rd1148, %rd106, %rd1147; ld.shared.u32 %r3658, [%rd1148]; xor.b32 %r3659, %r3658, %r3657; // inline asm bfe.u32 %r3249, %r3250, %r3044, %r3859; // inline asm mul.wide.u32 %rd1149, %r3249, 4; add.s64 %rd1150, %rd110, %rd1149; ld.shared.u32 %r3660, [%rd1150]; add.s32 %r3661, %r3660, %r3659; xor.b32 %r3662, %r3879, %r3234; xor.b32 %r3266, %r3662, %r3661; // inline asm bfe.u32 %r3253, %r3266, %r3858, %r3859; // inline asm mul.wide.u32 %rd1151, %r3253, 4; add.s64 %rd1152, %rd98, %rd1151; ld.shared.u32 %r3663, [%rd1152]; // inline asm bfe.u32 %r3257, %r3266, %r3860, %r3859; // inline asm mul.wide.u32 %rd1153, %r3257, 4; add.s64 %rd1154, %rd102, %rd1153; ld.shared.u32 %r3664, [%rd1154]; add.s32 %r3665, %r3664, %r3663; // inline asm bfe.u32 %r3261, %r3266, %r3859, %r3859; // inline asm mul.wide.u32 %rd1155, %r3261, 4; add.s64 %rd1156, %rd106, %rd1155; ld.shared.u32 %r3666, [%rd1156]; xor.b32 %r3667, %r3666, %r3665; // inline asm bfe.u32 %r3265, %r3266, %r3044, %r3859; // inline asm mul.wide.u32 %rd1157, %r3265, 4; add.s64 %rd1158, %rd110, %rd1157; ld.shared.u32 %r3668, [%rd1158]; add.s32 %r3669, %r3668, %r3667; xor.b32 %r3670, %r3878, %r3250; xor.b32 %r3282, %r3670, %r3669; // inline asm bfe.u32 %r3269, %r3282, %r3858, %r3859; // inline asm mul.wide.u32 %rd1159, %r3269, 4; add.s64 %rd1160, %rd98, %rd1159; ld.shared.u32 %r3671, [%rd1160]; // inline asm bfe.u32 %r3273, %r3282, %r3860, %r3859; // inline asm mul.wide.u32 %rd1161, %r3273, 4; add.s64 %rd1162, %rd102, %rd1161; ld.shared.u32 %r3672, [%rd1162]; add.s32 %r3673, %r3672, %r3671; // inline asm bfe.u32 %r3277, %r3282, %r3859, %r3859; // inline asm mul.wide.u32 %rd1163, %r3277, 4; add.s64 %rd1164, %rd106, %rd1163; ld.shared.u32 %r3674, [%rd1164]; xor.b32 %r3675, %r3674, %r3673; // inline asm bfe.u32 %r3281, %r3282, %r3044, %r3859; // inline asm mul.wide.u32 %rd1165, %r3281, 4; add.s64 %rd1166, %rd110, %rd1165; ld.shared.u32 %r3676, [%rd1166]; add.s32 %r3677, %r3676, %r3675; xor.b32 %r3678, %r3877, %r3266; xor.b32 %r3298, %r3678, %r3677; // inline asm bfe.u32 %r3285, %r3298, %r3858, %r3859; // inline asm mul.wide.u32 %rd1167, %r3285, 4; add.s64 %rd1168, %rd98, %rd1167; ld.shared.u32 %r3679, [%rd1168]; // inline asm bfe.u32 %r3289, %r3298, %r3860, %r3859; // inline asm mul.wide.u32 %rd1169, %r3289, 4; add.s64 %rd1170, %rd102, %rd1169; ld.shared.u32 %r3680, [%rd1170]; add.s32 %r3681, %r3680, %r3679; // inline asm bfe.u32 %r3293, %r3298, %r3859, %r3859; // inline asm mul.wide.u32 %rd1171, %r3293, 4; add.s64 %rd1172, %rd106, %rd1171; ld.shared.u32 %r3682, [%rd1172]; xor.b32 %r3683, %r3682, %r3681; // inline asm bfe.u32 %r3297, %r3298, %r3044, %r3859; // inline asm mul.wide.u32 %rd1173, %r3297, 4; add.s64 %rd1174, %rd110, %rd1173; ld.shared.u32 %r3684, [%rd1174]; add.s32 %r3685, %r3684, %r3683; xor.b32 %r3686, %r3876, %r3282; xor.b32 %r3687, %r3686, %r3685; xor.b32 %r3688, %r3875, %r3298; st.shared.u32 [%rd1340], %r3688; st.shared.u32 [%rd1340+4], %r3687; xor.b32 %r3689, %r3688, %r37; xor.b32 %r3314, %r3689, %r3892; // inline asm bfe.u32 %r3301, %r3314, %r3858, %r3859; // inline asm mul.wide.u32 %rd1175, %r3301, 4; add.s64 %rd1176, %rd98, %rd1175; ld.shared.u32 %r3690, [%rd1176]; // inline asm bfe.u32 %r3305, %r3314, %r3860, %r3859; // inline asm mul.wide.u32 %rd1177, %r3305, 4; add.s64 %rd1178, %rd102, %rd1177; ld.shared.u32 %r3691, [%rd1178]; add.s32 %r3692, %r3691, %r3690; // inline asm bfe.u32 %r3309, %r3314, %r3859, %r3859; // inline asm mul.wide.u32 %rd1179, %r3309, 4; add.s64 %rd1180, %rd106, %rd1179; ld.shared.u32 %r3693, [%rd1180]; xor.b32 %r3694, %r3693, %r3692; // inline asm bfe.u32 %r3313, %r3314, %r3044, %r3859; // inline asm mul.wide.u32 %rd1181, %r3313, 4; add.s64 %rd1182, %rd110, %rd1181; ld.shared.u32 %r3695, [%rd1182]; add.s32 %r3696, %r3695, %r3694; xor.b32 %r3697, %r38, %r3687; xor.b32 %r3698, %r3697, %r3891; xor.b32 %r3330, %r3698, %r3696; // inline asm bfe.u32 %r3317, %r3330, %r3858, %r3859; // inline asm mul.wide.u32 %rd1183, %r3317, 4; add.s64 %rd1184, %rd98, %rd1183; ld.shared.u32 %r3699, [%rd1184]; // inline asm bfe.u32 %r3321, %r3330, %r3860, %r3859; // inline asm mul.wide.u32 %rd1185, %r3321, 4; add.s64 %rd1186, %rd102, %rd1185; ld.shared.u32 %r3700, [%rd1186]; add.s32 %r3701, %r3700, %r3699; // inline asm bfe.u32 %r3325, %r3330, %r3859, %r3859; // inline asm mul.wide.u32 %rd1187, %r3325, 4; add.s64 %rd1188, %rd106, %rd1187; ld.shared.u32 %r3702, [%rd1188]; xor.b32 %r3703, %r3702, %r3701; // inline asm bfe.u32 %r3329, %r3330, %r3044, %r3859; // inline asm mul.wide.u32 %rd1189, %r3329, 4; add.s64 %rd1190, %rd110, %rd1189; ld.shared.u32 %r3704, [%rd1190]; add.s32 %r3705, %r3704, %r3703; xor.b32 %r3706, %r3890, %r3314; xor.b32 %r3346, %r3706, %r3705; // inline asm bfe.u32 %r3333, %r3346, %r3858, %r3859; // inline asm mul.wide.u32 %rd1191, %r3333, 4; add.s64 %rd1192, %rd98, %rd1191; ld.shared.u32 %r3707, [%rd1192]; // inline asm bfe.u32 %r3337, %r3346, %r3860, %r3859; // inline asm mul.wide.u32 %rd1193, %r3337, 4; add.s64 %rd1194, %rd102, %rd1193; ld.shared.u32 %r3708, [%rd1194]; add.s32 %r3709, %r3708, %r3707; // inline asm bfe.u32 %r3341, %r3346, %r3859, %r3859; // inline asm mul.wide.u32 %rd1195, %r3341, 4; add.s64 %rd1196, %rd106, %rd1195; ld.shared.u32 %r3710, [%rd1196]; xor.b32 %r3711, %r3710, %r3709; // inline asm bfe.u32 %r3345, %r3346, %r3044, %r3859; // inline asm mul.wide.u32 %rd1197, %r3345, 4; add.s64 %rd1198, %rd110, %rd1197; ld.shared.u32 %r3712, [%rd1198]; add.s32 %r3713, %r3712, %r3711; xor.b32 %r3714, %r3889, %r3330; xor.b32 %r3362, %r3714, %r3713; // inline asm bfe.u32 %r3349, %r3362, %r3858, %r3859; // inline asm mul.wide.u32 %rd1199, %r3349, 4; add.s64 %rd1200, %rd98, %rd1199; ld.shared.u32 %r3715, [%rd1200]; // inline asm bfe.u32 %r3353, %r3362, %r3860, %r3859; // inline asm mul.wide.u32 %rd1201, %r3353, 4; add.s64 %rd1202, %rd102, %rd1201; ld.shared.u32 %r3716, [%rd1202]; add.s32 %r3717, %r3716, %r3715; // inline asm bfe.u32 %r3357, %r3362, %r3859, %r3859; // inline asm mul.wide.u32 %rd1203, %r3357, 4; add.s64 %rd1204, %rd106, %rd1203; ld.shared.u32 %r3718, [%rd1204]; xor.b32 %r3719, %r3718, %r3717; // inline asm bfe.u32 %r3361, %r3362, %r3044, %r3859; // inline asm mul.wide.u32 %rd1205, %r3361, 4; add.s64 %rd1206, %rd110, %rd1205; ld.shared.u32 %r3720, [%rd1206]; add.s32 %r3721, %r3720, %r3719; xor.b32 %r3722, %r3888, %r3346; xor.b32 %r3378, %r3722, %r3721; // inline asm bfe.u32 %r3365, %r3378, %r3858, %r3859; // inline asm mul.wide.u32 %rd1207, %r3365, 4; add.s64 %rd1208, %rd98, %rd1207; ld.shared.u32 %r3723, [%rd1208]; // inline asm bfe.u32 %r3369, %r3378, %r3860, %r3859; // inline asm mul.wide.u32 %rd1209, %r3369, 4; add.s64 %rd1210, %rd102, %rd1209; ld.shared.u32 %r3724, [%rd1210]; add.s32 %r3725, %r3724, %r3723; // inline asm bfe.u32 %r3373, %r3378, %r3859, %r3859; // inline asm mul.wide.u32 %rd1211, %r3373, 4; add.s64 %rd1212, %rd106, %rd1211; ld.shared.u32 %r3726, [%rd1212]; xor.b32 %r3727, %r3726, %r3725; // inline asm bfe.u32 %r3377, %r3378, %r3044, %r3859; // inline asm mul.wide.u32 %rd1213, %r3377, 4; add.s64 %rd1214, %rd110, %rd1213; ld.shared.u32 %r3728, [%rd1214]; add.s32 %r3729, %r3728, %r3727; xor.b32 %r3730, %r3887, %r3362; xor.b32 %r3394, %r3730, %r3729; // inline asm bfe.u32 %r3381, %r3394, %r3858, %r3859; // inline asm mul.wide.u32 %rd1215, %r3381, 4; add.s64 %rd1216, %rd98, %rd1215; ld.shared.u32 %r3731, [%rd1216]; // inline asm bfe.u32 %r3385, %r3394, %r3860, %r3859; // inline asm mul.wide.u32 %rd1217, %r3385, 4; add.s64 %rd1218, %rd102, %rd1217; ld.shared.u32 %r3732, [%rd1218]; add.s32 %r3733, %r3732, %r3731; // inline asm bfe.u32 %r3389, %r3394, %r3859, %r3859; // inline asm mul.wide.u32 %rd1219, %r3389, 4; add.s64 %rd1220, %rd106, %rd1219; ld.shared.u32 %r3734, [%rd1220]; xor.b32 %r3735, %r3734, %r3733; // inline asm bfe.u32 %r3393, %r3394, %r3044, %r3859; // inline asm mul.wide.u32 %rd1221, %r3393, 4; add.s64 %rd1222, %rd110, %rd1221; ld.shared.u32 %r3736, [%rd1222]; add.s32 %r3737, %r3736, %r3735; xor.b32 %r3738, %r3886, %r3378; xor.b32 %r3410, %r3738, %r3737; // inline asm bfe.u32 %r3397, %r3410, %r3858, %r3859; // inline asm mul.wide.u32 %rd1223, %r3397, 4; add.s64 %rd1224, %rd98, %rd1223; ld.shared.u32 %r3739, [%rd1224]; // inline asm bfe.u32 %r3401, %r3410, %r3860, %r3859; // inline asm mul.wide.u32 %rd1225, %r3401, 4; add.s64 %rd1226, %rd102, %rd1225; ld.shared.u32 %r3740, [%rd1226]; add.s32 %r3741, %r3740, %r3739; // inline asm bfe.u32 %r3405, %r3410, %r3859, %r3859; // inline asm mul.wide.u32 %rd1227, %r3405, 4; add.s64 %rd1228, %rd106, %rd1227; ld.shared.u32 %r3742, [%rd1228]; xor.b32 %r3743, %r3742, %r3741; // inline asm bfe.u32 %r3409, %r3410, %r3044, %r3859; // inline asm mul.wide.u32 %rd1229, %r3409, 4; add.s64 %rd1230, %rd110, %rd1229; ld.shared.u32 %r3744, [%rd1230]; add.s32 %r3745, %r3744, %r3743; xor.b32 %r3746, %r3885, %r3394; xor.b32 %r3426, %r3746, %r3745; // inline asm bfe.u32 %r3413, %r3426, %r3858, %r3859; // inline asm mul.wide.u32 %rd1231, %r3413, 4; add.s64 %rd1232, %rd98, %rd1231; ld.shared.u32 %r3747, [%rd1232]; // inline asm bfe.u32 %r3417, %r3426, %r3860, %r3859; // inline asm mul.wide.u32 %rd1233, %r3417, 4; add.s64 %rd1234, %rd102, %rd1233; ld.shared.u32 %r3748, [%rd1234]; add.s32 %r3749, %r3748, %r3747; // inline asm bfe.u32 %r3421, %r3426, %r3859, %r3859; // inline asm mul.wide.u32 %rd1235, %r3421, 4; add.s64 %rd1236, %rd106, %rd1235; ld.shared.u32 %r3750, [%rd1236]; xor.b32 %r3751, %r3750, %r3749; // inline asm bfe.u32 %r3425, %r3426, %r3044, %r3859; // inline asm mul.wide.u32 %rd1237, %r3425, 4; add.s64 %rd1238, %rd110, %rd1237; ld.shared.u32 %r3752, [%rd1238]; add.s32 %r3753, %r3752, %r3751; xor.b32 %r3754, %r3884, %r3410; xor.b32 %r3442, %r3754, %r3753; // inline asm bfe.u32 %r3429, %r3442, %r3858, %r3859; // inline asm mul.wide.u32 %rd1239, %r3429, 4; add.s64 %rd1240, %rd98, %rd1239; ld.shared.u32 %r3755, [%rd1240]; // inline asm bfe.u32 %r3433, %r3442, %r3860, %r3859; // inline asm mul.wide.u32 %rd1241, %r3433, 4; add.s64 %rd1242, %rd102, %rd1241; ld.shared.u32 %r3756, [%rd1242]; add.s32 %r3757, %r3756, %r3755; // inline asm bfe.u32 %r3437, %r3442, %r3859, %r3859; // inline asm mul.wide.u32 %rd1243, %r3437, 4; add.s64 %rd1244, %rd106, %rd1243; ld.shared.u32 %r3758, [%rd1244]; xor.b32 %r3759, %r3758, %r3757; // inline asm bfe.u32 %r3441, %r3442, %r3044, %r3859; // inline asm mul.wide.u32 %rd1245, %r3441, 4; add.s64 %rd1246, %rd110, %rd1245; ld.shared.u32 %r3760, [%rd1246]; add.s32 %r3761, %r3760, %r3759; xor.b32 %r3762, %r3883, %r3426; xor.b32 %r3458, %r3762, %r3761; // inline asm bfe.u32 %r3445, %r3458, %r3858, %r3859; // inline asm mul.wide.u32 %rd1247, %r3445, 4; add.s64 %rd1248, %rd98, %rd1247; ld.shared.u32 %r3763, [%rd1248]; // inline asm bfe.u32 %r3449, %r3458, %r3860, %r3859; // inline asm mul.wide.u32 %rd1249, %r3449, 4; add.s64 %rd1250, %rd102, %rd1249; ld.shared.u32 %r3764, [%rd1250]; add.s32 %r3765, %r3764, %r3763; // inline asm bfe.u32 %r3453, %r3458, %r3859, %r3859; // inline asm mul.wide.u32 %rd1251, %r3453, 4; add.s64 %rd1252, %rd106, %rd1251; ld.shared.u32 %r3766, [%rd1252]; xor.b32 %r3767, %r3766, %r3765; // inline asm bfe.u32 %r3457, %r3458, %r3044, %r3859; // inline asm mul.wide.u32 %rd1253, %r3457, 4; add.s64 %rd1254, %rd110, %rd1253; ld.shared.u32 %r3768, [%rd1254]; add.s32 %r3769, %r3768, %r3767; xor.b32 %r3770, %r3882, %r3442; xor.b32 %r3474, %r3770, %r3769; // inline asm bfe.u32 %r3461, %r3474, %r3858, %r3859; // inline asm mul.wide.u32 %rd1255, %r3461, 4; add.s64 %rd1256, %rd98, %rd1255; ld.shared.u32 %r3771, [%rd1256]; // inline asm bfe.u32 %r3465, %r3474, %r3860, %r3859; // inline asm mul.wide.u32 %rd1257, %r3465, 4; add.s64 %rd1258, %rd102, %rd1257; ld.shared.u32 %r3772, [%rd1258]; add.s32 %r3773, %r3772, %r3771; // inline asm bfe.u32 %r3469, %r3474, %r3859, %r3859; // inline asm mul.wide.u32 %rd1259, %r3469, 4; add.s64 %rd1260, %rd106, %rd1259; ld.shared.u32 %r3774, [%rd1260]; xor.b32 %r3775, %r3774, %r3773; // inline asm bfe.u32 %r3473, %r3474, %r3044, %r3859; // inline asm mul.wide.u32 %rd1261, %r3473, 4; add.s64 %rd1262, %rd110, %rd1261; ld.shared.u32 %r3776, [%rd1262]; add.s32 %r3777, %r3776, %r3775; xor.b32 %r3778, %r3881, %r3458; xor.b32 %r3490, %r3778, %r3777; // inline asm bfe.u32 %r3477, %r3490, %r3858, %r3859; // inline asm mul.wide.u32 %rd1263, %r3477, 4; add.s64 %rd1264, %rd98, %rd1263; ld.shared.u32 %r3779, [%rd1264]; // inline asm bfe.u32 %r3481, %r3490, %r3860, %r3859; // inline asm mul.wide.u32 %rd1265, %r3481, 4; add.s64 %rd1266, %rd102, %rd1265; ld.shared.u32 %r3780, [%rd1266]; add.s32 %r3781, %r3780, %r3779; // inline asm bfe.u32 %r3485, %r3490, %r3859, %r3859; // inline asm mul.wide.u32 %rd1267, %r3485, 4; add.s64 %rd1268, %rd106, %rd1267; ld.shared.u32 %r3782, [%rd1268]; xor.b32 %r3783, %r3782, %r3781; // inline asm bfe.u32 %r3489, %r3490, %r3044, %r3859; // inline asm mul.wide.u32 %rd1269, %r3489, 4; add.s64 %rd1270, %rd110, %rd1269; ld.shared.u32 %r3784, [%rd1270]; add.s32 %r3785, %r3784, %r3783; xor.b32 %r3786, %r3880, %r3474; xor.b32 %r3506, %r3786, %r3785; // inline asm bfe.u32 %r3493, %r3506, %r3858, %r3859; // inline asm mul.wide.u32 %rd1271, %r3493, 4; add.s64 %rd1272, %rd98, %rd1271; ld.shared.u32 %r3787, [%rd1272]; // inline asm bfe.u32 %r3497, %r3506, %r3860, %r3859; // inline asm mul.wide.u32 %rd1273, %r3497, 4; add.s64 %rd1274, %rd102, %rd1273; ld.shared.u32 %r3788, [%rd1274]; add.s32 %r3789, %r3788, %r3787; // inline asm bfe.u32 %r3501, %r3506, %r3859, %r3859; // inline asm mul.wide.u32 %rd1275, %r3501, 4; add.s64 %rd1276, %rd106, %rd1275; ld.shared.u32 %r3790, [%rd1276]; xor.b32 %r3791, %r3790, %r3789; // inline asm bfe.u32 %r3505, %r3506, %r3044, %r3859; // inline asm mul.wide.u32 %rd1277, %r3505, 4; add.s64 %rd1278, %rd110, %rd1277; ld.shared.u32 %r3792, [%rd1278]; add.s32 %r3793, %r3792, %r3791; xor.b32 %r3794, %r3879, %r3490; xor.b32 %r3522, %r3794, %r3793; // inline asm bfe.u32 %r3509, %r3522, %r3858, %r3859; // inline asm mul.wide.u32 %rd1279, %r3509, 4; add.s64 %rd1280, %rd98, %rd1279; ld.shared.u32 %r3795, [%rd1280]; // inline asm bfe.u32 %r3513, %r3522, %r3860, %r3859; // inline asm mul.wide.u32 %rd1281, %r3513, 4; add.s64 %rd1282, %rd102, %rd1281; ld.shared.u32 %r3796, [%rd1282]; add.s32 %r3797, %r3796, %r3795; // inline asm bfe.u32 %r3517, %r3522, %r3859, %r3859; // inline asm mul.wide.u32 %rd1283, %r3517, 4; add.s64 %rd1284, %rd106, %rd1283; ld.shared.u32 %r3798, [%rd1284]; xor.b32 %r3799, %r3798, %r3797; // inline asm bfe.u32 %r3521, %r3522, %r3044, %r3859; // inline asm mul.wide.u32 %rd1285, %r3521, 4; add.s64 %rd1286, %rd110, %rd1285; ld.shared.u32 %r3800, [%rd1286]; add.s32 %r3801, %r3800, %r3799; xor.b32 %r3802, %r3878, %r3506; xor.b32 %r3538, %r3802, %r3801; // inline asm bfe.u32 %r3525, %r3538, %r3858, %r3859; // inline asm mul.wide.u32 %rd1287, %r3525, 4; add.s64 %rd1288, %rd98, %rd1287; ld.shared.u32 %r3803, [%rd1288]; // inline asm bfe.u32 %r3529, %r3538, %r3860, %r3859; // inline asm mul.wide.u32 %rd1289, %r3529, 4; add.s64 %rd1290, %rd102, %rd1289; ld.shared.u32 %r3804, [%rd1290]; add.s32 %r3805, %r3804, %r3803; // inline asm bfe.u32 %r3533, %r3538, %r3859, %r3859; // inline asm mul.wide.u32 %rd1291, %r3533, 4; add.s64 %rd1292, %rd106, %rd1291; ld.shared.u32 %r3806, [%rd1292]; xor.b32 %r3807, %r3806, %r3805; // inline asm bfe.u32 %r3537, %r3538, %r3044, %r3859; // inline asm mul.wide.u32 %rd1293, %r3537, 4; add.s64 %rd1294, %rd110, %rd1293; ld.shared.u32 %r3808, [%rd1294]; add.s32 %r3809, %r3808, %r3807; xor.b32 %r3810, %r3877, %r3522; xor.b32 %r3554, %r3810, %r3809; // inline asm bfe.u32 %r3541, %r3554, %r3858, %r3859; // inline asm mul.wide.u32 %rd1295, %r3541, 4; add.s64 %rd1296, %rd98, %rd1295; ld.shared.u32 %r3811, [%rd1296]; // inline asm bfe.u32 %r3545, %r3554, %r3860, %r3859; // inline asm mul.wide.u32 %rd1297, %r3545, 4; add.s64 %rd1298, %rd102, %rd1297; ld.shared.u32 %r3812, [%rd1298]; add.s32 %r3813, %r3812, %r3811; // inline asm bfe.u32 %r3549, %r3554, %r3859, %r3859; // inline asm mul.wide.u32 %rd1299, %r3549, 4; add.s64 %rd1300, %rd106, %rd1299; ld.shared.u32 %r3814, [%rd1300]; xor.b32 %r3815, %r3814, %r3813; // inline asm bfe.u32 %r3553, %r3554, %r3044, %r3859; // inline asm mul.wide.u32 %rd1301, %r3553, 4; add.s64 %rd1302, %rd110, %rd1301; ld.shared.u32 %r3816, [%rd1302]; add.s32 %r3817, %r3816, %r3815; xor.b32 %r3818, %r3876, %r3538; xor.b32 %r3897, %r3818, %r3817; xor.b32 %r3898, %r3875, %r3554; st.shared.u32 [%rd1340+8], %r3898; st.shared.u32 [%rd1340+12], %r3897; add.s64 %rd1340, %rd1340, 16; add.s32 %r3905, %r3905, 4; setp.lt.u32 %p16, %r3905, 256; @%p16 bra BB3_28; st.global.u32 [%rd16+72], %r3892; st.global.u32 [%rd16+76], %r3891; st.global.u32 [%rd16+80], %r3890; st.global.u32 [%rd16+84], %r3889; st.global.u32 [%rd16+88], %r3888; st.global.u32 [%rd16+92], %r3887; st.global.u32 [%rd16+96], %r3886; st.global.u32 [%rd16+100], %r3885; st.global.u32 [%rd16+104], %r3884; st.global.u32 [%rd16+108], %r3883; st.global.u32 [%rd16+112], %r3882; st.global.u32 [%rd16+116], %r3881; st.global.u32 [%rd16+120], %r3880; st.global.u32 [%rd16+124], %r3879; st.global.u32 [%rd16+128], %r3878; st.global.u32 [%rd16+132], %r3877; st.global.u32 [%rd16+136], %r3876; st.global.u32 [%rd16+140], %r3875; mov.u32 %r3908, 0; BB3_30: mul.wide.u32 %rd1306, %r3908, 4; add.s64 %rd1307, %rd98, %rd1306; ld.shared.u32 %r3820, [%rd1307]; add.s64 %rd1310, %rd16, %rd1306; ld.shared.u32 %r3821, [%rd1307+4]; ld.shared.u32 %r3822, [%rd1307+8]; ld.shared.u32 %r3823, [%rd1307+12]; ld.shared.u32 %r3824, [%rd1307+16]; ld.shared.u32 %r3825, [%rd1307+20]; ld.shared.u32 %r3826, [%rd1307+24]; ld.shared.u32 %r3827, [%rd1307+28]; st.global.u32 [%rd1310+144], %r3820; add.s64 %rd1313, %rd102, %rd1306; st.global.u32 [%rd1310+148], %r3821; st.global.u32 [%rd1310+152], %r3822; st.global.u32 [%rd1310+156], %r3823; st.global.u32 [%rd1310+160], %r3824; st.global.u32 [%rd1310+164], %r3825; st.global.u32 [%rd1310+168], %r3826; st.global.u32 [%rd1310+172], %r3827; ld.shared.u32 %r3828, [%rd1313]; ld.shared.u32 %r3829, [%rd1313+4]; ld.shared.u32 %r3830, [%rd1313+8]; ld.shared.u32 %r3831, [%rd1313+12]; ld.shared.u32 %r3832, [%rd1313+16]; ld.shared.u32 %r3833, [%rd1313+20]; ld.shared.u32 %r3834, [%rd1313+24]; ld.shared.u32 %r3835, [%rd1313+28]; st.global.u32 [%rd1310+1168], %r3828; add.s64 %rd1316, %rd106, %rd1306; st.global.u32 [%rd1310+1172], %r3829; st.global.u32 [%rd1310+1176], %r3830; st.global.u32 [%rd1310+1180], %r3831; st.global.u32 [%rd1310+1184], %r3832; st.global.u32 [%rd1310+1188], %r3833; st.global.u32 [%rd1310+1192], %r3834; st.global.u32 [%rd1310+1196], %r3835; ld.shared.u32 %r3836, [%rd1316]; ld.shared.u32 %r3837, [%rd1316+4]; ld.shared.u32 %r3838, [%rd1316+8]; ld.shared.u32 %r3839, [%rd1316+12]; ld.shared.u32 %r3840, [%rd1316+16]; ld.shared.u32 %r3841, [%rd1316+20]; ld.shared.u32 %r3842, [%rd1316+24]; ld.shared.u32 %r3843, [%rd1316+28]; st.global.u32 [%rd1310+2192], %r3836; add.s64 %rd1319, %rd110, %rd1306; st.global.u32 [%rd1310+2196], %r3837; st.global.u32 [%rd1310+2200], %r3838; st.global.u32 [%rd1310+2204], %r3839; st.global.u32 [%rd1310+2208], %r3840; st.global.u32 [%rd1310+2212], %r3841; st.global.u32 [%rd1310+2216], %r3842; st.global.u32 [%rd1310+2220], %r3843; ld.shared.u32 %r3844, [%rd1319]; ld.shared.u32 %r3845, [%rd1319+4]; ld.shared.u32 %r3846, [%rd1319+8]; ld.shared.u32 %r3847, [%rd1319+12]; ld.shared.u32 %r3848, [%rd1319+16]; ld.shared.u32 %r3849, [%rd1319+20]; ld.shared.u32 %r3850, [%rd1319+24]; ld.shared.u32 %r3851, [%rd1319+28]; st.global.u32 [%rd1310+3216], %r3844; st.global.u32 [%rd1310+3220], %r3845; st.global.u32 [%rd1310+3224], %r3846; st.global.u32 [%rd1310+3228], %r3847; st.global.u32 [%rd1310+3232], %r3848; st.global.u32 [%rd1310+3236], %r3849; st.global.u32 [%rd1310+3240], %r3850; st.global.u32 [%rd1310+3244], %r3851; add.s32 %r3908, %r3908, 8; setp.ne.s32 %p17, %r3908, 256; @%p17 bra BB3_30; BB3_31: ret; } // .globl m03200_loop .entry m03200_loop( .param .u64 .ptr .global .align 4 m03200_loop_param_0, .param .u64 .ptr .global .align 4 m03200_loop_param_1, .param .u64 .ptr .global .align 4 m03200_loop_param_2, .param .u64 .ptr .global .align 4 m03200_loop_param_3, .param .u64 .ptr .global .align 4 m03200_loop_param_4, .param .u64 .ptr .global .align 1 m03200_loop_param_5, .param .u64 .ptr .global .align 4 m03200_loop_param_6, .param .u64 .ptr .global .align 4 m03200_loop_param_7, .param .u64 .ptr .global .align 4 m03200_loop_param_8, .param .u64 .ptr .global .align 4 m03200_loop_param_9, .param .u64 .ptr .global .align 4 m03200_loop_param_10, .param .u64 .ptr .global .align 4 m03200_loop_param_11, .param .u64 .ptr .global .align 4 m03200_loop_param_12, .param .u64 .ptr .global .align 4 m03200_loop_param_13, .param .u64 .ptr .global .align 8 m03200_loop_param_14, .param .u64 .ptr .global .align 4 m03200_loop_param_15, .param .u64 .ptr .global .align 4 m03200_loop_param_16, .param .u64 .ptr .global .align 4 m03200_loop_param_17, .param .u64 .ptr .global .align 1 m03200_loop_param_18, .param .u64 .ptr .global .align 4 m03200_loop_param_19, .param .u64 .ptr .global .align 4 m03200_loop_param_20, .param .u64 .ptr .global .align 4 m03200_loop_param_21, .param .u64 .ptr .global .align 4 m03200_loop_param_22, .param .u64 .ptr .global .align 4 m03200_loop_param_23, .param .u32 m03200_loop_param_24, .param .u32 m03200_loop_param_25, .param .u32 m03200_loop_param_26, .param .u32 m03200_loop_param_27, .param .u32 m03200_loop_param_28, .param .u32 m03200_loop_param_29, .param .u32 m03200_loop_param_30, .param .u32 m03200_loop_param_31, .param .u32 m03200_loop_param_32, .param .u32 m03200_loop_param_33, .param .u64 m03200_loop_param_34 ) .reqntid 8, 1, 1 { .reg .pred %p<14>; .reg .b32 %r<10353>; .reg .b64 %rd<3503>; // demoted variable .shared .align 4 .b8 m03200_loop$S0_all[8192]; // demoted variable .shared .align 4 .b8 m03200_loop$S1_all[8192]; // demoted variable .shared .align 4 .b8 m03200_loop$S2_all[8192]; // demoted variable .shared .align 4 .b8 m03200_loop$S3_all[8192]; ld.param.u64 %rd31, [m03200_loop_param_4]; ld.param.u64 %rd32, [m03200_loop_param_17]; ld.param.u32 %r169, [m03200_loop_param_27]; ld.param.u32 %r170, [m03200_loop_param_29]; ld.param.u64 %rd33, [m03200_loop_param_34]; mov.b32 %r171, %envreg3; mov.u32 %r172, %ctaid.x; mov.u32 %r173, %ntid.x; mad.lo.s32 %r174, %r172, %r173, %r171; mov.u32 %r1, %tid.x; add.s32 %r2, %r174, %r1; cvt.s64.s32 %rd34, %r2; cvt.s64.s32 %rd1, %r1; setp.ge.u64 %p1, %rd34, %rd33; @%p1 bra BB4_24; mul.wide.s32 %rd35, %r2, 4240; add.s64 %rd2, %rd31, %rd35; ld.global.u32 %r3, [%rd2]; ld.global.u32 %r4, [%rd2+4]; ld.global.u32 %r5, [%rd2+8]; ld.global.u32 %r6, [%rd2+12]; ld.global.u32 %r7, [%rd2+16]; ld.global.u32 %r8, [%rd2+20]; ld.global.u32 %r9, [%rd2+24]; ld.global.u32 %r10, [%rd2+28]; ld.global.u32 %r11, [%rd2+32]; ld.global.u32 %r12, [%rd2+36]; ld.global.u32 %r13, [%rd2+40]; ld.global.u32 %r14, [%rd2+44]; ld.global.u32 %r15, [%rd2+48]; ld.global.u32 %r16, [%rd2+52]; ld.global.u32 %r17, [%rd2+56]; ld.global.u32 %r18, [%rd2+60]; ld.global.u32 %r19, [%rd2+64]; ld.global.u32 %r20, [%rd2+68]; ld.global.u32 %r10351, [%rd2+72]; ld.global.u32 %r10350, [%rd2+76]; ld.global.u32 %r10349, [%rd2+80]; ld.global.u32 %r10348, [%rd2+84]; ld.global.u32 %r10347, [%rd2+88]; ld.global.u32 %r10346, [%rd2+92]; ld.global.u32 %r10345, [%rd2+96]; ld.global.u32 %r10344, [%rd2+100]; ld.global.u32 %r10343, [%rd2+104]; ld.global.u32 %r10342, [%rd2+108]; ld.global.u32 %r10341, [%rd2+112]; ld.global.u32 %r10340, [%rd2+116]; ld.global.u32 %r10339, [%rd2+120]; ld.global.u32 %r10338, [%rd2+124]; ld.global.u32 %r10337, [%rd2+128]; ld.global.u32 %r10336, [%rd2+132]; ld.global.u32 %r10335, [%rd2+136]; ld.global.u32 %r10334, [%rd2+140]; mov.u32 %r10290, 0; BB4_2: mul.wide.u32 %rd38, %r10290, 4; add.s64 %rd39, %rd2, %rd38; ld.global.u32 %r176, [%rd39+144]; shl.b64 %rd40, %rd1, 10; mov.u64 %rd41, m03200_loop$S0_all; add.s64 %rd42, %rd41, %rd40; add.s64 %rd43, %rd42, %rd38; ld.global.u32 %r177, [%rd39+1168]; ld.global.u32 %r178, [%rd39+2192]; ld.global.u32 %r179, [%rd39+3216]; ld.global.u32 %r180, [%rd39+148]; ld.global.u32 %r181, [%rd39+1172]; ld.global.u32 %r182, [%rd39+2196]; ld.global.u32 %r183, [%rd39+3220]; ld.global.u32 %r184, [%rd39+152]; ld.global.u32 %r185, [%rd39+1176]; ld.global.u32 %r186, [%rd39+2200]; ld.global.u32 %r187, [%rd39+3224]; ld.global.u32 %r188, [%rd39+156]; ld.global.u32 %r189, [%rd39+1180]; ld.global.u32 %r190, [%rd39+2204]; ld.global.u32 %r191, [%rd39+3228]; ld.global.u32 %r192, [%rd39+160]; ld.global.u32 %r193, [%rd39+1184]; ld.global.u32 %r194, [%rd39+2208]; ld.global.u32 %r195, [%rd39+3232]; ld.global.u32 %r196, [%rd39+164]; ld.global.u32 %r197, [%rd39+1188]; ld.global.u32 %r198, [%rd39+2212]; ld.global.u32 %r199, [%rd39+3236]; ld.global.u32 %r200, [%rd39+168]; ld.global.u32 %r201, [%rd39+1192]; ld.global.u32 %r202, [%rd39+2216]; ld.global.u32 %r203, [%rd39+3240]; ld.global.u32 %r204, [%rd39+172]; ld.global.u32 %r205, [%rd39+1196]; ld.global.u32 %r206, [%rd39+2220]; ld.global.u32 %r207, [%rd39+3244]; st.shared.u32 [%rd43], %r176; mov.u64 %rd44, m03200_loop$S1_all; add.s64 %rd45, %rd44, %rd40; add.s64 %rd46, %rd45, %rd38; st.shared.u32 [%rd43+4], %r180; st.shared.u32 [%rd43+8], %r184; st.shared.u32 [%rd43+12], %r188; st.shared.u32 [%rd43+16], %r192; st.shared.u32 [%rd43+20], %r196; st.shared.u32 [%rd43+24], %r200; st.shared.u32 [%rd43+28], %r204; st.shared.u32 [%rd46], %r177; mov.u64 %rd47, m03200_loop$S2_all; add.s64 %rd48, %rd47, %rd40; add.s64 %rd49, %rd48, %rd38; st.shared.u32 [%rd46+4], %r181; st.shared.u32 [%rd46+8], %r185; st.shared.u32 [%rd46+12], %r189; st.shared.u32 [%rd46+16], %r193; st.shared.u32 [%rd46+20], %r197; st.shared.u32 [%rd46+24], %r201; st.shared.u32 [%rd46+28], %r205; st.shared.u32 [%rd49], %r178; mov.u64 %rd50, m03200_loop$S3_all; add.s64 %rd51, %rd50, %rd40; add.s64 %rd52, %rd51, %rd38; st.shared.u32 [%rd49+4], %r182; st.shared.u32 [%rd49+8], %r186; st.shared.u32 [%rd49+12], %r190; st.shared.u32 [%rd49+16], %r194; st.shared.u32 [%rd49+20], %r198; st.shared.u32 [%rd49+24], %r202; st.shared.u32 [%rd49+28], %r206; st.shared.u32 [%rd52], %r179; st.shared.u32 [%rd52+4], %r183; st.shared.u32 [%rd52+8], %r187; st.shared.u32 [%rd52+12], %r191; st.shared.u32 [%rd52+16], %r195; st.shared.u32 [%rd52+20], %r199; st.shared.u32 [%rd52+24], %r203; st.shared.u32 [%rd52+28], %r207; add.s32 %r10290, %r10290, 8; setp.ne.s32 %p2, %r10290, 256; @%p2 bra BB4_2; setp.eq.s32 %p3, %r170, 0; @%p3 bra BB4_22; mul.wide.u32 %rd53, %r169, 560; add.s64 %rd54, %rd32, %rd53; ld.global.u32 %r41, [%rd54]; ld.global.u32 %r42, [%rd54+4]; ld.global.u32 %r43, [%rd54+8]; ld.global.u32 %r44, [%rd54+12]; mul.wide.s32 %rd55, %r1, 1024; add.s64 %rd3, %rd41, %rd55; add.s64 %rd4, %rd44, %rd55; add.s64 %rd5, %rd47, %rd55; add.s64 %rd6, %rd50, %rd55; mov.u32 %r208, 0; mov.u32 %r10309, %r208; BB4_5: xor.b32 %r222, %r10351, %r3; mov.u32 %r2499, 24; mov.u32 %r2512, 8; // inline asm bfe.u32 %r209, %r222, %r2499, %r2512; // inline asm mul.wide.u32 %rd63, %r209, 4; add.s64 %rd64, %rd42, %rd63; ld.shared.u32 %r2514, [%rd64]; mov.u32 %r2503, 16; // inline asm bfe.u32 %r213, %r222, %r2503, %r2512; // inline asm mul.wide.u32 %rd67, %r213, 4; add.s64 %rd68, %rd45, %rd67; ld.shared.u32 %r2515, [%rd68]; add.s32 %r2516, %r2515, %r2514; // inline asm bfe.u32 %r217, %r222, %r2512, %r2512; // inline asm mul.wide.u32 %rd71, %r217, 4; add.s64 %rd72, %rd48, %rd71; ld.shared.u32 %r2517, [%rd72]; xor.b32 %r2518, %r2517, %r2516; // inline asm bfe.u32 %r221, %r222, %r208, %r2512; // inline asm mul.wide.u32 %rd75, %r221, 4; add.s64 %rd76, %rd51, %rd75; ld.shared.u32 %r2519, [%rd76]; add.s32 %r2520, %r2519, %r2518; xor.b32 %r2521, %r10350, %r4; xor.b32 %r238, %r2521, %r2520; // inline asm bfe.u32 %r225, %r238, %r2499, %r2512; // inline asm mul.wide.u32 %rd77, %r225, 4; add.s64 %rd78, %rd42, %rd77; ld.shared.u32 %r2522, [%rd78]; // inline asm bfe.u32 %r229, %r238, %r2503, %r2512; // inline asm mul.wide.u32 %rd79, %r229, 4; add.s64 %rd80, %rd45, %rd79; ld.shared.u32 %r2523, [%rd80]; add.s32 %r2524, %r2523, %r2522; // inline asm bfe.u32 %r233, %r238, %r2512, %r2512; // inline asm mul.wide.u32 %rd81, %r233, 4; add.s64 %rd82, %rd48, %rd81; ld.shared.u32 %r2525, [%rd82]; xor.b32 %r2526, %r2525, %r2524; // inline asm bfe.u32 %r237, %r238, %r208, %r2512; // inline asm mul.wide.u32 %rd83, %r237, 4; add.s64 %rd84, %rd51, %rd83; ld.shared.u32 %r2527, [%rd84]; add.s32 %r2528, %r2527, %r2526; xor.b32 %r2529, %r10349, %r5; xor.b32 %r2530, %r2529, %r222; xor.b32 %r254, %r2530, %r2528; // inline asm bfe.u32 %r241, %r254, %r2499, %r2512; // inline asm mul.wide.u32 %rd85, %r241, 4; add.s64 %rd86, %rd42, %rd85; ld.shared.u32 %r2531, [%rd86]; // inline asm bfe.u32 %r245, %r254, %r2503, %r2512; // inline asm mul.wide.u32 %rd87, %r245, 4; add.s64 %rd88, %rd45, %rd87; ld.shared.u32 %r2532, [%rd88]; add.s32 %r2533, %r2532, %r2531; // inline asm bfe.u32 %r249, %r254, %r2512, %r2512; // inline asm mul.wide.u32 %rd89, %r249, 4; add.s64 %rd90, %rd48, %rd89; ld.shared.u32 %r2534, [%rd90]; xor.b32 %r2535, %r2534, %r2533; // inline asm bfe.u32 %r253, %r254, %r208, %r2512; // inline asm mul.wide.u32 %rd91, %r253, 4; add.s64 %rd92, %rd51, %rd91; ld.shared.u32 %r2536, [%rd92]; add.s32 %r2537, %r2536, %r2535; xor.b32 %r2538, %r10348, %r6; xor.b32 %r2539, %r2538, %r238; xor.b32 %r270, %r2539, %r2537; // inline asm bfe.u32 %r257, %r270, %r2499, %r2512; // inline asm mul.wide.u32 %rd93, %r257, 4; add.s64 %rd94, %rd42, %rd93; ld.shared.u32 %r2540, [%rd94]; // inline asm bfe.u32 %r261, %r270, %r2503, %r2512; // inline asm mul.wide.u32 %rd95, %r261, 4; add.s64 %rd96, %rd45, %rd95; ld.shared.u32 %r2541, [%rd96]; add.s32 %r2542, %r2541, %r2540; // inline asm bfe.u32 %r265, %r270, %r2512, %r2512; // inline asm mul.wide.u32 %rd97, %r265, 4; add.s64 %rd98, %rd48, %rd97; ld.shared.u32 %r2543, [%rd98]; xor.b32 %r2544, %r2543, %r2542; // inline asm bfe.u32 %r269, %r270, %r208, %r2512; // inline asm mul.wide.u32 %rd99, %r269, 4; add.s64 %rd100, %rd51, %rd99; ld.shared.u32 %r2545, [%rd100]; add.s32 %r2546, %r2545, %r2544; xor.b32 %r2547, %r10347, %r7; xor.b32 %r2548, %r2547, %r254; xor.b32 %r286, %r2548, %r2546; // inline asm bfe.u32 %r273, %r286, %r2499, %r2512; // inline asm mul.wide.u32 %rd101, %r273, 4; add.s64 %rd102, %rd42, %rd101; ld.shared.u32 %r2549, [%rd102]; // inline asm bfe.u32 %r277, %r286, %r2503, %r2512; // inline asm mul.wide.u32 %rd103, %r277, 4; add.s64 %rd104, %rd45, %rd103; ld.shared.u32 %r2550, [%rd104]; add.s32 %r2551, %r2550, %r2549; // inline asm bfe.u32 %r281, %r286, %r2512, %r2512; // inline asm mul.wide.u32 %rd105, %r281, 4; add.s64 %rd106, %rd48, %rd105; ld.shared.u32 %r2552, [%rd106]; xor.b32 %r2553, %r2552, %r2551; // inline asm bfe.u32 %r285, %r286, %r208, %r2512; // inline asm mul.wide.u32 %rd107, %r285, 4; add.s64 %rd108, %rd51, %rd107; ld.shared.u32 %r2554, [%rd108]; add.s32 %r2555, %r2554, %r2553; xor.b32 %r2556, %r10346, %r8; xor.b32 %r2557, %r2556, %r270; xor.b32 %r302, %r2557, %r2555; // inline asm bfe.u32 %r289, %r302, %r2499, %r2512; // inline asm mul.wide.u32 %rd109, %r289, 4; add.s64 %rd110, %rd42, %rd109; ld.shared.u32 %r2558, [%rd110]; // inline asm bfe.u32 %r293, %r302, %r2503, %r2512; // inline asm mul.wide.u32 %rd111, %r293, 4; add.s64 %rd112, %rd45, %rd111; ld.shared.u32 %r2559, [%rd112]; add.s32 %r2560, %r2559, %r2558; // inline asm bfe.u32 %r297, %r302, %r2512, %r2512; // inline asm mul.wide.u32 %rd113, %r297, 4; add.s64 %rd114, %rd48, %rd113; ld.shared.u32 %r2561, [%rd114]; xor.b32 %r2562, %r2561, %r2560; // inline asm bfe.u32 %r301, %r302, %r208, %r2512; // inline asm mul.wide.u32 %rd115, %r301, 4; add.s64 %rd116, %rd51, %rd115; ld.shared.u32 %r2563, [%rd116]; add.s32 %r2564, %r2563, %r2562; xor.b32 %r2565, %r10345, %r9; xor.b32 %r2566, %r2565, %r286; xor.b32 %r318, %r2566, %r2564; // inline asm bfe.u32 %r305, %r318, %r2499, %r2512; // inline asm mul.wide.u32 %rd117, %r305, 4; add.s64 %rd118, %rd42, %rd117; ld.shared.u32 %r2567, [%rd118]; // inline asm bfe.u32 %r309, %r318, %r2503, %r2512; // inline asm mul.wide.u32 %rd119, %r309, 4; add.s64 %rd120, %rd45, %rd119; ld.shared.u32 %r2568, [%rd120]; add.s32 %r2569, %r2568, %r2567; // inline asm bfe.u32 %r313, %r318, %r2512, %r2512; // inline asm mul.wide.u32 %rd121, %r313, 4; add.s64 %rd122, %rd48, %rd121; ld.shared.u32 %r2570, [%rd122]; xor.b32 %r2571, %r2570, %r2569; // inline asm bfe.u32 %r317, %r318, %r208, %r2512; // inline asm mul.wide.u32 %rd123, %r317, 4; add.s64 %rd124, %rd51, %rd123; ld.shared.u32 %r2572, [%rd124]; add.s32 %r2573, %r2572, %r2571; xor.b32 %r2574, %r10344, %r10; xor.b32 %r2575, %r2574, %r302; xor.b32 %r334, %r2575, %r2573; // inline asm bfe.u32 %r321, %r334, %r2499, %r2512; // inline asm mul.wide.u32 %rd125, %r321, 4; add.s64 %rd126, %rd42, %rd125; ld.shared.u32 %r2576, [%rd126]; // inline asm bfe.u32 %r325, %r334, %r2503, %r2512; // inline asm mul.wide.u32 %rd127, %r325, 4; add.s64 %rd128, %rd45, %rd127; ld.shared.u32 %r2577, [%rd128]; add.s32 %r2578, %r2577, %r2576; // inline asm bfe.u32 %r329, %r334, %r2512, %r2512; // inline asm mul.wide.u32 %rd129, %r329, 4; add.s64 %rd130, %rd48, %rd129; ld.shared.u32 %r2579, [%rd130]; xor.b32 %r2580, %r2579, %r2578; // inline asm bfe.u32 %r333, %r334, %r208, %r2512; // inline asm mul.wide.u32 %rd131, %r333, 4; add.s64 %rd132, %rd51, %rd131; ld.shared.u32 %r2581, [%rd132]; add.s32 %r2582, %r2581, %r2580; xor.b32 %r2583, %r10343, %r11; xor.b32 %r2584, %r2583, %r318; xor.b32 %r350, %r2584, %r2582; // inline asm bfe.u32 %r337, %r350, %r2499, %r2512; // inline asm mul.wide.u32 %rd133, %r337, 4; add.s64 %rd134, %rd42, %rd133; ld.shared.u32 %r2585, [%rd134]; // inline asm bfe.u32 %r341, %r350, %r2503, %r2512; // inline asm mul.wide.u32 %rd135, %r341, 4; add.s64 %rd136, %rd45, %rd135; ld.shared.u32 %r2586, [%rd136]; add.s32 %r2587, %r2586, %r2585; // inline asm bfe.u32 %r345, %r350, %r2512, %r2512; // inline asm mul.wide.u32 %rd137, %r345, 4; add.s64 %rd138, %rd48, %rd137; ld.shared.u32 %r2588, [%rd138]; xor.b32 %r2589, %r2588, %r2587; // inline asm bfe.u32 %r349, %r350, %r208, %r2512; // inline asm mul.wide.u32 %rd139, %r349, 4; add.s64 %rd140, %rd51, %rd139; ld.shared.u32 %r2590, [%rd140]; add.s32 %r2591, %r2590, %r2589; xor.b32 %r2592, %r10342, %r12; xor.b32 %r2593, %r2592, %r334; xor.b32 %r366, %r2593, %r2591; // inline asm bfe.u32 %r353, %r366, %r2499, %r2512; // inline asm mul.wide.u32 %rd141, %r353, 4; add.s64 %rd142, %rd42, %rd141; ld.shared.u32 %r2594, [%rd142]; // inline asm bfe.u32 %r357, %r366, %r2503, %r2512; // inline asm mul.wide.u32 %rd143, %r357, 4; add.s64 %rd144, %rd45, %rd143; ld.shared.u32 %r2595, [%rd144]; add.s32 %r2596, %r2595, %r2594; // inline asm bfe.u32 %r361, %r366, %r2512, %r2512; // inline asm mul.wide.u32 %rd145, %r361, 4; add.s64 %rd146, %rd48, %rd145; ld.shared.u32 %r2597, [%rd146]; xor.b32 %r2598, %r2597, %r2596; // inline asm bfe.u32 %r365, %r366, %r208, %r2512; // inline asm mul.wide.u32 %rd147, %r365, 4; add.s64 %rd148, %rd51, %rd147; ld.shared.u32 %r2599, [%rd148]; add.s32 %r2600, %r2599, %r2598; xor.b32 %r2601, %r10341, %r13; xor.b32 %r2602, %r2601, %r350; xor.b32 %r382, %r2602, %r2600; // inline asm bfe.u32 %r369, %r382, %r2499, %r2512; // inline asm mul.wide.u32 %rd149, %r369, 4; add.s64 %rd150, %rd42, %rd149; ld.shared.u32 %r2603, [%rd150]; // inline asm bfe.u32 %r373, %r382, %r2503, %r2512; // inline asm mul.wide.u32 %rd151, %r373, 4; add.s64 %rd152, %rd45, %rd151; ld.shared.u32 %r2604, [%rd152]; add.s32 %r2605, %r2604, %r2603; // inline asm bfe.u32 %r377, %r382, %r2512, %r2512; // inline asm mul.wide.u32 %rd153, %r377, 4; add.s64 %rd154, %rd48, %rd153; ld.shared.u32 %r2606, [%rd154]; xor.b32 %r2607, %r2606, %r2605; // inline asm bfe.u32 %r381, %r382, %r208, %r2512; // inline asm mul.wide.u32 %rd155, %r381, 4; add.s64 %rd156, %rd51, %rd155; ld.shared.u32 %r2608, [%rd156]; add.s32 %r2609, %r2608, %r2607; xor.b32 %r2610, %r10340, %r14; xor.b32 %r2611, %r2610, %r366; xor.b32 %r398, %r2611, %r2609; // inline asm bfe.u32 %r385, %r398, %r2499, %r2512; // inline asm mul.wide.u32 %rd157, %r385, 4; add.s64 %rd158, %rd42, %rd157; ld.shared.u32 %r2612, [%rd158]; // inline asm bfe.u32 %r389, %r398, %r2503, %r2512; // inline asm mul.wide.u32 %rd159, %r389, 4; add.s64 %rd160, %rd45, %rd159; ld.shared.u32 %r2613, [%rd160]; add.s32 %r2614, %r2613, %r2612; // inline asm bfe.u32 %r393, %r398, %r2512, %r2512; // inline asm mul.wide.u32 %rd161, %r393, 4; add.s64 %rd162, %rd48, %rd161; ld.shared.u32 %r2615, [%rd162]; xor.b32 %r2616, %r2615, %r2614; // inline asm bfe.u32 %r397, %r398, %r208, %r2512; // inline asm mul.wide.u32 %rd163, %r397, 4; add.s64 %rd164, %rd51, %rd163; ld.shared.u32 %r2617, [%rd164]; add.s32 %r2618, %r2617, %r2616; xor.b32 %r2619, %r10339, %r15; xor.b32 %r2620, %r2619, %r382; xor.b32 %r414, %r2620, %r2618; // inline asm bfe.u32 %r401, %r414, %r2499, %r2512; // inline asm mul.wide.u32 %rd165, %r401, 4; add.s64 %rd166, %rd42, %rd165; ld.shared.u32 %r2621, [%rd166]; // inline asm bfe.u32 %r405, %r414, %r2503, %r2512; // inline asm mul.wide.u32 %rd167, %r405, 4; add.s64 %rd168, %rd45, %rd167; ld.shared.u32 %r2622, [%rd168]; add.s32 %r2623, %r2622, %r2621; // inline asm bfe.u32 %r409, %r414, %r2512, %r2512; // inline asm mul.wide.u32 %rd169, %r409, 4; add.s64 %rd170, %rd48, %rd169; ld.shared.u32 %r2624, [%rd170]; xor.b32 %r2625, %r2624, %r2623; // inline asm bfe.u32 %r413, %r414, %r208, %r2512; // inline asm mul.wide.u32 %rd171, %r413, 4; add.s64 %rd172, %rd51, %rd171; ld.shared.u32 %r2626, [%rd172]; add.s32 %r2627, %r2626, %r2625; xor.b32 %r2628, %r10338, %r16; xor.b32 %r2629, %r2628, %r398; xor.b32 %r430, %r2629, %r2627; // inline asm bfe.u32 %r417, %r430, %r2499, %r2512; // inline asm mul.wide.u32 %rd173, %r417, 4; add.s64 %rd174, %rd42, %rd173; ld.shared.u32 %r2630, [%rd174]; // inline asm bfe.u32 %r421, %r430, %r2503, %r2512; // inline asm mul.wide.u32 %rd175, %r421, 4; add.s64 %rd176, %rd45, %rd175; ld.shared.u32 %r2631, [%rd176]; add.s32 %r2632, %r2631, %r2630; // inline asm bfe.u32 %r425, %r430, %r2512, %r2512; // inline asm mul.wide.u32 %rd177, %r425, 4; add.s64 %rd178, %rd48, %rd177; ld.shared.u32 %r2633, [%rd178]; xor.b32 %r2634, %r2633, %r2632; // inline asm bfe.u32 %r429, %r430, %r208, %r2512; // inline asm mul.wide.u32 %rd179, %r429, 4; add.s64 %rd180, %rd51, %rd179; ld.shared.u32 %r2635, [%rd180]; add.s32 %r2636, %r2635, %r2634; xor.b32 %r2637, %r10337, %r17; xor.b32 %r2638, %r2637, %r414; xor.b32 %r446, %r2638, %r2636; // inline asm bfe.u32 %r433, %r446, %r2499, %r2512; // inline asm mul.wide.u32 %rd181, %r433, 4; add.s64 %rd182, %rd42, %rd181; ld.shared.u32 %r2639, [%rd182]; // inline asm bfe.u32 %r437, %r446, %r2503, %r2512; // inline asm mul.wide.u32 %rd183, %r437, 4; add.s64 %rd184, %rd45, %rd183; ld.shared.u32 %r2640, [%rd184]; add.s32 %r2641, %r2640, %r2639; // inline asm bfe.u32 %r441, %r446, %r2512, %r2512; // inline asm mul.wide.u32 %rd185, %r441, 4; add.s64 %rd186, %rd48, %rd185; ld.shared.u32 %r2642, [%rd186]; xor.b32 %r2643, %r2642, %r2641; // inline asm bfe.u32 %r445, %r446, %r208, %r2512; // inline asm mul.wide.u32 %rd187, %r445, 4; add.s64 %rd188, %rd51, %rd187; ld.shared.u32 %r2644, [%rd188]; add.s32 %r2645, %r2644, %r2643; xor.b32 %r2646, %r10336, %r18; xor.b32 %r2647, %r2646, %r430; xor.b32 %r462, %r2647, %r2645; // inline asm bfe.u32 %r449, %r462, %r2499, %r2512; // inline asm mul.wide.u32 %rd189, %r449, 4; add.s64 %rd190, %rd42, %rd189; ld.shared.u32 %r2648, [%rd190]; // inline asm bfe.u32 %r453, %r462, %r2503, %r2512; // inline asm mul.wide.u32 %rd191, %r453, 4; add.s64 %rd192, %rd45, %rd191; ld.shared.u32 %r2649, [%rd192]; add.s32 %r2650, %r2649, %r2648; // inline asm bfe.u32 %r457, %r462, %r2512, %r2512; // inline asm mul.wide.u32 %rd193, %r457, 4; add.s64 %rd194, %rd48, %rd193; ld.shared.u32 %r2651, [%rd194]; xor.b32 %r2652, %r2651, %r2650; // inline asm bfe.u32 %r461, %r462, %r208, %r2512; // inline asm mul.wide.u32 %rd195, %r461, 4; add.s64 %rd196, %rd51, %rd195; ld.shared.u32 %r2653, [%rd196]; add.s32 %r2654, %r2653, %r2652; xor.b32 %r2655, %r10335, %r19; xor.b32 %r2656, %r2655, %r446; xor.b32 %r64, %r2656, %r2654; xor.b32 %r2657, %r10334, %r20; xor.b32 %r65, %r2657, %r462; // inline asm bfe.u32 %r465, %r208, %r2499, %r2512; // inline asm mul.wide.u32 %rd197, %r465, 4; add.s64 %rd198, %rd42, %rd197; ld.shared.u32 %r2658, [%rd198]; // inline asm bfe.u32 %r469, %r208, %r2503, %r2512; // inline asm mul.wide.u32 %rd199, %r469, 4; add.s64 %rd200, %rd45, %rd199; ld.shared.u32 %r2659, [%rd200]; add.s32 %r2660, %r2659, %r2658; // inline asm bfe.u32 %r473, %r208, %r2512, %r2512; // inline asm mul.wide.u32 %rd201, %r473, 4; add.s64 %rd202, %rd48, %rd201; ld.shared.u32 %r2661, [%rd202]; xor.b32 %r2662, %r2661, %r2660; // inline asm bfe.u32 %r477, %r208, %r208, %r2512; // inline asm mul.wide.u32 %rd203, %r477, 4; add.s64 %rd204, %rd51, %rd203; ld.shared.u32 %r2663, [%rd204]; add.s32 %r494, %r2663, %r2662; // inline asm bfe.u32 %r481, %r494, %r2499, %r2512; // inline asm mul.wide.u32 %rd205, %r481, 4; add.s64 %rd206, %rd42, %rd205; ld.shared.u32 %r2664, [%rd206]; // inline asm bfe.u32 %r485, %r494, %r2503, %r2512; // inline asm mul.wide.u32 %rd207, %r485, 4; add.s64 %rd208, %rd45, %rd207; ld.shared.u32 %r2665, [%rd208]; add.s32 %r2666, %r2665, %r2664; // inline asm bfe.u32 %r489, %r494, %r2512, %r2512; // inline asm mul.wide.u32 %rd209, %r489, 4; add.s64 %rd210, %rd48, %rd209; ld.shared.u32 %r2667, [%rd210]; xor.b32 %r2668, %r2667, %r2666; // inline asm bfe.u32 %r493, %r494, %r208, %r2512; // inline asm mul.wide.u32 %rd211, %r493, 4; add.s64 %rd212, %rd51, %rd211; ld.shared.u32 %r2669, [%rd212]; add.s32 %r2670, %r2669, %r2668; xor.b32 %r510, %r2529, %r2670; // inline asm bfe.u32 %r497, %r510, %r2499, %r2512; // inline asm mul.wide.u32 %rd213, %r497, 4; add.s64 %rd214, %rd42, %rd213; ld.shared.u32 %r2671, [%rd214]; // inline asm bfe.u32 %r501, %r510, %r2503, %r2512; // inline asm mul.wide.u32 %rd215, %r501, 4; add.s64 %rd216, %rd45, %rd215; ld.shared.u32 %r2672, [%rd216]; add.s32 %r2673, %r2672, %r2671; // inline asm bfe.u32 %r505, %r510, %r2512, %r2512; // inline asm mul.wide.u32 %rd217, %r505, 4; add.s64 %rd218, %rd48, %rd217; ld.shared.u32 %r2674, [%rd218]; xor.b32 %r2675, %r2674, %r2673; // inline asm bfe.u32 %r509, %r510, %r208, %r2512; // inline asm mul.wide.u32 %rd219, %r509, 4; add.s64 %rd220, %rd51, %rd219; ld.shared.u32 %r2676, [%rd220]; add.s32 %r2677, %r2676, %r2675; xor.b32 %r2678, %r2538, %r494; xor.b32 %r526, %r2678, %r2677; // inline asm bfe.u32 %r513, %r526, %r2499, %r2512; // inline asm mul.wide.u32 %rd221, %r513, 4; add.s64 %rd222, %rd42, %rd221; ld.shared.u32 %r2679, [%rd222]; // inline asm bfe.u32 %r517, %r526, %r2503, %r2512; // inline asm mul.wide.u32 %rd223, %r517, 4; add.s64 %rd224, %rd45, %rd223; ld.shared.u32 %r2680, [%rd224]; add.s32 %r2681, %r2680, %r2679; // inline asm bfe.u32 %r521, %r526, %r2512, %r2512; // inline asm mul.wide.u32 %rd225, %r521, 4; add.s64 %rd226, %rd48, %rd225; ld.shared.u32 %r2682, [%rd226]; xor.b32 %r2683, %r2682, %r2681; // inline asm bfe.u32 %r525, %r526, %r208, %r2512; // inline asm mul.wide.u32 %rd227, %r525, 4; add.s64 %rd228, %rd51, %rd227; ld.shared.u32 %r2684, [%rd228]; add.s32 %r2685, %r2684, %r2683; xor.b32 %r2686, %r2547, %r510; xor.b32 %r542, %r2686, %r2685; // inline asm bfe.u32 %r529, %r542, %r2499, %r2512; // inline asm mul.wide.u32 %rd229, %r529, 4; add.s64 %rd230, %rd42, %rd229; ld.shared.u32 %r2687, [%rd230]; // inline asm bfe.u32 %r533, %r542, %r2503, %r2512; // inline asm mul.wide.u32 %rd231, %r533, 4; add.s64 %rd232, %rd45, %rd231; ld.shared.u32 %r2688, [%rd232]; add.s32 %r2689, %r2688, %r2687; // inline asm bfe.u32 %r537, %r542, %r2512, %r2512; // inline asm mul.wide.u32 %rd233, %r537, 4; add.s64 %rd234, %rd48, %rd233; ld.shared.u32 %r2690, [%rd234]; xor.b32 %r2691, %r2690, %r2689; // inline asm bfe.u32 %r541, %r542, %r208, %r2512; // inline asm mul.wide.u32 %rd235, %r541, 4; add.s64 %rd236, %rd51, %rd235; ld.shared.u32 %r2692, [%rd236]; add.s32 %r2693, %r2692, %r2691; xor.b32 %r2694, %r2556, %r526; xor.b32 %r558, %r2694, %r2693; // inline asm bfe.u32 %r545, %r558, %r2499, %r2512; // inline asm mul.wide.u32 %rd237, %r545, 4; add.s64 %rd238, %rd42, %rd237; ld.shared.u32 %r2695, [%rd238]; // inline asm bfe.u32 %r549, %r558, %r2503, %r2512; // inline asm mul.wide.u32 %rd239, %r549, 4; add.s64 %rd240, %rd45, %rd239; ld.shared.u32 %r2696, [%rd240]; add.s32 %r2697, %r2696, %r2695; // inline asm bfe.u32 %r553, %r558, %r2512, %r2512; // inline asm mul.wide.u32 %rd241, %r553, 4; add.s64 %rd242, %rd48, %rd241; ld.shared.u32 %r2698, [%rd242]; xor.b32 %r2699, %r2698, %r2697; // inline asm bfe.u32 %r557, %r558, %r208, %r2512; // inline asm mul.wide.u32 %rd243, %r557, 4; add.s64 %rd244, %rd51, %rd243; ld.shared.u32 %r2700, [%rd244]; add.s32 %r2701, %r2700, %r2699; xor.b32 %r2702, %r2565, %r542; xor.b32 %r574, %r2702, %r2701; // inline asm bfe.u32 %r561, %r574, %r2499, %r2512; // inline asm mul.wide.u32 %rd245, %r561, 4; add.s64 %rd246, %rd42, %rd245; ld.shared.u32 %r2703, [%rd246]; // inline asm bfe.u32 %r565, %r574, %r2503, %r2512; // inline asm mul.wide.u32 %rd247, %r565, 4; add.s64 %rd248, %rd45, %rd247; ld.shared.u32 %r2704, [%rd248]; add.s32 %r2705, %r2704, %r2703; // inline asm bfe.u32 %r569, %r574, %r2512, %r2512; // inline asm mul.wide.u32 %rd249, %r569, 4; add.s64 %rd250, %rd48, %rd249; ld.shared.u32 %r2706, [%rd250]; xor.b32 %r2707, %r2706, %r2705; // inline asm bfe.u32 %r573, %r574, %r208, %r2512; // inline asm mul.wide.u32 %rd251, %r573, 4; add.s64 %rd252, %rd51, %rd251; ld.shared.u32 %r2708, [%rd252]; add.s32 %r2709, %r2708, %r2707; xor.b32 %r2710, %r2574, %r558; xor.b32 %r590, %r2710, %r2709; // inline asm bfe.u32 %r577, %r590, %r2499, %r2512; // inline asm mul.wide.u32 %rd253, %r577, 4; add.s64 %rd254, %rd42, %rd253; ld.shared.u32 %r2711, [%rd254]; // inline asm bfe.u32 %r581, %r590, %r2503, %r2512; // inline asm mul.wide.u32 %rd255, %r581, 4; add.s64 %rd256, %rd45, %rd255; ld.shared.u32 %r2712, [%rd256]; add.s32 %r2713, %r2712, %r2711; // inline asm bfe.u32 %r585, %r590, %r2512, %r2512; // inline asm mul.wide.u32 %rd257, %r585, 4; add.s64 %rd258, %rd48, %rd257; ld.shared.u32 %r2714, [%rd258]; xor.b32 %r2715, %r2714, %r2713; // inline asm bfe.u32 %r589, %r590, %r208, %r2512; // inline asm mul.wide.u32 %rd259, %r589, 4; add.s64 %rd260, %rd51, %rd259; ld.shared.u32 %r2716, [%rd260]; add.s32 %r2717, %r2716, %r2715; xor.b32 %r2718, %r2583, %r574; xor.b32 %r606, %r2718, %r2717; // inline asm bfe.u32 %r593, %r606, %r2499, %r2512; // inline asm mul.wide.u32 %rd261, %r593, 4; add.s64 %rd262, %rd42, %rd261; ld.shared.u32 %r2719, [%rd262]; // inline asm bfe.u32 %r597, %r606, %r2503, %r2512; // inline asm mul.wide.u32 %rd263, %r597, 4; add.s64 %rd264, %rd45, %rd263; ld.shared.u32 %r2720, [%rd264]; add.s32 %r2721, %r2720, %r2719; // inline asm bfe.u32 %r601, %r606, %r2512, %r2512; // inline asm mul.wide.u32 %rd265, %r601, 4; add.s64 %rd266, %rd48, %rd265; ld.shared.u32 %r2722, [%rd266]; xor.b32 %r2723, %r2722, %r2721; // inline asm bfe.u32 %r605, %r606, %r208, %r2512; // inline asm mul.wide.u32 %rd267, %r605, 4; add.s64 %rd268, %rd51, %rd267; ld.shared.u32 %r2724, [%rd268]; add.s32 %r2725, %r2724, %r2723; xor.b32 %r2726, %r2592, %r590; xor.b32 %r622, %r2726, %r2725; // inline asm bfe.u32 %r609, %r622, %r2499, %r2512; // inline asm mul.wide.u32 %rd269, %r609, 4; add.s64 %rd270, %rd42, %rd269; ld.shared.u32 %r2727, [%rd270]; // inline asm bfe.u32 %r613, %r622, %r2503, %r2512; // inline asm mul.wide.u32 %rd271, %r613, 4; add.s64 %rd272, %rd45, %rd271; ld.shared.u32 %r2728, [%rd272]; add.s32 %r2729, %r2728, %r2727; // inline asm bfe.u32 %r617, %r622, %r2512, %r2512; // inline asm mul.wide.u32 %rd273, %r617, 4; add.s64 %rd274, %rd48, %rd273; ld.shared.u32 %r2730, [%rd274]; xor.b32 %r2731, %r2730, %r2729; // inline asm bfe.u32 %r621, %r622, %r208, %r2512; // inline asm mul.wide.u32 %rd275, %r621, 4; add.s64 %rd276, %rd51, %rd275; ld.shared.u32 %r2732, [%rd276]; add.s32 %r2733, %r2732, %r2731; xor.b32 %r2734, %r2601, %r606; xor.b32 %r638, %r2734, %r2733; // inline asm bfe.u32 %r625, %r638, %r2499, %r2512; // inline asm mul.wide.u32 %rd277, %r625, 4; add.s64 %rd278, %rd42, %rd277; ld.shared.u32 %r2735, [%rd278]; // inline asm bfe.u32 %r629, %r638, %r2503, %r2512; // inline asm mul.wide.u32 %rd279, %r629, 4; add.s64 %rd280, %rd45, %rd279; ld.shared.u32 %r2736, [%rd280]; add.s32 %r2737, %r2736, %r2735; // inline asm bfe.u32 %r633, %r638, %r2512, %r2512; // inline asm mul.wide.u32 %rd281, %r633, 4; add.s64 %rd282, %rd48, %rd281; ld.shared.u32 %r2738, [%rd282]; xor.b32 %r2739, %r2738, %r2737; // inline asm bfe.u32 %r637, %r638, %r208, %r2512; // inline asm mul.wide.u32 %rd283, %r637, 4; add.s64 %rd284, %rd51, %rd283; ld.shared.u32 %r2740, [%rd284]; add.s32 %r2741, %r2740, %r2739; xor.b32 %r2742, %r2610, %r622; xor.b32 %r654, %r2742, %r2741; // inline asm bfe.u32 %r641, %r654, %r2499, %r2512; // inline asm mul.wide.u32 %rd285, %r641, 4; add.s64 %rd286, %rd42, %rd285; ld.shared.u32 %r2743, [%rd286]; // inline asm bfe.u32 %r645, %r654, %r2503, %r2512; // inline asm mul.wide.u32 %rd287, %r645, 4; add.s64 %rd288, %rd45, %rd287; ld.shared.u32 %r2744, [%rd288]; add.s32 %r2745, %r2744, %r2743; // inline asm bfe.u32 %r649, %r654, %r2512, %r2512; // inline asm mul.wide.u32 %rd289, %r649, 4; add.s64 %rd290, %rd48, %rd289; ld.shared.u32 %r2746, [%rd290]; xor.b32 %r2747, %r2746, %r2745; // inline asm bfe.u32 %r653, %r654, %r208, %r2512; // inline asm mul.wide.u32 %rd291, %r653, 4; add.s64 %rd292, %rd51, %rd291; ld.shared.u32 %r2748, [%rd292]; add.s32 %r2749, %r2748, %r2747; xor.b32 %r2750, %r2619, %r638; xor.b32 %r670, %r2750, %r2749; // inline asm bfe.u32 %r657, %r670, %r2499, %r2512; // inline asm mul.wide.u32 %rd293, %r657, 4; add.s64 %rd294, %rd42, %rd293; ld.shared.u32 %r2751, [%rd294]; // inline asm bfe.u32 %r661, %r670, %r2503, %r2512; // inline asm mul.wide.u32 %rd295, %r661, 4; add.s64 %rd296, %rd45, %rd295; ld.shared.u32 %r2752, [%rd296]; add.s32 %r2753, %r2752, %r2751; // inline asm bfe.u32 %r665, %r670, %r2512, %r2512; // inline asm mul.wide.u32 %rd297, %r665, 4; add.s64 %rd298, %rd48, %rd297; ld.shared.u32 %r2754, [%rd298]; xor.b32 %r2755, %r2754, %r2753; // inline asm bfe.u32 %r669, %r670, %r208, %r2512; // inline asm mul.wide.u32 %rd299, %r669, 4; add.s64 %rd300, %rd51, %rd299; ld.shared.u32 %r2756, [%rd300]; add.s32 %r2757, %r2756, %r2755; xor.b32 %r2758, %r2628, %r654; xor.b32 %r686, %r2758, %r2757; // inline asm bfe.u32 %r673, %r686, %r2499, %r2512; // inline asm mul.wide.u32 %rd301, %r673, 4; add.s64 %rd302, %rd42, %rd301; ld.shared.u32 %r2759, [%rd302]; // inline asm bfe.u32 %r677, %r686, %r2503, %r2512; // inline asm mul.wide.u32 %rd303, %r677, 4; add.s64 %rd304, %rd45, %rd303; ld.shared.u32 %r2760, [%rd304]; add.s32 %r2761, %r2760, %r2759; // inline asm bfe.u32 %r681, %r686, %r2512, %r2512; // inline asm mul.wide.u32 %rd305, %r681, 4; add.s64 %rd306, %rd48, %rd305; ld.shared.u32 %r2762, [%rd306]; xor.b32 %r2763, %r2762, %r2761; // inline asm bfe.u32 %r685, %r686, %r208, %r2512; // inline asm mul.wide.u32 %rd307, %r685, 4; add.s64 %rd308, %rd51, %rd307; ld.shared.u32 %r2764, [%rd308]; add.s32 %r2765, %r2764, %r2763; xor.b32 %r2766, %r2637, %r670; xor.b32 %r702, %r2766, %r2765; // inline asm bfe.u32 %r689, %r702, %r2499, %r2512; // inline asm mul.wide.u32 %rd309, %r689, 4; add.s64 %rd310, %rd42, %rd309; ld.shared.u32 %r2767, [%rd310]; // inline asm bfe.u32 %r693, %r702, %r2503, %r2512; // inline asm mul.wide.u32 %rd311, %r693, 4; add.s64 %rd312, %rd45, %rd311; ld.shared.u32 %r2768, [%rd312]; add.s32 %r2769, %r2768, %r2767; // inline asm bfe.u32 %r697, %r702, %r2512, %r2512; // inline asm mul.wide.u32 %rd313, %r697, 4; add.s64 %rd314, %rd48, %rd313; ld.shared.u32 %r2770, [%rd314]; xor.b32 %r2771, %r2770, %r2769; // inline asm bfe.u32 %r701, %r702, %r208, %r2512; // inline asm mul.wide.u32 %rd315, %r701, 4; add.s64 %rd316, %rd51, %rd315; ld.shared.u32 %r2772, [%rd316]; add.s32 %r2773, %r2772, %r2771; xor.b32 %r2774, %r2646, %r686; xor.b32 %r718, %r2774, %r2773; // inline asm bfe.u32 %r705, %r718, %r2499, %r2512; // inline asm mul.wide.u32 %rd317, %r705, 4; add.s64 %rd318, %rd42, %rd317; ld.shared.u32 %r2775, [%rd318]; // inline asm bfe.u32 %r709, %r718, %r2503, %r2512; // inline asm mul.wide.u32 %rd319, %r709, 4; add.s64 %rd320, %rd45, %rd319; ld.shared.u32 %r2776, [%rd320]; add.s32 %r2777, %r2776, %r2775; // inline asm bfe.u32 %r713, %r718, %r2512, %r2512; // inline asm mul.wide.u32 %rd321, %r713, 4; add.s64 %rd322, %rd48, %rd321; ld.shared.u32 %r2778, [%rd322]; xor.b32 %r2779, %r2778, %r2777; // inline asm bfe.u32 %r717, %r718, %r208, %r2512; // inline asm mul.wide.u32 %rd323, %r717, 4; add.s64 %rd324, %rd51, %rd323; ld.shared.u32 %r2780, [%rd324]; add.s32 %r2781, %r2780, %r2779; xor.b32 %r2782, %r2655, %r702; xor.b32 %r66, %r2782, %r2781; xor.b32 %r67, %r2657, %r718; xor.b32 %r734, %r462, %r718; // inline asm bfe.u32 %r721, %r734, %r2499, %r2512; // inline asm mul.wide.u32 %rd325, %r721, 4; add.s64 %rd326, %rd42, %rd325; ld.shared.u32 %r2783, [%rd326]; // inline asm bfe.u32 %r725, %r734, %r2503, %r2512; // inline asm mul.wide.u32 %rd327, %r725, 4; add.s64 %rd328, %rd45, %rd327; ld.shared.u32 %r2784, [%rd328]; add.s32 %r2785, %r2784, %r2783; // inline asm bfe.u32 %r729, %r734, %r2512, %r2512; // inline asm mul.wide.u32 %rd329, %r729, 4; add.s64 %rd330, %rd48, %rd329; ld.shared.u32 %r2786, [%rd330]; xor.b32 %r2787, %r2786, %r2785; // inline asm bfe.u32 %r733, %r734, %r208, %r2512; // inline asm mul.wide.u32 %rd331, %r733, 4; add.s64 %rd332, %rd51, %rd331; ld.shared.u32 %r2788, [%rd332]; add.s32 %r2789, %r2788, %r2787; xor.b32 %r2790, %r64, %r66; xor.b32 %r750, %r2790, %r2789; // inline asm bfe.u32 %r737, %r750, %r2499, %r2512; // inline asm mul.wide.u32 %rd333, %r737, 4; add.s64 %rd334, %rd42, %rd333; ld.shared.u32 %r2791, [%rd334]; // inline asm bfe.u32 %r741, %r750, %r2503, %r2512; // inline asm mul.wide.u32 %rd335, %r741, 4; add.s64 %rd336, %rd45, %rd335; ld.shared.u32 %r2792, [%rd336]; add.s32 %r2793, %r2792, %r2791; // inline asm bfe.u32 %r745, %r750, %r2512, %r2512; // inline asm mul.wide.u32 %rd337, %r745, 4; add.s64 %rd338, %rd48, %rd337; ld.shared.u32 %r2794, [%rd338]; xor.b32 %r2795, %r2794, %r2793; // inline asm bfe.u32 %r749, %r750, %r208, %r2512; // inline asm mul.wide.u32 %rd339, %r749, 4; add.s64 %rd340, %rd51, %rd339; ld.shared.u32 %r2796, [%rd340]; add.s32 %r2797, %r2796, %r2795; xor.b32 %r766, %r65, %r2797; // inline asm bfe.u32 %r753, %r766, %r2499, %r2512; // inline asm mul.wide.u32 %rd341, %r753, 4; add.s64 %rd342, %rd42, %rd341; ld.shared.u32 %r2798, [%rd342]; // inline asm bfe.u32 %r757, %r766, %r2503, %r2512; // inline asm mul.wide.u32 %rd343, %r757, 4; add.s64 %rd344, %rd45, %rd343; ld.shared.u32 %r2799, [%rd344]; add.s32 %r2800, %r2799, %r2798; // inline asm bfe.u32 %r761, %r766, %r2512, %r2512; // inline asm mul.wide.u32 %rd345, %r761, 4; add.s64 %rd346, %rd48, %rd345; ld.shared.u32 %r2801, [%rd346]; xor.b32 %r2802, %r2801, %r2800; // inline asm bfe.u32 %r765, %r766, %r208, %r2512; // inline asm mul.wide.u32 %rd347, %r765, 4; add.s64 %rd348, %rd51, %rd347; ld.shared.u32 %r2803, [%rd348]; add.s32 %r2804, %r2803, %r2802; xor.b32 %r2805, %r64, %r2789; xor.b32 %r782, %r2805, %r2804; // inline asm bfe.u32 %r769, %r782, %r2499, %r2512; // inline asm mul.wide.u32 %rd349, %r769, 4; add.s64 %rd350, %rd42, %rd349; ld.shared.u32 %r2806, [%rd350]; // inline asm bfe.u32 %r773, %r782, %r2503, %r2512; // inline asm mul.wide.u32 %rd351, %r773, 4; add.s64 %rd352, %rd45, %rd351; ld.shared.u32 %r2807, [%rd352]; add.s32 %r2808, %r2807, %r2806; // inline asm bfe.u32 %r777, %r782, %r2512, %r2512; // inline asm mul.wide.u32 %rd353, %r777, 4; add.s64 %rd354, %rd48, %rd353; ld.shared.u32 %r2809, [%rd354]; xor.b32 %r2810, %r2809, %r2808; // inline asm bfe.u32 %r781, %r782, %r208, %r2512; // inline asm mul.wide.u32 %rd355, %r781, 4; add.s64 %rd356, %rd51, %rd355; ld.shared.u32 %r2811, [%rd356]; add.s32 %r2812, %r2811, %r2810; xor.b32 %r2813, %r2547, %r766; xor.b32 %r798, %r2813, %r2812; // inline asm bfe.u32 %r785, %r798, %r2499, %r2512; // inline asm mul.wide.u32 %rd357, %r785, 4; add.s64 %rd358, %rd42, %rd357; ld.shared.u32 %r2814, [%rd358]; // inline asm bfe.u32 %r789, %r798, %r2503, %r2512; // inline asm mul.wide.u32 %rd359, %r789, 4; add.s64 %rd360, %rd45, %rd359; ld.shared.u32 %r2815, [%rd360]; add.s32 %r2816, %r2815, %r2814; // inline asm bfe.u32 %r793, %r798, %r2512, %r2512; // inline asm mul.wide.u32 %rd361, %r793, 4; add.s64 %rd362, %rd48, %rd361; ld.shared.u32 %r2817, [%rd362]; xor.b32 %r2818, %r2817, %r2816; // inline asm bfe.u32 %r797, %r798, %r208, %r2512; // inline asm mul.wide.u32 %rd363, %r797, 4; add.s64 %rd364, %rd51, %rd363; ld.shared.u32 %r2819, [%rd364]; add.s32 %r2820, %r2819, %r2818; xor.b32 %r2821, %r2556, %r782; xor.b32 %r814, %r2821, %r2820; // inline asm bfe.u32 %r801, %r814, %r2499, %r2512; // inline asm mul.wide.u32 %rd365, %r801, 4; add.s64 %rd366, %rd42, %rd365; ld.shared.u32 %r2822, [%rd366]; // inline asm bfe.u32 %r805, %r814, %r2503, %r2512; // inline asm mul.wide.u32 %rd367, %r805, 4; add.s64 %rd368, %rd45, %rd367; ld.shared.u32 %r2823, [%rd368]; add.s32 %r2824, %r2823, %r2822; // inline asm bfe.u32 %r809, %r814, %r2512, %r2512; // inline asm mul.wide.u32 %rd369, %r809, 4; add.s64 %rd370, %rd48, %rd369; ld.shared.u32 %r2825, [%rd370]; xor.b32 %r2826, %r2825, %r2824; // inline asm bfe.u32 %r813, %r814, %r208, %r2512; // inline asm mul.wide.u32 %rd371, %r813, 4; add.s64 %rd372, %rd51, %rd371; ld.shared.u32 %r2827, [%rd372]; add.s32 %r2828, %r2827, %r2826; xor.b32 %r2829, %r2565, %r798; xor.b32 %r830, %r2829, %r2828; // inline asm bfe.u32 %r817, %r830, %r2499, %r2512; // inline asm mul.wide.u32 %rd373, %r817, 4; add.s64 %rd374, %rd42, %rd373; ld.shared.u32 %r2830, [%rd374]; // inline asm bfe.u32 %r821, %r830, %r2503, %r2512; // inline asm mul.wide.u32 %rd375, %r821, 4; add.s64 %rd376, %rd45, %rd375; ld.shared.u32 %r2831, [%rd376]; add.s32 %r2832, %r2831, %r2830; // inline asm bfe.u32 %r825, %r830, %r2512, %r2512; // inline asm mul.wide.u32 %rd377, %r825, 4; add.s64 %rd378, %rd48, %rd377; ld.shared.u32 %r2833, [%rd378]; xor.b32 %r2834, %r2833, %r2832; // inline asm bfe.u32 %r829, %r830, %r208, %r2512; // inline asm mul.wide.u32 %rd379, %r829, 4; add.s64 %rd380, %rd51, %rd379; ld.shared.u32 %r2835, [%rd380]; add.s32 %r2836, %r2835, %r2834; xor.b32 %r2837, %r2574, %r814; xor.b32 %r846, %r2837, %r2836; // inline asm bfe.u32 %r833, %r846, %r2499, %r2512; // inline asm mul.wide.u32 %rd381, %r833, 4; add.s64 %rd382, %rd42, %rd381; ld.shared.u32 %r2838, [%rd382]; // inline asm bfe.u32 %r837, %r846, %r2503, %r2512; // inline asm mul.wide.u32 %rd383, %r837, 4; add.s64 %rd384, %rd45, %rd383; ld.shared.u32 %r2839, [%rd384]; add.s32 %r2840, %r2839, %r2838; // inline asm bfe.u32 %r841, %r846, %r2512, %r2512; // inline asm mul.wide.u32 %rd385, %r841, 4; add.s64 %rd386, %rd48, %rd385; ld.shared.u32 %r2841, [%rd386]; xor.b32 %r2842, %r2841, %r2840; // inline asm bfe.u32 %r845, %r846, %r208, %r2512; // inline asm mul.wide.u32 %rd387, %r845, 4; add.s64 %rd388, %rd51, %rd387; ld.shared.u32 %r2843, [%rd388]; add.s32 %r2844, %r2843, %r2842; xor.b32 %r2845, %r2583, %r830; xor.b32 %r862, %r2845, %r2844; // inline asm bfe.u32 %r849, %r862, %r2499, %r2512; // inline asm mul.wide.u32 %rd389, %r849, 4; add.s64 %rd390, %rd42, %rd389; ld.shared.u32 %r2846, [%rd390]; // inline asm bfe.u32 %r853, %r862, %r2503, %r2512; // inline asm mul.wide.u32 %rd391, %r853, 4; add.s64 %rd392, %rd45, %rd391; ld.shared.u32 %r2847, [%rd392]; add.s32 %r2848, %r2847, %r2846; // inline asm bfe.u32 %r857, %r862, %r2512, %r2512; // inline asm mul.wide.u32 %rd393, %r857, 4; add.s64 %rd394, %rd48, %rd393; ld.shared.u32 %r2849, [%rd394]; xor.b32 %r2850, %r2849, %r2848; // inline asm bfe.u32 %r861, %r862, %r208, %r2512; // inline asm mul.wide.u32 %rd395, %r861, 4; add.s64 %rd396, %rd51, %rd395; ld.shared.u32 %r2851, [%rd396]; add.s32 %r2852, %r2851, %r2850; xor.b32 %r2853, %r2592, %r846; xor.b32 %r878, %r2853, %r2852; // inline asm bfe.u32 %r865, %r878, %r2499, %r2512; // inline asm mul.wide.u32 %rd397, %r865, 4; add.s64 %rd398, %rd42, %rd397; ld.shared.u32 %r2854, [%rd398]; // inline asm bfe.u32 %r869, %r878, %r2503, %r2512; // inline asm mul.wide.u32 %rd399, %r869, 4; add.s64 %rd400, %rd45, %rd399; ld.shared.u32 %r2855, [%rd400]; add.s32 %r2856, %r2855, %r2854; // inline asm bfe.u32 %r873, %r878, %r2512, %r2512; // inline asm mul.wide.u32 %rd401, %r873, 4; add.s64 %rd402, %rd48, %rd401; ld.shared.u32 %r2857, [%rd402]; xor.b32 %r2858, %r2857, %r2856; // inline asm bfe.u32 %r877, %r878, %r208, %r2512; // inline asm mul.wide.u32 %rd403, %r877, 4; add.s64 %rd404, %rd51, %rd403; ld.shared.u32 %r2859, [%rd404]; add.s32 %r2860, %r2859, %r2858; xor.b32 %r2861, %r2601, %r862; xor.b32 %r894, %r2861, %r2860; // inline asm bfe.u32 %r881, %r894, %r2499, %r2512; // inline asm mul.wide.u32 %rd405, %r881, 4; add.s64 %rd406, %rd42, %rd405; ld.shared.u32 %r2862, [%rd406]; // inline asm bfe.u32 %r885, %r894, %r2503, %r2512; // inline asm mul.wide.u32 %rd407, %r885, 4; add.s64 %rd408, %rd45, %rd407; ld.shared.u32 %r2863, [%rd408]; add.s32 %r2864, %r2863, %r2862; // inline asm bfe.u32 %r889, %r894, %r2512, %r2512; // inline asm mul.wide.u32 %rd409, %r889, 4; add.s64 %rd410, %rd48, %rd409; ld.shared.u32 %r2865, [%rd410]; xor.b32 %r2866, %r2865, %r2864; // inline asm bfe.u32 %r893, %r894, %r208, %r2512; // inline asm mul.wide.u32 %rd411, %r893, 4; add.s64 %rd412, %rd51, %rd411; ld.shared.u32 %r2867, [%rd412]; add.s32 %r2868, %r2867, %r2866; xor.b32 %r2869, %r2610, %r878; xor.b32 %r910, %r2869, %r2868; // inline asm bfe.u32 %r897, %r910, %r2499, %r2512; // inline asm mul.wide.u32 %rd413, %r897, 4; add.s64 %rd414, %rd42, %rd413; ld.shared.u32 %r2870, [%rd414]; // inline asm bfe.u32 %r901, %r910, %r2503, %r2512; // inline asm mul.wide.u32 %rd415, %r901, 4; add.s64 %rd416, %rd45, %rd415; ld.shared.u32 %r2871, [%rd416]; add.s32 %r2872, %r2871, %r2870; // inline asm bfe.u32 %r905, %r910, %r2512, %r2512; // inline asm mul.wide.u32 %rd417, %r905, 4; add.s64 %rd418, %rd48, %rd417; ld.shared.u32 %r2873, [%rd418]; xor.b32 %r2874, %r2873, %r2872; // inline asm bfe.u32 %r909, %r910, %r208, %r2512; // inline asm mul.wide.u32 %rd419, %r909, 4; add.s64 %rd420, %rd51, %rd419; ld.shared.u32 %r2875, [%rd420]; add.s32 %r2876, %r2875, %r2874; xor.b32 %r2877, %r2619, %r894; xor.b32 %r926, %r2877, %r2876; // inline asm bfe.u32 %r913, %r926, %r2499, %r2512; // inline asm mul.wide.u32 %rd421, %r913, 4; add.s64 %rd422, %rd42, %rd421; ld.shared.u32 %r2878, [%rd422]; // inline asm bfe.u32 %r917, %r926, %r2503, %r2512; // inline asm mul.wide.u32 %rd423, %r917, 4; add.s64 %rd424, %rd45, %rd423; ld.shared.u32 %r2879, [%rd424]; add.s32 %r2880, %r2879, %r2878; // inline asm bfe.u32 %r921, %r926, %r2512, %r2512; // inline asm mul.wide.u32 %rd425, %r921, 4; add.s64 %rd426, %rd48, %rd425; ld.shared.u32 %r2881, [%rd426]; xor.b32 %r2882, %r2881, %r2880; // inline asm bfe.u32 %r925, %r926, %r208, %r2512; // inline asm mul.wide.u32 %rd427, %r925, 4; add.s64 %rd428, %rd51, %rd427; ld.shared.u32 %r2883, [%rd428]; add.s32 %r2884, %r2883, %r2882; xor.b32 %r2885, %r2628, %r910; xor.b32 %r942, %r2885, %r2884; // inline asm bfe.u32 %r929, %r942, %r2499, %r2512; // inline asm mul.wide.u32 %rd429, %r929, 4; add.s64 %rd430, %rd42, %rd429; ld.shared.u32 %r2886, [%rd430]; // inline asm bfe.u32 %r933, %r942, %r2503, %r2512; // inline asm mul.wide.u32 %rd431, %r933, 4; add.s64 %rd432, %rd45, %rd431; ld.shared.u32 %r2887, [%rd432]; add.s32 %r2888, %r2887, %r2886; // inline asm bfe.u32 %r937, %r942, %r2512, %r2512; // inline asm mul.wide.u32 %rd433, %r937, 4; add.s64 %rd434, %rd48, %rd433; ld.shared.u32 %r2889, [%rd434]; xor.b32 %r2890, %r2889, %r2888; // inline asm bfe.u32 %r941, %r942, %r208, %r2512; // inline asm mul.wide.u32 %rd435, %r941, 4; add.s64 %rd436, %rd51, %rd435; ld.shared.u32 %r2891, [%rd436]; add.s32 %r2892, %r2891, %r2890; xor.b32 %r2893, %r2637, %r926; xor.b32 %r958, %r2893, %r2892; // inline asm bfe.u32 %r945, %r958, %r2499, %r2512; // inline asm mul.wide.u32 %rd437, %r945, 4; add.s64 %rd438, %rd42, %rd437; ld.shared.u32 %r2894, [%rd438]; // inline asm bfe.u32 %r949, %r958, %r2503, %r2512; // inline asm mul.wide.u32 %rd439, %r949, 4; add.s64 %rd440, %rd45, %rd439; ld.shared.u32 %r2895, [%rd440]; add.s32 %r2896, %r2895, %r2894; // inline asm bfe.u32 %r953, %r958, %r2512, %r2512; // inline asm mul.wide.u32 %rd441, %r953, 4; add.s64 %rd442, %rd48, %rd441; ld.shared.u32 %r2897, [%rd442]; xor.b32 %r2898, %r2897, %r2896; // inline asm bfe.u32 %r957, %r958, %r208, %r2512; // inline asm mul.wide.u32 %rd443, %r957, 4; add.s64 %rd444, %rd51, %rd443; ld.shared.u32 %r2899, [%rd444]; add.s32 %r2900, %r2899, %r2898; xor.b32 %r2901, %r2646, %r942; xor.b32 %r974, %r2901, %r2900; // inline asm bfe.u32 %r961, %r974, %r2499, %r2512; // inline asm mul.wide.u32 %rd445, %r961, 4; add.s64 %rd446, %rd42, %rd445; ld.shared.u32 %r2902, [%rd446]; // inline asm bfe.u32 %r965, %r974, %r2503, %r2512; // inline asm mul.wide.u32 %rd447, %r965, 4; add.s64 %rd448, %rd45, %rd447; ld.shared.u32 %r2903, [%rd448]; add.s32 %r2904, %r2903, %r2902; // inline asm bfe.u32 %r969, %r974, %r2512, %r2512; // inline asm mul.wide.u32 %rd449, %r969, 4; add.s64 %rd450, %rd48, %rd449; ld.shared.u32 %r2905, [%rd450]; xor.b32 %r2906, %r2905, %r2904; // inline asm bfe.u32 %r973, %r974, %r208, %r2512; // inline asm mul.wide.u32 %rd451, %r973, 4; add.s64 %rd452, %rd51, %rd451; ld.shared.u32 %r2907, [%rd452]; add.s32 %r2908, %r2907, %r2906; xor.b32 %r2909, %r2655, %r958; xor.b32 %r68, %r2909, %r2908; xor.b32 %r69, %r2657, %r974; xor.b32 %r990, %r462, %r974; // inline asm bfe.u32 %r977, %r990, %r2499, %r2512; // inline asm mul.wide.u32 %rd453, %r977, 4; add.s64 %rd454, %rd42, %rd453; ld.shared.u32 %r2910, [%rd454]; // inline asm bfe.u32 %r981, %r990, %r2503, %r2512; // inline asm mul.wide.u32 %rd455, %r981, 4; add.s64 %rd456, %rd45, %rd455; ld.shared.u32 %r2911, [%rd456]; add.s32 %r2912, %r2911, %r2910; // inline asm bfe.u32 %r985, %r990, %r2512, %r2512; // inline asm mul.wide.u32 %rd457, %r985, 4; add.s64 %rd458, %rd48, %rd457; ld.shared.u32 %r2913, [%rd458]; xor.b32 %r2914, %r2913, %r2912; // inline asm bfe.u32 %r989, %r990, %r208, %r2512; // inline asm mul.wide.u32 %rd459, %r989, 4; add.s64 %rd460, %rd51, %rd459; ld.shared.u32 %r2915, [%rd460]; add.s32 %r2916, %r2915, %r2914; xor.b32 %r2917, %r64, %r68; xor.b32 %r1006, %r2917, %r2916; // inline asm bfe.u32 %r993, %r1006, %r2499, %r2512; // inline asm mul.wide.u32 %rd461, %r993, 4; add.s64 %rd462, %rd42, %rd461; ld.shared.u32 %r2918, [%rd462]; // inline asm bfe.u32 %r997, %r1006, %r2503, %r2512; // inline asm mul.wide.u32 %rd463, %r997, 4; add.s64 %rd464, %rd45, %rd463; ld.shared.u32 %r2919, [%rd464]; add.s32 %r2920, %r2919, %r2918; // inline asm bfe.u32 %r1001, %r1006, %r2512, %r2512; // inline asm mul.wide.u32 %rd465, %r1001, 4; add.s64 %rd466, %rd48, %rd465; ld.shared.u32 %r2921, [%rd466]; xor.b32 %r2922, %r2921, %r2920; // inline asm bfe.u32 %r1005, %r1006, %r208, %r2512; // inline asm mul.wide.u32 %rd467, %r1005, 4; add.s64 %rd468, %rd51, %rd467; ld.shared.u32 %r2923, [%rd468]; add.s32 %r2924, %r2923, %r2922; xor.b32 %r2925, %r67, %r990; xor.b32 %r1022, %r2925, %r2924; // inline asm bfe.u32 %r1009, %r1022, %r2499, %r2512; // inline asm mul.wide.u32 %rd469, %r1009, 4; add.s64 %rd470, %rd42, %rd469; ld.shared.u32 %r2926, [%rd470]; // inline asm bfe.u32 %r1013, %r1022, %r2503, %r2512; // inline asm mul.wide.u32 %rd471, %r1013, 4; add.s64 %rd472, %rd45, %rd471; ld.shared.u32 %r2927, [%rd472]; add.s32 %r2928, %r2927, %r2926; // inline asm bfe.u32 %r1017, %r1022, %r2512, %r2512; // inline asm mul.wide.u32 %rd473, %r1017, 4; add.s64 %rd474, %rd48, %rd473; ld.shared.u32 %r2929, [%rd474]; xor.b32 %r2930, %r2929, %r2928; // inline asm bfe.u32 %r1021, %r1022, %r208, %r2512; // inline asm mul.wide.u32 %rd475, %r1021, 4; add.s64 %rd476, %rd51, %rd475; ld.shared.u32 %r2931, [%rd476]; add.s32 %r2932, %r2931, %r2930; xor.b32 %r2933, %r66, %r1006; xor.b32 %r1038, %r2933, %r2932; // inline asm bfe.u32 %r1025, %r1038, %r2499, %r2512; // inline asm mul.wide.u32 %rd477, %r1025, 4; add.s64 %rd478, %rd42, %rd477; ld.shared.u32 %r2934, [%rd478]; // inline asm bfe.u32 %r1029, %r1038, %r2503, %r2512; // inline asm mul.wide.u32 %rd479, %r1029, 4; add.s64 %rd480, %rd45, %rd479; ld.shared.u32 %r2935, [%rd480]; add.s32 %r2936, %r2935, %r2934; // inline asm bfe.u32 %r1033, %r1038, %r2512, %r2512; // inline asm mul.wide.u32 %rd481, %r1033, 4; add.s64 %rd482, %rd48, %rd481; ld.shared.u32 %r2937, [%rd482]; xor.b32 %r2938, %r2937, %r2936; // inline asm bfe.u32 %r1037, %r1038, %r208, %r2512; // inline asm mul.wide.u32 %rd483, %r1037, 4; add.s64 %rd484, %rd51, %rd483; ld.shared.u32 %r2939, [%rd484]; add.s32 %r2940, %r2939, %r2938; xor.b32 %r2941, %r69, %r1022; xor.b32 %r1054, %r2941, %r2940; // inline asm bfe.u32 %r1041, %r1054, %r2499, %r2512; // inline asm mul.wide.u32 %rd485, %r1041, 4; add.s64 %rd486, %rd42, %rd485; ld.shared.u32 %r2942, [%rd486]; // inline asm bfe.u32 %r1045, %r1054, %r2503, %r2512; // inline asm mul.wide.u32 %rd487, %r1045, 4; add.s64 %rd488, %rd45, %rd487; ld.shared.u32 %r2943, [%rd488]; add.s32 %r2944, %r2943, %r2942; // inline asm bfe.u32 %r1049, %r1054, %r2512, %r2512; // inline asm mul.wide.u32 %rd489, %r1049, 4; add.s64 %rd490, %rd48, %rd489; ld.shared.u32 %r2945, [%rd490]; xor.b32 %r2946, %r2945, %r2944; // inline asm bfe.u32 %r1053, %r1054, %r208, %r2512; // inline asm mul.wide.u32 %rd491, %r1053, 4; add.s64 %rd492, %rd51, %rd491; ld.shared.u32 %r2947, [%rd492]; add.s32 %r2948, %r2947, %r2946; xor.b32 %r2949, %r68, %r1038; xor.b32 %r1070, %r2949, %r2948; // inline asm bfe.u32 %r1057, %r1070, %r2499, %r2512; // inline asm mul.wide.u32 %rd493, %r1057, 4; add.s64 %rd494, %rd42, %rd493; ld.shared.u32 %r2950, [%rd494]; // inline asm bfe.u32 %r1061, %r1070, %r2503, %r2512; // inline asm mul.wide.u32 %rd495, %r1061, 4; add.s64 %rd496, %rd45, %rd495; ld.shared.u32 %r2951, [%rd496]; add.s32 %r2952, %r2951, %r2950; // inline asm bfe.u32 %r1065, %r1070, %r2512, %r2512; // inline asm mul.wide.u32 %rd497, %r1065, 4; add.s64 %rd498, %rd48, %rd497; ld.shared.u32 %r2953, [%rd498]; xor.b32 %r2954, %r2953, %r2952; // inline asm bfe.u32 %r1069, %r1070, %r208, %r2512; // inline asm mul.wide.u32 %rd499, %r1069, 4; add.s64 %rd500, %rd51, %rd499; ld.shared.u32 %r2955, [%rd500]; add.s32 %r2956, %r2955, %r2954; xor.b32 %r2957, %r2565, %r1054; xor.b32 %r1086, %r2957, %r2956; // inline asm bfe.u32 %r1073, %r1086, %r2499, %r2512; // inline asm mul.wide.u32 %rd501, %r1073, 4; add.s64 %rd502, %rd42, %rd501; ld.shared.u32 %r2958, [%rd502]; // inline asm bfe.u32 %r1077, %r1086, %r2503, %r2512; // inline asm mul.wide.u32 %rd503, %r1077, 4; add.s64 %rd504, %rd45, %rd503; ld.shared.u32 %r2959, [%rd504]; add.s32 %r2960, %r2959, %r2958; // inline asm bfe.u32 %r1081, %r1086, %r2512, %r2512; // inline asm mul.wide.u32 %rd505, %r1081, 4; add.s64 %rd506, %rd48, %rd505; ld.shared.u32 %r2961, [%rd506]; xor.b32 %r2962, %r2961, %r2960; // inline asm bfe.u32 %r1085, %r1086, %r208, %r2512; // inline asm mul.wide.u32 %rd507, %r1085, 4; add.s64 %rd508, %rd51, %rd507; ld.shared.u32 %r2963, [%rd508]; add.s32 %r2964, %r2963, %r2962; xor.b32 %r2965, %r2574, %r1070; xor.b32 %r1102, %r2965, %r2964; // inline asm bfe.u32 %r1089, %r1102, %r2499, %r2512; // inline asm mul.wide.u32 %rd509, %r1089, 4; add.s64 %rd510, %rd42, %rd509; ld.shared.u32 %r2966, [%rd510]; // inline asm bfe.u32 %r1093, %r1102, %r2503, %r2512; // inline asm mul.wide.u32 %rd511, %r1093, 4; add.s64 %rd512, %rd45, %rd511; ld.shared.u32 %r2967, [%rd512]; add.s32 %r2968, %r2967, %r2966; // inline asm bfe.u32 %r1097, %r1102, %r2512, %r2512; // inline asm mul.wide.u32 %rd513, %r1097, 4; add.s64 %rd514, %rd48, %rd513; ld.shared.u32 %r2969, [%rd514]; xor.b32 %r2970, %r2969, %r2968; // inline asm bfe.u32 %r1101, %r1102, %r208, %r2512; // inline asm mul.wide.u32 %rd515, %r1101, 4; add.s64 %rd516, %rd51, %rd515; ld.shared.u32 %r2971, [%rd516]; add.s32 %r2972, %r2971, %r2970; xor.b32 %r2973, %r2583, %r1086; xor.b32 %r1118, %r2973, %r2972; // inline asm bfe.u32 %r1105, %r1118, %r2499, %r2512; // inline asm mul.wide.u32 %rd517, %r1105, 4; add.s64 %rd518, %rd42, %rd517; ld.shared.u32 %r2974, [%rd518]; // inline asm bfe.u32 %r1109, %r1118, %r2503, %r2512; // inline asm mul.wide.u32 %rd519, %r1109, 4; add.s64 %rd520, %rd45, %rd519; ld.shared.u32 %r2975, [%rd520]; add.s32 %r2976, %r2975, %r2974; // inline asm bfe.u32 %r1113, %r1118, %r2512, %r2512; // inline asm mul.wide.u32 %rd521, %r1113, 4; add.s64 %rd522, %rd48, %rd521; ld.shared.u32 %r2977, [%rd522]; xor.b32 %r2978, %r2977, %r2976; // inline asm bfe.u32 %r1117, %r1118, %r208, %r2512; // inline asm mul.wide.u32 %rd523, %r1117, 4; add.s64 %rd524, %rd51, %rd523; ld.shared.u32 %r2979, [%rd524]; add.s32 %r2980, %r2979, %r2978; xor.b32 %r2981, %r2592, %r1102; xor.b32 %r1134, %r2981, %r2980; // inline asm bfe.u32 %r1121, %r1134, %r2499, %r2512; // inline asm mul.wide.u32 %rd525, %r1121, 4; add.s64 %rd526, %rd42, %rd525; ld.shared.u32 %r2982, [%rd526]; // inline asm bfe.u32 %r1125, %r1134, %r2503, %r2512; // inline asm mul.wide.u32 %rd527, %r1125, 4; add.s64 %rd528, %rd45, %rd527; ld.shared.u32 %r2983, [%rd528]; add.s32 %r2984, %r2983, %r2982; // inline asm bfe.u32 %r1129, %r1134, %r2512, %r2512; // inline asm mul.wide.u32 %rd529, %r1129, 4; add.s64 %rd530, %rd48, %rd529; ld.shared.u32 %r2985, [%rd530]; xor.b32 %r2986, %r2985, %r2984; // inline asm bfe.u32 %r1133, %r1134, %r208, %r2512; // inline asm mul.wide.u32 %rd531, %r1133, 4; add.s64 %rd532, %rd51, %rd531; ld.shared.u32 %r2987, [%rd532]; add.s32 %r2988, %r2987, %r2986; xor.b32 %r2989, %r2601, %r1118; xor.b32 %r1150, %r2989, %r2988; // inline asm bfe.u32 %r1137, %r1150, %r2499, %r2512; // inline asm mul.wide.u32 %rd533, %r1137, 4; add.s64 %rd534, %rd42, %rd533; ld.shared.u32 %r2990, [%rd534]; // inline asm bfe.u32 %r1141, %r1150, %r2503, %r2512; // inline asm mul.wide.u32 %rd535, %r1141, 4; add.s64 %rd536, %rd45, %rd535; ld.shared.u32 %r2991, [%rd536]; add.s32 %r2992, %r2991, %r2990; // inline asm bfe.u32 %r1145, %r1150, %r2512, %r2512; // inline asm mul.wide.u32 %rd537, %r1145, 4; add.s64 %rd538, %rd48, %rd537; ld.shared.u32 %r2993, [%rd538]; xor.b32 %r2994, %r2993, %r2992; // inline asm bfe.u32 %r1149, %r1150, %r208, %r2512; // inline asm mul.wide.u32 %rd539, %r1149, 4; add.s64 %rd540, %rd51, %rd539; ld.shared.u32 %r2995, [%rd540]; add.s32 %r2996, %r2995, %r2994; xor.b32 %r2997, %r2610, %r1134; xor.b32 %r1166, %r2997, %r2996; // inline asm bfe.u32 %r1153, %r1166, %r2499, %r2512; // inline asm mul.wide.u32 %rd541, %r1153, 4; add.s64 %rd542, %rd42, %rd541; ld.shared.u32 %r2998, [%rd542]; // inline asm bfe.u32 %r1157, %r1166, %r2503, %r2512; // inline asm mul.wide.u32 %rd543, %r1157, 4; add.s64 %rd544, %rd45, %rd543; ld.shared.u32 %r2999, [%rd544]; add.s32 %r3000, %r2999, %r2998; // inline asm bfe.u32 %r1161, %r1166, %r2512, %r2512; // inline asm mul.wide.u32 %rd545, %r1161, 4; add.s64 %rd546, %rd48, %rd545; ld.shared.u32 %r3001, [%rd546]; xor.b32 %r3002, %r3001, %r3000; // inline asm bfe.u32 %r1165, %r1166, %r208, %r2512; // inline asm mul.wide.u32 %rd547, %r1165, 4; add.s64 %rd548, %rd51, %rd547; ld.shared.u32 %r3003, [%rd548]; add.s32 %r3004, %r3003, %r3002; xor.b32 %r3005, %r2619, %r1150; xor.b32 %r1182, %r3005, %r3004; // inline asm bfe.u32 %r1169, %r1182, %r2499, %r2512; // inline asm mul.wide.u32 %rd549, %r1169, 4; add.s64 %rd550, %rd42, %rd549; ld.shared.u32 %r3006, [%rd550]; // inline asm bfe.u32 %r1173, %r1182, %r2503, %r2512; // inline asm mul.wide.u32 %rd551, %r1173, 4; add.s64 %rd552, %rd45, %rd551; ld.shared.u32 %r3007, [%rd552]; add.s32 %r3008, %r3007, %r3006; // inline asm bfe.u32 %r1177, %r1182, %r2512, %r2512; // inline asm mul.wide.u32 %rd553, %r1177, 4; add.s64 %rd554, %rd48, %rd553; ld.shared.u32 %r3009, [%rd554]; xor.b32 %r3010, %r3009, %r3008; // inline asm bfe.u32 %r1181, %r1182, %r208, %r2512; // inline asm mul.wide.u32 %rd555, %r1181, 4; add.s64 %rd556, %rd51, %rd555; ld.shared.u32 %r3011, [%rd556]; add.s32 %r3012, %r3011, %r3010; xor.b32 %r3013, %r2628, %r1166; xor.b32 %r1198, %r3013, %r3012; // inline asm bfe.u32 %r1185, %r1198, %r2499, %r2512; // inline asm mul.wide.u32 %rd557, %r1185, 4; add.s64 %rd558, %rd42, %rd557; ld.shared.u32 %r3014, [%rd558]; // inline asm bfe.u32 %r1189, %r1198, %r2503, %r2512; // inline asm mul.wide.u32 %rd559, %r1189, 4; add.s64 %rd560, %rd45, %rd559; ld.shared.u32 %r3015, [%rd560]; add.s32 %r3016, %r3015, %r3014; // inline asm bfe.u32 %r1193, %r1198, %r2512, %r2512; // inline asm mul.wide.u32 %rd561, %r1193, 4; add.s64 %rd562, %rd48, %rd561; ld.shared.u32 %r3017, [%rd562]; xor.b32 %r3018, %r3017, %r3016; // inline asm bfe.u32 %r1197, %r1198, %r208, %r2512; // inline asm mul.wide.u32 %rd563, %r1197, 4; add.s64 %rd564, %rd51, %rd563; ld.shared.u32 %r3019, [%rd564]; add.s32 %r3020, %r3019, %r3018; xor.b32 %r3021, %r2637, %r1182; xor.b32 %r1214, %r3021, %r3020; // inline asm bfe.u32 %r1201, %r1214, %r2499, %r2512; // inline asm mul.wide.u32 %rd565, %r1201, 4; add.s64 %rd566, %rd42, %rd565; ld.shared.u32 %r3022, [%rd566]; // inline asm bfe.u32 %r1205, %r1214, %r2503, %r2512; // inline asm mul.wide.u32 %rd567, %r1205, 4; add.s64 %rd568, %rd45, %rd567; ld.shared.u32 %r3023, [%rd568]; add.s32 %r3024, %r3023, %r3022; // inline asm bfe.u32 %r1209, %r1214, %r2512, %r2512; // inline asm mul.wide.u32 %rd569, %r1209, 4; add.s64 %rd570, %rd48, %rd569; ld.shared.u32 %r3025, [%rd570]; xor.b32 %r3026, %r3025, %r3024; // inline asm bfe.u32 %r1213, %r1214, %r208, %r2512; // inline asm mul.wide.u32 %rd571, %r1213, 4; add.s64 %rd572, %rd51, %rd571; ld.shared.u32 %r3027, [%rd572]; add.s32 %r3028, %r3027, %r3026; xor.b32 %r3029, %r2646, %r1198; xor.b32 %r1230, %r3029, %r3028; // inline asm bfe.u32 %r1217, %r1230, %r2499, %r2512; // inline asm mul.wide.u32 %rd573, %r1217, 4; add.s64 %rd574, %rd42, %rd573; ld.shared.u32 %r3030, [%rd574]; // inline asm bfe.u32 %r1221, %r1230, %r2503, %r2512; // inline asm mul.wide.u32 %rd575, %r1221, 4; add.s64 %rd576, %rd45, %rd575; ld.shared.u32 %r3031, [%rd576]; add.s32 %r3032, %r3031, %r3030; // inline asm bfe.u32 %r1225, %r1230, %r2512, %r2512; // inline asm mul.wide.u32 %rd577, %r1225, 4; add.s64 %rd578, %rd48, %rd577; ld.shared.u32 %r3033, [%rd578]; xor.b32 %r3034, %r3033, %r3032; // inline asm bfe.u32 %r1229, %r1230, %r208, %r2512; // inline asm mul.wide.u32 %rd579, %r1229, 4; add.s64 %rd580, %rd51, %rd579; ld.shared.u32 %r3035, [%rd580]; add.s32 %r3036, %r3035, %r3034; xor.b32 %r3037, %r2655, %r1214; xor.b32 %r70, %r3037, %r3036; xor.b32 %r71, %r2657, %r1230; xor.b32 %r1246, %r462, %r1230; // inline asm bfe.u32 %r1233, %r1246, %r2499, %r2512; // inline asm mul.wide.u32 %rd581, %r1233, 4; add.s64 %rd582, %rd42, %rd581; ld.shared.u32 %r3038, [%rd582]; // inline asm bfe.u32 %r1237, %r1246, %r2503, %r2512; // inline asm mul.wide.u32 %rd583, %r1237, 4; add.s64 %rd584, %rd45, %rd583; ld.shared.u32 %r3039, [%rd584]; add.s32 %r3040, %r3039, %r3038; // inline asm bfe.u32 %r1241, %r1246, %r2512, %r2512; // inline asm mul.wide.u32 %rd585, %r1241, 4; add.s64 %rd586, %rd48, %rd585; ld.shared.u32 %r3041, [%rd586]; xor.b32 %r3042, %r3041, %r3040; // inline asm bfe.u32 %r1245, %r1246, %r208, %r2512; // inline asm mul.wide.u32 %rd587, %r1245, 4; add.s64 %rd588, %rd51, %rd587; ld.shared.u32 %r3043, [%rd588]; add.s32 %r3044, %r3043, %r3042; xor.b32 %r3045, %r64, %r70; xor.b32 %r1262, %r3045, %r3044; // inline asm bfe.u32 %r1249, %r1262, %r2499, %r2512; // inline asm mul.wide.u32 %rd589, %r1249, 4; add.s64 %rd590, %rd42, %rd589; ld.shared.u32 %r3046, [%rd590]; // inline asm bfe.u32 %r1253, %r1262, %r2503, %r2512; // inline asm mul.wide.u32 %rd591, %r1253, 4; add.s64 %rd592, %rd45, %rd591; ld.shared.u32 %r3047, [%rd592]; add.s32 %r3048, %r3047, %r3046; // inline asm bfe.u32 %r1257, %r1262, %r2512, %r2512; // inline asm mul.wide.u32 %rd593, %r1257, 4; add.s64 %rd594, %rd48, %rd593; ld.shared.u32 %r3049, [%rd594]; xor.b32 %r3050, %r3049, %r3048; // inline asm bfe.u32 %r1261, %r1262, %r208, %r2512; // inline asm mul.wide.u32 %rd595, %r1261, 4; add.s64 %rd596, %rd51, %rd595; ld.shared.u32 %r3051, [%rd596]; add.s32 %r3052, %r3051, %r3050; xor.b32 %r3053, %r67, %r1246; xor.b32 %r1278, %r3053, %r3052; // inline asm bfe.u32 %r1265, %r1278, %r2499, %r2512; // inline asm mul.wide.u32 %rd597, %r1265, 4; add.s64 %rd598, %rd42, %rd597; ld.shared.u32 %r3054, [%rd598]; // inline asm bfe.u32 %r1269, %r1278, %r2503, %r2512; // inline asm mul.wide.u32 %rd599, %r1269, 4; add.s64 %rd600, %rd45, %rd599; ld.shared.u32 %r3055, [%rd600]; add.s32 %r3056, %r3055, %r3054; // inline asm bfe.u32 %r1273, %r1278, %r2512, %r2512; // inline asm mul.wide.u32 %rd601, %r1273, 4; add.s64 %rd602, %rd48, %rd601; ld.shared.u32 %r3057, [%rd602]; xor.b32 %r3058, %r3057, %r3056; // inline asm bfe.u32 %r1277, %r1278, %r208, %r2512; // inline asm mul.wide.u32 %rd603, %r1277, 4; add.s64 %rd604, %rd51, %rd603; ld.shared.u32 %r3059, [%rd604]; add.s32 %r3060, %r3059, %r3058; xor.b32 %r3061, %r66, %r1262; xor.b32 %r1294, %r3061, %r3060; // inline asm bfe.u32 %r1281, %r1294, %r2499, %r2512; // inline asm mul.wide.u32 %rd605, %r1281, 4; add.s64 %rd606, %rd42, %rd605; ld.shared.u32 %r3062, [%rd606]; // inline asm bfe.u32 %r1285, %r1294, %r2503, %r2512; // inline asm mul.wide.u32 %rd607, %r1285, 4; add.s64 %rd608, %rd45, %rd607; ld.shared.u32 %r3063, [%rd608]; add.s32 %r3064, %r3063, %r3062; // inline asm bfe.u32 %r1289, %r1294, %r2512, %r2512; // inline asm mul.wide.u32 %rd609, %r1289, 4; add.s64 %rd610, %rd48, %rd609; ld.shared.u32 %r3065, [%rd610]; xor.b32 %r3066, %r3065, %r3064; // inline asm bfe.u32 %r1293, %r1294, %r208, %r2512; // inline asm mul.wide.u32 %rd611, %r1293, 4; add.s64 %rd612, %rd51, %rd611; ld.shared.u32 %r3067, [%rd612]; add.s32 %r3068, %r3067, %r3066; xor.b32 %r3069, %r69, %r1278; xor.b32 %r1310, %r3069, %r3068; // inline asm bfe.u32 %r1297, %r1310, %r2499, %r2512; // inline asm mul.wide.u32 %rd613, %r1297, 4; add.s64 %rd614, %rd42, %rd613; ld.shared.u32 %r3070, [%rd614]; // inline asm bfe.u32 %r1301, %r1310, %r2503, %r2512; // inline asm mul.wide.u32 %rd615, %r1301, 4; add.s64 %rd616, %rd45, %rd615; ld.shared.u32 %r3071, [%rd616]; add.s32 %r3072, %r3071, %r3070; // inline asm bfe.u32 %r1305, %r1310, %r2512, %r2512; // inline asm mul.wide.u32 %rd617, %r1305, 4; add.s64 %rd618, %rd48, %rd617; ld.shared.u32 %r3073, [%rd618]; xor.b32 %r3074, %r3073, %r3072; // inline asm bfe.u32 %r1309, %r1310, %r208, %r2512; // inline asm mul.wide.u32 %rd619, %r1309, 4; add.s64 %rd620, %rd51, %rd619; ld.shared.u32 %r3075, [%rd620]; add.s32 %r3076, %r3075, %r3074; xor.b32 %r3077, %r68, %r1294; xor.b32 %r1326, %r3077, %r3076; // inline asm bfe.u32 %r1313, %r1326, %r2499, %r2512; // inline asm mul.wide.u32 %rd621, %r1313, 4; add.s64 %rd622, %rd42, %rd621; ld.shared.u32 %r3078, [%rd622]; // inline asm bfe.u32 %r1317, %r1326, %r2503, %r2512; // inline asm mul.wide.u32 %rd623, %r1317, 4; add.s64 %rd624, %rd45, %rd623; ld.shared.u32 %r3079, [%rd624]; add.s32 %r3080, %r3079, %r3078; // inline asm bfe.u32 %r1321, %r1326, %r2512, %r2512; // inline asm mul.wide.u32 %rd625, %r1321, 4; add.s64 %rd626, %rd48, %rd625; ld.shared.u32 %r3081, [%rd626]; xor.b32 %r3082, %r3081, %r3080; // inline asm bfe.u32 %r1325, %r1326, %r208, %r2512; // inline asm mul.wide.u32 %rd627, %r1325, 4; add.s64 %rd628, %rd51, %rd627; ld.shared.u32 %r3083, [%rd628]; add.s32 %r3084, %r3083, %r3082; xor.b32 %r3085, %r71, %r1310; xor.b32 %r1342, %r3085, %r3084; // inline asm bfe.u32 %r1329, %r1342, %r2499, %r2512; // inline asm mul.wide.u32 %rd629, %r1329, 4; add.s64 %rd630, %rd42, %rd629; ld.shared.u32 %r3086, [%rd630]; // inline asm bfe.u32 %r1333, %r1342, %r2503, %r2512; // inline asm mul.wide.u32 %rd631, %r1333, 4; add.s64 %rd632, %rd45, %rd631; ld.shared.u32 %r3087, [%rd632]; add.s32 %r3088, %r3087, %r3086; // inline asm bfe.u32 %r1337, %r1342, %r2512, %r2512; // inline asm mul.wide.u32 %rd633, %r1337, 4; add.s64 %rd634, %rd48, %rd633; ld.shared.u32 %r3089, [%rd634]; xor.b32 %r3090, %r3089, %r3088; // inline asm bfe.u32 %r1341, %r1342, %r208, %r2512; // inline asm mul.wide.u32 %rd635, %r1341, 4; add.s64 %rd636, %rd51, %rd635; ld.shared.u32 %r3091, [%rd636]; add.s32 %r3092, %r3091, %r3090; xor.b32 %r3093, %r70, %r1326; xor.b32 %r1358, %r3093, %r3092; // inline asm bfe.u32 %r1345, %r1358, %r2499, %r2512; // inline asm mul.wide.u32 %rd637, %r1345, 4; add.s64 %rd638, %rd42, %rd637; ld.shared.u32 %r3094, [%rd638]; // inline asm bfe.u32 %r1349, %r1358, %r2503, %r2512; // inline asm mul.wide.u32 %rd639, %r1349, 4; add.s64 %rd640, %rd45, %rd639; ld.shared.u32 %r3095, [%rd640]; add.s32 %r3096, %r3095, %r3094; // inline asm bfe.u32 %r1353, %r1358, %r2512, %r2512; // inline asm mul.wide.u32 %rd641, %r1353, 4; add.s64 %rd642, %rd48, %rd641; ld.shared.u32 %r3097, [%rd642]; xor.b32 %r3098, %r3097, %r3096; // inline asm bfe.u32 %r1357, %r1358, %r208, %r2512; // inline asm mul.wide.u32 %rd643, %r1357, 4; add.s64 %rd644, %rd51, %rd643; ld.shared.u32 %r3099, [%rd644]; add.s32 %r3100, %r3099, %r3098; xor.b32 %r3101, %r2583, %r1342; xor.b32 %r1374, %r3101, %r3100; // inline asm bfe.u32 %r1361, %r1374, %r2499, %r2512; // inline asm mul.wide.u32 %rd645, %r1361, 4; add.s64 %rd646, %rd42, %rd645; ld.shared.u32 %r3102, [%rd646]; // inline asm bfe.u32 %r1365, %r1374, %r2503, %r2512; // inline asm mul.wide.u32 %rd647, %r1365, 4; add.s64 %rd648, %rd45, %rd647; ld.shared.u32 %r3103, [%rd648]; add.s32 %r3104, %r3103, %r3102; // inline asm bfe.u32 %r1369, %r1374, %r2512, %r2512; // inline asm mul.wide.u32 %rd649, %r1369, 4; add.s64 %rd650, %rd48, %rd649; ld.shared.u32 %r3105, [%rd650]; xor.b32 %r3106, %r3105, %r3104; // inline asm bfe.u32 %r1373, %r1374, %r208, %r2512; // inline asm mul.wide.u32 %rd651, %r1373, 4; add.s64 %rd652, %rd51, %rd651; ld.shared.u32 %r3107, [%rd652]; add.s32 %r3108, %r3107, %r3106; xor.b32 %r3109, %r2592, %r1358; xor.b32 %r1390, %r3109, %r3108; // inline asm bfe.u32 %r1377, %r1390, %r2499, %r2512; // inline asm mul.wide.u32 %rd653, %r1377, 4; add.s64 %rd654, %rd42, %rd653; ld.shared.u32 %r3110, [%rd654]; // inline asm bfe.u32 %r1381, %r1390, %r2503, %r2512; // inline asm mul.wide.u32 %rd655, %r1381, 4; add.s64 %rd656, %rd45, %rd655; ld.shared.u32 %r3111, [%rd656]; add.s32 %r3112, %r3111, %r3110; // inline asm bfe.u32 %r1385, %r1390, %r2512, %r2512; // inline asm mul.wide.u32 %rd657, %r1385, 4; add.s64 %rd658, %rd48, %rd657; ld.shared.u32 %r3113, [%rd658]; xor.b32 %r3114, %r3113, %r3112; // inline asm bfe.u32 %r1389, %r1390, %r208, %r2512; // inline asm mul.wide.u32 %rd659, %r1389, 4; add.s64 %rd660, %rd51, %rd659; ld.shared.u32 %r3115, [%rd660]; add.s32 %r3116, %r3115, %r3114; xor.b32 %r3117, %r2601, %r1374; xor.b32 %r1406, %r3117, %r3116; // inline asm bfe.u32 %r1393, %r1406, %r2499, %r2512; // inline asm mul.wide.u32 %rd661, %r1393, 4; add.s64 %rd662, %rd42, %rd661; ld.shared.u32 %r3118, [%rd662]; // inline asm bfe.u32 %r1397, %r1406, %r2503, %r2512; // inline asm mul.wide.u32 %rd663, %r1397, 4; add.s64 %rd664, %rd45, %rd663; ld.shared.u32 %r3119, [%rd664]; add.s32 %r3120, %r3119, %r3118; // inline asm bfe.u32 %r1401, %r1406, %r2512, %r2512; // inline asm mul.wide.u32 %rd665, %r1401, 4; add.s64 %rd666, %rd48, %rd665; ld.shared.u32 %r3121, [%rd666]; xor.b32 %r3122, %r3121, %r3120; // inline asm bfe.u32 %r1405, %r1406, %r208, %r2512; // inline asm mul.wide.u32 %rd667, %r1405, 4; add.s64 %rd668, %rd51, %rd667; ld.shared.u32 %r3123, [%rd668]; add.s32 %r3124, %r3123, %r3122; xor.b32 %r3125, %r2610, %r1390; xor.b32 %r1422, %r3125, %r3124; // inline asm bfe.u32 %r1409, %r1422, %r2499, %r2512; // inline asm mul.wide.u32 %rd669, %r1409, 4; add.s64 %rd670, %rd42, %rd669; ld.shared.u32 %r3126, [%rd670]; // inline asm bfe.u32 %r1413, %r1422, %r2503, %r2512; // inline asm mul.wide.u32 %rd671, %r1413, 4; add.s64 %rd672, %rd45, %rd671; ld.shared.u32 %r3127, [%rd672]; add.s32 %r3128, %r3127, %r3126; // inline asm bfe.u32 %r1417, %r1422, %r2512, %r2512; // inline asm mul.wide.u32 %rd673, %r1417, 4; add.s64 %rd674, %rd48, %rd673; ld.shared.u32 %r3129, [%rd674]; xor.b32 %r3130, %r3129, %r3128; // inline asm bfe.u32 %r1421, %r1422, %r208, %r2512; // inline asm mul.wide.u32 %rd675, %r1421, 4; add.s64 %rd676, %rd51, %rd675; ld.shared.u32 %r3131, [%rd676]; add.s32 %r3132, %r3131, %r3130; xor.b32 %r3133, %r2619, %r1406; xor.b32 %r1438, %r3133, %r3132; // inline asm bfe.u32 %r1425, %r1438, %r2499, %r2512; // inline asm mul.wide.u32 %rd677, %r1425, 4; add.s64 %rd678, %rd42, %rd677; ld.shared.u32 %r3134, [%rd678]; // inline asm bfe.u32 %r1429, %r1438, %r2503, %r2512; // inline asm mul.wide.u32 %rd679, %r1429, 4; add.s64 %rd680, %rd45, %rd679; ld.shared.u32 %r3135, [%rd680]; add.s32 %r3136, %r3135, %r3134; // inline asm bfe.u32 %r1433, %r1438, %r2512, %r2512; // inline asm mul.wide.u32 %rd681, %r1433, 4; add.s64 %rd682, %rd48, %rd681; ld.shared.u32 %r3137, [%rd682]; xor.b32 %r3138, %r3137, %r3136; // inline asm bfe.u32 %r1437, %r1438, %r208, %r2512; // inline asm mul.wide.u32 %rd683, %r1437, 4; add.s64 %rd684, %rd51, %rd683; ld.shared.u32 %r3139, [%rd684]; add.s32 %r3140, %r3139, %r3138; xor.b32 %r3141, %r2628, %r1422; xor.b32 %r1454, %r3141, %r3140; // inline asm bfe.u32 %r1441, %r1454, %r2499, %r2512; // inline asm mul.wide.u32 %rd685, %r1441, 4; add.s64 %rd686, %rd42, %rd685; ld.shared.u32 %r3142, [%rd686]; // inline asm bfe.u32 %r1445, %r1454, %r2503, %r2512; // inline asm mul.wide.u32 %rd687, %r1445, 4; add.s64 %rd688, %rd45, %rd687; ld.shared.u32 %r3143, [%rd688]; add.s32 %r3144, %r3143, %r3142; // inline asm bfe.u32 %r1449, %r1454, %r2512, %r2512; // inline asm mul.wide.u32 %rd689, %r1449, 4; add.s64 %rd690, %rd48, %rd689; ld.shared.u32 %r3145, [%rd690]; xor.b32 %r3146, %r3145, %r3144; // inline asm bfe.u32 %r1453, %r1454, %r208, %r2512; // inline asm mul.wide.u32 %rd691, %r1453, 4; add.s64 %rd692, %rd51, %rd691; ld.shared.u32 %r3147, [%rd692]; add.s32 %r3148, %r3147, %r3146; xor.b32 %r3149, %r2637, %r1438; xor.b32 %r1470, %r3149, %r3148; // inline asm bfe.u32 %r1457, %r1470, %r2499, %r2512; // inline asm mul.wide.u32 %rd693, %r1457, 4; add.s64 %rd694, %rd42, %rd693; ld.shared.u32 %r3150, [%rd694]; // inline asm bfe.u32 %r1461, %r1470, %r2503, %r2512; // inline asm mul.wide.u32 %rd695, %r1461, 4; add.s64 %rd696, %rd45, %rd695; ld.shared.u32 %r3151, [%rd696]; add.s32 %r3152, %r3151, %r3150; // inline asm bfe.u32 %r1465, %r1470, %r2512, %r2512; // inline asm mul.wide.u32 %rd697, %r1465, 4; add.s64 %rd698, %rd48, %rd697; ld.shared.u32 %r3153, [%rd698]; xor.b32 %r3154, %r3153, %r3152; // inline asm bfe.u32 %r1469, %r1470, %r208, %r2512; // inline asm mul.wide.u32 %rd699, %r1469, 4; add.s64 %rd700, %rd51, %rd699; ld.shared.u32 %r3155, [%rd700]; add.s32 %r3156, %r3155, %r3154; xor.b32 %r3157, %r2646, %r1454; xor.b32 %r1486, %r3157, %r3156; // inline asm bfe.u32 %r1473, %r1486, %r2499, %r2512; // inline asm mul.wide.u32 %rd701, %r1473, 4; add.s64 %rd702, %rd42, %rd701; ld.shared.u32 %r3158, [%rd702]; // inline asm bfe.u32 %r1477, %r1486, %r2503, %r2512; // inline asm mul.wide.u32 %rd703, %r1477, 4; add.s64 %rd704, %rd45, %rd703; ld.shared.u32 %r3159, [%rd704]; add.s32 %r3160, %r3159, %r3158; // inline asm bfe.u32 %r1481, %r1486, %r2512, %r2512; // inline asm mul.wide.u32 %rd705, %r1481, 4; add.s64 %rd706, %rd48, %rd705; ld.shared.u32 %r3161, [%rd706]; xor.b32 %r3162, %r3161, %r3160; // inline asm bfe.u32 %r1485, %r1486, %r208, %r2512; // inline asm mul.wide.u32 %rd707, %r1485, 4; add.s64 %rd708, %rd51, %rd707; ld.shared.u32 %r3163, [%rd708]; add.s32 %r3164, %r3163, %r3162; xor.b32 %r3165, %r2655, %r1470; xor.b32 %r72, %r3165, %r3164; xor.b32 %r73, %r2657, %r1486; xor.b32 %r1502, %r462, %r1486; // inline asm bfe.u32 %r1489, %r1502, %r2499, %r2512; // inline asm mul.wide.u32 %rd709, %r1489, 4; add.s64 %rd710, %rd42, %rd709; ld.shared.u32 %r3166, [%rd710]; // inline asm bfe.u32 %r1493, %r1502, %r2503, %r2512; // inline asm mul.wide.u32 %rd711, %r1493, 4; add.s64 %rd712, %rd45, %rd711; ld.shared.u32 %r3167, [%rd712]; add.s32 %r3168, %r3167, %r3166; // inline asm bfe.u32 %r1497, %r1502, %r2512, %r2512; // inline asm mul.wide.u32 %rd713, %r1497, 4; add.s64 %rd714, %rd48, %rd713; ld.shared.u32 %r3169, [%rd714]; xor.b32 %r3170, %r3169, %r3168; // inline asm bfe.u32 %r1501, %r1502, %r208, %r2512; // inline asm mul.wide.u32 %rd715, %r1501, 4; add.s64 %rd716, %rd51, %rd715; ld.shared.u32 %r3171, [%rd716]; add.s32 %r3172, %r3171, %r3170; xor.b32 %r3173, %r64, %r72; xor.b32 %r1518, %r3173, %r3172; // inline asm bfe.u32 %r1505, %r1518, %r2499, %r2512; // inline asm mul.wide.u32 %rd717, %r1505, 4; add.s64 %rd718, %rd42, %rd717; ld.shared.u32 %r3174, [%rd718]; // inline asm bfe.u32 %r1509, %r1518, %r2503, %r2512; // inline asm mul.wide.u32 %rd719, %r1509, 4; add.s64 %rd720, %rd45, %rd719; ld.shared.u32 %r3175, [%rd720]; add.s32 %r3176, %r3175, %r3174; // inline asm bfe.u32 %r1513, %r1518, %r2512, %r2512; // inline asm mul.wide.u32 %rd721, %r1513, 4; add.s64 %rd722, %rd48, %rd721; ld.shared.u32 %r3177, [%rd722]; xor.b32 %r3178, %r3177, %r3176; // inline asm bfe.u32 %r1517, %r1518, %r208, %r2512; // inline asm mul.wide.u32 %rd723, %r1517, 4; add.s64 %rd724, %rd51, %rd723; ld.shared.u32 %r3179, [%rd724]; add.s32 %r3180, %r3179, %r3178; xor.b32 %r3181, %r67, %r1502; xor.b32 %r1534, %r3181, %r3180; // inline asm bfe.u32 %r1521, %r1534, %r2499, %r2512; // inline asm mul.wide.u32 %rd725, %r1521, 4; add.s64 %rd726, %rd42, %rd725; ld.shared.u32 %r3182, [%rd726]; // inline asm bfe.u32 %r1525, %r1534, %r2503, %r2512; // inline asm mul.wide.u32 %rd727, %r1525, 4; add.s64 %rd728, %rd45, %rd727; ld.shared.u32 %r3183, [%rd728]; add.s32 %r3184, %r3183, %r3182; // inline asm bfe.u32 %r1529, %r1534, %r2512, %r2512; // inline asm mul.wide.u32 %rd729, %r1529, 4; add.s64 %rd730, %rd48, %rd729; ld.shared.u32 %r3185, [%rd730]; xor.b32 %r3186, %r3185, %r3184; // inline asm bfe.u32 %r1533, %r1534, %r208, %r2512; // inline asm mul.wide.u32 %rd731, %r1533, 4; add.s64 %rd732, %rd51, %rd731; ld.shared.u32 %r3187, [%rd732]; add.s32 %r3188, %r3187, %r3186; xor.b32 %r3189, %r66, %r1518; xor.b32 %r1550, %r3189, %r3188; // inline asm bfe.u32 %r1537, %r1550, %r2499, %r2512; // inline asm mul.wide.u32 %rd733, %r1537, 4; add.s64 %rd734, %rd42, %rd733; ld.shared.u32 %r3190, [%rd734]; // inline asm bfe.u32 %r1541, %r1550, %r2503, %r2512; // inline asm mul.wide.u32 %rd735, %r1541, 4; add.s64 %rd736, %rd45, %rd735; ld.shared.u32 %r3191, [%rd736]; add.s32 %r3192, %r3191, %r3190; // inline asm bfe.u32 %r1545, %r1550, %r2512, %r2512; // inline asm mul.wide.u32 %rd737, %r1545, 4; add.s64 %rd738, %rd48, %rd737; ld.shared.u32 %r3193, [%rd738]; xor.b32 %r3194, %r3193, %r3192; // inline asm bfe.u32 %r1549, %r1550, %r208, %r2512; // inline asm mul.wide.u32 %rd739, %r1549, 4; add.s64 %rd740, %rd51, %rd739; ld.shared.u32 %r3195, [%rd740]; add.s32 %r3196, %r3195, %r3194; xor.b32 %r3197, %r69, %r1534; xor.b32 %r1566, %r3197, %r3196; // inline asm bfe.u32 %r1553, %r1566, %r2499, %r2512; // inline asm mul.wide.u32 %rd741, %r1553, 4; add.s64 %rd742, %rd42, %rd741; ld.shared.u32 %r3198, [%rd742]; // inline asm bfe.u32 %r1557, %r1566, %r2503, %r2512; // inline asm mul.wide.u32 %rd743, %r1557, 4; add.s64 %rd744, %rd45, %rd743; ld.shared.u32 %r3199, [%rd744]; add.s32 %r3200, %r3199, %r3198; // inline asm bfe.u32 %r1561, %r1566, %r2512, %r2512; // inline asm mul.wide.u32 %rd745, %r1561, 4; add.s64 %rd746, %rd48, %rd745; ld.shared.u32 %r3201, [%rd746]; xor.b32 %r3202, %r3201, %r3200; // inline asm bfe.u32 %r1565, %r1566, %r208, %r2512; // inline asm mul.wide.u32 %rd747, %r1565, 4; add.s64 %rd748, %rd51, %rd747; ld.shared.u32 %r3203, [%rd748]; add.s32 %r3204, %r3203, %r3202; xor.b32 %r3205, %r68, %r1550; xor.b32 %r1582, %r3205, %r3204; // inline asm bfe.u32 %r1569, %r1582, %r2499, %r2512; // inline asm mul.wide.u32 %rd749, %r1569, 4; add.s64 %rd750, %rd42, %rd749; ld.shared.u32 %r3206, [%rd750]; // inline asm bfe.u32 %r1573, %r1582, %r2503, %r2512; // inline asm mul.wide.u32 %rd751, %r1573, 4; add.s64 %rd752, %rd45, %rd751; ld.shared.u32 %r3207, [%rd752]; add.s32 %r3208, %r3207, %r3206; // inline asm bfe.u32 %r1577, %r1582, %r2512, %r2512; // inline asm mul.wide.u32 %rd753, %r1577, 4; add.s64 %rd754, %rd48, %rd753; ld.shared.u32 %r3209, [%rd754]; xor.b32 %r3210, %r3209, %r3208; // inline asm bfe.u32 %r1581, %r1582, %r208, %r2512; // inline asm mul.wide.u32 %rd755, %r1581, 4; add.s64 %rd756, %rd51, %rd755; ld.shared.u32 %r3211, [%rd756]; add.s32 %r3212, %r3211, %r3210; xor.b32 %r3213, %r71, %r1566; xor.b32 %r1598, %r3213, %r3212; // inline asm bfe.u32 %r1585, %r1598, %r2499, %r2512; // inline asm mul.wide.u32 %rd757, %r1585, 4; add.s64 %rd758, %rd42, %rd757; ld.shared.u32 %r3214, [%rd758]; // inline asm bfe.u32 %r1589, %r1598, %r2503, %r2512; // inline asm mul.wide.u32 %rd759, %r1589, 4; add.s64 %rd760, %rd45, %rd759; ld.shared.u32 %r3215, [%rd760]; add.s32 %r3216, %r3215, %r3214; // inline asm bfe.u32 %r1593, %r1598, %r2512, %r2512; // inline asm mul.wide.u32 %rd761, %r1593, 4; add.s64 %rd762, %rd48, %rd761; ld.shared.u32 %r3217, [%rd762]; xor.b32 %r3218, %r3217, %r3216; // inline asm bfe.u32 %r1597, %r1598, %r208, %r2512; // inline asm mul.wide.u32 %rd763, %r1597, 4; add.s64 %rd764, %rd51, %rd763; ld.shared.u32 %r3219, [%rd764]; add.s32 %r3220, %r3219, %r3218; xor.b32 %r3221, %r70, %r1582; xor.b32 %r1614, %r3221, %r3220; // inline asm bfe.u32 %r1601, %r1614, %r2499, %r2512; // inline asm mul.wide.u32 %rd765, %r1601, 4; add.s64 %rd766, %rd42, %rd765; ld.shared.u32 %r3222, [%rd766]; // inline asm bfe.u32 %r1605, %r1614, %r2503, %r2512; // inline asm mul.wide.u32 %rd767, %r1605, 4; add.s64 %rd768, %rd45, %rd767; ld.shared.u32 %r3223, [%rd768]; add.s32 %r3224, %r3223, %r3222; // inline asm bfe.u32 %r1609, %r1614, %r2512, %r2512; // inline asm mul.wide.u32 %rd769, %r1609, 4; add.s64 %rd770, %rd48, %rd769; ld.shared.u32 %r3225, [%rd770]; xor.b32 %r3226, %r3225, %r3224; // inline asm bfe.u32 %r1613, %r1614, %r208, %r2512; // inline asm mul.wide.u32 %rd771, %r1613, 4; add.s64 %rd772, %rd51, %rd771; ld.shared.u32 %r3227, [%rd772]; add.s32 %r3228, %r3227, %r3226; xor.b32 %r3229, %r73, %r1598; xor.b32 %r1630, %r3229, %r3228; // inline asm bfe.u32 %r1617, %r1630, %r2499, %r2512; // inline asm mul.wide.u32 %rd773, %r1617, 4; add.s64 %rd774, %rd42, %rd773; ld.shared.u32 %r3230, [%rd774]; // inline asm bfe.u32 %r1621, %r1630, %r2503, %r2512; // inline asm mul.wide.u32 %rd775, %r1621, 4; add.s64 %rd776, %rd45, %rd775; ld.shared.u32 %r3231, [%rd776]; add.s32 %r3232, %r3231, %r3230; // inline asm bfe.u32 %r1625, %r1630, %r2512, %r2512; // inline asm mul.wide.u32 %rd777, %r1625, 4; add.s64 %rd778, %rd48, %rd777; ld.shared.u32 %r3233, [%rd778]; xor.b32 %r3234, %r3233, %r3232; // inline asm bfe.u32 %r1629, %r1630, %r208, %r2512; // inline asm mul.wide.u32 %rd779, %r1629, 4; add.s64 %rd780, %rd51, %rd779; ld.shared.u32 %r3235, [%rd780]; add.s32 %r3236, %r3235, %r3234; xor.b32 %r3237, %r72, %r1614; xor.b32 %r1646, %r3237, %r3236; // inline asm bfe.u32 %r1633, %r1646, %r2499, %r2512; // inline asm mul.wide.u32 %rd781, %r1633, 4; add.s64 %rd782, %rd42, %rd781; ld.shared.u32 %r3238, [%rd782]; // inline asm bfe.u32 %r1637, %r1646, %r2503, %r2512; // inline asm mul.wide.u32 %rd783, %r1637, 4; add.s64 %rd784, %rd45, %rd783; ld.shared.u32 %r3239, [%rd784]; add.s32 %r3240, %r3239, %r3238; // inline asm bfe.u32 %r1641, %r1646, %r2512, %r2512; // inline asm mul.wide.u32 %rd785, %r1641, 4; add.s64 %rd786, %rd48, %rd785; ld.shared.u32 %r3241, [%rd786]; xor.b32 %r3242, %r3241, %r3240; // inline asm bfe.u32 %r1645, %r1646, %r208, %r2512; // inline asm mul.wide.u32 %rd787, %r1645, 4; add.s64 %rd788, %rd51, %rd787; ld.shared.u32 %r3243, [%rd788]; add.s32 %r3244, %r3243, %r3242; xor.b32 %r3245, %r2601, %r1630; xor.b32 %r1662, %r3245, %r3244; // inline asm bfe.u32 %r1649, %r1662, %r2499, %r2512; // inline asm mul.wide.u32 %rd789, %r1649, 4; add.s64 %rd790, %rd42, %rd789; ld.shared.u32 %r3246, [%rd790]; // inline asm bfe.u32 %r1653, %r1662, %r2503, %r2512; // inline asm mul.wide.u32 %rd791, %r1653, 4; add.s64 %rd792, %rd45, %rd791; ld.shared.u32 %r3247, [%rd792]; add.s32 %r3248, %r3247, %r3246; // inline asm bfe.u32 %r1657, %r1662, %r2512, %r2512; // inline asm mul.wide.u32 %rd793, %r1657, 4; add.s64 %rd794, %rd48, %rd793; ld.shared.u32 %r3249, [%rd794]; xor.b32 %r3250, %r3249, %r3248; // inline asm bfe.u32 %r1661, %r1662, %r208, %r2512; // inline asm mul.wide.u32 %rd795, %r1661, 4; add.s64 %rd796, %rd51, %rd795; ld.shared.u32 %r3251, [%rd796]; add.s32 %r3252, %r3251, %r3250; xor.b32 %r3253, %r2610, %r1646; xor.b32 %r1678, %r3253, %r3252; // inline asm bfe.u32 %r1665, %r1678, %r2499, %r2512; // inline asm mul.wide.u32 %rd797, %r1665, 4; add.s64 %rd798, %rd42, %rd797; ld.shared.u32 %r3254, [%rd798]; // inline asm bfe.u32 %r1669, %r1678, %r2503, %r2512; // inline asm mul.wide.u32 %rd799, %r1669, 4; add.s64 %rd800, %rd45, %rd799; ld.shared.u32 %r3255, [%rd800]; add.s32 %r3256, %r3255, %r3254; // inline asm bfe.u32 %r1673, %r1678, %r2512, %r2512; // inline asm mul.wide.u32 %rd801, %r1673, 4; add.s64 %rd802, %rd48, %rd801; ld.shared.u32 %r3257, [%rd802]; xor.b32 %r3258, %r3257, %r3256; // inline asm bfe.u32 %r1677, %r1678, %r208, %r2512; // inline asm mul.wide.u32 %rd803, %r1677, 4; add.s64 %rd804, %rd51, %rd803; ld.shared.u32 %r3259, [%rd804]; add.s32 %r3260, %r3259, %r3258; xor.b32 %r3261, %r2619, %r1662; xor.b32 %r1694, %r3261, %r3260; // inline asm bfe.u32 %r1681, %r1694, %r2499, %r2512; // inline asm mul.wide.u32 %rd805, %r1681, 4; add.s64 %rd806, %rd42, %rd805; ld.shared.u32 %r3262, [%rd806]; // inline asm bfe.u32 %r1685, %r1694, %r2503, %r2512; // inline asm mul.wide.u32 %rd807, %r1685, 4; add.s64 %rd808, %rd45, %rd807; ld.shared.u32 %r3263, [%rd808]; add.s32 %r3264, %r3263, %r3262; // inline asm bfe.u32 %r1689, %r1694, %r2512, %r2512; // inline asm mul.wide.u32 %rd809, %r1689, 4; add.s64 %rd810, %rd48, %rd809; ld.shared.u32 %r3265, [%rd810]; xor.b32 %r3266, %r3265, %r3264; // inline asm bfe.u32 %r1693, %r1694, %r208, %r2512; // inline asm mul.wide.u32 %rd811, %r1693, 4; add.s64 %rd812, %rd51, %rd811; ld.shared.u32 %r3267, [%rd812]; add.s32 %r3268, %r3267, %r3266; xor.b32 %r3269, %r2628, %r1678; xor.b32 %r1710, %r3269, %r3268; // inline asm bfe.u32 %r1697, %r1710, %r2499, %r2512; // inline asm mul.wide.u32 %rd813, %r1697, 4; add.s64 %rd814, %rd42, %rd813; ld.shared.u32 %r3270, [%rd814]; // inline asm bfe.u32 %r1701, %r1710, %r2503, %r2512; // inline asm mul.wide.u32 %rd815, %r1701, 4; add.s64 %rd816, %rd45, %rd815; ld.shared.u32 %r3271, [%rd816]; add.s32 %r3272, %r3271, %r3270; // inline asm bfe.u32 %r1705, %r1710, %r2512, %r2512; // inline asm mul.wide.u32 %rd817, %r1705, 4; add.s64 %rd818, %rd48, %rd817; ld.shared.u32 %r3273, [%rd818]; xor.b32 %r3274, %r3273, %r3272; // inline asm bfe.u32 %r1709, %r1710, %r208, %r2512; // inline asm mul.wide.u32 %rd819, %r1709, 4; add.s64 %rd820, %rd51, %rd819; ld.shared.u32 %r3275, [%rd820]; add.s32 %r3276, %r3275, %r3274; xor.b32 %r3277, %r2637, %r1694; xor.b32 %r1726, %r3277, %r3276; // inline asm bfe.u32 %r1713, %r1726, %r2499, %r2512; // inline asm mul.wide.u32 %rd821, %r1713, 4; add.s64 %rd822, %rd42, %rd821; ld.shared.u32 %r3278, [%rd822]; // inline asm bfe.u32 %r1717, %r1726, %r2503, %r2512; // inline asm mul.wide.u32 %rd823, %r1717, 4; add.s64 %rd824, %rd45, %rd823; ld.shared.u32 %r3279, [%rd824]; add.s32 %r3280, %r3279, %r3278; // inline asm bfe.u32 %r1721, %r1726, %r2512, %r2512; // inline asm mul.wide.u32 %rd825, %r1721, 4; add.s64 %rd826, %rd48, %rd825; ld.shared.u32 %r3281, [%rd826]; xor.b32 %r3282, %r3281, %r3280; // inline asm bfe.u32 %r1725, %r1726, %r208, %r2512; // inline asm mul.wide.u32 %rd827, %r1725, 4; add.s64 %rd828, %rd51, %rd827; ld.shared.u32 %r3283, [%rd828]; add.s32 %r3284, %r3283, %r3282; xor.b32 %r3285, %r2646, %r1710; xor.b32 %r1742, %r3285, %r3284; // inline asm bfe.u32 %r1729, %r1742, %r2499, %r2512; // inline asm mul.wide.u32 %rd829, %r1729, 4; add.s64 %rd830, %rd42, %rd829; ld.shared.u32 %r3286, [%rd830]; // inline asm bfe.u32 %r1733, %r1742, %r2503, %r2512; // inline asm mul.wide.u32 %rd831, %r1733, 4; add.s64 %rd832, %rd45, %rd831; ld.shared.u32 %r3287, [%rd832]; add.s32 %r3288, %r3287, %r3286; // inline asm bfe.u32 %r1737, %r1742, %r2512, %r2512; // inline asm mul.wide.u32 %rd833, %r1737, 4; add.s64 %rd834, %rd48, %rd833; ld.shared.u32 %r3289, [%rd834]; xor.b32 %r3290, %r3289, %r3288; // inline asm bfe.u32 %r1741, %r1742, %r208, %r2512; // inline asm mul.wide.u32 %rd835, %r1741, 4; add.s64 %rd836, %rd51, %rd835; ld.shared.u32 %r3291, [%rd836]; add.s32 %r3292, %r3291, %r3290; xor.b32 %r3293, %r2655, %r1726; xor.b32 %r74, %r3293, %r3292; xor.b32 %r75, %r2657, %r1742; xor.b32 %r1758, %r462, %r1742; // inline asm bfe.u32 %r1745, %r1758, %r2499, %r2512; // inline asm mul.wide.u32 %rd837, %r1745, 4; add.s64 %rd838, %rd42, %rd837; ld.shared.u32 %r3294, [%rd838]; // inline asm bfe.u32 %r1749, %r1758, %r2503, %r2512; // inline asm mul.wide.u32 %rd839, %r1749, 4; add.s64 %rd840, %rd45, %rd839; ld.shared.u32 %r3295, [%rd840]; add.s32 %r3296, %r3295, %r3294; // inline asm bfe.u32 %r1753, %r1758, %r2512, %r2512; // inline asm mul.wide.u32 %rd841, %r1753, 4; add.s64 %rd842, %rd48, %rd841; ld.shared.u32 %r3297, [%rd842]; xor.b32 %r3298, %r3297, %r3296; // inline asm bfe.u32 %r1757, %r1758, %r208, %r2512; // inline asm mul.wide.u32 %rd843, %r1757, 4; add.s64 %rd844, %rd51, %rd843; ld.shared.u32 %r3299, [%rd844]; add.s32 %r3300, %r3299, %r3298; xor.b32 %r3301, %r64, %r74; xor.b32 %r1774, %r3301, %r3300; // inline asm bfe.u32 %r1761, %r1774, %r2499, %r2512; // inline asm mul.wide.u32 %rd845, %r1761, 4; add.s64 %rd846, %rd42, %rd845; ld.shared.u32 %r3302, [%rd846]; // inline asm bfe.u32 %r1765, %r1774, %r2503, %r2512; // inline asm mul.wide.u32 %rd847, %r1765, 4; add.s64 %rd848, %rd45, %rd847; ld.shared.u32 %r3303, [%rd848]; add.s32 %r3304, %r3303, %r3302; // inline asm bfe.u32 %r1769, %r1774, %r2512, %r2512; // inline asm mul.wide.u32 %rd849, %r1769, 4; add.s64 %rd850, %rd48, %rd849; ld.shared.u32 %r3305, [%rd850]; xor.b32 %r3306, %r3305, %r3304; // inline asm bfe.u32 %r1773, %r1774, %r208, %r2512; // inline asm mul.wide.u32 %rd851, %r1773, 4; add.s64 %rd852, %rd51, %rd851; ld.shared.u32 %r3307, [%rd852]; add.s32 %r3308, %r3307, %r3306; xor.b32 %r3309, %r67, %r1758; xor.b32 %r1790, %r3309, %r3308; // inline asm bfe.u32 %r1777, %r1790, %r2499, %r2512; // inline asm mul.wide.u32 %rd853, %r1777, 4; add.s64 %rd854, %rd42, %rd853; ld.shared.u32 %r3310, [%rd854]; // inline asm bfe.u32 %r1781, %r1790, %r2503, %r2512; // inline asm mul.wide.u32 %rd855, %r1781, 4; add.s64 %rd856, %rd45, %rd855; ld.shared.u32 %r3311, [%rd856]; add.s32 %r3312, %r3311, %r3310; // inline asm bfe.u32 %r1785, %r1790, %r2512, %r2512; // inline asm mul.wide.u32 %rd857, %r1785, 4; add.s64 %rd858, %rd48, %rd857; ld.shared.u32 %r3313, [%rd858]; xor.b32 %r3314, %r3313, %r3312; // inline asm bfe.u32 %r1789, %r1790, %r208, %r2512; // inline asm mul.wide.u32 %rd859, %r1789, 4; add.s64 %rd860, %rd51, %rd859; ld.shared.u32 %r3315, [%rd860]; add.s32 %r3316, %r3315, %r3314; xor.b32 %r3317, %r66, %r1774; xor.b32 %r1806, %r3317, %r3316; // inline asm bfe.u32 %r1793, %r1806, %r2499, %r2512; // inline asm mul.wide.u32 %rd861, %r1793, 4; add.s64 %rd862, %rd42, %rd861; ld.shared.u32 %r3318, [%rd862]; // inline asm bfe.u32 %r1797, %r1806, %r2503, %r2512; // inline asm mul.wide.u32 %rd863, %r1797, 4; add.s64 %rd864, %rd45, %rd863; ld.shared.u32 %r3319, [%rd864]; add.s32 %r3320, %r3319, %r3318; // inline asm bfe.u32 %r1801, %r1806, %r2512, %r2512; // inline asm mul.wide.u32 %rd865, %r1801, 4; add.s64 %rd866, %rd48, %rd865; ld.shared.u32 %r3321, [%rd866]; xor.b32 %r3322, %r3321, %r3320; // inline asm bfe.u32 %r1805, %r1806, %r208, %r2512; // inline asm mul.wide.u32 %rd867, %r1805, 4; add.s64 %rd868, %rd51, %rd867; ld.shared.u32 %r3323, [%rd868]; add.s32 %r3324, %r3323, %r3322; xor.b32 %r3325, %r69, %r1790; xor.b32 %r1822, %r3325, %r3324; // inline asm bfe.u32 %r1809, %r1822, %r2499, %r2512; // inline asm mul.wide.u32 %rd869, %r1809, 4; add.s64 %rd870, %rd42, %rd869; ld.shared.u32 %r3326, [%rd870]; // inline asm bfe.u32 %r1813, %r1822, %r2503, %r2512; // inline asm mul.wide.u32 %rd871, %r1813, 4; add.s64 %rd872, %rd45, %rd871; ld.shared.u32 %r3327, [%rd872]; add.s32 %r3328, %r3327, %r3326; // inline asm bfe.u32 %r1817, %r1822, %r2512, %r2512; // inline asm mul.wide.u32 %rd873, %r1817, 4; add.s64 %rd874, %rd48, %rd873; ld.shared.u32 %r3329, [%rd874]; xor.b32 %r3330, %r3329, %r3328; // inline asm bfe.u32 %r1821, %r1822, %r208, %r2512; // inline asm mul.wide.u32 %rd875, %r1821, 4; add.s64 %rd876, %rd51, %rd875; ld.shared.u32 %r3331, [%rd876]; add.s32 %r3332, %r3331, %r3330; xor.b32 %r3333, %r68, %r1806; xor.b32 %r1838, %r3333, %r3332; // inline asm bfe.u32 %r1825, %r1838, %r2499, %r2512; // inline asm mul.wide.u32 %rd877, %r1825, 4; add.s64 %rd878, %rd42, %rd877; ld.shared.u32 %r3334, [%rd878]; // inline asm bfe.u32 %r1829, %r1838, %r2503, %r2512; // inline asm mul.wide.u32 %rd879, %r1829, 4; add.s64 %rd880, %rd45, %rd879; ld.shared.u32 %r3335, [%rd880]; add.s32 %r3336, %r3335, %r3334; // inline asm bfe.u32 %r1833, %r1838, %r2512, %r2512; // inline asm mul.wide.u32 %rd881, %r1833, 4; add.s64 %rd882, %rd48, %rd881; ld.shared.u32 %r3337, [%rd882]; xor.b32 %r3338, %r3337, %r3336; // inline asm bfe.u32 %r1837, %r1838, %r208, %r2512; // inline asm mul.wide.u32 %rd883, %r1837, 4; add.s64 %rd884, %rd51, %rd883; ld.shared.u32 %r3339, [%rd884]; add.s32 %r3340, %r3339, %r3338; xor.b32 %r3341, %r71, %r1822; xor.b32 %r1854, %r3341, %r3340; // inline asm bfe.u32 %r1841, %r1854, %r2499, %r2512; // inline asm mul.wide.u32 %rd885, %r1841, 4; add.s64 %rd886, %rd42, %rd885; ld.shared.u32 %r3342, [%rd886]; // inline asm bfe.u32 %r1845, %r1854, %r2503, %r2512; // inline asm mul.wide.u32 %rd887, %r1845, 4; add.s64 %rd888, %rd45, %rd887; ld.shared.u32 %r3343, [%rd888]; add.s32 %r3344, %r3343, %r3342; // inline asm bfe.u32 %r1849, %r1854, %r2512, %r2512; // inline asm mul.wide.u32 %rd889, %r1849, 4; add.s64 %rd890, %rd48, %rd889; ld.shared.u32 %r3345, [%rd890]; xor.b32 %r3346, %r3345, %r3344; // inline asm bfe.u32 %r1853, %r1854, %r208, %r2512; // inline asm mul.wide.u32 %rd891, %r1853, 4; add.s64 %rd892, %rd51, %rd891; ld.shared.u32 %r3347, [%rd892]; add.s32 %r3348, %r3347, %r3346; xor.b32 %r3349, %r70, %r1838; xor.b32 %r1870, %r3349, %r3348; // inline asm bfe.u32 %r1857, %r1870, %r2499, %r2512; // inline asm mul.wide.u32 %rd893, %r1857, 4; add.s64 %rd894, %rd42, %rd893; ld.shared.u32 %r3350, [%rd894]; // inline asm bfe.u32 %r1861, %r1870, %r2503, %r2512; // inline asm mul.wide.u32 %rd895, %r1861, 4; add.s64 %rd896, %rd45, %rd895; ld.shared.u32 %r3351, [%rd896]; add.s32 %r3352, %r3351, %r3350; // inline asm bfe.u32 %r1865, %r1870, %r2512, %r2512; // inline asm mul.wide.u32 %rd897, %r1865, 4; add.s64 %rd898, %rd48, %rd897; ld.shared.u32 %r3353, [%rd898]; xor.b32 %r3354, %r3353, %r3352; // inline asm bfe.u32 %r1869, %r1870, %r208, %r2512; // inline asm mul.wide.u32 %rd899, %r1869, 4; add.s64 %rd900, %rd51, %rd899; ld.shared.u32 %r3355, [%rd900]; add.s32 %r3356, %r3355, %r3354; xor.b32 %r3357, %r73, %r1854; xor.b32 %r1886, %r3357, %r3356; // inline asm bfe.u32 %r1873, %r1886, %r2499, %r2512; // inline asm mul.wide.u32 %rd901, %r1873, 4; add.s64 %rd902, %rd42, %rd901; ld.shared.u32 %r3358, [%rd902]; // inline asm bfe.u32 %r1877, %r1886, %r2503, %r2512; // inline asm mul.wide.u32 %rd903, %r1877, 4; add.s64 %rd904, %rd45, %rd903; ld.shared.u32 %r3359, [%rd904]; add.s32 %r3360, %r3359, %r3358; // inline asm bfe.u32 %r1881, %r1886, %r2512, %r2512; // inline asm mul.wide.u32 %rd905, %r1881, 4; add.s64 %rd906, %rd48, %rd905; ld.shared.u32 %r3361, [%rd906]; xor.b32 %r3362, %r3361, %r3360; // inline asm bfe.u32 %r1885, %r1886, %r208, %r2512; // inline asm mul.wide.u32 %rd907, %r1885, 4; add.s64 %rd908, %rd51, %rd907; ld.shared.u32 %r3363, [%rd908]; add.s32 %r3364, %r3363, %r3362; xor.b32 %r3365, %r72, %r1870; xor.b32 %r1902, %r3365, %r3364; // inline asm bfe.u32 %r1889, %r1902, %r2499, %r2512; // inline asm mul.wide.u32 %rd909, %r1889, 4; add.s64 %rd910, %rd42, %rd909; ld.shared.u32 %r3366, [%rd910]; // inline asm bfe.u32 %r1893, %r1902, %r2503, %r2512; // inline asm mul.wide.u32 %rd911, %r1893, 4; add.s64 %rd912, %rd45, %rd911; ld.shared.u32 %r3367, [%rd912]; add.s32 %r3368, %r3367, %r3366; // inline asm bfe.u32 %r1897, %r1902, %r2512, %r2512; // inline asm mul.wide.u32 %rd913, %r1897, 4; add.s64 %rd914, %rd48, %rd913; ld.shared.u32 %r3369, [%rd914]; xor.b32 %r3370, %r3369, %r3368; // inline asm bfe.u32 %r1901, %r1902, %r208, %r2512; // inline asm mul.wide.u32 %rd915, %r1901, 4; add.s64 %rd916, %rd51, %rd915; ld.shared.u32 %r3371, [%rd916]; add.s32 %r3372, %r3371, %r3370; xor.b32 %r3373, %r75, %r1886; xor.b32 %r1918, %r3373, %r3372; // inline asm bfe.u32 %r1905, %r1918, %r2499, %r2512; // inline asm mul.wide.u32 %rd917, %r1905, 4; add.s64 %rd918, %rd42, %rd917; ld.shared.u32 %r3374, [%rd918]; // inline asm bfe.u32 %r1909, %r1918, %r2503, %r2512; // inline asm mul.wide.u32 %rd919, %r1909, 4; add.s64 %rd920, %rd45, %rd919; ld.shared.u32 %r3375, [%rd920]; add.s32 %r3376, %r3375, %r3374; // inline asm bfe.u32 %r1913, %r1918, %r2512, %r2512; // inline asm mul.wide.u32 %rd921, %r1913, 4; add.s64 %rd922, %rd48, %rd921; ld.shared.u32 %r3377, [%rd922]; xor.b32 %r3378, %r3377, %r3376; // inline asm bfe.u32 %r1917, %r1918, %r208, %r2512; // inline asm mul.wide.u32 %rd923, %r1917, 4; add.s64 %rd924, %rd51, %rd923; ld.shared.u32 %r3379, [%rd924]; add.s32 %r3380, %r3379, %r3378; xor.b32 %r3381, %r74, %r1902; xor.b32 %r1934, %r3381, %r3380; // inline asm bfe.u32 %r1921, %r1934, %r2499, %r2512; // inline asm mul.wide.u32 %rd925, %r1921, 4; add.s64 %rd926, %rd42, %rd925; ld.shared.u32 %r3382, [%rd926]; // inline asm bfe.u32 %r1925, %r1934, %r2503, %r2512; // inline asm mul.wide.u32 %rd927, %r1925, 4; add.s64 %rd928, %rd45, %rd927; ld.shared.u32 %r3383, [%rd928]; add.s32 %r3384, %r3383, %r3382; // inline asm bfe.u32 %r1929, %r1934, %r2512, %r2512; // inline asm mul.wide.u32 %rd929, %r1929, 4; add.s64 %rd930, %rd48, %rd929; ld.shared.u32 %r3385, [%rd930]; xor.b32 %r3386, %r3385, %r3384; // inline asm bfe.u32 %r1933, %r1934, %r208, %r2512; // inline asm mul.wide.u32 %rd931, %r1933, 4; add.s64 %rd932, %rd51, %rd931; ld.shared.u32 %r3387, [%rd932]; add.s32 %r3388, %r3387, %r3386; xor.b32 %r3389, %r2619, %r1918; xor.b32 %r1950, %r3389, %r3388; // inline asm bfe.u32 %r1937, %r1950, %r2499, %r2512; // inline asm mul.wide.u32 %rd933, %r1937, 4; add.s64 %rd934, %rd42, %rd933; ld.shared.u32 %r3390, [%rd934]; // inline asm bfe.u32 %r1941, %r1950, %r2503, %r2512; // inline asm mul.wide.u32 %rd935, %r1941, 4; add.s64 %rd936, %rd45, %rd935; ld.shared.u32 %r3391, [%rd936]; add.s32 %r3392, %r3391, %r3390; // inline asm bfe.u32 %r1945, %r1950, %r2512, %r2512; // inline asm mul.wide.u32 %rd937, %r1945, 4; add.s64 %rd938, %rd48, %rd937; ld.shared.u32 %r3393, [%rd938]; xor.b32 %r3394, %r3393, %r3392; // inline asm bfe.u32 %r1949, %r1950, %r208, %r2512; // inline asm mul.wide.u32 %rd939, %r1949, 4; add.s64 %rd940, %rd51, %rd939; ld.shared.u32 %r3395, [%rd940]; add.s32 %r3396, %r3395, %r3394; xor.b32 %r3397, %r2628, %r1934; xor.b32 %r1966, %r3397, %r3396; // inline asm bfe.u32 %r1953, %r1966, %r2499, %r2512; // inline asm mul.wide.u32 %rd941, %r1953, 4; add.s64 %rd942, %rd42, %rd941; ld.shared.u32 %r3398, [%rd942]; // inline asm bfe.u32 %r1957, %r1966, %r2503, %r2512; // inline asm mul.wide.u32 %rd943, %r1957, 4; add.s64 %rd944, %rd45, %rd943; ld.shared.u32 %r3399, [%rd944]; add.s32 %r3400, %r3399, %r3398; // inline asm bfe.u32 %r1961, %r1966, %r2512, %r2512; // inline asm mul.wide.u32 %rd945, %r1961, 4; add.s64 %rd946, %rd48, %rd945; ld.shared.u32 %r3401, [%rd946]; xor.b32 %r3402, %r3401, %r3400; // inline asm bfe.u32 %r1965, %r1966, %r208, %r2512; // inline asm mul.wide.u32 %rd947, %r1965, 4; add.s64 %rd948, %rd51, %rd947; ld.shared.u32 %r3403, [%rd948]; add.s32 %r3404, %r3403, %r3402; xor.b32 %r3405, %r2637, %r1950; xor.b32 %r1982, %r3405, %r3404; // inline asm bfe.u32 %r1969, %r1982, %r2499, %r2512; // inline asm mul.wide.u32 %rd949, %r1969, 4; add.s64 %rd950, %rd42, %rd949; ld.shared.u32 %r3406, [%rd950]; // inline asm bfe.u32 %r1973, %r1982, %r2503, %r2512; // inline asm mul.wide.u32 %rd951, %r1973, 4; add.s64 %rd952, %rd45, %rd951; ld.shared.u32 %r3407, [%rd952]; add.s32 %r3408, %r3407, %r3406; // inline asm bfe.u32 %r1977, %r1982, %r2512, %r2512; // inline asm mul.wide.u32 %rd953, %r1977, 4; add.s64 %rd954, %rd48, %rd953; ld.shared.u32 %r3409, [%rd954]; xor.b32 %r3410, %r3409, %r3408; // inline asm bfe.u32 %r1981, %r1982, %r208, %r2512; // inline asm mul.wide.u32 %rd955, %r1981, 4; add.s64 %rd956, %rd51, %rd955; ld.shared.u32 %r3411, [%rd956]; add.s32 %r3412, %r3411, %r3410; xor.b32 %r3413, %r2646, %r1966; xor.b32 %r1998, %r3413, %r3412; // inline asm bfe.u32 %r1985, %r1998, %r2499, %r2512; // inline asm mul.wide.u32 %rd957, %r1985, 4; add.s64 %rd958, %rd42, %rd957; ld.shared.u32 %r3414, [%rd958]; // inline asm bfe.u32 %r1989, %r1998, %r2503, %r2512; // inline asm mul.wide.u32 %rd959, %r1989, 4; add.s64 %rd960, %rd45, %rd959; ld.shared.u32 %r3415, [%rd960]; add.s32 %r3416, %r3415, %r3414; // inline asm bfe.u32 %r1993, %r1998, %r2512, %r2512; // inline asm mul.wide.u32 %rd961, %r1993, 4; add.s64 %rd962, %rd48, %rd961; ld.shared.u32 %r3417, [%rd962]; xor.b32 %r3418, %r3417, %r3416; // inline asm bfe.u32 %r1997, %r1998, %r208, %r2512; // inline asm mul.wide.u32 %rd963, %r1997, 4; add.s64 %rd964, %rd51, %rd963; ld.shared.u32 %r3419, [%rd964]; add.s32 %r3420, %r3419, %r3418; xor.b32 %r3421, %r2655, %r1982; xor.b32 %r76, %r3421, %r3420; xor.b32 %r77, %r2657, %r1998; xor.b32 %r2014, %r462, %r1998; // inline asm bfe.u32 %r2001, %r2014, %r2499, %r2512; // inline asm mul.wide.u32 %rd965, %r2001, 4; add.s64 %rd966, %rd42, %rd965; ld.shared.u32 %r3422, [%rd966]; // inline asm bfe.u32 %r2005, %r2014, %r2503, %r2512; // inline asm mul.wide.u32 %rd967, %r2005, 4; add.s64 %rd968, %rd45, %rd967; ld.shared.u32 %r3423, [%rd968]; add.s32 %r3424, %r3423, %r3422; // inline asm bfe.u32 %r2009, %r2014, %r2512, %r2512; // inline asm mul.wide.u32 %rd969, %r2009, 4; add.s64 %rd970, %rd48, %rd969; ld.shared.u32 %r3425, [%rd970]; xor.b32 %r3426, %r3425, %r3424; // inline asm bfe.u32 %r2013, %r2014, %r208, %r2512; // inline asm mul.wide.u32 %rd971, %r2013, 4; add.s64 %rd972, %rd51, %rd971; ld.shared.u32 %r3427, [%rd972]; add.s32 %r3428, %r3427, %r3426; xor.b32 %r3429, %r64, %r76; xor.b32 %r2030, %r3429, %r3428; // inline asm bfe.u32 %r2017, %r2030, %r2499, %r2512; // inline asm mul.wide.u32 %rd973, %r2017, 4; add.s64 %rd974, %rd42, %rd973; ld.shared.u32 %r3430, [%rd974]; // inline asm bfe.u32 %r2021, %r2030, %r2503, %r2512; // inline asm mul.wide.u32 %rd975, %r2021, 4; add.s64 %rd976, %rd45, %rd975; ld.shared.u32 %r3431, [%rd976]; add.s32 %r3432, %r3431, %r3430; // inline asm bfe.u32 %r2025, %r2030, %r2512, %r2512; // inline asm mul.wide.u32 %rd977, %r2025, 4; add.s64 %rd978, %rd48, %rd977; ld.shared.u32 %r3433, [%rd978]; xor.b32 %r3434, %r3433, %r3432; // inline asm bfe.u32 %r2029, %r2030, %r208, %r2512; // inline asm mul.wide.u32 %rd979, %r2029, 4; add.s64 %rd980, %rd51, %rd979; ld.shared.u32 %r3435, [%rd980]; add.s32 %r3436, %r3435, %r3434; xor.b32 %r3437, %r67, %r2014; xor.b32 %r2046, %r3437, %r3436; // inline asm bfe.u32 %r2033, %r2046, %r2499, %r2512; // inline asm mul.wide.u32 %rd981, %r2033, 4; add.s64 %rd982, %rd42, %rd981; ld.shared.u32 %r3438, [%rd982]; // inline asm bfe.u32 %r2037, %r2046, %r2503, %r2512; // inline asm mul.wide.u32 %rd983, %r2037, 4; add.s64 %rd984, %rd45, %rd983; ld.shared.u32 %r3439, [%rd984]; add.s32 %r3440, %r3439, %r3438; // inline asm bfe.u32 %r2041, %r2046, %r2512, %r2512; // inline asm mul.wide.u32 %rd985, %r2041, 4; add.s64 %rd986, %rd48, %rd985; ld.shared.u32 %r3441, [%rd986]; xor.b32 %r3442, %r3441, %r3440; // inline asm bfe.u32 %r2045, %r2046, %r208, %r2512; // inline asm mul.wide.u32 %rd987, %r2045, 4; add.s64 %rd988, %rd51, %rd987; ld.shared.u32 %r3443, [%rd988]; add.s32 %r3444, %r3443, %r3442; xor.b32 %r3445, %r66, %r2030; xor.b32 %r2062, %r3445, %r3444; // inline asm bfe.u32 %r2049, %r2062, %r2499, %r2512; // inline asm mul.wide.u32 %rd989, %r2049, 4; add.s64 %rd990, %rd42, %rd989; ld.shared.u32 %r3446, [%rd990]; // inline asm bfe.u32 %r2053, %r2062, %r2503, %r2512; // inline asm mul.wide.u32 %rd991, %r2053, 4; add.s64 %rd992, %rd45, %rd991; ld.shared.u32 %r3447, [%rd992]; add.s32 %r3448, %r3447, %r3446; // inline asm bfe.u32 %r2057, %r2062, %r2512, %r2512; // inline asm mul.wide.u32 %rd993, %r2057, 4; add.s64 %rd994, %rd48, %rd993; ld.shared.u32 %r3449, [%rd994]; xor.b32 %r3450, %r3449, %r3448; // inline asm bfe.u32 %r2061, %r2062, %r208, %r2512; // inline asm mul.wide.u32 %rd995, %r2061, 4; add.s64 %rd996, %rd51, %rd995; ld.shared.u32 %r3451, [%rd996]; add.s32 %r3452, %r3451, %r3450; xor.b32 %r3453, %r69, %r2046; xor.b32 %r2078, %r3453, %r3452; // inline asm bfe.u32 %r2065, %r2078, %r2499, %r2512; // inline asm mul.wide.u32 %rd997, %r2065, 4; add.s64 %rd998, %rd42, %rd997; ld.shared.u32 %r3454, [%rd998]; // inline asm bfe.u32 %r2069, %r2078, %r2503, %r2512; // inline asm mul.wide.u32 %rd999, %r2069, 4; add.s64 %rd1000, %rd45, %rd999; ld.shared.u32 %r3455, [%rd1000]; add.s32 %r3456, %r3455, %r3454; // inline asm bfe.u32 %r2073, %r2078, %r2512, %r2512; // inline asm mul.wide.u32 %rd1001, %r2073, 4; add.s64 %rd1002, %rd48, %rd1001; ld.shared.u32 %r3457, [%rd1002]; xor.b32 %r3458, %r3457, %r3456; // inline asm bfe.u32 %r2077, %r2078, %r208, %r2512; // inline asm mul.wide.u32 %rd1003, %r2077, 4; add.s64 %rd1004, %rd51, %rd1003; ld.shared.u32 %r3459, [%rd1004]; add.s32 %r3460, %r3459, %r3458; xor.b32 %r3461, %r68, %r2062; xor.b32 %r2094, %r3461, %r3460; // inline asm bfe.u32 %r2081, %r2094, %r2499, %r2512; // inline asm mul.wide.u32 %rd1005, %r2081, 4; add.s64 %rd1006, %rd42, %rd1005; ld.shared.u32 %r3462, [%rd1006]; // inline asm bfe.u32 %r2085, %r2094, %r2503, %r2512; // inline asm mul.wide.u32 %rd1007, %r2085, 4; add.s64 %rd1008, %rd45, %rd1007; ld.shared.u32 %r3463, [%rd1008]; add.s32 %r3464, %r3463, %r3462; // inline asm bfe.u32 %r2089, %r2094, %r2512, %r2512; // inline asm mul.wide.u32 %rd1009, %r2089, 4; add.s64 %rd1010, %rd48, %rd1009; ld.shared.u32 %r3465, [%rd1010]; xor.b32 %r3466, %r3465, %r3464; // inline asm bfe.u32 %r2093, %r2094, %r208, %r2512; // inline asm mul.wide.u32 %rd1011, %r2093, 4; add.s64 %rd1012, %rd51, %rd1011; ld.shared.u32 %r3467, [%rd1012]; add.s32 %r3468, %r3467, %r3466; xor.b32 %r3469, %r71, %r2078; xor.b32 %r2110, %r3469, %r3468; // inline asm bfe.u32 %r2097, %r2110, %r2499, %r2512; // inline asm mul.wide.u32 %rd1013, %r2097, 4; add.s64 %rd1014, %rd42, %rd1013; ld.shared.u32 %r3470, [%rd1014]; // inline asm bfe.u32 %r2101, %r2110, %r2503, %r2512; // inline asm mul.wide.u32 %rd1015, %r2101, 4; add.s64 %rd1016, %rd45, %rd1015; ld.shared.u32 %r3471, [%rd1016]; add.s32 %r3472, %r3471, %r3470; // inline asm bfe.u32 %r2105, %r2110, %r2512, %r2512; // inline asm mul.wide.u32 %rd1017, %r2105, 4; add.s64 %rd1018, %rd48, %rd1017; ld.shared.u32 %r3473, [%rd1018]; xor.b32 %r3474, %r3473, %r3472; // inline asm bfe.u32 %r2109, %r2110, %r208, %r2512; // inline asm mul.wide.u32 %rd1019, %r2109, 4; add.s64 %rd1020, %rd51, %rd1019; ld.shared.u32 %r3475, [%rd1020]; add.s32 %r3476, %r3475, %r3474; xor.b32 %r3477, %r70, %r2094; xor.b32 %r2126, %r3477, %r3476; // inline asm bfe.u32 %r2113, %r2126, %r2499, %r2512; // inline asm mul.wide.u32 %rd1021, %r2113, 4; add.s64 %rd1022, %rd42, %rd1021; ld.shared.u32 %r3478, [%rd1022]; // inline asm bfe.u32 %r2117, %r2126, %r2503, %r2512; // inline asm mul.wide.u32 %rd1023, %r2117, 4; add.s64 %rd1024, %rd45, %rd1023; ld.shared.u32 %r3479, [%rd1024]; add.s32 %r3480, %r3479, %r3478; // inline asm bfe.u32 %r2121, %r2126, %r2512, %r2512; // inline asm mul.wide.u32 %rd1025, %r2121, 4; add.s64 %rd1026, %rd48, %rd1025; ld.shared.u32 %r3481, [%rd1026]; xor.b32 %r3482, %r3481, %r3480; // inline asm bfe.u32 %r2125, %r2126, %r208, %r2512; // inline asm mul.wide.u32 %rd1027, %r2125, 4; add.s64 %rd1028, %rd51, %rd1027; ld.shared.u32 %r3483, [%rd1028]; add.s32 %r3484, %r3483, %r3482; xor.b32 %r3485, %r73, %r2110; xor.b32 %r2142, %r3485, %r3484; // inline asm bfe.u32 %r2129, %r2142, %r2499, %r2512; // inline asm mul.wide.u32 %rd1029, %r2129, 4; add.s64 %rd1030, %rd42, %rd1029; ld.shared.u32 %r3486, [%rd1030]; // inline asm bfe.u32 %r2133, %r2142, %r2503, %r2512; // inline asm mul.wide.u32 %rd1031, %r2133, 4; add.s64 %rd1032, %rd45, %rd1031; ld.shared.u32 %r3487, [%rd1032]; add.s32 %r3488, %r3487, %r3486; // inline asm bfe.u32 %r2137, %r2142, %r2512, %r2512; // inline asm mul.wide.u32 %rd1033, %r2137, 4; add.s64 %rd1034, %rd48, %rd1033; ld.shared.u32 %r3489, [%rd1034]; xor.b32 %r3490, %r3489, %r3488; // inline asm bfe.u32 %r2141, %r2142, %r208, %r2512; // inline asm mul.wide.u32 %rd1035, %r2141, 4; add.s64 %rd1036, %rd51, %rd1035; ld.shared.u32 %r3491, [%rd1036]; add.s32 %r3492, %r3491, %r3490; xor.b32 %r3493, %r72, %r2126; xor.b32 %r2158, %r3493, %r3492; // inline asm bfe.u32 %r2145, %r2158, %r2499, %r2512; // inline asm mul.wide.u32 %rd1037, %r2145, 4; add.s64 %rd1038, %rd42, %rd1037; ld.shared.u32 %r3494, [%rd1038]; // inline asm bfe.u32 %r2149, %r2158, %r2503, %r2512; // inline asm mul.wide.u32 %rd1039, %r2149, 4; add.s64 %rd1040, %rd45, %rd1039; ld.shared.u32 %r3495, [%rd1040]; add.s32 %r3496, %r3495, %r3494; // inline asm bfe.u32 %r2153, %r2158, %r2512, %r2512; // inline asm mul.wide.u32 %rd1041, %r2153, 4; add.s64 %rd1042, %rd48, %rd1041; ld.shared.u32 %r3497, [%rd1042]; xor.b32 %r3498, %r3497, %r3496; // inline asm bfe.u32 %r2157, %r2158, %r208, %r2512; // inline asm mul.wide.u32 %rd1043, %r2157, 4; add.s64 %rd1044, %rd51, %rd1043; ld.shared.u32 %r3499, [%rd1044]; add.s32 %r3500, %r3499, %r3498; xor.b32 %r3501, %r75, %r2142; xor.b32 %r2174, %r3501, %r3500; // inline asm bfe.u32 %r2161, %r2174, %r2499, %r2512; // inline asm mul.wide.u32 %rd1045, %r2161, 4; add.s64 %rd1046, %rd42, %rd1045; ld.shared.u32 %r3502, [%rd1046]; // inline asm bfe.u32 %r2165, %r2174, %r2503, %r2512; // inline asm mul.wide.u32 %rd1047, %r2165, 4; add.s64 %rd1048, %rd45, %rd1047; ld.shared.u32 %r3503, [%rd1048]; add.s32 %r3504, %r3503, %r3502; // inline asm bfe.u32 %r2169, %r2174, %r2512, %r2512; // inline asm mul.wide.u32 %rd1049, %r2169, 4; add.s64 %rd1050, %rd48, %rd1049; ld.shared.u32 %r3505, [%rd1050]; xor.b32 %r3506, %r3505, %r3504; // inline asm bfe.u32 %r2173, %r2174, %r208, %r2512; // inline asm mul.wide.u32 %rd1051, %r2173, 4; add.s64 %rd1052, %rd51, %rd1051; ld.shared.u32 %r3507, [%rd1052]; add.s32 %r3508, %r3507, %r3506; xor.b32 %r3509, %r74, %r2158; xor.b32 %r2190, %r3509, %r3508; // inline asm bfe.u32 %r2177, %r2190, %r2499, %r2512; // inline asm mul.wide.u32 %rd1053, %r2177, 4; add.s64 %rd1054, %rd42, %rd1053; ld.shared.u32 %r3510, [%rd1054]; // inline asm bfe.u32 %r2181, %r2190, %r2503, %r2512; // inline asm mul.wide.u32 %rd1055, %r2181, 4; add.s64 %rd1056, %rd45, %rd1055; ld.shared.u32 %r3511, [%rd1056]; add.s32 %r3512, %r3511, %r3510; // inline asm bfe.u32 %r2185, %r2190, %r2512, %r2512; // inline asm mul.wide.u32 %rd1057, %r2185, 4; add.s64 %rd1058, %rd48, %rd1057; ld.shared.u32 %r3513, [%rd1058]; xor.b32 %r3514, %r3513, %r3512; // inline asm bfe.u32 %r2189, %r2190, %r208, %r2512; // inline asm mul.wide.u32 %rd1059, %r2189, 4; add.s64 %rd1060, %rd51, %rd1059; ld.shared.u32 %r3515, [%rd1060]; add.s32 %r3516, %r3515, %r3514; xor.b32 %r3517, %r77, %r2174; xor.b32 %r2206, %r3517, %r3516; // inline asm bfe.u32 %r2193, %r2206, %r2499, %r2512; // inline asm mul.wide.u32 %rd1061, %r2193, 4; add.s64 %rd1062, %rd42, %rd1061; ld.shared.u32 %r3518, [%rd1062]; // inline asm bfe.u32 %r2197, %r2206, %r2503, %r2512; // inline asm mul.wide.u32 %rd1063, %r2197, 4; add.s64 %rd1064, %rd45, %rd1063; ld.shared.u32 %r3519, [%rd1064]; add.s32 %r3520, %r3519, %r3518; // inline asm bfe.u32 %r2201, %r2206, %r2512, %r2512; // inline asm mul.wide.u32 %rd1065, %r2201, 4; add.s64 %rd1066, %rd48, %rd1065; ld.shared.u32 %r3521, [%rd1066]; xor.b32 %r3522, %r3521, %r3520; // inline asm bfe.u32 %r2205, %r2206, %r208, %r2512; // inline asm mul.wide.u32 %rd1067, %r2205, 4; add.s64 %rd1068, %rd51, %rd1067; ld.shared.u32 %r3523, [%rd1068]; add.s32 %r3524, %r3523, %r3522; xor.b32 %r3525, %r76, %r2190; xor.b32 %r2222, %r3525, %r3524; // inline asm bfe.u32 %r2209, %r2222, %r2499, %r2512; // inline asm mul.wide.u32 %rd1069, %r2209, 4; add.s64 %rd1070, %rd42, %rd1069; ld.shared.u32 %r3526, [%rd1070]; // inline asm bfe.u32 %r2213, %r2222, %r2503, %r2512; // inline asm mul.wide.u32 %rd1071, %r2213, 4; add.s64 %rd1072, %rd45, %rd1071; ld.shared.u32 %r3527, [%rd1072]; add.s32 %r3528, %r3527, %r3526; // inline asm bfe.u32 %r2217, %r2222, %r2512, %r2512; // inline asm mul.wide.u32 %rd1073, %r2217, 4; add.s64 %rd1074, %rd48, %rd1073; ld.shared.u32 %r3529, [%rd1074]; xor.b32 %r3530, %r3529, %r3528; // inline asm bfe.u32 %r2221, %r2222, %r208, %r2512; // inline asm mul.wide.u32 %rd1075, %r2221, 4; add.s64 %rd1076, %rd51, %rd1075; ld.shared.u32 %r3531, [%rd1076]; add.s32 %r3532, %r3531, %r3530; xor.b32 %r3533, %r2637, %r2206; xor.b32 %r2238, %r3533, %r3532; // inline asm bfe.u32 %r2225, %r2238, %r2499, %r2512; // inline asm mul.wide.u32 %rd1077, %r2225, 4; add.s64 %rd1078, %rd42, %rd1077; ld.shared.u32 %r3534, [%rd1078]; // inline asm bfe.u32 %r2229, %r2238, %r2503, %r2512; // inline asm mul.wide.u32 %rd1079, %r2229, 4; add.s64 %rd1080, %rd45, %rd1079; ld.shared.u32 %r3535, [%rd1080]; add.s32 %r3536, %r3535, %r3534; // inline asm bfe.u32 %r2233, %r2238, %r2512, %r2512; // inline asm mul.wide.u32 %rd1081, %r2233, 4; add.s64 %rd1082, %rd48, %rd1081; ld.shared.u32 %r3537, [%rd1082]; xor.b32 %r3538, %r3537, %r3536; // inline asm bfe.u32 %r2237, %r2238, %r208, %r2512; // inline asm mul.wide.u32 %rd1083, %r2237, 4; add.s64 %rd1084, %rd51, %rd1083; ld.shared.u32 %r3539, [%rd1084]; add.s32 %r3540, %r3539, %r3538; xor.b32 %r3541, %r2646, %r2222; xor.b32 %r2254, %r3541, %r3540; // inline asm bfe.u32 %r2241, %r2254, %r2499, %r2512; // inline asm mul.wide.u32 %rd1085, %r2241, 4; add.s64 %rd1086, %rd42, %rd1085; ld.shared.u32 %r3542, [%rd1086]; // inline asm bfe.u32 %r2245, %r2254, %r2503, %r2512; // inline asm mul.wide.u32 %rd1087, %r2245, 4; add.s64 %rd1088, %rd45, %rd1087; ld.shared.u32 %r3543, [%rd1088]; add.s32 %r3544, %r3543, %r3542; // inline asm bfe.u32 %r2249, %r2254, %r2512, %r2512; // inline asm mul.wide.u32 %rd1089, %r2249, 4; add.s64 %rd1090, %rd48, %rd1089; ld.shared.u32 %r3545, [%rd1090]; xor.b32 %r3546, %r3545, %r3544; // inline asm bfe.u32 %r2253, %r2254, %r208, %r2512; // inline asm mul.wide.u32 %rd1091, %r2253, 4; add.s64 %rd1092, %rd51, %rd1091; ld.shared.u32 %r3547, [%rd1092]; add.s32 %r3548, %r3547, %r3546; xor.b32 %r3549, %r2655, %r2238; xor.b32 %r78, %r3549, %r3548; xor.b32 %r79, %r2657, %r2254; xor.b32 %r2270, %r462, %r2254; // inline asm bfe.u32 %r2257, %r2270, %r2499, %r2512; // inline asm mul.wide.u32 %rd1093, %r2257, 4; add.s64 %rd1094, %rd42, %rd1093; ld.shared.u32 %r3550, [%rd1094]; // inline asm bfe.u32 %r2261, %r2270, %r2503, %r2512; // inline asm mul.wide.u32 %rd1095, %r2261, 4; add.s64 %rd1096, %rd45, %rd1095; ld.shared.u32 %r3551, [%rd1096]; add.s32 %r3552, %r3551, %r3550; // inline asm bfe.u32 %r2265, %r2270, %r2512, %r2512; // inline asm mul.wide.u32 %rd1097, %r2265, 4; add.s64 %rd1098, %rd48, %rd1097; ld.shared.u32 %r3553, [%rd1098]; xor.b32 %r3554, %r3553, %r3552; // inline asm bfe.u32 %r2269, %r2270, %r208, %r2512; // inline asm mul.wide.u32 %rd1099, %r2269, 4; add.s64 %rd1100, %rd51, %rd1099; ld.shared.u32 %r3555, [%rd1100]; add.s32 %r3556, %r3555, %r3554; xor.b32 %r3557, %r64, %r78; xor.b32 %r2286, %r3557, %r3556; // inline asm bfe.u32 %r2273, %r2286, %r2499, %r2512; // inline asm mul.wide.u32 %rd1101, %r2273, 4; add.s64 %rd1102, %rd42, %rd1101; ld.shared.u32 %r3558, [%rd1102]; // inline asm bfe.u32 %r2277, %r2286, %r2503, %r2512; // inline asm mul.wide.u32 %rd1103, %r2277, 4; add.s64 %rd1104, %rd45, %rd1103; ld.shared.u32 %r3559, [%rd1104]; add.s32 %r3560, %r3559, %r3558; // inline asm bfe.u32 %r2281, %r2286, %r2512, %r2512; // inline asm mul.wide.u32 %rd1105, %r2281, 4; add.s64 %rd1106, %rd48, %rd1105; ld.shared.u32 %r3561, [%rd1106]; xor.b32 %r3562, %r3561, %r3560; // inline asm bfe.u32 %r2285, %r2286, %r208, %r2512; // inline asm mul.wide.u32 %rd1107, %r2285, 4; add.s64 %rd1108, %rd51, %rd1107; ld.shared.u32 %r3563, [%rd1108]; add.s32 %r3564, %r3563, %r3562; xor.b32 %r3565, %r67, %r2270; xor.b32 %r2302, %r3565, %r3564; // inline asm bfe.u32 %r2289, %r2302, %r2499, %r2512; // inline asm mul.wide.u32 %rd1109, %r2289, 4; add.s64 %rd1110, %rd42, %rd1109; ld.shared.u32 %r3566, [%rd1110]; // inline asm bfe.u32 %r2293, %r2302, %r2503, %r2512; // inline asm mul.wide.u32 %rd1111, %r2293, 4; add.s64 %rd1112, %rd45, %rd1111; ld.shared.u32 %r3567, [%rd1112]; add.s32 %r3568, %r3567, %r3566; // inline asm bfe.u32 %r2297, %r2302, %r2512, %r2512; // inline asm mul.wide.u32 %rd1113, %r2297, 4; add.s64 %rd1114, %rd48, %rd1113; ld.shared.u32 %r3569, [%rd1114]; xor.b32 %r3570, %r3569, %r3568; // inline asm bfe.u32 %r2301, %r2302, %r208, %r2512; // inline asm mul.wide.u32 %rd1115, %r2301, 4; add.s64 %rd1116, %rd51, %rd1115; ld.shared.u32 %r3571, [%rd1116]; add.s32 %r3572, %r3571, %r3570; xor.b32 %r3573, %r66, %r2286; xor.b32 %r2318, %r3573, %r3572; // inline asm bfe.u32 %r2305, %r2318, %r2499, %r2512; // inline asm mul.wide.u32 %rd1117, %r2305, 4; add.s64 %rd1118, %rd42, %rd1117; ld.shared.u32 %r3574, [%rd1118]; // inline asm bfe.u32 %r2309, %r2318, %r2503, %r2512; // inline asm mul.wide.u32 %rd1119, %r2309, 4; add.s64 %rd1120, %rd45, %rd1119; ld.shared.u32 %r3575, [%rd1120]; add.s32 %r3576, %r3575, %r3574; // inline asm bfe.u32 %r2313, %r2318, %r2512, %r2512; // inline asm mul.wide.u32 %rd1121, %r2313, 4; add.s64 %rd1122, %rd48, %rd1121; ld.shared.u32 %r3577, [%rd1122]; xor.b32 %r3578, %r3577, %r3576; // inline asm bfe.u32 %r2317, %r2318, %r208, %r2512; // inline asm mul.wide.u32 %rd1123, %r2317, 4; add.s64 %rd1124, %rd51, %rd1123; ld.shared.u32 %r3579, [%rd1124]; add.s32 %r3580, %r3579, %r3578; xor.b32 %r3581, %r69, %r2302; xor.b32 %r2334, %r3581, %r3580; // inline asm bfe.u32 %r2321, %r2334, %r2499, %r2512; // inline asm mul.wide.u32 %rd1125, %r2321, 4; add.s64 %rd1126, %rd42, %rd1125; ld.shared.u32 %r3582, [%rd1126]; // inline asm bfe.u32 %r2325, %r2334, %r2503, %r2512; // inline asm mul.wide.u32 %rd1127, %r2325, 4; add.s64 %rd1128, %rd45, %rd1127; ld.shared.u32 %r3583, [%rd1128]; add.s32 %r3584, %r3583, %r3582; // inline asm bfe.u32 %r2329, %r2334, %r2512, %r2512; // inline asm mul.wide.u32 %rd1129, %r2329, 4; add.s64 %rd1130, %rd48, %rd1129; ld.shared.u32 %r3585, [%rd1130]; xor.b32 %r3586, %r3585, %r3584; // inline asm bfe.u32 %r2333, %r2334, %r208, %r2512; // inline asm mul.wide.u32 %rd1131, %r2333, 4; add.s64 %rd1132, %rd51, %rd1131; ld.shared.u32 %r3587, [%rd1132]; add.s32 %r3588, %r3587, %r3586; xor.b32 %r3589, %r68, %r2318; xor.b32 %r2350, %r3589, %r3588; // inline asm bfe.u32 %r2337, %r2350, %r2499, %r2512; // inline asm mul.wide.u32 %rd1133, %r2337, 4; add.s64 %rd1134, %rd42, %rd1133; ld.shared.u32 %r3590, [%rd1134]; // inline asm bfe.u32 %r2341, %r2350, %r2503, %r2512; // inline asm mul.wide.u32 %rd1135, %r2341, 4; add.s64 %rd1136, %rd45, %rd1135; ld.shared.u32 %r3591, [%rd1136]; add.s32 %r3592, %r3591, %r3590; // inline asm bfe.u32 %r2345, %r2350, %r2512, %r2512; // inline asm mul.wide.u32 %rd1137, %r2345, 4; add.s64 %rd1138, %rd48, %rd1137; ld.shared.u32 %r3593, [%rd1138]; xor.b32 %r3594, %r3593, %r3592; // inline asm bfe.u32 %r2349, %r2350, %r208, %r2512; // inline asm mul.wide.u32 %rd1139, %r2349, 4; add.s64 %rd1140, %rd51, %rd1139; ld.shared.u32 %r3595, [%rd1140]; add.s32 %r3596, %r3595, %r3594; xor.b32 %r3597, %r71, %r2334; xor.b32 %r2366, %r3597, %r3596; // inline asm bfe.u32 %r2353, %r2366, %r2499, %r2512; // inline asm mul.wide.u32 %rd1141, %r2353, 4; add.s64 %rd1142, %rd42, %rd1141; ld.shared.u32 %r3598, [%rd1142]; // inline asm bfe.u32 %r2357, %r2366, %r2503, %r2512; // inline asm mul.wide.u32 %rd1143, %r2357, 4; add.s64 %rd1144, %rd45, %rd1143; ld.shared.u32 %r3599, [%rd1144]; add.s32 %r3600, %r3599, %r3598; // inline asm bfe.u32 %r2361, %r2366, %r2512, %r2512; // inline asm mul.wide.u32 %rd1145, %r2361, 4; add.s64 %rd1146, %rd48, %rd1145; ld.shared.u32 %r3601, [%rd1146]; xor.b32 %r3602, %r3601, %r3600; // inline asm bfe.u32 %r2365, %r2366, %r208, %r2512; // inline asm mul.wide.u32 %rd1147, %r2365, 4; add.s64 %rd1148, %rd51, %rd1147; ld.shared.u32 %r3603, [%rd1148]; add.s32 %r3604, %r3603, %r3602; xor.b32 %r3605, %r70, %r2350; xor.b32 %r2382, %r3605, %r3604; // inline asm bfe.u32 %r2369, %r2382, %r2499, %r2512; // inline asm mul.wide.u32 %rd1149, %r2369, 4; add.s64 %rd1150, %rd42, %rd1149; ld.shared.u32 %r3606, [%rd1150]; // inline asm bfe.u32 %r2373, %r2382, %r2503, %r2512; // inline asm mul.wide.u32 %rd1151, %r2373, 4; add.s64 %rd1152, %rd45, %rd1151; ld.shared.u32 %r3607, [%rd1152]; add.s32 %r3608, %r3607, %r3606; // inline asm bfe.u32 %r2377, %r2382, %r2512, %r2512; // inline asm mul.wide.u32 %rd1153, %r2377, 4; add.s64 %rd1154, %rd48, %rd1153; ld.shared.u32 %r3609, [%rd1154]; xor.b32 %r3610, %r3609, %r3608; // inline asm bfe.u32 %r2381, %r2382, %r208, %r2512; // inline asm mul.wide.u32 %rd1155, %r2381, 4; add.s64 %rd1156, %rd51, %rd1155; ld.shared.u32 %r3611, [%rd1156]; add.s32 %r3612, %r3611, %r3610; xor.b32 %r3613, %r73, %r2366; xor.b32 %r2398, %r3613, %r3612; // inline asm bfe.u32 %r2385, %r2398, %r2499, %r2512; // inline asm mul.wide.u32 %rd1157, %r2385, 4; add.s64 %rd1158, %rd42, %rd1157; ld.shared.u32 %r3614, [%rd1158]; // inline asm bfe.u32 %r2389, %r2398, %r2503, %r2512; // inline asm mul.wide.u32 %rd1159, %r2389, 4; add.s64 %rd1160, %rd45, %rd1159; ld.shared.u32 %r3615, [%rd1160]; add.s32 %r3616, %r3615, %r3614; // inline asm bfe.u32 %r2393, %r2398, %r2512, %r2512; // inline asm mul.wide.u32 %rd1161, %r2393, 4; add.s64 %rd1162, %rd48, %rd1161; ld.shared.u32 %r3617, [%rd1162]; xor.b32 %r3618, %r3617, %r3616; // inline asm bfe.u32 %r2397, %r2398, %r208, %r2512; // inline asm mul.wide.u32 %rd1163, %r2397, 4; add.s64 %rd1164, %rd51, %rd1163; ld.shared.u32 %r3619, [%rd1164]; add.s32 %r3620, %r3619, %r3618; xor.b32 %r3621, %r72, %r2382; xor.b32 %r2414, %r3621, %r3620; // inline asm bfe.u32 %r2401, %r2414, %r2499, %r2512; // inline asm mul.wide.u32 %rd1165, %r2401, 4; add.s64 %rd1166, %rd42, %rd1165; ld.shared.u32 %r3622, [%rd1166]; // inline asm bfe.u32 %r2405, %r2414, %r2503, %r2512; // inline asm mul.wide.u32 %rd1167, %r2405, 4; add.s64 %rd1168, %rd45, %rd1167; ld.shared.u32 %r3623, [%rd1168]; add.s32 %r3624, %r3623, %r3622; // inline asm bfe.u32 %r2409, %r2414, %r2512, %r2512; // inline asm mul.wide.u32 %rd1169, %r2409, 4; add.s64 %rd1170, %rd48, %rd1169; ld.shared.u32 %r3625, [%rd1170]; xor.b32 %r3626, %r3625, %r3624; // inline asm bfe.u32 %r2413, %r2414, %r208, %r2512; // inline asm mul.wide.u32 %rd1171, %r2413, 4; add.s64 %rd1172, %rd51, %rd1171; ld.shared.u32 %r3627, [%rd1172]; add.s32 %r3628, %r3627, %r3626; xor.b32 %r3629, %r75, %r2398; xor.b32 %r2430, %r3629, %r3628; // inline asm bfe.u32 %r2417, %r2430, %r2499, %r2512; // inline asm mul.wide.u32 %rd1173, %r2417, 4; add.s64 %rd1174, %rd42, %rd1173; ld.shared.u32 %r3630, [%rd1174]; // inline asm bfe.u32 %r2421, %r2430, %r2503, %r2512; // inline asm mul.wide.u32 %rd1175, %r2421, 4; add.s64 %rd1176, %rd45, %rd1175; ld.shared.u32 %r3631, [%rd1176]; add.s32 %r3632, %r3631, %r3630; // inline asm bfe.u32 %r2425, %r2430, %r2512, %r2512; // inline asm mul.wide.u32 %rd1177, %r2425, 4; add.s64 %rd1178, %rd48, %rd1177; ld.shared.u32 %r3633, [%rd1178]; xor.b32 %r3634, %r3633, %r3632; // inline asm bfe.u32 %r2429, %r2430, %r208, %r2512; // inline asm mul.wide.u32 %rd1179, %r2429, 4; add.s64 %rd1180, %rd51, %rd1179; ld.shared.u32 %r3635, [%rd1180]; add.s32 %r3636, %r3635, %r3634; xor.b32 %r3637, %r74, %r2414; xor.b32 %r2446, %r3637, %r3636; // inline asm bfe.u32 %r2433, %r2446, %r2499, %r2512; // inline asm mul.wide.u32 %rd1181, %r2433, 4; add.s64 %rd1182, %rd42, %rd1181; ld.shared.u32 %r3638, [%rd1182]; // inline asm bfe.u32 %r2437, %r2446, %r2503, %r2512; // inline asm mul.wide.u32 %rd1183, %r2437, 4; add.s64 %rd1184, %rd45, %rd1183; ld.shared.u32 %r3639, [%rd1184]; add.s32 %r3640, %r3639, %r3638; // inline asm bfe.u32 %r2441, %r2446, %r2512, %r2512; // inline asm mul.wide.u32 %rd1185, %r2441, 4; add.s64 %rd1186, %rd48, %rd1185; ld.shared.u32 %r3641, [%rd1186]; xor.b32 %r3642, %r3641, %r3640; // inline asm bfe.u32 %r2445, %r2446, %r208, %r2512; // inline asm mul.wide.u32 %rd1187, %r2445, 4; add.s64 %rd1188, %rd51, %rd1187; ld.shared.u32 %r3643, [%rd1188]; add.s32 %r3644, %r3643, %r3642; xor.b32 %r3645, %r77, %r2430; xor.b32 %r2462, %r3645, %r3644; // inline asm bfe.u32 %r2449, %r2462, %r2499, %r2512; // inline asm mul.wide.u32 %rd1189, %r2449, 4; add.s64 %rd1190, %rd42, %rd1189; ld.shared.u32 %r3646, [%rd1190]; // inline asm bfe.u32 %r2453, %r2462, %r2503, %r2512; // inline asm mul.wide.u32 %rd1191, %r2453, 4; add.s64 %rd1192, %rd45, %rd1191; ld.shared.u32 %r3647, [%rd1192]; add.s32 %r3648, %r3647, %r3646; // inline asm bfe.u32 %r2457, %r2462, %r2512, %r2512; // inline asm mul.wide.u32 %rd1193, %r2457, 4; add.s64 %rd1194, %rd48, %rd1193; ld.shared.u32 %r3649, [%rd1194]; xor.b32 %r3650, %r3649, %r3648; // inline asm bfe.u32 %r2461, %r2462, %r208, %r2512; // inline asm mul.wide.u32 %rd1195, %r2461, 4; add.s64 %rd1196, %rd51, %rd1195; ld.shared.u32 %r3651, [%rd1196]; add.s32 %r3652, %r3651, %r3650; xor.b32 %r3653, %r76, %r2446; xor.b32 %r2478, %r3653, %r3652; // inline asm bfe.u32 %r2465, %r2478, %r2499, %r2512; // inline asm mul.wide.u32 %rd1197, %r2465, 4; add.s64 %rd1198, %rd42, %rd1197; ld.shared.u32 %r3654, [%rd1198]; // inline asm bfe.u32 %r2469, %r2478, %r2503, %r2512; // inline asm mul.wide.u32 %rd1199, %r2469, 4; add.s64 %rd1200, %rd45, %rd1199; ld.shared.u32 %r3655, [%rd1200]; add.s32 %r3656, %r3655, %r3654; // inline asm bfe.u32 %r2473, %r2478, %r2512, %r2512; // inline asm mul.wide.u32 %rd1201, %r2473, 4; add.s64 %rd1202, %rd48, %rd1201; ld.shared.u32 %r3657, [%rd1202]; xor.b32 %r3658, %r3657, %r3656; // inline asm bfe.u32 %r2477, %r2478, %r208, %r2512; // inline asm mul.wide.u32 %rd1203, %r2477, 4; add.s64 %rd1204, %rd51, %rd1203; ld.shared.u32 %r3659, [%rd1204]; add.s32 %r3660, %r3659, %r3658; xor.b32 %r3661, %r79, %r2462; xor.b32 %r2494, %r3661, %r3660; // inline asm bfe.u32 %r2481, %r2494, %r2499, %r2512; // inline asm mul.wide.u32 %rd1205, %r2481, 4; add.s64 %rd1206, %rd42, %rd1205; ld.shared.u32 %r3662, [%rd1206]; // inline asm bfe.u32 %r2485, %r2494, %r2503, %r2512; // inline asm mul.wide.u32 %rd1207, %r2485, 4; add.s64 %rd1208, %rd45, %rd1207; ld.shared.u32 %r3663, [%rd1208]; add.s32 %r3664, %r3663, %r3662; // inline asm bfe.u32 %r2489, %r2494, %r2512, %r2512; // inline asm mul.wide.u32 %rd1209, %r2489, 4; add.s64 %rd1210, %rd48, %rd1209; ld.shared.u32 %r3665, [%rd1210]; xor.b32 %r3666, %r3665, %r3664; // inline asm bfe.u32 %r2493, %r2494, %r208, %r2512; // inline asm mul.wide.u32 %rd1211, %r2493, 4; add.s64 %rd1212, %rd51, %rd1211; ld.shared.u32 %r3667, [%rd1212]; add.s32 %r3668, %r3667, %r3666; xor.b32 %r3669, %r78, %r2478; xor.b32 %r2510, %r3669, %r3668; // inline asm bfe.u32 %r2497, %r2510, %r2499, %r2512; // inline asm mul.wide.u32 %rd1213, %r2497, 4; add.s64 %rd1214, %rd42, %rd1213; ld.shared.u32 %r3670, [%rd1214]; // inline asm bfe.u32 %r2501, %r2510, %r2503, %r2512; // inline asm mul.wide.u32 %rd1215, %r2501, 4; add.s64 %rd1216, %rd45, %rd1215; ld.shared.u32 %r3671, [%rd1216]; add.s32 %r3672, %r3671, %r3670; // inline asm bfe.u32 %r2505, %r2510, %r2512, %r2512; // inline asm mul.wide.u32 %rd1217, %r2505, 4; add.s64 %rd1218, %rd48, %rd1217; ld.shared.u32 %r3673, [%rd1218]; xor.b32 %r3674, %r3673, %r3672; // inline asm bfe.u32 %r2509, %r2510, %r208, %r2512; // inline asm mul.wide.u32 %rd1219, %r2509, 4; add.s64 %rd1220, %rd51, %rd1219; ld.shared.u32 %r3675, [%rd1220]; add.s32 %r3676, %r3675, %r3674; xor.b32 %r3677, %r2655, %r2494; xor.b32 %r80, %r3677, %r3676; xor.b32 %r81, %r2657, %r2510; mov.u64 %rd3495, %rd3; mov.u32 %r10310, %r208; mov.u32 %r10314, %r80; mov.u32 %r10315, %r81; BB4_6: mov.u32 %r10277, 16; mov.u32 %r10276, 8; mov.u32 %r10275, 24; xor.b32 %r3691, %r65, %r10315; // inline asm bfe.u32 %r3678, %r3691, %r10275, %r10276; // inline asm mul.wide.u32 %rd1224, %r3678, 4; add.s64 %rd1225, %rd42, %rd1224; ld.shared.u32 %r3935, [%rd1225]; // inline asm bfe.u32 %r3682, %r3691, %r10277, %r10276; // inline asm mul.wide.u32 %rd1228, %r3682, 4; add.s64 %rd1229, %rd45, %rd1228; ld.shared.u32 %r3936, [%rd1229]; add.s32 %r3937, %r3936, %r3935; // inline asm bfe.u32 %r3686, %r3691, %r10276, %r10276; // inline asm mul.wide.u32 %rd1232, %r3686, 4; add.s64 %rd1233, %rd48, %rd1232; ld.shared.u32 %r3938, [%rd1233]; xor.b32 %r3939, %r3938, %r3937; // inline asm bfe.u32 %r3690, %r3691, %r208, %r10276; // inline asm mul.wide.u32 %rd1236, %r3690, 4; add.s64 %rd1237, %rd51, %rd1236; ld.shared.u32 %r3940, [%rd1237]; add.s32 %r3941, %r3940, %r3939; xor.b32 %r3942, %r64, %r10314; xor.b32 %r3707, %r3942, %r3941; // inline asm bfe.u32 %r3694, %r3707, %r10275, %r10276; // inline asm mul.wide.u32 %rd1238, %r3694, 4; add.s64 %rd1239, %rd42, %rd1238; ld.shared.u32 %r3943, [%rd1239]; // inline asm bfe.u32 %r3698, %r3707, %r10277, %r10276; // inline asm mul.wide.u32 %rd1240, %r3698, 4; add.s64 %rd1241, %rd45, %rd1240; ld.shared.u32 %r3944, [%rd1241]; add.s32 %r3945, %r3944, %r3943; // inline asm bfe.u32 %r3702, %r3707, %r10276, %r10276; // inline asm mul.wide.u32 %rd1242, %r3702, 4; add.s64 %rd1243, %rd48, %rd1242; ld.shared.u32 %r3946, [%rd1243]; xor.b32 %r3947, %r3946, %r3945; // inline asm bfe.u32 %r3706, %r3707, %r208, %r10276; // inline asm mul.wide.u32 %rd1244, %r3706, 4; add.s64 %rd1245, %rd51, %rd1244; ld.shared.u32 %r3948, [%rd1245]; add.s32 %r3949, %r3948, %r3947; xor.b32 %r3950, %r67, %r3691; xor.b32 %r3723, %r3950, %r3949; // inline asm bfe.u32 %r3710, %r3723, %r10275, %r10276; // inline asm mul.wide.u32 %rd1246, %r3710, 4; add.s64 %rd1247, %rd42, %rd1246; ld.shared.u32 %r3951, [%rd1247]; // inline asm bfe.u32 %r3714, %r3723, %r10277, %r10276; // inline asm mul.wide.u32 %rd1248, %r3714, 4; add.s64 %rd1249, %rd45, %rd1248; ld.shared.u32 %r3952, [%rd1249]; add.s32 %r3953, %r3952, %r3951; // inline asm bfe.u32 %r3718, %r3723, %r10276, %r10276; // inline asm mul.wide.u32 %rd1250, %r3718, 4; add.s64 %rd1251, %rd48, %rd1250; ld.shared.u32 %r3954, [%rd1251]; xor.b32 %r3955, %r3954, %r3953; // inline asm bfe.u32 %r3722, %r3723, %r208, %r10276; // inline asm mul.wide.u32 %rd1252, %r3722, 4; add.s64 %rd1253, %rd51, %rd1252; ld.shared.u32 %r3956, [%rd1253]; add.s32 %r3957, %r3956, %r3955; xor.b32 %r3958, %r66, %r3707; xor.b32 %r3739, %r3958, %r3957; // inline asm bfe.u32 %r3726, %r3739, %r10275, %r10276; // inline asm mul.wide.u32 %rd1254, %r3726, 4; add.s64 %rd1255, %rd42, %rd1254; ld.shared.u32 %r3959, [%rd1255]; // inline asm bfe.u32 %r3730, %r3739, %r10277, %r10276; // inline asm mul.wide.u32 %rd1256, %r3730, 4; add.s64 %rd1257, %rd45, %rd1256; ld.shared.u32 %r3960, [%rd1257]; add.s32 %r3961, %r3960, %r3959; // inline asm bfe.u32 %r3734, %r3739, %r10276, %r10276; // inline asm mul.wide.u32 %rd1258, %r3734, 4; add.s64 %rd1259, %rd48, %rd1258; ld.shared.u32 %r3962, [%rd1259]; xor.b32 %r3963, %r3962, %r3961; // inline asm bfe.u32 %r3738, %r3739, %r208, %r10276; // inline asm mul.wide.u32 %rd1260, %r3738, 4; add.s64 %rd1261, %rd51, %rd1260; ld.shared.u32 %r3964, [%rd1261]; add.s32 %r3965, %r3964, %r3963; xor.b32 %r3966, %r69, %r3723; xor.b32 %r3755, %r3966, %r3965; // inline asm bfe.u32 %r3742, %r3755, %r10275, %r10276; // inline asm mul.wide.u32 %rd1262, %r3742, 4; add.s64 %rd1263, %rd42, %rd1262; ld.shared.u32 %r3967, [%rd1263]; // inline asm bfe.u32 %r3746, %r3755, %r10277, %r10276; // inline asm mul.wide.u32 %rd1264, %r3746, 4; add.s64 %rd1265, %rd45, %rd1264; ld.shared.u32 %r3968, [%rd1265]; add.s32 %r3969, %r3968, %r3967; // inline asm bfe.u32 %r3750, %r3755, %r10276, %r10276; // inline asm mul.wide.u32 %rd1266, %r3750, 4; add.s64 %rd1267, %rd48, %rd1266; ld.shared.u32 %r3970, [%rd1267]; xor.b32 %r3971, %r3970, %r3969; // inline asm bfe.u32 %r3754, %r3755, %r208, %r10276; // inline asm mul.wide.u32 %rd1268, %r3754, 4; add.s64 %rd1269, %rd51, %rd1268; ld.shared.u32 %r3972, [%rd1269]; add.s32 %r3973, %r3972, %r3971; xor.b32 %r3974, %r68, %r3739; xor.b32 %r3771, %r3974, %r3973; // inline asm bfe.u32 %r3758, %r3771, %r10275, %r10276; // inline asm mul.wide.u32 %rd1270, %r3758, 4; add.s64 %rd1271, %rd42, %rd1270; ld.shared.u32 %r3975, [%rd1271]; // inline asm bfe.u32 %r3762, %r3771, %r10277, %r10276; // inline asm mul.wide.u32 %rd1272, %r3762, 4; add.s64 %rd1273, %rd45, %rd1272; ld.shared.u32 %r3976, [%rd1273]; add.s32 %r3977, %r3976, %r3975; // inline asm bfe.u32 %r3766, %r3771, %r10276, %r10276; // inline asm mul.wide.u32 %rd1274, %r3766, 4; add.s64 %rd1275, %rd48, %rd1274; ld.shared.u32 %r3978, [%rd1275]; xor.b32 %r3979, %r3978, %r3977; // inline asm bfe.u32 %r3770, %r3771, %r208, %r10276; // inline asm mul.wide.u32 %rd1276, %r3770, 4; add.s64 %rd1277, %rd51, %rd1276; ld.shared.u32 %r3980, [%rd1277]; add.s32 %r3981, %r3980, %r3979; xor.b32 %r3982, %r71, %r3755; xor.b32 %r3787, %r3982, %r3981; // inline asm bfe.u32 %r3774, %r3787, %r10275, %r10276; // inline asm mul.wide.u32 %rd1278, %r3774, 4; add.s64 %rd1279, %rd42, %rd1278; ld.shared.u32 %r3983, [%rd1279]; // inline asm bfe.u32 %r3778, %r3787, %r10277, %r10276; // inline asm mul.wide.u32 %rd1280, %r3778, 4; add.s64 %rd1281, %rd45, %rd1280; ld.shared.u32 %r3984, [%rd1281]; add.s32 %r3985, %r3984, %r3983; // inline asm bfe.u32 %r3782, %r3787, %r10276, %r10276; // inline asm mul.wide.u32 %rd1282, %r3782, 4; add.s64 %rd1283, %rd48, %rd1282; ld.shared.u32 %r3986, [%rd1283]; xor.b32 %r3987, %r3986, %r3985; // inline asm bfe.u32 %r3786, %r3787, %r208, %r10276; // inline asm mul.wide.u32 %rd1284, %r3786, 4; add.s64 %rd1285, %rd51, %rd1284; ld.shared.u32 %r3988, [%rd1285]; add.s32 %r3989, %r3988, %r3987; xor.b32 %r3990, %r70, %r3771; xor.b32 %r3803, %r3990, %r3989; // inline asm bfe.u32 %r3790, %r3803, %r10275, %r10276; // inline asm mul.wide.u32 %rd1286, %r3790, 4; add.s64 %rd1287, %rd42, %rd1286; ld.shared.u32 %r3991, [%rd1287]; // inline asm bfe.u32 %r3794, %r3803, %r10277, %r10276; // inline asm mul.wide.u32 %rd1288, %r3794, 4; add.s64 %rd1289, %rd45, %rd1288; ld.shared.u32 %r3992, [%rd1289]; add.s32 %r3993, %r3992, %r3991; // inline asm bfe.u32 %r3798, %r3803, %r10276, %r10276; // inline asm mul.wide.u32 %rd1290, %r3798, 4; add.s64 %rd1291, %rd48, %rd1290; ld.shared.u32 %r3994, [%rd1291]; xor.b32 %r3995, %r3994, %r3993; // inline asm bfe.u32 %r3802, %r3803, %r208, %r10276; // inline asm mul.wide.u32 %rd1292, %r3802, 4; add.s64 %rd1293, %rd51, %rd1292; ld.shared.u32 %r3996, [%rd1293]; add.s32 %r3997, %r3996, %r3995; xor.b32 %r3998, %r73, %r3787; xor.b32 %r3819, %r3998, %r3997; // inline asm bfe.u32 %r3806, %r3819, %r10275, %r10276; // inline asm mul.wide.u32 %rd1294, %r3806, 4; add.s64 %rd1295, %rd42, %rd1294; ld.shared.u32 %r3999, [%rd1295]; // inline asm bfe.u32 %r3810, %r3819, %r10277, %r10276; // inline asm mul.wide.u32 %rd1296, %r3810, 4; add.s64 %rd1297, %rd45, %rd1296; ld.shared.u32 %r4000, [%rd1297]; add.s32 %r4001, %r4000, %r3999; // inline asm bfe.u32 %r3814, %r3819, %r10276, %r10276; // inline asm mul.wide.u32 %rd1298, %r3814, 4; add.s64 %rd1299, %rd48, %rd1298; ld.shared.u32 %r4002, [%rd1299]; xor.b32 %r4003, %r4002, %r4001; // inline asm bfe.u32 %r3818, %r3819, %r208, %r10276; // inline asm mul.wide.u32 %rd1300, %r3818, 4; add.s64 %rd1301, %rd51, %rd1300; ld.shared.u32 %r4004, [%rd1301]; add.s32 %r4005, %r4004, %r4003; xor.b32 %r4006, %r72, %r3803; xor.b32 %r3835, %r4006, %r4005; // inline asm bfe.u32 %r3822, %r3835, %r10275, %r10276; // inline asm mul.wide.u32 %rd1302, %r3822, 4; add.s64 %rd1303, %rd42, %rd1302; ld.shared.u32 %r4007, [%rd1303]; // inline asm bfe.u32 %r3826, %r3835, %r10277, %r10276; // inline asm mul.wide.u32 %rd1304, %r3826, 4; add.s64 %rd1305, %rd45, %rd1304; ld.shared.u32 %r4008, [%rd1305]; add.s32 %r4009, %r4008, %r4007; // inline asm bfe.u32 %r3830, %r3835, %r10276, %r10276; // inline asm mul.wide.u32 %rd1306, %r3830, 4; add.s64 %rd1307, %rd48, %rd1306; ld.shared.u32 %r4010, [%rd1307]; xor.b32 %r4011, %r4010, %r4009; // inline asm bfe.u32 %r3834, %r3835, %r208, %r10276; // inline asm mul.wide.u32 %rd1308, %r3834, 4; add.s64 %rd1309, %rd51, %rd1308; ld.shared.u32 %r4012, [%rd1309]; add.s32 %r4013, %r4012, %r4011; xor.b32 %r4014, %r75, %r3819; xor.b32 %r3851, %r4014, %r4013; // inline asm bfe.u32 %r3838, %r3851, %r10275, %r10276; // inline asm mul.wide.u32 %rd1310, %r3838, 4; add.s64 %rd1311, %rd42, %rd1310; ld.shared.u32 %r4015, [%rd1311]; // inline asm bfe.u32 %r3842, %r3851, %r10277, %r10276; // inline asm mul.wide.u32 %rd1312, %r3842, 4; add.s64 %rd1313, %rd45, %rd1312; ld.shared.u32 %r4016, [%rd1313]; add.s32 %r4017, %r4016, %r4015; // inline asm bfe.u32 %r3846, %r3851, %r10276, %r10276; // inline asm mul.wide.u32 %rd1314, %r3846, 4; add.s64 %rd1315, %rd48, %rd1314; ld.shared.u32 %r4018, [%rd1315]; xor.b32 %r4019, %r4018, %r4017; // inline asm bfe.u32 %r3850, %r3851, %r208, %r10276; // inline asm mul.wide.u32 %rd1316, %r3850, 4; add.s64 %rd1317, %rd51, %rd1316; ld.shared.u32 %r4020, [%rd1317]; add.s32 %r4021, %r4020, %r4019; xor.b32 %r4022, %r74, %r3835; xor.b32 %r3867, %r4022, %r4021; // inline asm bfe.u32 %r3854, %r3867, %r10275, %r10276; // inline asm mul.wide.u32 %rd1318, %r3854, 4; add.s64 %rd1319, %rd42, %rd1318; ld.shared.u32 %r4023, [%rd1319]; // inline asm bfe.u32 %r3858, %r3867, %r10277, %r10276; // inline asm mul.wide.u32 %rd1320, %r3858, 4; add.s64 %rd1321, %rd45, %rd1320; ld.shared.u32 %r4024, [%rd1321]; add.s32 %r4025, %r4024, %r4023; // inline asm bfe.u32 %r3862, %r3867, %r10276, %r10276; // inline asm mul.wide.u32 %rd1322, %r3862, 4; add.s64 %rd1323, %rd48, %rd1322; ld.shared.u32 %r4026, [%rd1323]; xor.b32 %r4027, %r4026, %r4025; // inline asm bfe.u32 %r3866, %r3867, %r208, %r10276; // inline asm mul.wide.u32 %rd1324, %r3866, 4; add.s64 %rd1325, %rd51, %rd1324; ld.shared.u32 %r4028, [%rd1325]; add.s32 %r4029, %r4028, %r4027; xor.b32 %r4030, %r77, %r3851; xor.b32 %r3883, %r4030, %r4029; // inline asm bfe.u32 %r3870, %r3883, %r10275, %r10276; // inline asm mul.wide.u32 %rd1326, %r3870, 4; add.s64 %rd1327, %rd42, %rd1326; ld.shared.u32 %r4031, [%rd1327]; // inline asm bfe.u32 %r3874, %r3883, %r10277, %r10276; // inline asm mul.wide.u32 %rd1328, %r3874, 4; add.s64 %rd1329, %rd45, %rd1328; ld.shared.u32 %r4032, [%rd1329]; add.s32 %r4033, %r4032, %r4031; // inline asm bfe.u32 %r3878, %r3883, %r10276, %r10276; // inline asm mul.wide.u32 %rd1330, %r3878, 4; add.s64 %rd1331, %rd48, %rd1330; ld.shared.u32 %r4034, [%rd1331]; xor.b32 %r4035, %r4034, %r4033; // inline asm bfe.u32 %r3882, %r3883, %r208, %r10276; // inline asm mul.wide.u32 %rd1332, %r3882, 4; add.s64 %rd1333, %rd51, %rd1332; ld.shared.u32 %r4036, [%rd1333]; add.s32 %r4037, %r4036, %r4035; xor.b32 %r4038, %r76, %r3867; xor.b32 %r3899, %r4038, %r4037; // inline asm bfe.u32 %r3886, %r3899, %r10275, %r10276; // inline asm mul.wide.u32 %rd1334, %r3886, 4; add.s64 %rd1335, %rd42, %rd1334; ld.shared.u32 %r4039, [%rd1335]; // inline asm bfe.u32 %r3890, %r3899, %r10277, %r10276; // inline asm mul.wide.u32 %rd1336, %r3890, 4; add.s64 %rd1337, %rd45, %rd1336; ld.shared.u32 %r4040, [%rd1337]; add.s32 %r4041, %r4040, %r4039; // inline asm bfe.u32 %r3894, %r3899, %r10276, %r10276; // inline asm mul.wide.u32 %rd1338, %r3894, 4; add.s64 %rd1339, %rd48, %rd1338; ld.shared.u32 %r4042, [%rd1339]; xor.b32 %r4043, %r4042, %r4041; // inline asm bfe.u32 %r3898, %r3899, %r208, %r10276; // inline asm mul.wide.u32 %rd1340, %r3898, 4; add.s64 %rd1341, %rd51, %rd1340; ld.shared.u32 %r4044, [%rd1341]; add.s32 %r4045, %r4044, %r4043; xor.b32 %r4046, %r79, %r3883; xor.b32 %r3915, %r4046, %r4045; // inline asm bfe.u32 %r3902, %r3915, %r10275, %r10276; // inline asm mul.wide.u32 %rd1342, %r3902, 4; add.s64 %rd1343, %rd42, %rd1342; ld.shared.u32 %r4047, [%rd1343]; // inline asm bfe.u32 %r3906, %r3915, %r10277, %r10276; // inline asm mul.wide.u32 %rd1344, %r3906, 4; add.s64 %rd1345, %rd45, %rd1344; ld.shared.u32 %r4048, [%rd1345]; add.s32 %r4049, %r4048, %r4047; // inline asm bfe.u32 %r3910, %r3915, %r10276, %r10276; // inline asm mul.wide.u32 %rd1346, %r3910, 4; add.s64 %rd1347, %rd48, %rd1346; ld.shared.u32 %r4050, [%rd1347]; xor.b32 %r4051, %r4050, %r4049; // inline asm bfe.u32 %r3914, %r3915, %r208, %r10276; // inline asm mul.wide.u32 %rd1348, %r3914, 4; add.s64 %rd1349, %rd51, %rd1348; ld.shared.u32 %r4052, [%rd1349]; add.s32 %r4053, %r4052, %r4051; xor.b32 %r4054, %r78, %r3899; xor.b32 %r3931, %r4054, %r4053; // inline asm bfe.u32 %r3918, %r3931, %r10275, %r10276; // inline asm mul.wide.u32 %rd1350, %r3918, 4; add.s64 %rd1351, %rd42, %rd1350; ld.shared.u32 %r4055, [%rd1351]; // inline asm bfe.u32 %r3922, %r3931, %r10277, %r10276; // inline asm mul.wide.u32 %rd1352, %r3922, 4; add.s64 %rd1353, %rd45, %rd1352; ld.shared.u32 %r4056, [%rd1353]; add.s32 %r4057, %r4056, %r4055; // inline asm bfe.u32 %r3926, %r3931, %r10276, %r10276; // inline asm mul.wide.u32 %rd1354, %r3926, 4; add.s64 %rd1355, %rd48, %rd1354; ld.shared.u32 %r4058, [%rd1355]; xor.b32 %r4059, %r4058, %r4057; // inline asm bfe.u32 %r3930, %r3931, %r208, %r10276; // inline asm mul.wide.u32 %rd1356, %r3930, 4; add.s64 %rd1357, %rd51, %rd1356; ld.shared.u32 %r4060, [%rd1357]; add.s32 %r4061, %r4060, %r4059; xor.b32 %r4062, %r81, %r3915; xor.b32 %r10314, %r4062, %r4061; xor.b32 %r10315, %r80, %r3931; st.shared.u32 [%rd3495], %r10315; st.shared.u32 [%rd3495+4], %r10314; add.s64 %rd3495, %rd3495, 8; add.s32 %r10310, %r10310, 2; setp.lt.u32 %p4, %r10310, 256; @%p4 bra BB4_6; mov.u64 %rd3496, %rd4; mov.u32 %r10313, %r208; BB4_8: mov.u32 %r10280, 16; mov.u32 %r10279, 8; mov.u32 %r10278, 24; xor.b32 %r4076, %r65, %r10315; // inline asm bfe.u32 %r4063, %r4076, %r10278, %r10279; // inline asm mul.wide.u32 %rd1361, %r4063, 4; add.s64 %rd1362, %rd42, %rd1361; ld.shared.u32 %r4320, [%rd1362]; // inline asm bfe.u32 %r4067, %r4076, %r10280, %r10279; // inline asm mul.wide.u32 %rd1365, %r4067, 4; add.s64 %rd1366, %rd45, %rd1365; ld.shared.u32 %r4321, [%rd1366]; add.s32 %r4322, %r4321, %r4320; // inline asm bfe.u32 %r4071, %r4076, %r10279, %r10279; // inline asm mul.wide.u32 %rd1369, %r4071, 4; add.s64 %rd1370, %rd48, %rd1369; ld.shared.u32 %r4323, [%rd1370]; xor.b32 %r4324, %r4323, %r4322; // inline asm bfe.u32 %r4075, %r4076, %r208, %r10279; // inline asm mul.wide.u32 %rd1373, %r4075, 4; add.s64 %rd1374, %rd51, %rd1373; ld.shared.u32 %r4325, [%rd1374]; add.s32 %r4326, %r4325, %r4324; xor.b32 %r4327, %r64, %r10314; xor.b32 %r4092, %r4327, %r4326; // inline asm bfe.u32 %r4079, %r4092, %r10278, %r10279; // inline asm mul.wide.u32 %rd1375, %r4079, 4; add.s64 %rd1376, %rd42, %rd1375; ld.shared.u32 %r4328, [%rd1376]; // inline asm bfe.u32 %r4083, %r4092, %r10280, %r10279; // inline asm mul.wide.u32 %rd1377, %r4083, 4; add.s64 %rd1378, %rd45, %rd1377; ld.shared.u32 %r4329, [%rd1378]; add.s32 %r4330, %r4329, %r4328; // inline asm bfe.u32 %r4087, %r4092, %r10279, %r10279; // inline asm mul.wide.u32 %rd1379, %r4087, 4; add.s64 %rd1380, %rd48, %rd1379; ld.shared.u32 %r4331, [%rd1380]; xor.b32 %r4332, %r4331, %r4330; // inline asm bfe.u32 %r4091, %r4092, %r208, %r10279; // inline asm mul.wide.u32 %rd1381, %r4091, 4; add.s64 %rd1382, %rd51, %rd1381; ld.shared.u32 %r4333, [%rd1382]; add.s32 %r4334, %r4333, %r4332; xor.b32 %r4335, %r67, %r4076; xor.b32 %r4108, %r4335, %r4334; // inline asm bfe.u32 %r4095, %r4108, %r10278, %r10279; // inline asm mul.wide.u32 %rd1383, %r4095, 4; add.s64 %rd1384, %rd42, %rd1383; ld.shared.u32 %r4336, [%rd1384]; // inline asm bfe.u32 %r4099, %r4108, %r10280, %r10279; // inline asm mul.wide.u32 %rd1385, %r4099, 4; add.s64 %rd1386, %rd45, %rd1385; ld.shared.u32 %r4337, [%rd1386]; add.s32 %r4338, %r4337, %r4336; // inline asm bfe.u32 %r4103, %r4108, %r10279, %r10279; // inline asm mul.wide.u32 %rd1387, %r4103, 4; add.s64 %rd1388, %rd48, %rd1387; ld.shared.u32 %r4339, [%rd1388]; xor.b32 %r4340, %r4339, %r4338; // inline asm bfe.u32 %r4107, %r4108, %r208, %r10279; // inline asm mul.wide.u32 %rd1389, %r4107, 4; add.s64 %rd1390, %rd51, %rd1389; ld.shared.u32 %r4341, [%rd1390]; add.s32 %r4342, %r4341, %r4340; xor.b32 %r4343, %r66, %r4092; xor.b32 %r4124, %r4343, %r4342; // inline asm bfe.u32 %r4111, %r4124, %r10278, %r10279; // inline asm mul.wide.u32 %rd1391, %r4111, 4; add.s64 %rd1392, %rd42, %rd1391; ld.shared.u32 %r4344, [%rd1392]; // inline asm bfe.u32 %r4115, %r4124, %r10280, %r10279; // inline asm mul.wide.u32 %rd1393, %r4115, 4; add.s64 %rd1394, %rd45, %rd1393; ld.shared.u32 %r4345, [%rd1394]; add.s32 %r4346, %r4345, %r4344; // inline asm bfe.u32 %r4119, %r4124, %r10279, %r10279; // inline asm mul.wide.u32 %rd1395, %r4119, 4; add.s64 %rd1396, %rd48, %rd1395; ld.shared.u32 %r4347, [%rd1396]; xor.b32 %r4348, %r4347, %r4346; // inline asm bfe.u32 %r4123, %r4124, %r208, %r10279; // inline asm mul.wide.u32 %rd1397, %r4123, 4; add.s64 %rd1398, %rd51, %rd1397; ld.shared.u32 %r4349, [%rd1398]; add.s32 %r4350, %r4349, %r4348; xor.b32 %r4351, %r69, %r4108; xor.b32 %r4140, %r4351, %r4350; // inline asm bfe.u32 %r4127, %r4140, %r10278, %r10279; // inline asm mul.wide.u32 %rd1399, %r4127, 4; add.s64 %rd1400, %rd42, %rd1399; ld.shared.u32 %r4352, [%rd1400]; // inline asm bfe.u32 %r4131, %r4140, %r10280, %r10279; // inline asm mul.wide.u32 %rd1401, %r4131, 4; add.s64 %rd1402, %rd45, %rd1401; ld.shared.u32 %r4353, [%rd1402]; add.s32 %r4354, %r4353, %r4352; // inline asm bfe.u32 %r4135, %r4140, %r10279, %r10279; // inline asm mul.wide.u32 %rd1403, %r4135, 4; add.s64 %rd1404, %rd48, %rd1403; ld.shared.u32 %r4355, [%rd1404]; xor.b32 %r4356, %r4355, %r4354; // inline asm bfe.u32 %r4139, %r4140, %r208, %r10279; // inline asm mul.wide.u32 %rd1405, %r4139, 4; add.s64 %rd1406, %rd51, %rd1405; ld.shared.u32 %r4357, [%rd1406]; add.s32 %r4358, %r4357, %r4356; xor.b32 %r4359, %r68, %r4124; xor.b32 %r4156, %r4359, %r4358; // inline asm bfe.u32 %r4143, %r4156, %r10278, %r10279; // inline asm mul.wide.u32 %rd1407, %r4143, 4; add.s64 %rd1408, %rd42, %rd1407; ld.shared.u32 %r4360, [%rd1408]; // inline asm bfe.u32 %r4147, %r4156, %r10280, %r10279; // inline asm mul.wide.u32 %rd1409, %r4147, 4; add.s64 %rd1410, %rd45, %rd1409; ld.shared.u32 %r4361, [%rd1410]; add.s32 %r4362, %r4361, %r4360; // inline asm bfe.u32 %r4151, %r4156, %r10279, %r10279; // inline asm mul.wide.u32 %rd1411, %r4151, 4; add.s64 %rd1412, %rd48, %rd1411; ld.shared.u32 %r4363, [%rd1412]; xor.b32 %r4364, %r4363, %r4362; // inline asm bfe.u32 %r4155, %r4156, %r208, %r10279; // inline asm mul.wide.u32 %rd1413, %r4155, 4; add.s64 %rd1414, %rd51, %rd1413; ld.shared.u32 %r4365, [%rd1414]; add.s32 %r4366, %r4365, %r4364; xor.b32 %r4367, %r71, %r4140; xor.b32 %r4172, %r4367, %r4366; // inline asm bfe.u32 %r4159, %r4172, %r10278, %r10279; // inline asm mul.wide.u32 %rd1415, %r4159, 4; add.s64 %rd1416, %rd42, %rd1415; ld.shared.u32 %r4368, [%rd1416]; // inline asm bfe.u32 %r4163, %r4172, %r10280, %r10279; // inline asm mul.wide.u32 %rd1417, %r4163, 4; add.s64 %rd1418, %rd45, %rd1417; ld.shared.u32 %r4369, [%rd1418]; add.s32 %r4370, %r4369, %r4368; // inline asm bfe.u32 %r4167, %r4172, %r10279, %r10279; // inline asm mul.wide.u32 %rd1419, %r4167, 4; add.s64 %rd1420, %rd48, %rd1419; ld.shared.u32 %r4371, [%rd1420]; xor.b32 %r4372, %r4371, %r4370; // inline asm bfe.u32 %r4171, %r4172, %r208, %r10279; // inline asm mul.wide.u32 %rd1421, %r4171, 4; add.s64 %rd1422, %rd51, %rd1421; ld.shared.u32 %r4373, [%rd1422]; add.s32 %r4374, %r4373, %r4372; xor.b32 %r4375, %r70, %r4156; xor.b32 %r4188, %r4375, %r4374; // inline asm bfe.u32 %r4175, %r4188, %r10278, %r10279; // inline asm mul.wide.u32 %rd1423, %r4175, 4; add.s64 %rd1424, %rd42, %rd1423; ld.shared.u32 %r4376, [%rd1424]; // inline asm bfe.u32 %r4179, %r4188, %r10280, %r10279; // inline asm mul.wide.u32 %rd1425, %r4179, 4; add.s64 %rd1426, %rd45, %rd1425; ld.shared.u32 %r4377, [%rd1426]; add.s32 %r4378, %r4377, %r4376; // inline asm bfe.u32 %r4183, %r4188, %r10279, %r10279; // inline asm mul.wide.u32 %rd1427, %r4183, 4; add.s64 %rd1428, %rd48, %rd1427; ld.shared.u32 %r4379, [%rd1428]; xor.b32 %r4380, %r4379, %r4378; // inline asm bfe.u32 %r4187, %r4188, %r208, %r10279; // inline asm mul.wide.u32 %rd1429, %r4187, 4; add.s64 %rd1430, %rd51, %rd1429; ld.shared.u32 %r4381, [%rd1430]; add.s32 %r4382, %r4381, %r4380; xor.b32 %r4383, %r73, %r4172; xor.b32 %r4204, %r4383, %r4382; // inline asm bfe.u32 %r4191, %r4204, %r10278, %r10279; // inline asm mul.wide.u32 %rd1431, %r4191, 4; add.s64 %rd1432, %rd42, %rd1431; ld.shared.u32 %r4384, [%rd1432]; // inline asm bfe.u32 %r4195, %r4204, %r10280, %r10279; // inline asm mul.wide.u32 %rd1433, %r4195, 4; add.s64 %rd1434, %rd45, %rd1433; ld.shared.u32 %r4385, [%rd1434]; add.s32 %r4386, %r4385, %r4384; // inline asm bfe.u32 %r4199, %r4204, %r10279, %r10279; // inline asm mul.wide.u32 %rd1435, %r4199, 4; add.s64 %rd1436, %rd48, %rd1435; ld.shared.u32 %r4387, [%rd1436]; xor.b32 %r4388, %r4387, %r4386; // inline asm bfe.u32 %r4203, %r4204, %r208, %r10279; // inline asm mul.wide.u32 %rd1437, %r4203, 4; add.s64 %rd1438, %rd51, %rd1437; ld.shared.u32 %r4389, [%rd1438]; add.s32 %r4390, %r4389, %r4388; xor.b32 %r4391, %r72, %r4188; xor.b32 %r4220, %r4391, %r4390; // inline asm bfe.u32 %r4207, %r4220, %r10278, %r10279; // inline asm mul.wide.u32 %rd1439, %r4207, 4; add.s64 %rd1440, %rd42, %rd1439; ld.shared.u32 %r4392, [%rd1440]; // inline asm bfe.u32 %r4211, %r4220, %r10280, %r10279; // inline asm mul.wide.u32 %rd1441, %r4211, 4; add.s64 %rd1442, %rd45, %rd1441; ld.shared.u32 %r4393, [%rd1442]; add.s32 %r4394, %r4393, %r4392; // inline asm bfe.u32 %r4215, %r4220, %r10279, %r10279; // inline asm mul.wide.u32 %rd1443, %r4215, 4; add.s64 %rd1444, %rd48, %rd1443; ld.shared.u32 %r4395, [%rd1444]; xor.b32 %r4396, %r4395, %r4394; // inline asm bfe.u32 %r4219, %r4220, %r208, %r10279; // inline asm mul.wide.u32 %rd1445, %r4219, 4; add.s64 %rd1446, %rd51, %rd1445; ld.shared.u32 %r4397, [%rd1446]; add.s32 %r4398, %r4397, %r4396; xor.b32 %r4399, %r75, %r4204; xor.b32 %r4236, %r4399, %r4398; // inline asm bfe.u32 %r4223, %r4236, %r10278, %r10279; // inline asm mul.wide.u32 %rd1447, %r4223, 4; add.s64 %rd1448, %rd42, %rd1447; ld.shared.u32 %r4400, [%rd1448]; // inline asm bfe.u32 %r4227, %r4236, %r10280, %r10279; // inline asm mul.wide.u32 %rd1449, %r4227, 4; add.s64 %rd1450, %rd45, %rd1449; ld.shared.u32 %r4401, [%rd1450]; add.s32 %r4402, %r4401, %r4400; // inline asm bfe.u32 %r4231, %r4236, %r10279, %r10279; // inline asm mul.wide.u32 %rd1451, %r4231, 4; add.s64 %rd1452, %rd48, %rd1451; ld.shared.u32 %r4403, [%rd1452]; xor.b32 %r4404, %r4403, %r4402; // inline asm bfe.u32 %r4235, %r4236, %r208, %r10279; // inline asm mul.wide.u32 %rd1453, %r4235, 4; add.s64 %rd1454, %rd51, %rd1453; ld.shared.u32 %r4405, [%rd1454]; add.s32 %r4406, %r4405, %r4404; xor.b32 %r4407, %r74, %r4220; xor.b32 %r4252, %r4407, %r4406; // inline asm bfe.u32 %r4239, %r4252, %r10278, %r10279; // inline asm mul.wide.u32 %rd1455, %r4239, 4; add.s64 %rd1456, %rd42, %rd1455; ld.shared.u32 %r4408, [%rd1456]; // inline asm bfe.u32 %r4243, %r4252, %r10280, %r10279; // inline asm mul.wide.u32 %rd1457, %r4243, 4; add.s64 %rd1458, %rd45, %rd1457; ld.shared.u32 %r4409, [%rd1458]; add.s32 %r4410, %r4409, %r4408; // inline asm bfe.u32 %r4247, %r4252, %r10279, %r10279; // inline asm mul.wide.u32 %rd1459, %r4247, 4; add.s64 %rd1460, %rd48, %rd1459; ld.shared.u32 %r4411, [%rd1460]; xor.b32 %r4412, %r4411, %r4410; // inline asm bfe.u32 %r4251, %r4252, %r208, %r10279; // inline asm mul.wide.u32 %rd1461, %r4251, 4; add.s64 %rd1462, %rd51, %rd1461; ld.shared.u32 %r4413, [%rd1462]; add.s32 %r4414, %r4413, %r4412; xor.b32 %r4415, %r77, %r4236; xor.b32 %r4268, %r4415, %r4414; // inline asm bfe.u32 %r4255, %r4268, %r10278, %r10279; // inline asm mul.wide.u32 %rd1463, %r4255, 4; add.s64 %rd1464, %rd42, %rd1463; ld.shared.u32 %r4416, [%rd1464]; // inline asm bfe.u32 %r4259, %r4268, %r10280, %r10279; // inline asm mul.wide.u32 %rd1465, %r4259, 4; add.s64 %rd1466, %rd45, %rd1465; ld.shared.u32 %r4417, [%rd1466]; add.s32 %r4418, %r4417, %r4416; // inline asm bfe.u32 %r4263, %r4268, %r10279, %r10279; // inline asm mul.wide.u32 %rd1467, %r4263, 4; add.s64 %rd1468, %rd48, %rd1467; ld.shared.u32 %r4419, [%rd1468]; xor.b32 %r4420, %r4419, %r4418; // inline asm bfe.u32 %r4267, %r4268, %r208, %r10279; // inline asm mul.wide.u32 %rd1469, %r4267, 4; add.s64 %rd1470, %rd51, %rd1469; ld.shared.u32 %r4421, [%rd1470]; add.s32 %r4422, %r4421, %r4420; xor.b32 %r4423, %r76, %r4252; xor.b32 %r4284, %r4423, %r4422; // inline asm bfe.u32 %r4271, %r4284, %r10278, %r10279; // inline asm mul.wide.u32 %rd1471, %r4271, 4; add.s64 %rd1472, %rd42, %rd1471; ld.shared.u32 %r4424, [%rd1472]; // inline asm bfe.u32 %r4275, %r4284, %r10280, %r10279; // inline asm mul.wide.u32 %rd1473, %r4275, 4; add.s64 %rd1474, %rd45, %rd1473; ld.shared.u32 %r4425, [%rd1474]; add.s32 %r4426, %r4425, %r4424; // inline asm bfe.u32 %r4279, %r4284, %r10279, %r10279; // inline asm mul.wide.u32 %rd1475, %r4279, 4; add.s64 %rd1476, %rd48, %rd1475; ld.shared.u32 %r4427, [%rd1476]; xor.b32 %r4428, %r4427, %r4426; // inline asm bfe.u32 %r4283, %r4284, %r208, %r10279; // inline asm mul.wide.u32 %rd1477, %r4283, 4; add.s64 %rd1478, %rd51, %rd1477; ld.shared.u32 %r4429, [%rd1478]; add.s32 %r4430, %r4429, %r4428; xor.b32 %r4431, %r79, %r4268; xor.b32 %r4300, %r4431, %r4430; // inline asm bfe.u32 %r4287, %r4300, %r10278, %r10279; // inline asm mul.wide.u32 %rd1479, %r4287, 4; add.s64 %rd1480, %rd42, %rd1479; ld.shared.u32 %r4432, [%rd1480]; // inline asm bfe.u32 %r4291, %r4300, %r10280, %r10279; // inline asm mul.wide.u32 %rd1481, %r4291, 4; add.s64 %rd1482, %rd45, %rd1481; ld.shared.u32 %r4433, [%rd1482]; add.s32 %r4434, %r4433, %r4432; // inline asm bfe.u32 %r4295, %r4300, %r10279, %r10279; // inline asm mul.wide.u32 %rd1483, %r4295, 4; add.s64 %rd1484, %rd48, %rd1483; ld.shared.u32 %r4435, [%rd1484]; xor.b32 %r4436, %r4435, %r4434; // inline asm bfe.u32 %r4299, %r4300, %r208, %r10279; // inline asm mul.wide.u32 %rd1485, %r4299, 4; add.s64 %rd1486, %rd51, %rd1485; ld.shared.u32 %r4437, [%rd1486]; add.s32 %r4438, %r4437, %r4436; xor.b32 %r4439, %r78, %r4284; xor.b32 %r4316, %r4439, %r4438; // inline asm bfe.u32 %r4303, %r4316, %r10278, %r10279; // inline asm mul.wide.u32 %rd1487, %r4303, 4; add.s64 %rd1488, %rd42, %rd1487; ld.shared.u32 %r4440, [%rd1488]; // inline asm bfe.u32 %r4307, %r4316, %r10280, %r10279; // inline asm mul.wide.u32 %rd1489, %r4307, 4; add.s64 %rd1490, %rd45, %rd1489; ld.shared.u32 %r4441, [%rd1490]; add.s32 %r4442, %r4441, %r4440; // inline asm bfe.u32 %r4311, %r4316, %r10279, %r10279; // inline asm mul.wide.u32 %rd1491, %r4311, 4; add.s64 %rd1492, %rd48, %rd1491; ld.shared.u32 %r4443, [%rd1492]; xor.b32 %r4444, %r4443, %r4442; // inline asm bfe.u32 %r4315, %r4316, %r208, %r10279; // inline asm mul.wide.u32 %rd1493, %r4315, 4; add.s64 %rd1494, %rd51, %rd1493; ld.shared.u32 %r4445, [%rd1494]; add.s32 %r4446, %r4445, %r4444; xor.b32 %r4447, %r81, %r4300; xor.b32 %r10314, %r4447, %r4446; xor.b32 %r10315, %r80, %r4316; st.shared.u32 [%rd3496], %r10315; st.shared.u32 [%rd3496+4], %r10314; add.s64 %rd3496, %rd3496, 8; add.s32 %r10313, %r10313, 2; setp.lt.u32 %p5, %r10313, 256; @%p5 bra BB4_8; mov.u64 %rd3497, %rd5; mov.u32 %r10316, %r208; BB4_10: mov.u32 %r10283, 16; mov.u32 %r10282, 8; mov.u32 %r10281, 24; xor.b32 %r4461, %r65, %r10315; // inline asm bfe.u32 %r4448, %r4461, %r10281, %r10282; // inline asm mul.wide.u32 %rd1498, %r4448, 4; add.s64 %rd1499, %rd42, %rd1498; ld.shared.u32 %r4705, [%rd1499]; // inline asm bfe.u32 %r4452, %r4461, %r10283, %r10282; // inline asm mul.wide.u32 %rd1502, %r4452, 4; add.s64 %rd1503, %rd45, %rd1502; ld.shared.u32 %r4706, [%rd1503]; add.s32 %r4707, %r4706, %r4705; // inline asm bfe.u32 %r4456, %r4461, %r10282, %r10282; // inline asm mul.wide.u32 %rd1506, %r4456, 4; add.s64 %rd1507, %rd48, %rd1506; ld.shared.u32 %r4708, [%rd1507]; xor.b32 %r4709, %r4708, %r4707; // inline asm bfe.u32 %r4460, %r4461, %r208, %r10282; // inline asm mul.wide.u32 %rd1510, %r4460, 4; add.s64 %rd1511, %rd51, %rd1510; ld.shared.u32 %r4710, [%rd1511]; add.s32 %r4711, %r4710, %r4709; xor.b32 %r4712, %r64, %r10314; xor.b32 %r4477, %r4712, %r4711; // inline asm bfe.u32 %r4464, %r4477, %r10281, %r10282; // inline asm mul.wide.u32 %rd1512, %r4464, 4; add.s64 %rd1513, %rd42, %rd1512; ld.shared.u32 %r4713, [%rd1513]; // inline asm bfe.u32 %r4468, %r4477, %r10283, %r10282; // inline asm mul.wide.u32 %rd1514, %r4468, 4; add.s64 %rd1515, %rd45, %rd1514; ld.shared.u32 %r4714, [%rd1515]; add.s32 %r4715, %r4714, %r4713; // inline asm bfe.u32 %r4472, %r4477, %r10282, %r10282; // inline asm mul.wide.u32 %rd1516, %r4472, 4; add.s64 %rd1517, %rd48, %rd1516; ld.shared.u32 %r4716, [%rd1517]; xor.b32 %r4717, %r4716, %r4715; // inline asm bfe.u32 %r4476, %r4477, %r208, %r10282; // inline asm mul.wide.u32 %rd1518, %r4476, 4; add.s64 %rd1519, %rd51, %rd1518; ld.shared.u32 %r4718, [%rd1519]; add.s32 %r4719, %r4718, %r4717; xor.b32 %r4720, %r67, %r4461; xor.b32 %r4493, %r4720, %r4719; // inline asm bfe.u32 %r4480, %r4493, %r10281, %r10282; // inline asm mul.wide.u32 %rd1520, %r4480, 4; add.s64 %rd1521, %rd42, %rd1520; ld.shared.u32 %r4721, [%rd1521]; // inline asm bfe.u32 %r4484, %r4493, %r10283, %r10282; // inline asm mul.wide.u32 %rd1522, %r4484, 4; add.s64 %rd1523, %rd45, %rd1522; ld.shared.u32 %r4722, [%rd1523]; add.s32 %r4723, %r4722, %r4721; // inline asm bfe.u32 %r4488, %r4493, %r10282, %r10282; // inline asm mul.wide.u32 %rd1524, %r4488, 4; add.s64 %rd1525, %rd48, %rd1524; ld.shared.u32 %r4724, [%rd1525]; xor.b32 %r4725, %r4724, %r4723; // inline asm bfe.u32 %r4492, %r4493, %r208, %r10282; // inline asm mul.wide.u32 %rd1526, %r4492, 4; add.s64 %rd1527, %rd51, %rd1526; ld.shared.u32 %r4726, [%rd1527]; add.s32 %r4727, %r4726, %r4725; xor.b32 %r4728, %r66, %r4477; xor.b32 %r4509, %r4728, %r4727; // inline asm bfe.u32 %r4496, %r4509, %r10281, %r10282; // inline asm mul.wide.u32 %rd1528, %r4496, 4; add.s64 %rd1529, %rd42, %rd1528; ld.shared.u32 %r4729, [%rd1529]; // inline asm bfe.u32 %r4500, %r4509, %r10283, %r10282; // inline asm mul.wide.u32 %rd1530, %r4500, 4; add.s64 %rd1531, %rd45, %rd1530; ld.shared.u32 %r4730, [%rd1531]; add.s32 %r4731, %r4730, %r4729; // inline asm bfe.u32 %r4504, %r4509, %r10282, %r10282; // inline asm mul.wide.u32 %rd1532, %r4504, 4; add.s64 %rd1533, %rd48, %rd1532; ld.shared.u32 %r4732, [%rd1533]; xor.b32 %r4733, %r4732, %r4731; // inline asm bfe.u32 %r4508, %r4509, %r208, %r10282; // inline asm mul.wide.u32 %rd1534, %r4508, 4; add.s64 %rd1535, %rd51, %rd1534; ld.shared.u32 %r4734, [%rd1535]; add.s32 %r4735, %r4734, %r4733; xor.b32 %r4736, %r69, %r4493; xor.b32 %r4525, %r4736, %r4735; // inline asm bfe.u32 %r4512, %r4525, %r10281, %r10282; // inline asm mul.wide.u32 %rd1536, %r4512, 4; add.s64 %rd1537, %rd42, %rd1536; ld.shared.u32 %r4737, [%rd1537]; // inline asm bfe.u32 %r4516, %r4525, %r10283, %r10282; // inline asm mul.wide.u32 %rd1538, %r4516, 4; add.s64 %rd1539, %rd45, %rd1538; ld.shared.u32 %r4738, [%rd1539]; add.s32 %r4739, %r4738, %r4737; // inline asm bfe.u32 %r4520, %r4525, %r10282, %r10282; // inline asm mul.wide.u32 %rd1540, %r4520, 4; add.s64 %rd1541, %rd48, %rd1540; ld.shared.u32 %r4740, [%rd1541]; xor.b32 %r4741, %r4740, %r4739; // inline asm bfe.u32 %r4524, %r4525, %r208, %r10282; // inline asm mul.wide.u32 %rd1542, %r4524, 4; add.s64 %rd1543, %rd51, %rd1542; ld.shared.u32 %r4742, [%rd1543]; add.s32 %r4743, %r4742, %r4741; xor.b32 %r4744, %r68, %r4509; xor.b32 %r4541, %r4744, %r4743; // inline asm bfe.u32 %r4528, %r4541, %r10281, %r10282; // inline asm mul.wide.u32 %rd1544, %r4528, 4; add.s64 %rd1545, %rd42, %rd1544; ld.shared.u32 %r4745, [%rd1545]; // inline asm bfe.u32 %r4532, %r4541, %r10283, %r10282; // inline asm mul.wide.u32 %rd1546, %r4532, 4; add.s64 %rd1547, %rd45, %rd1546; ld.shared.u32 %r4746, [%rd1547]; add.s32 %r4747, %r4746, %r4745; // inline asm bfe.u32 %r4536, %r4541, %r10282, %r10282; // inline asm mul.wide.u32 %rd1548, %r4536, 4; add.s64 %rd1549, %rd48, %rd1548; ld.shared.u32 %r4748, [%rd1549]; xor.b32 %r4749, %r4748, %r4747; // inline asm bfe.u32 %r4540, %r4541, %r208, %r10282; // inline asm mul.wide.u32 %rd1550, %r4540, 4; add.s64 %rd1551, %rd51, %rd1550; ld.shared.u32 %r4750, [%rd1551]; add.s32 %r4751, %r4750, %r4749; xor.b32 %r4752, %r71, %r4525; xor.b32 %r4557, %r4752, %r4751; // inline asm bfe.u32 %r4544, %r4557, %r10281, %r10282; // inline asm mul.wide.u32 %rd1552, %r4544, 4; add.s64 %rd1553, %rd42, %rd1552; ld.shared.u32 %r4753, [%rd1553]; // inline asm bfe.u32 %r4548, %r4557, %r10283, %r10282; // inline asm mul.wide.u32 %rd1554, %r4548, 4; add.s64 %rd1555, %rd45, %rd1554; ld.shared.u32 %r4754, [%rd1555]; add.s32 %r4755, %r4754, %r4753; // inline asm bfe.u32 %r4552, %r4557, %r10282, %r10282; // inline asm mul.wide.u32 %rd1556, %r4552, 4; add.s64 %rd1557, %rd48, %rd1556; ld.shared.u32 %r4756, [%rd1557]; xor.b32 %r4757, %r4756, %r4755; // inline asm bfe.u32 %r4556, %r4557, %r208, %r10282; // inline asm mul.wide.u32 %rd1558, %r4556, 4; add.s64 %rd1559, %rd51, %rd1558; ld.shared.u32 %r4758, [%rd1559]; add.s32 %r4759, %r4758, %r4757; xor.b32 %r4760, %r70, %r4541; xor.b32 %r4573, %r4760, %r4759; // inline asm bfe.u32 %r4560, %r4573, %r10281, %r10282; // inline asm mul.wide.u32 %rd1560, %r4560, 4; add.s64 %rd1561, %rd42, %rd1560; ld.shared.u32 %r4761, [%rd1561]; // inline asm bfe.u32 %r4564, %r4573, %r10283, %r10282; // inline asm mul.wide.u32 %rd1562, %r4564, 4; add.s64 %rd1563, %rd45, %rd1562; ld.shared.u32 %r4762, [%rd1563]; add.s32 %r4763, %r4762, %r4761; // inline asm bfe.u32 %r4568, %r4573, %r10282, %r10282; // inline asm mul.wide.u32 %rd1564, %r4568, 4; add.s64 %rd1565, %rd48, %rd1564; ld.shared.u32 %r4764, [%rd1565]; xor.b32 %r4765, %r4764, %r4763; // inline asm bfe.u32 %r4572, %r4573, %r208, %r10282; // inline asm mul.wide.u32 %rd1566, %r4572, 4; add.s64 %rd1567, %rd51, %rd1566; ld.shared.u32 %r4766, [%rd1567]; add.s32 %r4767, %r4766, %r4765; xor.b32 %r4768, %r73, %r4557; xor.b32 %r4589, %r4768, %r4767; // inline asm bfe.u32 %r4576, %r4589, %r10281, %r10282; // inline asm mul.wide.u32 %rd1568, %r4576, 4; add.s64 %rd1569, %rd42, %rd1568; ld.shared.u32 %r4769, [%rd1569]; // inline asm bfe.u32 %r4580, %r4589, %r10283, %r10282; // inline asm mul.wide.u32 %rd1570, %r4580, 4; add.s64 %rd1571, %rd45, %rd1570; ld.shared.u32 %r4770, [%rd1571]; add.s32 %r4771, %r4770, %r4769; // inline asm bfe.u32 %r4584, %r4589, %r10282, %r10282; // inline asm mul.wide.u32 %rd1572, %r4584, 4; add.s64 %rd1573, %rd48, %rd1572; ld.shared.u32 %r4772, [%rd1573]; xor.b32 %r4773, %r4772, %r4771; // inline asm bfe.u32 %r4588, %r4589, %r208, %r10282; // inline asm mul.wide.u32 %rd1574, %r4588, 4; add.s64 %rd1575, %rd51, %rd1574; ld.shared.u32 %r4774, [%rd1575]; add.s32 %r4775, %r4774, %r4773; xor.b32 %r4776, %r72, %r4573; xor.b32 %r4605, %r4776, %r4775; // inline asm bfe.u32 %r4592, %r4605, %r10281, %r10282; // inline asm mul.wide.u32 %rd1576, %r4592, 4; add.s64 %rd1577, %rd42, %rd1576; ld.shared.u32 %r4777, [%rd1577]; // inline asm bfe.u32 %r4596, %r4605, %r10283, %r10282; // inline asm mul.wide.u32 %rd1578, %r4596, 4; add.s64 %rd1579, %rd45, %rd1578; ld.shared.u32 %r4778, [%rd1579]; add.s32 %r4779, %r4778, %r4777; // inline asm bfe.u32 %r4600, %r4605, %r10282, %r10282; // inline asm mul.wide.u32 %rd1580, %r4600, 4; add.s64 %rd1581, %rd48, %rd1580; ld.shared.u32 %r4780, [%rd1581]; xor.b32 %r4781, %r4780, %r4779; // inline asm bfe.u32 %r4604, %r4605, %r208, %r10282; // inline asm mul.wide.u32 %rd1582, %r4604, 4; add.s64 %rd1583, %rd51, %rd1582; ld.shared.u32 %r4782, [%rd1583]; add.s32 %r4783, %r4782, %r4781; xor.b32 %r4784, %r75, %r4589; xor.b32 %r4621, %r4784, %r4783; // inline asm bfe.u32 %r4608, %r4621, %r10281, %r10282; // inline asm mul.wide.u32 %rd1584, %r4608, 4; add.s64 %rd1585, %rd42, %rd1584; ld.shared.u32 %r4785, [%rd1585]; // inline asm bfe.u32 %r4612, %r4621, %r10283, %r10282; // inline asm mul.wide.u32 %rd1586, %r4612, 4; add.s64 %rd1587, %rd45, %rd1586; ld.shared.u32 %r4786, [%rd1587]; add.s32 %r4787, %r4786, %r4785; // inline asm bfe.u32 %r4616, %r4621, %r10282, %r10282; // inline asm mul.wide.u32 %rd1588, %r4616, 4; add.s64 %rd1589, %rd48, %rd1588; ld.shared.u32 %r4788, [%rd1589]; xor.b32 %r4789, %r4788, %r4787; // inline asm bfe.u32 %r4620, %r4621, %r208, %r10282; // inline asm mul.wide.u32 %rd1590, %r4620, 4; add.s64 %rd1591, %rd51, %rd1590; ld.shared.u32 %r4790, [%rd1591]; add.s32 %r4791, %r4790, %r4789; xor.b32 %r4792, %r74, %r4605; xor.b32 %r4637, %r4792, %r4791; // inline asm bfe.u32 %r4624, %r4637, %r10281, %r10282; // inline asm mul.wide.u32 %rd1592, %r4624, 4; add.s64 %rd1593, %rd42, %rd1592; ld.shared.u32 %r4793, [%rd1593]; // inline asm bfe.u32 %r4628, %r4637, %r10283, %r10282; // inline asm mul.wide.u32 %rd1594, %r4628, 4; add.s64 %rd1595, %rd45, %rd1594; ld.shared.u32 %r4794, [%rd1595]; add.s32 %r4795, %r4794, %r4793; // inline asm bfe.u32 %r4632, %r4637, %r10282, %r10282; // inline asm mul.wide.u32 %rd1596, %r4632, 4; add.s64 %rd1597, %rd48, %rd1596; ld.shared.u32 %r4796, [%rd1597]; xor.b32 %r4797, %r4796, %r4795; // inline asm bfe.u32 %r4636, %r4637, %r208, %r10282; // inline asm mul.wide.u32 %rd1598, %r4636, 4; add.s64 %rd1599, %rd51, %rd1598; ld.shared.u32 %r4798, [%rd1599]; add.s32 %r4799, %r4798, %r4797; xor.b32 %r4800, %r77, %r4621; xor.b32 %r4653, %r4800, %r4799; // inline asm bfe.u32 %r4640, %r4653, %r10281, %r10282; // inline asm mul.wide.u32 %rd1600, %r4640, 4; add.s64 %rd1601, %rd42, %rd1600; ld.shared.u32 %r4801, [%rd1601]; // inline asm bfe.u32 %r4644, %r4653, %r10283, %r10282; // inline asm mul.wide.u32 %rd1602, %r4644, 4; add.s64 %rd1603, %rd45, %rd1602; ld.shared.u32 %r4802, [%rd1603]; add.s32 %r4803, %r4802, %r4801; // inline asm bfe.u32 %r4648, %r4653, %r10282, %r10282; // inline asm mul.wide.u32 %rd1604, %r4648, 4; add.s64 %rd1605, %rd48, %rd1604; ld.shared.u32 %r4804, [%rd1605]; xor.b32 %r4805, %r4804, %r4803; // inline asm bfe.u32 %r4652, %r4653, %r208, %r10282; // inline asm mul.wide.u32 %rd1606, %r4652, 4; add.s64 %rd1607, %rd51, %rd1606; ld.shared.u32 %r4806, [%rd1607]; add.s32 %r4807, %r4806, %r4805; xor.b32 %r4808, %r76, %r4637; xor.b32 %r4669, %r4808, %r4807; // inline asm bfe.u32 %r4656, %r4669, %r10281, %r10282; // inline asm mul.wide.u32 %rd1608, %r4656, 4; add.s64 %rd1609, %rd42, %rd1608; ld.shared.u32 %r4809, [%rd1609]; // inline asm bfe.u32 %r4660, %r4669, %r10283, %r10282; // inline asm mul.wide.u32 %rd1610, %r4660, 4; add.s64 %rd1611, %rd45, %rd1610; ld.shared.u32 %r4810, [%rd1611]; add.s32 %r4811, %r4810, %r4809; // inline asm bfe.u32 %r4664, %r4669, %r10282, %r10282; // inline asm mul.wide.u32 %rd1612, %r4664, 4; add.s64 %rd1613, %rd48, %rd1612; ld.shared.u32 %r4812, [%rd1613]; xor.b32 %r4813, %r4812, %r4811; // inline asm bfe.u32 %r4668, %r4669, %r208, %r10282; // inline asm mul.wide.u32 %rd1614, %r4668, 4; add.s64 %rd1615, %rd51, %rd1614; ld.shared.u32 %r4814, [%rd1615]; add.s32 %r4815, %r4814, %r4813; xor.b32 %r4816, %r79, %r4653; xor.b32 %r4685, %r4816, %r4815; // inline asm bfe.u32 %r4672, %r4685, %r10281, %r10282; // inline asm mul.wide.u32 %rd1616, %r4672, 4; add.s64 %rd1617, %rd42, %rd1616; ld.shared.u32 %r4817, [%rd1617]; // inline asm bfe.u32 %r4676, %r4685, %r10283, %r10282; // inline asm mul.wide.u32 %rd1618, %r4676, 4; add.s64 %rd1619, %rd45, %rd1618; ld.shared.u32 %r4818, [%rd1619]; add.s32 %r4819, %r4818, %r4817; // inline asm bfe.u32 %r4680, %r4685, %r10282, %r10282; // inline asm mul.wide.u32 %rd1620, %r4680, 4; add.s64 %rd1621, %rd48, %rd1620; ld.shared.u32 %r4820, [%rd1621]; xor.b32 %r4821, %r4820, %r4819; // inline asm bfe.u32 %r4684, %r4685, %r208, %r10282; // inline asm mul.wide.u32 %rd1622, %r4684, 4; add.s64 %rd1623, %rd51, %rd1622; ld.shared.u32 %r4822, [%rd1623]; add.s32 %r4823, %r4822, %r4821; xor.b32 %r4824, %r78, %r4669; xor.b32 %r4701, %r4824, %r4823; // inline asm bfe.u32 %r4688, %r4701, %r10281, %r10282; // inline asm mul.wide.u32 %rd1624, %r4688, 4; add.s64 %rd1625, %rd42, %rd1624; ld.shared.u32 %r4825, [%rd1625]; // inline asm bfe.u32 %r4692, %r4701, %r10283, %r10282; // inline asm mul.wide.u32 %rd1626, %r4692, 4; add.s64 %rd1627, %rd45, %rd1626; ld.shared.u32 %r4826, [%rd1627]; add.s32 %r4827, %r4826, %r4825; // inline asm bfe.u32 %r4696, %r4701, %r10282, %r10282; // inline asm mul.wide.u32 %rd1628, %r4696, 4; add.s64 %rd1629, %rd48, %rd1628; ld.shared.u32 %r4828, [%rd1629]; xor.b32 %r4829, %r4828, %r4827; // inline asm bfe.u32 %r4700, %r4701, %r208, %r10282; // inline asm mul.wide.u32 %rd1630, %r4700, 4; add.s64 %rd1631, %rd51, %rd1630; ld.shared.u32 %r4830, [%rd1631]; add.s32 %r4831, %r4830, %r4829; xor.b32 %r4832, %r81, %r4685; xor.b32 %r10314, %r4832, %r4831; xor.b32 %r10315, %r80, %r4701; st.shared.u32 [%rd3497], %r10315; st.shared.u32 [%rd3497+4], %r10314; add.s64 %rd3497, %rd3497, 8; add.s32 %r10316, %r10316, 2; setp.lt.u32 %p6, %r10316, 256; @%p6 bra BB4_10; mov.u64 %rd3498, %rd6; mov.u32 %r10319, %r208; BB4_12: mov.u32 %r10286, 16; mov.u32 %r10285, 8; mov.u32 %r10284, 24; xor.b32 %r4846, %r65, %r10315; // inline asm bfe.u32 %r4833, %r4846, %r10284, %r10285; // inline asm mul.wide.u32 %rd1635, %r4833, 4; add.s64 %rd1636, %rd42, %rd1635; ld.shared.u32 %r5089, [%rd1636]; // inline asm bfe.u32 %r4837, %r4846, %r10286, %r10285; // inline asm mul.wide.u32 %rd1639, %r4837, 4; add.s64 %rd1640, %rd45, %rd1639; ld.shared.u32 %r5090, [%rd1640]; add.s32 %r5091, %r5090, %r5089; // inline asm bfe.u32 %r4841, %r4846, %r10285, %r10285; // inline asm mul.wide.u32 %rd1643, %r4841, 4; add.s64 %rd1644, %rd48, %rd1643; ld.shared.u32 %r5092, [%rd1644]; xor.b32 %r5093, %r5092, %r5091; // inline asm bfe.u32 %r4845, %r4846, %r208, %r10285; // inline asm mul.wide.u32 %rd1647, %r4845, 4; add.s64 %rd1648, %rd51, %rd1647; ld.shared.u32 %r5094, [%rd1648]; add.s32 %r5095, %r5094, %r5093; xor.b32 %r5096, %r64, %r10314; xor.b32 %r4862, %r5096, %r5095; // inline asm bfe.u32 %r4849, %r4862, %r10284, %r10285; // inline asm mul.wide.u32 %rd1649, %r4849, 4; add.s64 %rd1650, %rd42, %rd1649; ld.shared.u32 %r5097, [%rd1650]; // inline asm bfe.u32 %r4853, %r4862, %r10286, %r10285; // inline asm mul.wide.u32 %rd1651, %r4853, 4; add.s64 %rd1652, %rd45, %rd1651; ld.shared.u32 %r5098, [%rd1652]; add.s32 %r5099, %r5098, %r5097; // inline asm bfe.u32 %r4857, %r4862, %r10285, %r10285; // inline asm mul.wide.u32 %rd1653, %r4857, 4; add.s64 %rd1654, %rd48, %rd1653; ld.shared.u32 %r5100, [%rd1654]; xor.b32 %r5101, %r5100, %r5099; // inline asm bfe.u32 %r4861, %r4862, %r208, %r10285; // inline asm mul.wide.u32 %rd1655, %r4861, 4; add.s64 %rd1656, %rd51, %rd1655; ld.shared.u32 %r5102, [%rd1656]; add.s32 %r5103, %r5102, %r5101; xor.b32 %r5104, %r67, %r4846; xor.b32 %r4878, %r5104, %r5103; // inline asm bfe.u32 %r4865, %r4878, %r10284, %r10285; // inline asm mul.wide.u32 %rd1657, %r4865, 4; add.s64 %rd1658, %rd42, %rd1657; ld.shared.u32 %r5105, [%rd1658]; // inline asm bfe.u32 %r4869, %r4878, %r10286, %r10285; // inline asm mul.wide.u32 %rd1659, %r4869, 4; add.s64 %rd1660, %rd45, %rd1659; ld.shared.u32 %r5106, [%rd1660]; add.s32 %r5107, %r5106, %r5105; // inline asm bfe.u32 %r4873, %r4878, %r10285, %r10285; // inline asm mul.wide.u32 %rd1661, %r4873, 4; add.s64 %rd1662, %rd48, %rd1661; ld.shared.u32 %r5108, [%rd1662]; xor.b32 %r5109, %r5108, %r5107; // inline asm bfe.u32 %r4877, %r4878, %r208, %r10285; // inline asm mul.wide.u32 %rd1663, %r4877, 4; add.s64 %rd1664, %rd51, %rd1663; ld.shared.u32 %r5110, [%rd1664]; add.s32 %r5111, %r5110, %r5109; xor.b32 %r5112, %r66, %r4862; xor.b32 %r4894, %r5112, %r5111; // inline asm bfe.u32 %r4881, %r4894, %r10284, %r10285; // inline asm mul.wide.u32 %rd1665, %r4881, 4; add.s64 %rd1666, %rd42, %rd1665; ld.shared.u32 %r5113, [%rd1666]; // inline asm bfe.u32 %r4885, %r4894, %r10286, %r10285; // inline asm mul.wide.u32 %rd1667, %r4885, 4; add.s64 %rd1668, %rd45, %rd1667; ld.shared.u32 %r5114, [%rd1668]; add.s32 %r5115, %r5114, %r5113; // inline asm bfe.u32 %r4889, %r4894, %r10285, %r10285; // inline asm mul.wide.u32 %rd1669, %r4889, 4; add.s64 %rd1670, %rd48, %rd1669; ld.shared.u32 %r5116, [%rd1670]; xor.b32 %r5117, %r5116, %r5115; // inline asm bfe.u32 %r4893, %r4894, %r208, %r10285; // inline asm mul.wide.u32 %rd1671, %r4893, 4; add.s64 %rd1672, %rd51, %rd1671; ld.shared.u32 %r5118, [%rd1672]; add.s32 %r5119, %r5118, %r5117; xor.b32 %r5120, %r69, %r4878; xor.b32 %r4910, %r5120, %r5119; // inline asm bfe.u32 %r4897, %r4910, %r10284, %r10285; // inline asm mul.wide.u32 %rd1673, %r4897, 4; add.s64 %rd1674, %rd42, %rd1673; ld.shared.u32 %r5121, [%rd1674]; // inline asm bfe.u32 %r4901, %r4910, %r10286, %r10285; // inline asm mul.wide.u32 %rd1675, %r4901, 4; add.s64 %rd1676, %rd45, %rd1675; ld.shared.u32 %r5122, [%rd1676]; add.s32 %r5123, %r5122, %r5121; // inline asm bfe.u32 %r4905, %r4910, %r10285, %r10285; // inline asm mul.wide.u32 %rd1677, %r4905, 4; add.s64 %rd1678, %rd48, %rd1677; ld.shared.u32 %r5124, [%rd1678]; xor.b32 %r5125, %r5124, %r5123; // inline asm bfe.u32 %r4909, %r4910, %r208, %r10285; // inline asm mul.wide.u32 %rd1679, %r4909, 4; add.s64 %rd1680, %rd51, %rd1679; ld.shared.u32 %r5126, [%rd1680]; add.s32 %r5127, %r5126, %r5125; xor.b32 %r5128, %r68, %r4894; xor.b32 %r4926, %r5128, %r5127; // inline asm bfe.u32 %r4913, %r4926, %r10284, %r10285; // inline asm mul.wide.u32 %rd1681, %r4913, 4; add.s64 %rd1682, %rd42, %rd1681; ld.shared.u32 %r5129, [%rd1682]; // inline asm bfe.u32 %r4917, %r4926, %r10286, %r10285; // inline asm mul.wide.u32 %rd1683, %r4917, 4; add.s64 %rd1684, %rd45, %rd1683; ld.shared.u32 %r5130, [%rd1684]; add.s32 %r5131, %r5130, %r5129; // inline asm bfe.u32 %r4921, %r4926, %r10285, %r10285; // inline asm mul.wide.u32 %rd1685, %r4921, 4; add.s64 %rd1686, %rd48, %rd1685; ld.shared.u32 %r5132, [%rd1686]; xor.b32 %r5133, %r5132, %r5131; // inline asm bfe.u32 %r4925, %r4926, %r208, %r10285; // inline asm mul.wide.u32 %rd1687, %r4925, 4; add.s64 %rd1688, %rd51, %rd1687; ld.shared.u32 %r5134, [%rd1688]; add.s32 %r5135, %r5134, %r5133; xor.b32 %r5136, %r71, %r4910; xor.b32 %r4942, %r5136, %r5135; // inline asm bfe.u32 %r4929, %r4942, %r10284, %r10285; // inline asm mul.wide.u32 %rd1689, %r4929, 4; add.s64 %rd1690, %rd42, %rd1689; ld.shared.u32 %r5137, [%rd1690]; // inline asm bfe.u32 %r4933, %r4942, %r10286, %r10285; // inline asm mul.wide.u32 %rd1691, %r4933, 4; add.s64 %rd1692, %rd45, %rd1691; ld.shared.u32 %r5138, [%rd1692]; add.s32 %r5139, %r5138, %r5137; // inline asm bfe.u32 %r4937, %r4942, %r10285, %r10285; // inline asm mul.wide.u32 %rd1693, %r4937, 4; add.s64 %rd1694, %rd48, %rd1693; ld.shared.u32 %r5140, [%rd1694]; xor.b32 %r5141, %r5140, %r5139; // inline asm bfe.u32 %r4941, %r4942, %r208, %r10285; // inline asm mul.wide.u32 %rd1695, %r4941, 4; add.s64 %rd1696, %rd51, %rd1695; ld.shared.u32 %r5142, [%rd1696]; add.s32 %r5143, %r5142, %r5141; xor.b32 %r5144, %r70, %r4926; xor.b32 %r4958, %r5144, %r5143; // inline asm bfe.u32 %r4945, %r4958, %r10284, %r10285; // inline asm mul.wide.u32 %rd1697, %r4945, 4; add.s64 %rd1698, %rd42, %rd1697; ld.shared.u32 %r5145, [%rd1698]; // inline asm bfe.u32 %r4949, %r4958, %r10286, %r10285; // inline asm mul.wide.u32 %rd1699, %r4949, 4; add.s64 %rd1700, %rd45, %rd1699; ld.shared.u32 %r5146, [%rd1700]; add.s32 %r5147, %r5146, %r5145; // inline asm bfe.u32 %r4953, %r4958, %r10285, %r10285; // inline asm mul.wide.u32 %rd1701, %r4953, 4; add.s64 %rd1702, %rd48, %rd1701; ld.shared.u32 %r5148, [%rd1702]; xor.b32 %r5149, %r5148, %r5147; // inline asm bfe.u32 %r4957, %r4958, %r208, %r10285; // inline asm mul.wide.u32 %rd1703, %r4957, 4; add.s64 %rd1704, %rd51, %rd1703; ld.shared.u32 %r5150, [%rd1704]; add.s32 %r5151, %r5150, %r5149; xor.b32 %r5152, %r73, %r4942; xor.b32 %r4974, %r5152, %r5151; // inline asm bfe.u32 %r4961, %r4974, %r10284, %r10285; // inline asm mul.wide.u32 %rd1705, %r4961, 4; add.s64 %rd1706, %rd42, %rd1705; ld.shared.u32 %r5153, [%rd1706]; // inline asm bfe.u32 %r4965, %r4974, %r10286, %r10285; // inline asm mul.wide.u32 %rd1707, %r4965, 4; add.s64 %rd1708, %rd45, %rd1707; ld.shared.u32 %r5154, [%rd1708]; add.s32 %r5155, %r5154, %r5153; // inline asm bfe.u32 %r4969, %r4974, %r10285, %r10285; // inline asm mul.wide.u32 %rd1709, %r4969, 4; add.s64 %rd1710, %rd48, %rd1709; ld.shared.u32 %r5156, [%rd1710]; xor.b32 %r5157, %r5156, %r5155; // inline asm bfe.u32 %r4973, %r4974, %r208, %r10285; // inline asm mul.wide.u32 %rd1711, %r4973, 4; add.s64 %rd1712, %rd51, %rd1711; ld.shared.u32 %r5158, [%rd1712]; add.s32 %r5159, %r5158, %r5157; xor.b32 %r5160, %r72, %r4958; xor.b32 %r4990, %r5160, %r5159; // inline asm bfe.u32 %r4977, %r4990, %r10284, %r10285; // inline asm mul.wide.u32 %rd1713, %r4977, 4; add.s64 %rd1714, %rd42, %rd1713; ld.shared.u32 %r5161, [%rd1714]; // inline asm bfe.u32 %r4981, %r4990, %r10286, %r10285; // inline asm mul.wide.u32 %rd1715, %r4981, 4; add.s64 %rd1716, %rd45, %rd1715; ld.shared.u32 %r5162, [%rd1716]; add.s32 %r5163, %r5162, %r5161; // inline asm bfe.u32 %r4985, %r4990, %r10285, %r10285; // inline asm mul.wide.u32 %rd1717, %r4985, 4; add.s64 %rd1718, %rd48, %rd1717; ld.shared.u32 %r5164, [%rd1718]; xor.b32 %r5165, %r5164, %r5163; // inline asm bfe.u32 %r4989, %r4990, %r208, %r10285; // inline asm mul.wide.u32 %rd1719, %r4989, 4; add.s64 %rd1720, %rd51, %rd1719; ld.shared.u32 %r5166, [%rd1720]; add.s32 %r5167, %r5166, %r5165; xor.b32 %r5168, %r75, %r4974; xor.b32 %r5006, %r5168, %r5167; // inline asm bfe.u32 %r4993, %r5006, %r10284, %r10285; // inline asm mul.wide.u32 %rd1721, %r4993, 4; add.s64 %rd1722, %rd42, %rd1721; ld.shared.u32 %r5169, [%rd1722]; // inline asm bfe.u32 %r4997, %r5006, %r10286, %r10285; // inline asm mul.wide.u32 %rd1723, %r4997, 4; add.s64 %rd1724, %rd45, %rd1723; ld.shared.u32 %r5170, [%rd1724]; add.s32 %r5171, %r5170, %r5169; // inline asm bfe.u32 %r5001, %r5006, %r10285, %r10285; // inline asm mul.wide.u32 %rd1725, %r5001, 4; add.s64 %rd1726, %rd48, %rd1725; ld.shared.u32 %r5172, [%rd1726]; xor.b32 %r5173, %r5172, %r5171; // inline asm bfe.u32 %r5005, %r5006, %r208, %r10285; // inline asm mul.wide.u32 %rd1727, %r5005, 4; add.s64 %rd1728, %rd51, %rd1727; ld.shared.u32 %r5174, [%rd1728]; add.s32 %r5175, %r5174, %r5173; xor.b32 %r5176, %r74, %r4990; xor.b32 %r5022, %r5176, %r5175; // inline asm bfe.u32 %r5009, %r5022, %r10284, %r10285; // inline asm mul.wide.u32 %rd1729, %r5009, 4; add.s64 %rd1730, %rd42, %rd1729; ld.shared.u32 %r5177, [%rd1730]; // inline asm bfe.u32 %r5013, %r5022, %r10286, %r10285; // inline asm mul.wide.u32 %rd1731, %r5013, 4; add.s64 %rd1732, %rd45, %rd1731; ld.shared.u32 %r5178, [%rd1732]; add.s32 %r5179, %r5178, %r5177; // inline asm bfe.u32 %r5017, %r5022, %r10285, %r10285; // inline asm mul.wide.u32 %rd1733, %r5017, 4; add.s64 %rd1734, %rd48, %rd1733; ld.shared.u32 %r5180, [%rd1734]; xor.b32 %r5181, %r5180, %r5179; // inline asm bfe.u32 %r5021, %r5022, %r208, %r10285; // inline asm mul.wide.u32 %rd1735, %r5021, 4; add.s64 %rd1736, %rd51, %rd1735; ld.shared.u32 %r5182, [%rd1736]; add.s32 %r5183, %r5182, %r5181; xor.b32 %r5184, %r77, %r5006; xor.b32 %r5038, %r5184, %r5183; // inline asm bfe.u32 %r5025, %r5038, %r10284, %r10285; // inline asm mul.wide.u32 %rd1737, %r5025, 4; add.s64 %rd1738, %rd42, %rd1737; ld.shared.u32 %r5185, [%rd1738]; // inline asm bfe.u32 %r5029, %r5038, %r10286, %r10285; // inline asm mul.wide.u32 %rd1739, %r5029, 4; add.s64 %rd1740, %rd45, %rd1739; ld.shared.u32 %r5186, [%rd1740]; add.s32 %r5187, %r5186, %r5185; // inline asm bfe.u32 %r5033, %r5038, %r10285, %r10285; // inline asm mul.wide.u32 %rd1741, %r5033, 4; add.s64 %rd1742, %rd48, %rd1741; ld.shared.u32 %r5188, [%rd1742]; xor.b32 %r5189, %r5188, %r5187; // inline asm bfe.u32 %r5037, %r5038, %r208, %r10285; // inline asm mul.wide.u32 %rd1743, %r5037, 4; add.s64 %rd1744, %rd51, %rd1743; ld.shared.u32 %r5190, [%rd1744]; add.s32 %r5191, %r5190, %r5189; xor.b32 %r5192, %r76, %r5022; xor.b32 %r5054, %r5192, %r5191; // inline asm bfe.u32 %r5041, %r5054, %r10284, %r10285; // inline asm mul.wide.u32 %rd1745, %r5041, 4; add.s64 %rd1746, %rd42, %rd1745; ld.shared.u32 %r5193, [%rd1746]; // inline asm bfe.u32 %r5045, %r5054, %r10286, %r10285; // inline asm mul.wide.u32 %rd1747, %r5045, 4; add.s64 %rd1748, %rd45, %rd1747; ld.shared.u32 %r5194, [%rd1748]; add.s32 %r5195, %r5194, %r5193; // inline asm bfe.u32 %r5049, %r5054, %r10285, %r10285; // inline asm mul.wide.u32 %rd1749, %r5049, 4; add.s64 %rd1750, %rd48, %rd1749; ld.shared.u32 %r5196, [%rd1750]; xor.b32 %r5197, %r5196, %r5195; // inline asm bfe.u32 %r5053, %r5054, %r208, %r10285; // inline asm mul.wide.u32 %rd1751, %r5053, 4; add.s64 %rd1752, %rd51, %rd1751; ld.shared.u32 %r5198, [%rd1752]; add.s32 %r5199, %r5198, %r5197; xor.b32 %r5200, %r79, %r5038; xor.b32 %r5070, %r5200, %r5199; // inline asm bfe.u32 %r5057, %r5070, %r10284, %r10285; // inline asm mul.wide.u32 %rd1753, %r5057, 4; add.s64 %rd1754, %rd42, %rd1753; ld.shared.u32 %r5201, [%rd1754]; // inline asm bfe.u32 %r5061, %r5070, %r10286, %r10285; // inline asm mul.wide.u32 %rd1755, %r5061, 4; add.s64 %rd1756, %rd45, %rd1755; ld.shared.u32 %r5202, [%rd1756]; add.s32 %r5203, %r5202, %r5201; // inline asm bfe.u32 %r5065, %r5070, %r10285, %r10285; // inline asm mul.wide.u32 %rd1757, %r5065, 4; add.s64 %rd1758, %rd48, %rd1757; ld.shared.u32 %r5204, [%rd1758]; xor.b32 %r5205, %r5204, %r5203; // inline asm bfe.u32 %r5069, %r5070, %r208, %r10285; // inline asm mul.wide.u32 %rd1759, %r5069, 4; add.s64 %rd1760, %rd51, %rd1759; ld.shared.u32 %r5206, [%rd1760]; add.s32 %r5207, %r5206, %r5205; xor.b32 %r5208, %r78, %r5054; xor.b32 %r5086, %r5208, %r5207; // inline asm bfe.u32 %r5073, %r5086, %r10284, %r10285; // inline asm mul.wide.u32 %rd1761, %r5073, 4; add.s64 %rd1762, %rd42, %rd1761; ld.shared.u32 %r5209, [%rd1762]; // inline asm bfe.u32 %r5077, %r5086, %r10286, %r10285; // inline asm mul.wide.u32 %rd1763, %r5077, 4; add.s64 %rd1764, %rd45, %rd1763; ld.shared.u32 %r5210, [%rd1764]; add.s32 %r5211, %r5210, %r5209; // inline asm bfe.u32 %r5081, %r5086, %r10285, %r10285; // inline asm mul.wide.u32 %rd1765, %r5081, 4; add.s64 %rd1766, %rd48, %rd1765; ld.shared.u32 %r5212, [%rd1766]; xor.b32 %r5213, %r5212, %r5211; // inline asm bfe.u32 %r5085, %r5086, %r208, %r10285; // inline asm mul.wide.u32 %rd1767, %r5085, 4; add.s64 %rd1768, %rd51, %rd1767; ld.shared.u32 %r5214, [%rd1768]; add.s32 %r5215, %r5214, %r5213; xor.b32 %r5216, %r81, %r5070; xor.b32 %r10314, %r5216, %r5215; xor.b32 %r10315, %r80, %r5086; st.shared.u32 [%rd3498], %r10315; st.shared.u32 [%rd3498+4], %r10314; add.s64 %rd3498, %rd3498, 8; add.s32 %r10319, %r10319, 2; setp.lt.u32 %p7, %r10319, 256; @%p7 bra BB4_12; mov.u32 %r10289, 16; mov.u32 %r10288, 8; mov.u32 %r10287, 24; xor.b32 %r5230, %r65, %r41; // inline asm bfe.u32 %r5217, %r5230, %r10287, %r10288; // inline asm mul.wide.u32 %rd1772, %r5217, 4; add.s64 %rd1773, %rd42, %rd1772; ld.shared.u32 %r7522, [%rd1773]; // inline asm bfe.u32 %r5221, %r5230, %r10289, %r10288; // inline asm mul.wide.u32 %rd1776, %r5221, 4; add.s64 %rd1777, %rd45, %rd1776; ld.shared.u32 %r7523, [%rd1777]; add.s32 %r7524, %r7523, %r7522; // inline asm bfe.u32 %r5225, %r5230, %r10288, %r10288; // inline asm mul.wide.u32 %rd1780, %r5225, 4; add.s64 %rd1781, %rd48, %rd1780; ld.shared.u32 %r7525, [%rd1781]; xor.b32 %r7526, %r7525, %r7524; mov.u32 %r10322, 0; // inline asm bfe.u32 %r5229, %r5230, %r10322, %r10288; // inline asm mul.wide.u32 %rd1784, %r5229, 4; add.s64 %rd1785, %rd51, %rd1784; ld.shared.u32 %r7527, [%rd1785]; add.s32 %r7528, %r7527, %r7526; xor.b32 %r7529, %r64, %r42; xor.b32 %r5246, %r7529, %r7528; // inline asm bfe.u32 %r5233, %r5246, %r10287, %r10288; // inline asm mul.wide.u32 %rd1786, %r5233, 4; add.s64 %rd1787, %rd42, %rd1786; ld.shared.u32 %r7530, [%rd1787]; // inline asm bfe.u32 %r5237, %r5246, %r10289, %r10288; // inline asm mul.wide.u32 %rd1788, %r5237, 4; add.s64 %rd1789, %rd45, %rd1788; ld.shared.u32 %r7531, [%rd1789]; add.s32 %r7532, %r7531, %r7530; // inline asm bfe.u32 %r5241, %r5246, %r10288, %r10288; // inline asm mul.wide.u32 %rd1790, %r5241, 4; add.s64 %rd1791, %rd48, %rd1790; ld.shared.u32 %r7533, [%rd1791]; xor.b32 %r7534, %r7533, %r7532; // inline asm bfe.u32 %r5245, %r5246, %r10322, %r10288; // inline asm mul.wide.u32 %rd1792, %r5245, 4; add.s64 %rd1793, %rd51, %rd1792; ld.shared.u32 %r7535, [%rd1793]; add.s32 %r7536, %r7535, %r7534; xor.b32 %r7537, %r67, %r43; xor.b32 %r7538, %r7537, %r5230; xor.b32 %r5262, %r7538, %r7536; // inline asm bfe.u32 %r5249, %r5262, %r10287, %r10288; // inline asm mul.wide.u32 %rd1794, %r5249, 4; add.s64 %rd1795, %rd42, %rd1794; ld.shared.u32 %r7539, [%rd1795]; // inline asm bfe.u32 %r5253, %r5262, %r10289, %r10288; // inline asm mul.wide.u32 %rd1796, %r5253, 4; add.s64 %rd1797, %rd45, %rd1796; ld.shared.u32 %r7540, [%rd1797]; add.s32 %r7541, %r7540, %r7539; // inline asm bfe.u32 %r5257, %r5262, %r10288, %r10288; // inline asm mul.wide.u32 %rd1798, %r5257, 4; add.s64 %rd1799, %rd48, %rd1798; ld.shared.u32 %r7542, [%rd1799]; xor.b32 %r7543, %r7542, %r7541; // inline asm bfe.u32 %r5261, %r5262, %r10322, %r10288; // inline asm mul.wide.u32 %rd1800, %r5261, 4; add.s64 %rd1801, %rd51, %rd1800; ld.shared.u32 %r7544, [%rd1801]; add.s32 %r7545, %r7544, %r7543; xor.b32 %r7546, %r66, %r44; xor.b32 %r7547, %r7546, %r5246; xor.b32 %r5278, %r7547, %r7545; // inline asm bfe.u32 %r5265, %r5278, %r10287, %r10288; // inline asm mul.wide.u32 %rd1802, %r5265, 4; add.s64 %rd1803, %rd42, %rd1802; ld.shared.u32 %r7548, [%rd1803]; // inline asm bfe.u32 %r5269, %r5278, %r10289, %r10288; // inline asm mul.wide.u32 %rd1804, %r5269, 4; add.s64 %rd1805, %rd45, %rd1804; ld.shared.u32 %r7549, [%rd1805]; add.s32 %r7550, %r7549, %r7548; // inline asm bfe.u32 %r5273, %r5278, %r10288, %r10288; // inline asm mul.wide.u32 %rd1806, %r5273, 4; add.s64 %rd1807, %rd48, %rd1806; ld.shared.u32 %r7551, [%rd1807]; xor.b32 %r7552, %r7551, %r7550; // inline asm bfe.u32 %r5277, %r5278, %r10322, %r10288; // inline asm mul.wide.u32 %rd1808, %r5277, 4; add.s64 %rd1809, %rd51, %rd1808; ld.shared.u32 %r7553, [%rd1809]; add.s32 %r7554, %r7553, %r7552; xor.b32 %r7555, %r69, %r41; xor.b32 %r7556, %r7555, %r5262; xor.b32 %r5294, %r7556, %r7554; // inline asm bfe.u32 %r5281, %r5294, %r10287, %r10288; // inline asm mul.wide.u32 %rd1810, %r5281, 4; add.s64 %rd1811, %rd42, %rd1810; ld.shared.u32 %r7557, [%rd1811]; // inline asm bfe.u32 %r5285, %r5294, %r10289, %r10288; // inline asm mul.wide.u32 %rd1812, %r5285, 4; add.s64 %rd1813, %rd45, %rd1812; ld.shared.u32 %r7558, [%rd1813]; add.s32 %r7559, %r7558, %r7557; // inline asm bfe.u32 %r5289, %r5294, %r10288, %r10288; // inline asm mul.wide.u32 %rd1814, %r5289, 4; add.s64 %rd1815, %rd48, %rd1814; ld.shared.u32 %r7560, [%rd1815]; xor.b32 %r7561, %r7560, %r7559; // inline asm bfe.u32 %r5293, %r5294, %r10322, %r10288; // inline asm mul.wide.u32 %rd1816, %r5293, 4; add.s64 %rd1817, %rd51, %rd1816; ld.shared.u32 %r7562, [%rd1817]; add.s32 %r7563, %r7562, %r7561; xor.b32 %r7564, %r68, %r42; xor.b32 %r7565, %r7564, %r5278; xor.b32 %r5310, %r7565, %r7563; // inline asm bfe.u32 %r5297, %r5310, %r10287, %r10288; // inline asm mul.wide.u32 %rd1818, %r5297, 4; add.s64 %rd1819, %rd42, %rd1818; ld.shared.u32 %r7566, [%rd1819]; // inline asm bfe.u32 %r5301, %r5310, %r10289, %r10288; // inline asm mul.wide.u32 %rd1820, %r5301, 4; add.s64 %rd1821, %rd45, %rd1820; ld.shared.u32 %r7567, [%rd1821]; add.s32 %r7568, %r7567, %r7566; // inline asm bfe.u32 %r5305, %r5310, %r10288, %r10288; // inline asm mul.wide.u32 %rd1822, %r5305, 4; add.s64 %rd1823, %rd48, %rd1822; ld.shared.u32 %r7569, [%rd1823]; xor.b32 %r7570, %r7569, %r7568; // inline asm bfe.u32 %r5309, %r5310, %r10322, %r10288; // inline asm mul.wide.u32 %rd1824, %r5309, 4; add.s64 %rd1825, %rd51, %rd1824; ld.shared.u32 %r7571, [%rd1825]; add.s32 %r7572, %r7571, %r7570; xor.b32 %r7573, %r71, %r43; xor.b32 %r7574, %r7573, %r5294; xor.b32 %r5326, %r7574, %r7572; // inline asm bfe.u32 %r5313, %r5326, %r10287, %r10288; // inline asm mul.wide.u32 %rd1826, %r5313, 4; add.s64 %rd1827, %rd42, %rd1826; ld.shared.u32 %r7575, [%rd1827]; // inline asm bfe.u32 %r5317, %r5326, %r10289, %r10288; // inline asm mul.wide.u32 %rd1828, %r5317, 4; add.s64 %rd1829, %rd45, %rd1828; ld.shared.u32 %r7576, [%rd1829]; add.s32 %r7577, %r7576, %r7575; // inline asm bfe.u32 %r5321, %r5326, %r10288, %r10288; // inline asm mul.wide.u32 %rd1830, %r5321, 4; add.s64 %rd1831, %rd48, %rd1830; ld.shared.u32 %r7578, [%rd1831]; xor.b32 %r7579, %r7578, %r7577; // inline asm bfe.u32 %r5325, %r5326, %r10322, %r10288; // inline asm mul.wide.u32 %rd1832, %r5325, 4; add.s64 %rd1833, %rd51, %rd1832; ld.shared.u32 %r7580, [%rd1833]; add.s32 %r7581, %r7580, %r7579; xor.b32 %r7582, %r70, %r44; xor.b32 %r7583, %r7582, %r5310; xor.b32 %r5342, %r7583, %r7581; // inline asm bfe.u32 %r5329, %r5342, %r10287, %r10288; // inline asm mul.wide.u32 %rd1834, %r5329, 4; add.s64 %rd1835, %rd42, %rd1834; ld.shared.u32 %r7584, [%rd1835]; // inline asm bfe.u32 %r5333, %r5342, %r10289, %r10288; // inline asm mul.wide.u32 %rd1836, %r5333, 4; add.s64 %rd1837, %rd45, %rd1836; ld.shared.u32 %r7585, [%rd1837]; add.s32 %r7586, %r7585, %r7584; // inline asm bfe.u32 %r5337, %r5342, %r10288, %r10288; // inline asm mul.wide.u32 %rd1838, %r5337, 4; add.s64 %rd1839, %rd48, %rd1838; ld.shared.u32 %r7587, [%rd1839]; xor.b32 %r7588, %r7587, %r7586; // inline asm bfe.u32 %r5341, %r5342, %r10322, %r10288; // inline asm mul.wide.u32 %rd1840, %r5341, 4; add.s64 %rd1841, %rd51, %rd1840; ld.shared.u32 %r7589, [%rd1841]; add.s32 %r7590, %r7589, %r7588; xor.b32 %r7591, %r73, %r41; xor.b32 %r7592, %r7591, %r5326; xor.b32 %r5358, %r7592, %r7590; // inline asm bfe.u32 %r5345, %r5358, %r10287, %r10288; // inline asm mul.wide.u32 %rd1842, %r5345, 4; add.s64 %rd1843, %rd42, %rd1842; ld.shared.u32 %r7593, [%rd1843]; // inline asm bfe.u32 %r5349, %r5358, %r10289, %r10288; // inline asm mul.wide.u32 %rd1844, %r5349, 4; add.s64 %rd1845, %rd45, %rd1844; ld.shared.u32 %r7594, [%rd1845]; add.s32 %r7595, %r7594, %r7593; // inline asm bfe.u32 %r5353, %r5358, %r10288, %r10288; // inline asm mul.wide.u32 %rd1846, %r5353, 4; add.s64 %rd1847, %rd48, %rd1846; ld.shared.u32 %r7596, [%rd1847]; xor.b32 %r7597, %r7596, %r7595; // inline asm bfe.u32 %r5357, %r5358, %r10322, %r10288; // inline asm mul.wide.u32 %rd1848, %r5357, 4; add.s64 %rd1849, %rd51, %rd1848; ld.shared.u32 %r7598, [%rd1849]; add.s32 %r7599, %r7598, %r7597; xor.b32 %r7600, %r72, %r42; xor.b32 %r7601, %r7600, %r5342; xor.b32 %r5374, %r7601, %r7599; // inline asm bfe.u32 %r5361, %r5374, %r10287, %r10288; // inline asm mul.wide.u32 %rd1850, %r5361, 4; add.s64 %rd1851, %rd42, %rd1850; ld.shared.u32 %r7602, [%rd1851]; // inline asm bfe.u32 %r5365, %r5374, %r10289, %r10288; // inline asm mul.wide.u32 %rd1852, %r5365, 4; add.s64 %rd1853, %rd45, %rd1852; ld.shared.u32 %r7603, [%rd1853]; add.s32 %r7604, %r7603, %r7602; // inline asm bfe.u32 %r5369, %r5374, %r10288, %r10288; // inline asm mul.wide.u32 %rd1854, %r5369, 4; add.s64 %rd1855, %rd48, %rd1854; ld.shared.u32 %r7605, [%rd1855]; xor.b32 %r7606, %r7605, %r7604; // inline asm bfe.u32 %r5373, %r5374, %r10322, %r10288; // inline asm mul.wide.u32 %rd1856, %r5373, 4; add.s64 %rd1857, %rd51, %rd1856; ld.shared.u32 %r7607, [%rd1857]; add.s32 %r7608, %r7607, %r7606; xor.b32 %r7609, %r75, %r43; xor.b32 %r7610, %r7609, %r5358; xor.b32 %r5390, %r7610, %r7608; // inline asm bfe.u32 %r5377, %r5390, %r10287, %r10288; // inline asm mul.wide.u32 %rd1858, %r5377, 4; add.s64 %rd1859, %rd42, %rd1858; ld.shared.u32 %r7611, [%rd1859]; // inline asm bfe.u32 %r5381, %r5390, %r10289, %r10288; // inline asm mul.wide.u32 %rd1860, %r5381, 4; add.s64 %rd1861, %rd45, %rd1860; ld.shared.u32 %r7612, [%rd1861]; add.s32 %r7613, %r7612, %r7611; // inline asm bfe.u32 %r5385, %r5390, %r10288, %r10288; // inline asm mul.wide.u32 %rd1862, %r5385, 4; add.s64 %rd1863, %rd48, %rd1862; ld.shared.u32 %r7614, [%rd1863]; xor.b32 %r7615, %r7614, %r7613; // inline asm bfe.u32 %r5389, %r5390, %r10322, %r10288; // inline asm mul.wide.u32 %rd1864, %r5389, 4; add.s64 %rd1865, %rd51, %rd1864; ld.shared.u32 %r7616, [%rd1865]; add.s32 %r7617, %r7616, %r7615; xor.b32 %r7618, %r74, %r44; xor.b32 %r7619, %r7618, %r5374; xor.b32 %r5406, %r7619, %r7617; // inline asm bfe.u32 %r5393, %r5406, %r10287, %r10288; // inline asm mul.wide.u32 %rd1866, %r5393, 4; add.s64 %rd1867, %rd42, %rd1866; ld.shared.u32 %r7620, [%rd1867]; // inline asm bfe.u32 %r5397, %r5406, %r10289, %r10288; // inline asm mul.wide.u32 %rd1868, %r5397, 4; add.s64 %rd1869, %rd45, %rd1868; ld.shared.u32 %r7621, [%rd1869]; add.s32 %r7622, %r7621, %r7620; // inline asm bfe.u32 %r5401, %r5406, %r10288, %r10288; // inline asm mul.wide.u32 %rd1870, %r5401, 4; add.s64 %rd1871, %rd48, %rd1870; ld.shared.u32 %r7623, [%rd1871]; xor.b32 %r7624, %r7623, %r7622; // inline asm bfe.u32 %r5405, %r5406, %r10322, %r10288; // inline asm mul.wide.u32 %rd1872, %r5405, 4; add.s64 %rd1873, %rd51, %rd1872; ld.shared.u32 %r7625, [%rd1873]; add.s32 %r7626, %r7625, %r7624; xor.b32 %r7627, %r77, %r41; xor.b32 %r7628, %r7627, %r5390; xor.b32 %r5422, %r7628, %r7626; // inline asm bfe.u32 %r5409, %r5422, %r10287, %r10288; // inline asm mul.wide.u32 %rd1874, %r5409, 4; add.s64 %rd1875, %rd42, %rd1874; ld.shared.u32 %r7629, [%rd1875]; // inline asm bfe.u32 %r5413, %r5422, %r10289, %r10288; // inline asm mul.wide.u32 %rd1876, %r5413, 4; add.s64 %rd1877, %rd45, %rd1876; ld.shared.u32 %r7630, [%rd1877]; add.s32 %r7631, %r7630, %r7629; // inline asm bfe.u32 %r5417, %r5422, %r10288, %r10288; // inline asm mul.wide.u32 %rd1878, %r5417, 4; add.s64 %rd1879, %rd48, %rd1878; ld.shared.u32 %r7632, [%rd1879]; xor.b32 %r7633, %r7632, %r7631; // inline asm bfe.u32 %r5421, %r5422, %r10322, %r10288; // inline asm mul.wide.u32 %rd1880, %r5421, 4; add.s64 %rd1881, %rd51, %rd1880; ld.shared.u32 %r7634, [%rd1881]; add.s32 %r7635, %r7634, %r7633; xor.b32 %r7636, %r76, %r42; xor.b32 %r7637, %r7636, %r5406; xor.b32 %r5438, %r7637, %r7635; // inline asm bfe.u32 %r5425, %r5438, %r10287, %r10288; // inline asm mul.wide.u32 %rd1882, %r5425, 4; add.s64 %rd1883, %rd42, %rd1882; ld.shared.u32 %r7638, [%rd1883]; // inline asm bfe.u32 %r5429, %r5438, %r10289, %r10288; // inline asm mul.wide.u32 %rd1884, %r5429, 4; add.s64 %rd1885, %rd45, %rd1884; ld.shared.u32 %r7639, [%rd1885]; add.s32 %r7640, %r7639, %r7638; // inline asm bfe.u32 %r5433, %r5438, %r10288, %r10288; // inline asm mul.wide.u32 %rd1886, %r5433, 4; add.s64 %rd1887, %rd48, %rd1886; ld.shared.u32 %r7641, [%rd1887]; xor.b32 %r7642, %r7641, %r7640; // inline asm bfe.u32 %r5437, %r5438, %r10322, %r10288; // inline asm mul.wide.u32 %rd1888, %r5437, 4; add.s64 %rd1889, %rd51, %rd1888; ld.shared.u32 %r7643, [%rd1889]; add.s32 %r7644, %r7643, %r7642; xor.b32 %r7645, %r79, %r43; xor.b32 %r7646, %r7645, %r5422; xor.b32 %r5454, %r7646, %r7644; // inline asm bfe.u32 %r5441, %r5454, %r10287, %r10288; // inline asm mul.wide.u32 %rd1890, %r5441, 4; add.s64 %rd1891, %rd42, %rd1890; ld.shared.u32 %r7647, [%rd1891]; // inline asm bfe.u32 %r5445, %r5454, %r10289, %r10288; // inline asm mul.wide.u32 %rd1892, %r5445, 4; add.s64 %rd1893, %rd45, %rd1892; ld.shared.u32 %r7648, [%rd1893]; add.s32 %r7649, %r7648, %r7647; // inline asm bfe.u32 %r5449, %r5454, %r10288, %r10288; // inline asm mul.wide.u32 %rd1894, %r5449, 4; add.s64 %rd1895, %rd48, %rd1894; ld.shared.u32 %r7650, [%rd1895]; xor.b32 %r7651, %r7650, %r7649; // inline asm bfe.u32 %r5453, %r5454, %r10322, %r10288; // inline asm mul.wide.u32 %rd1896, %r5453, 4; add.s64 %rd1897, %rd51, %rd1896; ld.shared.u32 %r7652, [%rd1897]; add.s32 %r7653, %r7652, %r7651; xor.b32 %r7654, %r78, %r44; xor.b32 %r7655, %r7654, %r5438; xor.b32 %r5470, %r7655, %r7653; // inline asm bfe.u32 %r5457, %r5470, %r10287, %r10288; // inline asm mul.wide.u32 %rd1898, %r5457, 4; add.s64 %rd1899, %rd42, %rd1898; ld.shared.u32 %r7656, [%rd1899]; // inline asm bfe.u32 %r5461, %r5470, %r10289, %r10288; // inline asm mul.wide.u32 %rd1900, %r5461, 4; add.s64 %rd1901, %rd45, %rd1900; ld.shared.u32 %r7657, [%rd1901]; add.s32 %r7658, %r7657, %r7656; // inline asm bfe.u32 %r5465, %r5470, %r10288, %r10288; // inline asm mul.wide.u32 %rd1902, %r5465, 4; add.s64 %rd1903, %rd48, %rd1902; ld.shared.u32 %r7659, [%rd1903]; xor.b32 %r7660, %r7659, %r7658; // inline asm bfe.u32 %r5469, %r5470, %r10322, %r10288; // inline asm mul.wide.u32 %rd1904, %r5469, 4; add.s64 %rd1905, %rd51, %rd1904; ld.shared.u32 %r7661, [%rd1905]; add.s32 %r7662, %r7661, %r7660; xor.b32 %r7663, %r81, %r41; xor.b32 %r7664, %r7663, %r5454; xor.b32 %r10350, %r7664, %r7662; xor.b32 %r7665, %r80, %r42; xor.b32 %r10351, %r7665, %r5470; // inline asm bfe.u32 %r5473, %r10322, %r10287, %r10288; // inline asm mul.wide.u32 %rd1906, %r5473, 4; add.s64 %rd1907, %rd42, %rd1906; ld.shared.u32 %r7666, [%rd1907]; // inline asm bfe.u32 %r5477, %r10322, %r10289, %r10288; // inline asm mul.wide.u32 %rd1908, %r5477, 4; add.s64 %rd1909, %rd45, %rd1908; ld.shared.u32 %r7667, [%rd1909]; add.s32 %r7668, %r7667, %r7666; // inline asm bfe.u32 %r5481, %r10322, %r10288, %r10288; // inline asm mul.wide.u32 %rd1910, %r5481, 4; add.s64 %rd1911, %rd48, %rd1910; ld.shared.u32 %r7669, [%rd1911]; xor.b32 %r7670, %r7669, %r7668; // inline asm bfe.u32 %r5485, %r10322, %r10322, %r10288; // inline asm mul.wide.u32 %rd1912, %r5485, 4; add.s64 %rd1913, %rd51, %rd1912; ld.shared.u32 %r7671, [%rd1913]; add.s32 %r5502, %r7671, %r7670; // inline asm bfe.u32 %r5489, %r5502, %r10287, %r10288; // inline asm mul.wide.u32 %rd1914, %r5489, 4; add.s64 %rd1915, %rd42, %rd1914; ld.shared.u32 %r7672, [%rd1915]; // inline asm bfe.u32 %r5493, %r5502, %r10289, %r10288; // inline asm mul.wide.u32 %rd1916, %r5493, 4; add.s64 %rd1917, %rd45, %rd1916; ld.shared.u32 %r7673, [%rd1917]; add.s32 %r7674, %r7673, %r7672; // inline asm bfe.u32 %r5497, %r5502, %r10288, %r10288; // inline asm mul.wide.u32 %rd1918, %r5497, 4; add.s64 %rd1919, %rd48, %rd1918; ld.shared.u32 %r7675, [%rd1919]; xor.b32 %r7676, %r7675, %r7674; // inline asm bfe.u32 %r5501, %r5502, %r10322, %r10288; // inline asm mul.wide.u32 %rd1920, %r5501, 4; add.s64 %rd1921, %rd51, %rd1920; ld.shared.u32 %r7677, [%rd1921]; add.s32 %r7678, %r7677, %r7676; xor.b32 %r5518, %r7537, %r7678; // inline asm bfe.u32 %r5505, %r5518, %r10287, %r10288; // inline asm mul.wide.u32 %rd1922, %r5505, 4; add.s64 %rd1923, %rd42, %rd1922; ld.shared.u32 %r7679, [%rd1923]; // inline asm bfe.u32 %r5509, %r5518, %r10289, %r10288; // inline asm mul.wide.u32 %rd1924, %r5509, 4; add.s64 %rd1925, %rd45, %rd1924; ld.shared.u32 %r7680, [%rd1925]; add.s32 %r7681, %r7680, %r7679; // inline asm bfe.u32 %r5513, %r5518, %r10288, %r10288; // inline asm mul.wide.u32 %rd1926, %r5513, 4; add.s64 %rd1927, %rd48, %rd1926; ld.shared.u32 %r7682, [%rd1927]; xor.b32 %r7683, %r7682, %r7681; // inline asm bfe.u32 %r5517, %r5518, %r10322, %r10288; // inline asm mul.wide.u32 %rd1928, %r5517, 4; add.s64 %rd1929, %rd51, %rd1928; ld.shared.u32 %r7684, [%rd1929]; add.s32 %r7685, %r7684, %r7683; xor.b32 %r7686, %r7546, %r5502; xor.b32 %r5534, %r7686, %r7685; // inline asm bfe.u32 %r5521, %r5534, %r10287, %r10288; // inline asm mul.wide.u32 %rd1930, %r5521, 4; add.s64 %rd1931, %rd42, %rd1930; ld.shared.u32 %r7687, [%rd1931]; // inline asm bfe.u32 %r5525, %r5534, %r10289, %r10288; // inline asm mul.wide.u32 %rd1932, %r5525, 4; add.s64 %rd1933, %rd45, %rd1932; ld.shared.u32 %r7688, [%rd1933]; add.s32 %r7689, %r7688, %r7687; // inline asm bfe.u32 %r5529, %r5534, %r10288, %r10288; // inline asm mul.wide.u32 %rd1934, %r5529, 4; add.s64 %rd1935, %rd48, %rd1934; ld.shared.u32 %r7690, [%rd1935]; xor.b32 %r7691, %r7690, %r7689; // inline asm bfe.u32 %r5533, %r5534, %r10322, %r10288; // inline asm mul.wide.u32 %rd1936, %r5533, 4; add.s64 %rd1937, %rd51, %rd1936; ld.shared.u32 %r7692, [%rd1937]; add.s32 %r7693, %r7692, %r7691; xor.b32 %r7694, %r7555, %r5518; xor.b32 %r5550, %r7694, %r7693; // inline asm bfe.u32 %r5537, %r5550, %r10287, %r10288; // inline asm mul.wide.u32 %rd1938, %r5537, 4; add.s64 %rd1939, %rd42, %rd1938; ld.shared.u32 %r7695, [%rd1939]; // inline asm bfe.u32 %r5541, %r5550, %r10289, %r10288; // inline asm mul.wide.u32 %rd1940, %r5541, 4; add.s64 %rd1941, %rd45, %rd1940; ld.shared.u32 %r7696, [%rd1941]; add.s32 %r7697, %r7696, %r7695; // inline asm bfe.u32 %r5545, %r5550, %r10288, %r10288; // inline asm mul.wide.u32 %rd1942, %r5545, 4; add.s64 %rd1943, %rd48, %rd1942; ld.shared.u32 %r7698, [%rd1943]; xor.b32 %r7699, %r7698, %r7697; // inline asm bfe.u32 %r5549, %r5550, %r10322, %r10288; // inline asm mul.wide.u32 %rd1944, %r5549, 4; add.s64 %rd1945, %rd51, %rd1944; ld.shared.u32 %r7700, [%rd1945]; add.s32 %r7701, %r7700, %r7699; xor.b32 %r7702, %r7564, %r5534; xor.b32 %r5566, %r7702, %r7701; // inline asm bfe.u32 %r5553, %r5566, %r10287, %r10288; // inline asm mul.wide.u32 %rd1946, %r5553, 4; add.s64 %rd1947, %rd42, %rd1946; ld.shared.u32 %r7703, [%rd1947]; // inline asm bfe.u32 %r5557, %r5566, %r10289, %r10288; // inline asm mul.wide.u32 %rd1948, %r5557, 4; add.s64 %rd1949, %rd45, %rd1948; ld.shared.u32 %r7704, [%rd1949]; add.s32 %r7705, %r7704, %r7703; // inline asm bfe.u32 %r5561, %r5566, %r10288, %r10288; // inline asm mul.wide.u32 %rd1950, %r5561, 4; add.s64 %rd1951, %rd48, %rd1950; ld.shared.u32 %r7706, [%rd1951]; xor.b32 %r7707, %r7706, %r7705; // inline asm bfe.u32 %r5565, %r5566, %r10322, %r10288; // inline asm mul.wide.u32 %rd1952, %r5565, 4; add.s64 %rd1953, %rd51, %rd1952; ld.shared.u32 %r7708, [%rd1953]; add.s32 %r7709, %r7708, %r7707; xor.b32 %r7710, %r7573, %r5550; xor.b32 %r5582, %r7710, %r7709; // inline asm bfe.u32 %r5569, %r5582, %r10287, %r10288; // inline asm mul.wide.u32 %rd1954, %r5569, 4; add.s64 %rd1955, %rd42, %rd1954; ld.shared.u32 %r7711, [%rd1955]; // inline asm bfe.u32 %r5573, %r5582, %r10289, %r10288; // inline asm mul.wide.u32 %rd1956, %r5573, 4; add.s64 %rd1957, %rd45, %rd1956; ld.shared.u32 %r7712, [%rd1957]; add.s32 %r7713, %r7712, %r7711; // inline asm bfe.u32 %r5577, %r5582, %r10288, %r10288; // inline asm mul.wide.u32 %rd1958, %r5577, 4; add.s64 %rd1959, %rd48, %rd1958; ld.shared.u32 %r7714, [%rd1959]; xor.b32 %r7715, %r7714, %r7713; // inline asm bfe.u32 %r5581, %r5582, %r10322, %r10288; // inline asm mul.wide.u32 %rd1960, %r5581, 4; add.s64 %rd1961, %rd51, %rd1960; ld.shared.u32 %r7716, [%rd1961]; add.s32 %r7717, %r7716, %r7715; xor.b32 %r7718, %r7582, %r5566; xor.b32 %r5598, %r7718, %r7717; // inline asm bfe.u32 %r5585, %r5598, %r10287, %r10288; // inline asm mul.wide.u32 %rd1962, %r5585, 4; add.s64 %rd1963, %rd42, %rd1962; ld.shared.u32 %r7719, [%rd1963]; // inline asm bfe.u32 %r5589, %r5598, %r10289, %r10288; // inline asm mul.wide.u32 %rd1964, %r5589, 4; add.s64 %rd1965, %rd45, %rd1964; ld.shared.u32 %r7720, [%rd1965]; add.s32 %r7721, %r7720, %r7719; // inline asm bfe.u32 %r5593, %r5598, %r10288, %r10288; // inline asm mul.wide.u32 %rd1966, %r5593, 4; add.s64 %rd1967, %rd48, %rd1966; ld.shared.u32 %r7722, [%rd1967]; xor.b32 %r7723, %r7722, %r7721; // inline asm bfe.u32 %r5597, %r5598, %r10322, %r10288; // inline asm mul.wide.u32 %rd1968, %r5597, 4; add.s64 %rd1969, %rd51, %rd1968; ld.shared.u32 %r7724, [%rd1969]; add.s32 %r7725, %r7724, %r7723; xor.b32 %r7726, %r7591, %r5582; xor.b32 %r5614, %r7726, %r7725; // inline asm bfe.u32 %r5601, %r5614, %r10287, %r10288; // inline asm mul.wide.u32 %rd1970, %r5601, 4; add.s64 %rd1971, %rd42, %rd1970; ld.shared.u32 %r7727, [%rd1971]; // inline asm bfe.u32 %r5605, %r5614, %r10289, %r10288; // inline asm mul.wide.u32 %rd1972, %r5605, 4; add.s64 %rd1973, %rd45, %rd1972; ld.shared.u32 %r7728, [%rd1973]; add.s32 %r7729, %r7728, %r7727; // inline asm bfe.u32 %r5609, %r5614, %r10288, %r10288; // inline asm mul.wide.u32 %rd1974, %r5609, 4; add.s64 %rd1975, %rd48, %rd1974; ld.shared.u32 %r7730, [%rd1975]; xor.b32 %r7731, %r7730, %r7729; // inline asm bfe.u32 %r5613, %r5614, %r10322, %r10288; // inline asm mul.wide.u32 %rd1976, %r5613, 4; add.s64 %rd1977, %rd51, %rd1976; ld.shared.u32 %r7732, [%rd1977]; add.s32 %r7733, %r7732, %r7731; xor.b32 %r7734, %r7600, %r5598; xor.b32 %r5630, %r7734, %r7733; // inline asm bfe.u32 %r5617, %r5630, %r10287, %r10288; // inline asm mul.wide.u32 %rd1978, %r5617, 4; add.s64 %rd1979, %rd42, %rd1978; ld.shared.u32 %r7735, [%rd1979]; // inline asm bfe.u32 %r5621, %r5630, %r10289, %r10288; // inline asm mul.wide.u32 %rd1980, %r5621, 4; add.s64 %rd1981, %rd45, %rd1980; ld.shared.u32 %r7736, [%rd1981]; add.s32 %r7737, %r7736, %r7735; // inline asm bfe.u32 %r5625, %r5630, %r10288, %r10288; // inline asm mul.wide.u32 %rd1982, %r5625, 4; add.s64 %rd1983, %rd48, %rd1982; ld.shared.u32 %r7738, [%rd1983]; xor.b32 %r7739, %r7738, %r7737; // inline asm bfe.u32 %r5629, %r5630, %r10322, %r10288; // inline asm mul.wide.u32 %rd1984, %r5629, 4; add.s64 %rd1985, %rd51, %rd1984; ld.shared.u32 %r7740, [%rd1985]; add.s32 %r7741, %r7740, %r7739; xor.b32 %r7742, %r7609, %r5614; xor.b32 %r5646, %r7742, %r7741; // inline asm bfe.u32 %r5633, %r5646, %r10287, %r10288; // inline asm mul.wide.u32 %rd1986, %r5633, 4; add.s64 %rd1987, %rd42, %rd1986; ld.shared.u32 %r7743, [%rd1987]; // inline asm bfe.u32 %r5637, %r5646, %r10289, %r10288; // inline asm mul.wide.u32 %rd1988, %r5637, 4; add.s64 %rd1989, %rd45, %rd1988; ld.shared.u32 %r7744, [%rd1989]; add.s32 %r7745, %r7744, %r7743; // inline asm bfe.u32 %r5641, %r5646, %r10288, %r10288; // inline asm mul.wide.u32 %rd1990, %r5641, 4; add.s64 %rd1991, %rd48, %rd1990; ld.shared.u32 %r7746, [%rd1991]; xor.b32 %r7747, %r7746, %r7745; // inline asm bfe.u32 %r5645, %r5646, %r10322, %r10288; // inline asm mul.wide.u32 %rd1992, %r5645, 4; add.s64 %rd1993, %rd51, %rd1992; ld.shared.u32 %r7748, [%rd1993]; add.s32 %r7749, %r7748, %r7747; xor.b32 %r7750, %r7618, %r5630; xor.b32 %r5662, %r7750, %r7749; // inline asm bfe.u32 %r5649, %r5662, %r10287, %r10288; // inline asm mul.wide.u32 %rd1994, %r5649, 4; add.s64 %rd1995, %rd42, %rd1994; ld.shared.u32 %r7751, [%rd1995]; // inline asm bfe.u32 %r5653, %r5662, %r10289, %r10288; // inline asm mul.wide.u32 %rd1996, %r5653, 4; add.s64 %rd1997, %rd45, %rd1996; ld.shared.u32 %r7752, [%rd1997]; add.s32 %r7753, %r7752, %r7751; // inline asm bfe.u32 %r5657, %r5662, %r10288, %r10288; // inline asm mul.wide.u32 %rd1998, %r5657, 4; add.s64 %rd1999, %rd48, %rd1998; ld.shared.u32 %r7754, [%rd1999]; xor.b32 %r7755, %r7754, %r7753; // inline asm bfe.u32 %r5661, %r5662, %r10322, %r10288; // inline asm mul.wide.u32 %rd2000, %r5661, 4; add.s64 %rd2001, %rd51, %rd2000; ld.shared.u32 %r7756, [%rd2001]; add.s32 %r7757, %r7756, %r7755; xor.b32 %r7758, %r7627, %r5646; xor.b32 %r5678, %r7758, %r7757; // inline asm bfe.u32 %r5665, %r5678, %r10287, %r10288; // inline asm mul.wide.u32 %rd2002, %r5665, 4; add.s64 %rd2003, %rd42, %rd2002; ld.shared.u32 %r7759, [%rd2003]; // inline asm bfe.u32 %r5669, %r5678, %r10289, %r10288; // inline asm mul.wide.u32 %rd2004, %r5669, 4; add.s64 %rd2005, %rd45, %rd2004; ld.shared.u32 %r7760, [%rd2005]; add.s32 %r7761, %r7760, %r7759; // inline asm bfe.u32 %r5673, %r5678, %r10288, %r10288; // inline asm mul.wide.u32 %rd2006, %r5673, 4; add.s64 %rd2007, %rd48, %rd2006; ld.shared.u32 %r7762, [%rd2007]; xor.b32 %r7763, %r7762, %r7761; // inline asm bfe.u32 %r5677, %r5678, %r10322, %r10288; // inline asm mul.wide.u32 %rd2008, %r5677, 4; add.s64 %rd2009, %rd51, %rd2008; ld.shared.u32 %r7764, [%rd2009]; add.s32 %r7765, %r7764, %r7763; xor.b32 %r7766, %r7636, %r5662; xor.b32 %r5694, %r7766, %r7765; // inline asm bfe.u32 %r5681, %r5694, %r10287, %r10288; // inline asm mul.wide.u32 %rd2010, %r5681, 4; add.s64 %rd2011, %rd42, %rd2010; ld.shared.u32 %r7767, [%rd2011]; // inline asm bfe.u32 %r5685, %r5694, %r10289, %r10288; // inline asm mul.wide.u32 %rd2012, %r5685, 4; add.s64 %rd2013, %rd45, %rd2012; ld.shared.u32 %r7768, [%rd2013]; add.s32 %r7769, %r7768, %r7767; // inline asm bfe.u32 %r5689, %r5694, %r10288, %r10288; // inline asm mul.wide.u32 %rd2014, %r5689, 4; add.s64 %rd2015, %rd48, %rd2014; ld.shared.u32 %r7770, [%rd2015]; xor.b32 %r7771, %r7770, %r7769; // inline asm bfe.u32 %r5693, %r5694, %r10322, %r10288; // inline asm mul.wide.u32 %rd2016, %r5693, 4; add.s64 %rd2017, %rd51, %rd2016; ld.shared.u32 %r7772, [%rd2017]; add.s32 %r7773, %r7772, %r7771; xor.b32 %r7774, %r7645, %r5678; xor.b32 %r5710, %r7774, %r7773; // inline asm bfe.u32 %r5697, %r5710, %r10287, %r10288; // inline asm mul.wide.u32 %rd2018, %r5697, 4; add.s64 %rd2019, %rd42, %rd2018; ld.shared.u32 %r7775, [%rd2019]; // inline asm bfe.u32 %r5701, %r5710, %r10289, %r10288; // inline asm mul.wide.u32 %rd2020, %r5701, 4; add.s64 %rd2021, %rd45, %rd2020; ld.shared.u32 %r7776, [%rd2021]; add.s32 %r7777, %r7776, %r7775; // inline asm bfe.u32 %r5705, %r5710, %r10288, %r10288; // inline asm mul.wide.u32 %rd2022, %r5705, 4; add.s64 %rd2023, %rd48, %rd2022; ld.shared.u32 %r7778, [%rd2023]; xor.b32 %r7779, %r7778, %r7777; // inline asm bfe.u32 %r5709, %r5710, %r10322, %r10288; // inline asm mul.wide.u32 %rd2024, %r5709, 4; add.s64 %rd2025, %rd51, %rd2024; ld.shared.u32 %r7780, [%rd2025]; add.s32 %r7781, %r7780, %r7779; xor.b32 %r7782, %r7654, %r5694; xor.b32 %r5726, %r7782, %r7781; // inline asm bfe.u32 %r5713, %r5726, %r10287, %r10288; // inline asm mul.wide.u32 %rd2026, %r5713, 4; add.s64 %rd2027, %rd42, %rd2026; ld.shared.u32 %r7783, [%rd2027]; // inline asm bfe.u32 %r5717, %r5726, %r10289, %r10288; // inline asm mul.wide.u32 %rd2028, %r5717, 4; add.s64 %rd2029, %rd45, %rd2028; ld.shared.u32 %r7784, [%rd2029]; add.s32 %r7785, %r7784, %r7783; // inline asm bfe.u32 %r5721, %r5726, %r10288, %r10288; // inline asm mul.wide.u32 %rd2030, %r5721, 4; add.s64 %rd2031, %rd48, %rd2030; ld.shared.u32 %r7786, [%rd2031]; xor.b32 %r7787, %r7786, %r7785; // inline asm bfe.u32 %r5725, %r5726, %r10322, %r10288; // inline asm mul.wide.u32 %rd2032, %r5725, 4; add.s64 %rd2033, %rd51, %rd2032; ld.shared.u32 %r7788, [%rd2033]; add.s32 %r7789, %r7788, %r7787; xor.b32 %r7790, %r7663, %r5710; xor.b32 %r10348, %r7790, %r7789; xor.b32 %r10349, %r7665, %r5726; xor.b32 %r5742, %r5470, %r5726; // inline asm bfe.u32 %r5729, %r5742, %r10287, %r10288; // inline asm mul.wide.u32 %rd2034, %r5729, 4; add.s64 %rd2035, %rd42, %rd2034; ld.shared.u32 %r7791, [%rd2035]; // inline asm bfe.u32 %r5733, %r5742, %r10289, %r10288; // inline asm mul.wide.u32 %rd2036, %r5733, 4; add.s64 %rd2037, %rd45, %rd2036; ld.shared.u32 %r7792, [%rd2037]; add.s32 %r7793, %r7792, %r7791; // inline asm bfe.u32 %r5737, %r5742, %r10288, %r10288; // inline asm mul.wide.u32 %rd2038, %r5737, 4; add.s64 %rd2039, %rd48, %rd2038; ld.shared.u32 %r7794, [%rd2039]; xor.b32 %r7795, %r7794, %r7793; // inline asm bfe.u32 %r5741, %r5742, %r10322, %r10288; // inline asm mul.wide.u32 %rd2040, %r5741, 4; add.s64 %rd2041, %rd51, %rd2040; ld.shared.u32 %r7796, [%rd2041]; add.s32 %r7797, %r7796, %r7795; xor.b32 %r7798, %r10350, %r10348; xor.b32 %r5758, %r7798, %r7797; // inline asm bfe.u32 %r5745, %r5758, %r10287, %r10288; // inline asm mul.wide.u32 %rd2042, %r5745, 4; add.s64 %rd2043, %rd42, %rd2042; ld.shared.u32 %r7799, [%rd2043]; // inline asm bfe.u32 %r5749, %r5758, %r10289, %r10288; // inline asm mul.wide.u32 %rd2044, %r5749, 4; add.s64 %rd2045, %rd45, %rd2044; ld.shared.u32 %r7800, [%rd2045]; add.s32 %r7801, %r7800, %r7799; // inline asm bfe.u32 %r5753, %r5758, %r10288, %r10288; // inline asm mul.wide.u32 %rd2046, %r5753, 4; add.s64 %rd2047, %rd48, %rd2046; ld.shared.u32 %r7802, [%rd2047]; xor.b32 %r7803, %r7802, %r7801; // inline asm bfe.u32 %r5757, %r5758, %r10322, %r10288; // inline asm mul.wide.u32 %rd2048, %r5757, 4; add.s64 %rd2049, %rd51, %rd2048; ld.shared.u32 %r7804, [%rd2049]; add.s32 %r7805, %r7804, %r7803; xor.b32 %r5774, %r10351, %r7805; // inline asm bfe.u32 %r5761, %r5774, %r10287, %r10288; // inline asm mul.wide.u32 %rd2050, %r5761, 4; add.s64 %rd2051, %rd42, %rd2050; ld.shared.u32 %r7806, [%rd2051]; // inline asm bfe.u32 %r5765, %r5774, %r10289, %r10288; // inline asm mul.wide.u32 %rd2052, %r5765, 4; add.s64 %rd2053, %rd45, %rd2052; ld.shared.u32 %r7807, [%rd2053]; add.s32 %r7808, %r7807, %r7806; // inline asm bfe.u32 %r5769, %r5774, %r10288, %r10288; // inline asm mul.wide.u32 %rd2054, %r5769, 4; add.s64 %rd2055, %rd48, %rd2054; ld.shared.u32 %r7809, [%rd2055]; xor.b32 %r7810, %r7809, %r7808; // inline asm bfe.u32 %r5773, %r5774, %r10322, %r10288; // inline asm mul.wide.u32 %rd2056, %r5773, 4; add.s64 %rd2057, %rd51, %rd2056; ld.shared.u32 %r7811, [%rd2057]; add.s32 %r7812, %r7811, %r7810; xor.b32 %r7813, %r10350, %r7797; xor.b32 %r5790, %r7813, %r7812; // inline asm bfe.u32 %r5777, %r5790, %r10287, %r10288; // inline asm mul.wide.u32 %rd2058, %r5777, 4; add.s64 %rd2059, %rd42, %rd2058; ld.shared.u32 %r7814, [%rd2059]; // inline asm bfe.u32 %r5781, %r5790, %r10289, %r10288; // inline asm mul.wide.u32 %rd2060, %r5781, 4; add.s64 %rd2061, %rd45, %rd2060; ld.shared.u32 %r7815, [%rd2061]; add.s32 %r7816, %r7815, %r7814; // inline asm bfe.u32 %r5785, %r5790, %r10288, %r10288; // inline asm mul.wide.u32 %rd2062, %r5785, 4; add.s64 %rd2063, %rd48, %rd2062; ld.shared.u32 %r7817, [%rd2063]; xor.b32 %r7818, %r7817, %r7816; // inline asm bfe.u32 %r5789, %r5790, %r10322, %r10288; // inline asm mul.wide.u32 %rd2064, %r5789, 4; add.s64 %rd2065, %rd51, %rd2064; ld.shared.u32 %r7819, [%rd2065]; add.s32 %r7820, %r7819, %r7818; xor.b32 %r7821, %r7555, %r5774; xor.b32 %r5806, %r7821, %r7820; // inline asm bfe.u32 %r5793, %r5806, %r10287, %r10288; // inline asm mul.wide.u32 %rd2066, %r5793, 4; add.s64 %rd2067, %rd42, %rd2066; ld.shared.u32 %r7822, [%rd2067]; // inline asm bfe.u32 %r5797, %r5806, %r10289, %r10288; // inline asm mul.wide.u32 %rd2068, %r5797, 4; add.s64 %rd2069, %rd45, %rd2068; ld.shared.u32 %r7823, [%rd2069]; add.s32 %r7824, %r7823, %r7822; // inline asm bfe.u32 %r5801, %r5806, %r10288, %r10288; // inline asm mul.wide.u32 %rd2070, %r5801, 4; add.s64 %rd2071, %rd48, %rd2070; ld.shared.u32 %r7825, [%rd2071]; xor.b32 %r7826, %r7825, %r7824; // inline asm bfe.u32 %r5805, %r5806, %r10322, %r10288; // inline asm mul.wide.u32 %rd2072, %r5805, 4; add.s64 %rd2073, %rd51, %rd2072; ld.shared.u32 %r7827, [%rd2073]; add.s32 %r7828, %r7827, %r7826; xor.b32 %r7829, %r7564, %r5790; xor.b32 %r5822, %r7829, %r7828; // inline asm bfe.u32 %r5809, %r5822, %r10287, %r10288; // inline asm mul.wide.u32 %rd2074, %r5809, 4; add.s64 %rd2075, %rd42, %rd2074; ld.shared.u32 %r7830, [%rd2075]; // inline asm bfe.u32 %r5813, %r5822, %r10289, %r10288; // inline asm mul.wide.u32 %rd2076, %r5813, 4; add.s64 %rd2077, %rd45, %rd2076; ld.shared.u32 %r7831, [%rd2077]; add.s32 %r7832, %r7831, %r7830; // inline asm bfe.u32 %r5817, %r5822, %r10288, %r10288; // inline asm mul.wide.u32 %rd2078, %r5817, 4; add.s64 %rd2079, %rd48, %rd2078; ld.shared.u32 %r7833, [%rd2079]; xor.b32 %r7834, %r7833, %r7832; // inline asm bfe.u32 %r5821, %r5822, %r10322, %r10288; // inline asm mul.wide.u32 %rd2080, %r5821, 4; add.s64 %rd2081, %rd51, %rd2080; ld.shared.u32 %r7835, [%rd2081]; add.s32 %r7836, %r7835, %r7834; xor.b32 %r7837, %r7573, %r5806; xor.b32 %r5838, %r7837, %r7836; // inline asm bfe.u32 %r5825, %r5838, %r10287, %r10288; // inline asm mul.wide.u32 %rd2082, %r5825, 4; add.s64 %rd2083, %rd42, %rd2082; ld.shared.u32 %r7838, [%rd2083]; // inline asm bfe.u32 %r5829, %r5838, %r10289, %r10288; // inline asm mul.wide.u32 %rd2084, %r5829, 4; add.s64 %rd2085, %rd45, %rd2084; ld.shared.u32 %r7839, [%rd2085]; add.s32 %r7840, %r7839, %r7838; // inline asm bfe.u32 %r5833, %r5838, %r10288, %r10288; // inline asm mul.wide.u32 %rd2086, %r5833, 4; add.s64 %rd2087, %rd48, %rd2086; ld.shared.u32 %r7841, [%rd2087]; xor.b32 %r7842, %r7841, %r7840; // inline asm bfe.u32 %r5837, %r5838, %r10322, %r10288; // inline asm mul.wide.u32 %rd2088, %r5837, 4; add.s64 %rd2089, %rd51, %rd2088; ld.shared.u32 %r7843, [%rd2089]; add.s32 %r7844, %r7843, %r7842; xor.b32 %r7845, %r7582, %r5822; xor.b32 %r5854, %r7845, %r7844; // inline asm bfe.u32 %r5841, %r5854, %r10287, %r10288; // inline asm mul.wide.u32 %rd2090, %r5841, 4; add.s64 %rd2091, %rd42, %rd2090; ld.shared.u32 %r7846, [%rd2091]; // inline asm bfe.u32 %r5845, %r5854, %r10289, %r10288; // inline asm mul.wide.u32 %rd2092, %r5845, 4; add.s64 %rd2093, %rd45, %rd2092; ld.shared.u32 %r7847, [%rd2093]; add.s32 %r7848, %r7847, %r7846; // inline asm bfe.u32 %r5849, %r5854, %r10288, %r10288; // inline asm mul.wide.u32 %rd2094, %r5849, 4; add.s64 %rd2095, %rd48, %rd2094; ld.shared.u32 %r7849, [%rd2095]; xor.b32 %r7850, %r7849, %r7848; // inline asm bfe.u32 %r5853, %r5854, %r10322, %r10288; // inline asm mul.wide.u32 %rd2096, %r5853, 4; add.s64 %rd2097, %rd51, %rd2096; ld.shared.u32 %r7851, [%rd2097]; add.s32 %r7852, %r7851, %r7850; xor.b32 %r7853, %r7591, %r5838; xor.b32 %r5870, %r7853, %r7852; // inline asm bfe.u32 %r5857, %r5870, %r10287, %r10288; // inline asm mul.wide.u32 %rd2098, %r5857, 4; add.s64 %rd2099, %rd42, %rd2098; ld.shared.u32 %r7854, [%rd2099]; // inline asm bfe.u32 %r5861, %r5870, %r10289, %r10288; // inline asm mul.wide.u32 %rd2100, %r5861, 4; add.s64 %rd2101, %rd45, %rd2100; ld.shared.u32 %r7855, [%rd2101]; add.s32 %r7856, %r7855, %r7854; // inline asm bfe.u32 %r5865, %r5870, %r10288, %r10288; // inline asm mul.wide.u32 %rd2102, %r5865, 4; add.s64 %rd2103, %rd48, %rd2102; ld.shared.u32 %r7857, [%rd2103]; xor.b32 %r7858, %r7857, %r7856; // inline asm bfe.u32 %r5869, %r5870, %r10322, %r10288; // inline asm mul.wide.u32 %rd2104, %r5869, 4; add.s64 %rd2105, %rd51, %rd2104; ld.shared.u32 %r7859, [%rd2105]; add.s32 %r7860, %r7859, %r7858; xor.b32 %r7861, %r7600, %r5854; xor.b32 %r5886, %r7861, %r7860; // inline asm bfe.u32 %r5873, %r5886, %r10287, %r10288; // inline asm mul.wide.u32 %rd2106, %r5873, 4; add.s64 %rd2107, %rd42, %rd2106; ld.shared.u32 %r7862, [%rd2107]; // inline asm bfe.u32 %r5877, %r5886, %r10289, %r10288; // inline asm mul.wide.u32 %rd2108, %r5877, 4; add.s64 %rd2109, %rd45, %rd2108; ld.shared.u32 %r7863, [%rd2109]; add.s32 %r7864, %r7863, %r7862; // inline asm bfe.u32 %r5881, %r5886, %r10288, %r10288; // inline asm mul.wide.u32 %rd2110, %r5881, 4; add.s64 %rd2111, %rd48, %rd2110; ld.shared.u32 %r7865, [%rd2111]; xor.b32 %r7866, %r7865, %r7864; // inline asm bfe.u32 %r5885, %r5886, %r10322, %r10288; // inline asm mul.wide.u32 %rd2112, %r5885, 4; add.s64 %rd2113, %rd51, %rd2112; ld.shared.u32 %r7867, [%rd2113]; add.s32 %r7868, %r7867, %r7866; xor.b32 %r7869, %r7609, %r5870; xor.b32 %r5902, %r7869, %r7868; // inline asm bfe.u32 %r5889, %r5902, %r10287, %r10288; // inline asm mul.wide.u32 %rd2114, %r5889, 4; add.s64 %rd2115, %rd42, %rd2114; ld.shared.u32 %r7870, [%rd2115]; // inline asm bfe.u32 %r5893, %r5902, %r10289, %r10288; // inline asm mul.wide.u32 %rd2116, %r5893, 4; add.s64 %rd2117, %rd45, %rd2116; ld.shared.u32 %r7871, [%rd2117]; add.s32 %r7872, %r7871, %r7870; // inline asm bfe.u32 %r5897, %r5902, %r10288, %r10288; // inline asm mul.wide.u32 %rd2118, %r5897, 4; add.s64 %rd2119, %rd48, %rd2118; ld.shared.u32 %r7873, [%rd2119]; xor.b32 %r7874, %r7873, %r7872; // inline asm bfe.u32 %r5901, %r5902, %r10322, %r10288; // inline asm mul.wide.u32 %rd2120, %r5901, 4; add.s64 %rd2121, %rd51, %rd2120; ld.shared.u32 %r7875, [%rd2121]; add.s32 %r7876, %r7875, %r7874; xor.b32 %r7877, %r7618, %r5886; xor.b32 %r5918, %r7877, %r7876; // inline asm bfe.u32 %r5905, %r5918, %r10287, %r10288; // inline asm mul.wide.u32 %rd2122, %r5905, 4; add.s64 %rd2123, %rd42, %rd2122; ld.shared.u32 %r7878, [%rd2123]; // inline asm bfe.u32 %r5909, %r5918, %r10289, %r10288; // inline asm mul.wide.u32 %rd2124, %r5909, 4; add.s64 %rd2125, %rd45, %rd2124; ld.shared.u32 %r7879, [%rd2125]; add.s32 %r7880, %r7879, %r7878; // inline asm bfe.u32 %r5913, %r5918, %r10288, %r10288; // inline asm mul.wide.u32 %rd2126, %r5913, 4; add.s64 %rd2127, %rd48, %rd2126; ld.shared.u32 %r7881, [%rd2127]; xor.b32 %r7882, %r7881, %r7880; // inline asm bfe.u32 %r5917, %r5918, %r10322, %r10288; // inline asm mul.wide.u32 %rd2128, %r5917, 4; add.s64 %rd2129, %rd51, %rd2128; ld.shared.u32 %r7883, [%rd2129]; add.s32 %r7884, %r7883, %r7882; xor.b32 %r7885, %r7627, %r5902; xor.b32 %r5934, %r7885, %r7884; // inline asm bfe.u32 %r5921, %r5934, %r10287, %r10288; // inline asm mul.wide.u32 %rd2130, %r5921, 4; add.s64 %rd2131, %rd42, %rd2130; ld.shared.u32 %r7886, [%rd2131]; // inline asm bfe.u32 %r5925, %r5934, %r10289, %r10288; // inline asm mul.wide.u32 %rd2132, %r5925, 4; add.s64 %rd2133, %rd45, %rd2132; ld.shared.u32 %r7887, [%rd2133]; add.s32 %r7888, %r7887, %r7886; // inline asm bfe.u32 %r5929, %r5934, %r10288, %r10288; // inline asm mul.wide.u32 %rd2134, %r5929, 4; add.s64 %rd2135, %rd48, %rd2134; ld.shared.u32 %r7889, [%rd2135]; xor.b32 %r7890, %r7889, %r7888; // inline asm bfe.u32 %r5933, %r5934, %r10322, %r10288; // inline asm mul.wide.u32 %rd2136, %r5933, 4; add.s64 %rd2137, %rd51, %rd2136; ld.shared.u32 %r7891, [%rd2137]; add.s32 %r7892, %r7891, %r7890; xor.b32 %r7893, %r7636, %r5918; xor.b32 %r5950, %r7893, %r7892; // inline asm bfe.u32 %r5937, %r5950, %r10287, %r10288; // inline asm mul.wide.u32 %rd2138, %r5937, 4; add.s64 %rd2139, %rd42, %rd2138; ld.shared.u32 %r7894, [%rd2139]; // inline asm bfe.u32 %r5941, %r5950, %r10289, %r10288; // inline asm mul.wide.u32 %rd2140, %r5941, 4; add.s64 %rd2141, %rd45, %rd2140; ld.shared.u32 %r7895, [%rd2141]; add.s32 %r7896, %r7895, %r7894; // inline asm bfe.u32 %r5945, %r5950, %r10288, %r10288; // inline asm mul.wide.u32 %rd2142, %r5945, 4; add.s64 %rd2143, %rd48, %rd2142; ld.shared.u32 %r7897, [%rd2143]; xor.b32 %r7898, %r7897, %r7896; // inline asm bfe.u32 %r5949, %r5950, %r10322, %r10288; // inline asm mul.wide.u32 %rd2144, %r5949, 4; add.s64 %rd2145, %rd51, %rd2144; ld.shared.u32 %r7899, [%rd2145]; add.s32 %r7900, %r7899, %r7898; xor.b32 %r7901, %r7645, %r5934; xor.b32 %r5966, %r7901, %r7900; // inline asm bfe.u32 %r5953, %r5966, %r10287, %r10288; // inline asm mul.wide.u32 %rd2146, %r5953, 4; add.s64 %rd2147, %rd42, %rd2146; ld.shared.u32 %r7902, [%rd2147]; // inline asm bfe.u32 %r5957, %r5966, %r10289, %r10288; // inline asm mul.wide.u32 %rd2148, %r5957, 4; add.s64 %rd2149, %rd45, %rd2148; ld.shared.u32 %r7903, [%rd2149]; add.s32 %r7904, %r7903, %r7902; // inline asm bfe.u32 %r5961, %r5966, %r10288, %r10288; // inline asm mul.wide.u32 %rd2150, %r5961, 4; add.s64 %rd2151, %rd48, %rd2150; ld.shared.u32 %r7905, [%rd2151]; xor.b32 %r7906, %r7905, %r7904; // inline asm bfe.u32 %r5965, %r5966, %r10322, %r10288; // inline asm mul.wide.u32 %rd2152, %r5965, 4; add.s64 %rd2153, %rd51, %rd2152; ld.shared.u32 %r7907, [%rd2153]; add.s32 %r7908, %r7907, %r7906; xor.b32 %r7909, %r7654, %r5950; xor.b32 %r5982, %r7909, %r7908; // inline asm bfe.u32 %r5969, %r5982, %r10287, %r10288; // inline asm mul.wide.u32 %rd2154, %r5969, 4; add.s64 %rd2155, %rd42, %rd2154; ld.shared.u32 %r7910, [%rd2155]; // inline asm bfe.u32 %r5973, %r5982, %r10289, %r10288; // inline asm mul.wide.u32 %rd2156, %r5973, 4; add.s64 %rd2157, %rd45, %rd2156; ld.shared.u32 %r7911, [%rd2157]; add.s32 %r7912, %r7911, %r7910; // inline asm bfe.u32 %r5977, %r5982, %r10288, %r10288; // inline asm mul.wide.u32 %rd2158, %r5977, 4; add.s64 %rd2159, %rd48, %rd2158; ld.shared.u32 %r7913, [%rd2159]; xor.b32 %r7914, %r7913, %r7912; // inline asm bfe.u32 %r5981, %r5982, %r10322, %r10288; // inline asm mul.wide.u32 %rd2160, %r5981, 4; add.s64 %rd2161, %rd51, %rd2160; ld.shared.u32 %r7915, [%rd2161]; add.s32 %r7916, %r7915, %r7914; xor.b32 %r7917, %r7663, %r5966; xor.b32 %r10346, %r7917, %r7916; xor.b32 %r10347, %r7665, %r5982; xor.b32 %r5998, %r5470, %r5982; // inline asm bfe.u32 %r5985, %r5998, %r10287, %r10288; // inline asm mul.wide.u32 %rd2162, %r5985, 4; add.s64 %rd2163, %rd42, %rd2162; ld.shared.u32 %r7918, [%rd2163]; // inline asm bfe.u32 %r5989, %r5998, %r10289, %r10288; // inline asm mul.wide.u32 %rd2164, %r5989, 4; add.s64 %rd2165, %rd45, %rd2164; ld.shared.u32 %r7919, [%rd2165]; add.s32 %r7920, %r7919, %r7918; // inline asm bfe.u32 %r5993, %r5998, %r10288, %r10288; // inline asm mul.wide.u32 %rd2166, %r5993, 4; add.s64 %rd2167, %rd48, %rd2166; ld.shared.u32 %r7921, [%rd2167]; xor.b32 %r7922, %r7921, %r7920; // inline asm bfe.u32 %r5997, %r5998, %r10322, %r10288; // inline asm mul.wide.u32 %rd2168, %r5997, 4; add.s64 %rd2169, %rd51, %rd2168; ld.shared.u32 %r7923, [%rd2169]; add.s32 %r7924, %r7923, %r7922; xor.b32 %r7925, %r10350, %r10346; xor.b32 %r6014, %r7925, %r7924; // inline asm bfe.u32 %r6001, %r6014, %r10287, %r10288; // inline asm mul.wide.u32 %rd2170, %r6001, 4; add.s64 %rd2171, %rd42, %rd2170; ld.shared.u32 %r7926, [%rd2171]; // inline asm bfe.u32 %r6005, %r6014, %r10289, %r10288; // inline asm mul.wide.u32 %rd2172, %r6005, 4; add.s64 %rd2173, %rd45, %rd2172; ld.shared.u32 %r7927, [%rd2173]; add.s32 %r7928, %r7927, %r7926; // inline asm bfe.u32 %r6009, %r6014, %r10288, %r10288; // inline asm mul.wide.u32 %rd2174, %r6009, 4; add.s64 %rd2175, %rd48, %rd2174; ld.shared.u32 %r7929, [%rd2175]; xor.b32 %r7930, %r7929, %r7928; // inline asm bfe.u32 %r6013, %r6014, %r10322, %r10288; // inline asm mul.wide.u32 %rd2176, %r6013, 4; add.s64 %rd2177, %rd51, %rd2176; ld.shared.u32 %r7931, [%rd2177]; add.s32 %r7932, %r7931, %r7930; xor.b32 %r7933, %r10349, %r5998; xor.b32 %r6030, %r7933, %r7932; // inline asm bfe.u32 %r6017, %r6030, %r10287, %r10288; // inline asm mul.wide.u32 %rd2178, %r6017, 4; add.s64 %rd2179, %rd42, %rd2178; ld.shared.u32 %r7934, [%rd2179]; // inline asm bfe.u32 %r6021, %r6030, %r10289, %r10288; // inline asm mul.wide.u32 %rd2180, %r6021, 4; add.s64 %rd2181, %rd45, %rd2180; ld.shared.u32 %r7935, [%rd2181]; add.s32 %r7936, %r7935, %r7934; // inline asm bfe.u32 %r6025, %r6030, %r10288, %r10288; // inline asm mul.wide.u32 %rd2182, %r6025, 4; add.s64 %rd2183, %rd48, %rd2182; ld.shared.u32 %r7937, [%rd2183]; xor.b32 %r7938, %r7937, %r7936; // inline asm bfe.u32 %r6029, %r6030, %r10322, %r10288; // inline asm mul.wide.u32 %rd2184, %r6029, 4; add.s64 %rd2185, %rd51, %rd2184; ld.shared.u32 %r7939, [%rd2185]; add.s32 %r7940, %r7939, %r7938; xor.b32 %r7941, %r10348, %r6014; xor.b32 %r6046, %r7941, %r7940; // inline asm bfe.u32 %r6033, %r6046, %r10287, %r10288; // inline asm mul.wide.u32 %rd2186, %r6033, 4; add.s64 %rd2187, %rd42, %rd2186; ld.shared.u32 %r7942, [%rd2187]; // inline asm bfe.u32 %r6037, %r6046, %r10289, %r10288; // inline asm mul.wide.u32 %rd2188, %r6037, 4; add.s64 %rd2189, %rd45, %rd2188; ld.shared.u32 %r7943, [%rd2189]; add.s32 %r7944, %r7943, %r7942; // inline asm bfe.u32 %r6041, %r6046, %r10288, %r10288; // inline asm mul.wide.u32 %rd2190, %r6041, 4; add.s64 %rd2191, %rd48, %rd2190; ld.shared.u32 %r7945, [%rd2191]; xor.b32 %r7946, %r7945, %r7944; // inline asm bfe.u32 %r6045, %r6046, %r10322, %r10288; // inline asm mul.wide.u32 %rd2192, %r6045, 4; add.s64 %rd2193, %rd51, %rd2192; ld.shared.u32 %r7947, [%rd2193]; add.s32 %r7948, %r7947, %r7946; xor.b32 %r7949, %r10347, %r6030; xor.b32 %r6062, %r7949, %r7948; // inline asm bfe.u32 %r6049, %r6062, %r10287, %r10288; // inline asm mul.wide.u32 %rd2194, %r6049, 4; add.s64 %rd2195, %rd42, %rd2194; ld.shared.u32 %r7950, [%rd2195]; // inline asm bfe.u32 %r6053, %r6062, %r10289, %r10288; // inline asm mul.wide.u32 %rd2196, %r6053, 4; add.s64 %rd2197, %rd45, %rd2196; ld.shared.u32 %r7951, [%rd2197]; add.s32 %r7952, %r7951, %r7950; // inline asm bfe.u32 %r6057, %r6062, %r10288, %r10288; // inline asm mul.wide.u32 %rd2198, %r6057, 4; add.s64 %rd2199, %rd48, %rd2198; ld.shared.u32 %r7953, [%rd2199]; xor.b32 %r7954, %r7953, %r7952; // inline asm bfe.u32 %r6061, %r6062, %r10322, %r10288; // inline asm mul.wide.u32 %rd2200, %r6061, 4; add.s64 %rd2201, %rd51, %rd2200; ld.shared.u32 %r7955, [%rd2201]; add.s32 %r7956, %r7955, %r7954; xor.b32 %r7957, %r10346, %r6046; xor.b32 %r6078, %r7957, %r7956; // inline asm bfe.u32 %r6065, %r6078, %r10287, %r10288; // inline asm mul.wide.u32 %rd2202, %r6065, 4; add.s64 %rd2203, %rd42, %rd2202; ld.shared.u32 %r7958, [%rd2203]; // inline asm bfe.u32 %r6069, %r6078, %r10289, %r10288; // inline asm mul.wide.u32 %rd2204, %r6069, 4; add.s64 %rd2205, %rd45, %rd2204; ld.shared.u32 %r7959, [%rd2205]; add.s32 %r7960, %r7959, %r7958; // inline asm bfe.u32 %r6073, %r6078, %r10288, %r10288; // inline asm mul.wide.u32 %rd2206, %r6073, 4; add.s64 %rd2207, %rd48, %rd2206; ld.shared.u32 %r7961, [%rd2207]; xor.b32 %r7962, %r7961, %r7960; // inline asm bfe.u32 %r6077, %r6078, %r10322, %r10288; // inline asm mul.wide.u32 %rd2208, %r6077, 4; add.s64 %rd2209, %rd51, %rd2208; ld.shared.u32 %r7963, [%rd2209]; add.s32 %r7964, %r7963, %r7962; xor.b32 %r7965, %r7573, %r6062; xor.b32 %r6094, %r7965, %r7964; // inline asm bfe.u32 %r6081, %r6094, %r10287, %r10288; // inline asm mul.wide.u32 %rd2210, %r6081, 4; add.s64 %rd2211, %rd42, %rd2210; ld.shared.u32 %r7966, [%rd2211]; // inline asm bfe.u32 %r6085, %r6094, %r10289, %r10288; // inline asm mul.wide.u32 %rd2212, %r6085, 4; add.s64 %rd2213, %rd45, %rd2212; ld.shared.u32 %r7967, [%rd2213]; add.s32 %r7968, %r7967, %r7966; // inline asm bfe.u32 %r6089, %r6094, %r10288, %r10288; // inline asm mul.wide.u32 %rd2214, %r6089, 4; add.s64 %rd2215, %rd48, %rd2214; ld.shared.u32 %r7969, [%rd2215]; xor.b32 %r7970, %r7969, %r7968; // inline asm bfe.u32 %r6093, %r6094, %r10322, %r10288; // inline asm mul.wide.u32 %rd2216, %r6093, 4; add.s64 %rd2217, %rd51, %rd2216; ld.shared.u32 %r7971, [%rd2217]; add.s32 %r7972, %r7971, %r7970; xor.b32 %r7973, %r7582, %r6078; xor.b32 %r6110, %r7973, %r7972; // inline asm bfe.u32 %r6097, %r6110, %r10287, %r10288; // inline asm mul.wide.u32 %rd2218, %r6097, 4; add.s64 %rd2219, %rd42, %rd2218; ld.shared.u32 %r7974, [%rd2219]; // inline asm bfe.u32 %r6101, %r6110, %r10289, %r10288; // inline asm mul.wide.u32 %rd2220, %r6101, 4; add.s64 %rd2221, %rd45, %rd2220; ld.shared.u32 %r7975, [%rd2221]; add.s32 %r7976, %r7975, %r7974; // inline asm bfe.u32 %r6105, %r6110, %r10288, %r10288; // inline asm mul.wide.u32 %rd2222, %r6105, 4; add.s64 %rd2223, %rd48, %rd2222; ld.shared.u32 %r7977, [%rd2223]; xor.b32 %r7978, %r7977, %r7976; // inline asm bfe.u32 %r6109, %r6110, %r10322, %r10288; // inline asm mul.wide.u32 %rd2224, %r6109, 4; add.s64 %rd2225, %rd51, %rd2224; ld.shared.u32 %r7979, [%rd2225]; add.s32 %r7980, %r7979, %r7978; xor.b32 %r7981, %r7591, %r6094; xor.b32 %r6126, %r7981, %r7980; // inline asm bfe.u32 %r6113, %r6126, %r10287, %r10288; // inline asm mul.wide.u32 %rd2226, %r6113, 4; add.s64 %rd2227, %rd42, %rd2226; ld.shared.u32 %r7982, [%rd2227]; // inline asm bfe.u32 %r6117, %r6126, %r10289, %r10288; // inline asm mul.wide.u32 %rd2228, %r6117, 4; add.s64 %rd2229, %rd45, %rd2228; ld.shared.u32 %r7983, [%rd2229]; add.s32 %r7984, %r7983, %r7982; // inline asm bfe.u32 %r6121, %r6126, %r10288, %r10288; // inline asm mul.wide.u32 %rd2230, %r6121, 4; add.s64 %rd2231, %rd48, %rd2230; ld.shared.u32 %r7985, [%rd2231]; xor.b32 %r7986, %r7985, %r7984; // inline asm bfe.u32 %r6125, %r6126, %r10322, %r10288; // inline asm mul.wide.u32 %rd2232, %r6125, 4; add.s64 %rd2233, %rd51, %rd2232; ld.shared.u32 %r7987, [%rd2233]; add.s32 %r7988, %r7987, %r7986; xor.b32 %r7989, %r7600, %r6110; xor.b32 %r6142, %r7989, %r7988; // inline asm bfe.u32 %r6129, %r6142, %r10287, %r10288; // inline asm mul.wide.u32 %rd2234, %r6129, 4; add.s64 %rd2235, %rd42, %rd2234; ld.shared.u32 %r7990, [%rd2235]; // inline asm bfe.u32 %r6133, %r6142, %r10289, %r10288; // inline asm mul.wide.u32 %rd2236, %r6133, 4; add.s64 %rd2237, %rd45, %rd2236; ld.shared.u32 %r7991, [%rd2237]; add.s32 %r7992, %r7991, %r7990; // inline asm bfe.u32 %r6137, %r6142, %r10288, %r10288; // inline asm mul.wide.u32 %rd2238, %r6137, 4; add.s64 %rd2239, %rd48, %rd2238; ld.shared.u32 %r7993, [%rd2239]; xor.b32 %r7994, %r7993, %r7992; // inline asm bfe.u32 %r6141, %r6142, %r10322, %r10288; // inline asm mul.wide.u32 %rd2240, %r6141, 4; add.s64 %rd2241, %rd51, %rd2240; ld.shared.u32 %r7995, [%rd2241]; add.s32 %r7996, %r7995, %r7994; xor.b32 %r7997, %r7609, %r6126; xor.b32 %r6158, %r7997, %r7996; // inline asm bfe.u32 %r6145, %r6158, %r10287, %r10288; // inline asm mul.wide.u32 %rd2242, %r6145, 4; add.s64 %rd2243, %rd42, %rd2242; ld.shared.u32 %r7998, [%rd2243]; // inline asm bfe.u32 %r6149, %r6158, %r10289, %r10288; // inline asm mul.wide.u32 %rd2244, %r6149, 4; add.s64 %rd2245, %rd45, %rd2244; ld.shared.u32 %r7999, [%rd2245]; add.s32 %r8000, %r7999, %r7998; // inline asm bfe.u32 %r6153, %r6158, %r10288, %r10288; // inline asm mul.wide.u32 %rd2246, %r6153, 4; add.s64 %rd2247, %rd48, %rd2246; ld.shared.u32 %r8001, [%rd2247]; xor.b32 %r8002, %r8001, %r8000; // inline asm bfe.u32 %r6157, %r6158, %r10322, %r10288; // inline asm mul.wide.u32 %rd2248, %r6157, 4; add.s64 %rd2249, %rd51, %rd2248; ld.shared.u32 %r8003, [%rd2249]; add.s32 %r8004, %r8003, %r8002; xor.b32 %r8005, %r7618, %r6142; xor.b32 %r6174, %r8005, %r8004; // inline asm bfe.u32 %r6161, %r6174, %r10287, %r10288; // inline asm mul.wide.u32 %rd2250, %r6161, 4; add.s64 %rd2251, %rd42, %rd2250; ld.shared.u32 %r8006, [%rd2251]; // inline asm bfe.u32 %r6165, %r6174, %r10289, %r10288; // inline asm mul.wide.u32 %rd2252, %r6165, 4; add.s64 %rd2253, %rd45, %rd2252; ld.shared.u32 %r8007, [%rd2253]; add.s32 %r8008, %r8007, %r8006; // inline asm bfe.u32 %r6169, %r6174, %r10288, %r10288; // inline asm mul.wide.u32 %rd2254, %r6169, 4; add.s64 %rd2255, %rd48, %rd2254; ld.shared.u32 %r8009, [%rd2255]; xor.b32 %r8010, %r8009, %r8008; // inline asm bfe.u32 %r6173, %r6174, %r10322, %r10288; // inline asm mul.wide.u32 %rd2256, %r6173, 4; add.s64 %rd2257, %rd51, %rd2256; ld.shared.u32 %r8011, [%rd2257]; add.s32 %r8012, %r8011, %r8010; xor.b32 %r8013, %r7627, %r6158; xor.b32 %r6190, %r8013, %r8012; // inline asm bfe.u32 %r6177, %r6190, %r10287, %r10288; // inline asm mul.wide.u32 %rd2258, %r6177, 4; add.s64 %rd2259, %rd42, %rd2258; ld.shared.u32 %r8014, [%rd2259]; // inline asm bfe.u32 %r6181, %r6190, %r10289, %r10288; // inline asm mul.wide.u32 %rd2260, %r6181, 4; add.s64 %rd2261, %rd45, %rd2260; ld.shared.u32 %r8015, [%rd2261]; add.s32 %r8016, %r8015, %r8014; // inline asm bfe.u32 %r6185, %r6190, %r10288, %r10288; // inline asm mul.wide.u32 %rd2262, %r6185, 4; add.s64 %rd2263, %rd48, %rd2262; ld.shared.u32 %r8017, [%rd2263]; xor.b32 %r8018, %r8017, %r8016; // inline asm bfe.u32 %r6189, %r6190, %r10322, %r10288; // inline asm mul.wide.u32 %rd2264, %r6189, 4; add.s64 %rd2265, %rd51, %rd2264; ld.shared.u32 %r8019, [%rd2265]; add.s32 %r8020, %r8019, %r8018; xor.b32 %r8021, %r7636, %r6174; xor.b32 %r6206, %r8021, %r8020; // inline asm bfe.u32 %r6193, %r6206, %r10287, %r10288; // inline asm mul.wide.u32 %rd2266, %r6193, 4; add.s64 %rd2267, %rd42, %rd2266; ld.shared.u32 %r8022, [%rd2267]; // inline asm bfe.u32 %r6197, %r6206, %r10289, %r10288; // inline asm mul.wide.u32 %rd2268, %r6197, 4; add.s64 %rd2269, %rd45, %rd2268; ld.shared.u32 %r8023, [%rd2269]; add.s32 %r8024, %r8023, %r8022; // inline asm bfe.u32 %r6201, %r6206, %r10288, %r10288; // inline asm mul.wide.u32 %rd2270, %r6201, 4; add.s64 %rd2271, %rd48, %rd2270; ld.shared.u32 %r8025, [%rd2271]; xor.b32 %r8026, %r8025, %r8024; // inline asm bfe.u32 %r6205, %r6206, %r10322, %r10288; // inline asm mul.wide.u32 %rd2272, %r6205, 4; add.s64 %rd2273, %rd51, %rd2272; ld.shared.u32 %r8027, [%rd2273]; add.s32 %r8028, %r8027, %r8026; xor.b32 %r8029, %r7645, %r6190; xor.b32 %r6222, %r8029, %r8028; // inline asm bfe.u32 %r6209, %r6222, %r10287, %r10288; // inline asm mul.wide.u32 %rd2274, %r6209, 4; add.s64 %rd2275, %rd42, %rd2274; ld.shared.u32 %r8030, [%rd2275]; // inline asm bfe.u32 %r6213, %r6222, %r10289, %r10288; // inline asm mul.wide.u32 %rd2276, %r6213, 4; add.s64 %rd2277, %rd45, %rd2276; ld.shared.u32 %r8031, [%rd2277]; add.s32 %r8032, %r8031, %r8030; // inline asm bfe.u32 %r6217, %r6222, %r10288, %r10288; // inline asm mul.wide.u32 %rd2278, %r6217, 4; add.s64 %rd2279, %rd48, %rd2278; ld.shared.u32 %r8033, [%rd2279]; xor.b32 %r8034, %r8033, %r8032; // inline asm bfe.u32 %r6221, %r6222, %r10322, %r10288; // inline asm mul.wide.u32 %rd2280, %r6221, 4; add.s64 %rd2281, %rd51, %rd2280; ld.shared.u32 %r8035, [%rd2281]; add.s32 %r8036, %r8035, %r8034; xor.b32 %r8037, %r7654, %r6206; xor.b32 %r6238, %r8037, %r8036; // inline asm bfe.u32 %r6225, %r6238, %r10287, %r10288; // inline asm mul.wide.u32 %rd2282, %r6225, 4; add.s64 %rd2283, %rd42, %rd2282; ld.shared.u32 %r8038, [%rd2283]; // inline asm bfe.u32 %r6229, %r6238, %r10289, %r10288; // inline asm mul.wide.u32 %rd2284, %r6229, 4; add.s64 %rd2285, %rd45, %rd2284; ld.shared.u32 %r8039, [%rd2285]; add.s32 %r8040, %r8039, %r8038; // inline asm bfe.u32 %r6233, %r6238, %r10288, %r10288; // inline asm mul.wide.u32 %rd2286, %r6233, 4; add.s64 %rd2287, %rd48, %rd2286; ld.shared.u32 %r8041, [%rd2287]; xor.b32 %r8042, %r8041, %r8040; // inline asm bfe.u32 %r6237, %r6238, %r10322, %r10288; // inline asm mul.wide.u32 %rd2288, %r6237, 4; add.s64 %rd2289, %rd51, %rd2288; ld.shared.u32 %r8043, [%rd2289]; add.s32 %r8044, %r8043, %r8042; xor.b32 %r8045, %r7663, %r6222; xor.b32 %r10344, %r8045, %r8044; xor.b32 %r10345, %r7665, %r6238; xor.b32 %r6254, %r5470, %r6238; // inline asm bfe.u32 %r6241, %r6254, %r10287, %r10288; // inline asm mul.wide.u32 %rd2290, %r6241, 4; add.s64 %rd2291, %rd42, %rd2290; ld.shared.u32 %r8046, [%rd2291]; // inline asm bfe.u32 %r6245, %r6254, %r10289, %r10288; // inline asm mul.wide.u32 %rd2292, %r6245, 4; add.s64 %rd2293, %rd45, %rd2292; ld.shared.u32 %r8047, [%rd2293]; add.s32 %r8048, %r8047, %r8046; // inline asm bfe.u32 %r6249, %r6254, %r10288, %r10288; // inline asm mul.wide.u32 %rd2294, %r6249, 4; add.s64 %rd2295, %rd48, %rd2294; ld.shared.u32 %r8049, [%rd2295]; xor.b32 %r8050, %r8049, %r8048; // inline asm bfe.u32 %r6253, %r6254, %r10322, %r10288; // inline asm mul.wide.u32 %rd2296, %r6253, 4; add.s64 %rd2297, %rd51, %rd2296; ld.shared.u32 %r8051, [%rd2297]; add.s32 %r8052, %r8051, %r8050; xor.b32 %r8053, %r10350, %r10344; xor.b32 %r6270, %r8053, %r8052; // inline asm bfe.u32 %r6257, %r6270, %r10287, %r10288; // inline asm mul.wide.u32 %rd2298, %r6257, 4; add.s64 %rd2299, %rd42, %rd2298; ld.shared.u32 %r8054, [%rd2299]; // inline asm bfe.u32 %r6261, %r6270, %r10289, %r10288; // inline asm mul.wide.u32 %rd2300, %r6261, 4; add.s64 %rd2301, %rd45, %rd2300; ld.shared.u32 %r8055, [%rd2301]; add.s32 %r8056, %r8055, %r8054; // inline asm bfe.u32 %r6265, %r6270, %r10288, %r10288; // inline asm mul.wide.u32 %rd2302, %r6265, 4; add.s64 %rd2303, %rd48, %rd2302; ld.shared.u32 %r8057, [%rd2303]; xor.b32 %r8058, %r8057, %r8056; // inline asm bfe.u32 %r6269, %r6270, %r10322, %r10288; // inline asm mul.wide.u32 %rd2304, %r6269, 4; add.s64 %rd2305, %rd51, %rd2304; ld.shared.u32 %r8059, [%rd2305]; add.s32 %r8060, %r8059, %r8058; xor.b32 %r8061, %r10349, %r6254; xor.b32 %r6286, %r8061, %r8060; // inline asm bfe.u32 %r6273, %r6286, %r10287, %r10288; // inline asm mul.wide.u32 %rd2306, %r6273, 4; add.s64 %rd2307, %rd42, %rd2306; ld.shared.u32 %r8062, [%rd2307]; // inline asm bfe.u32 %r6277, %r6286, %r10289, %r10288; // inline asm mul.wide.u32 %rd2308, %r6277, 4; add.s64 %rd2309, %rd45, %rd2308; ld.shared.u32 %r8063, [%rd2309]; add.s32 %r8064, %r8063, %r8062; // inline asm bfe.u32 %r6281, %r6286, %r10288, %r10288; // inline asm mul.wide.u32 %rd2310, %r6281, 4; add.s64 %rd2311, %rd48, %rd2310; ld.shared.u32 %r8065, [%rd2311]; xor.b32 %r8066, %r8065, %r8064; // inline asm bfe.u32 %r6285, %r6286, %r10322, %r10288; // inline asm mul.wide.u32 %rd2312, %r6285, 4; add.s64 %rd2313, %rd51, %rd2312; ld.shared.u32 %r8067, [%rd2313]; add.s32 %r8068, %r8067, %r8066; xor.b32 %r8069, %r10348, %r6270; xor.b32 %r6302, %r8069, %r8068; // inline asm bfe.u32 %r6289, %r6302, %r10287, %r10288; // inline asm mul.wide.u32 %rd2314, %r6289, 4; add.s64 %rd2315, %rd42, %rd2314; ld.shared.u32 %r8070, [%rd2315]; // inline asm bfe.u32 %r6293, %r6302, %r10289, %r10288; // inline asm mul.wide.u32 %rd2316, %r6293, 4; add.s64 %rd2317, %rd45, %rd2316; ld.shared.u32 %r8071, [%rd2317]; add.s32 %r8072, %r8071, %r8070; // inline asm bfe.u32 %r6297, %r6302, %r10288, %r10288; // inline asm mul.wide.u32 %rd2318, %r6297, 4; add.s64 %rd2319, %rd48, %rd2318; ld.shared.u32 %r8073, [%rd2319]; xor.b32 %r8074, %r8073, %r8072; // inline asm bfe.u32 %r6301, %r6302, %r10322, %r10288; // inline asm mul.wide.u32 %rd2320, %r6301, 4; add.s64 %rd2321, %rd51, %rd2320; ld.shared.u32 %r8075, [%rd2321]; add.s32 %r8076, %r8075, %r8074; xor.b32 %r8077, %r10347, %r6286; xor.b32 %r6318, %r8077, %r8076; // inline asm bfe.u32 %r6305, %r6318, %r10287, %r10288; // inline asm mul.wide.u32 %rd2322, %r6305, 4; add.s64 %rd2323, %rd42, %rd2322; ld.shared.u32 %r8078, [%rd2323]; // inline asm bfe.u32 %r6309, %r6318, %r10289, %r10288; // inline asm mul.wide.u32 %rd2324, %r6309, 4; add.s64 %rd2325, %rd45, %rd2324; ld.shared.u32 %r8079, [%rd2325]; add.s32 %r8080, %r8079, %r8078; // inline asm bfe.u32 %r6313, %r6318, %r10288, %r10288; // inline asm mul.wide.u32 %rd2326, %r6313, 4; add.s64 %rd2327, %rd48, %rd2326; ld.shared.u32 %r8081, [%rd2327]; xor.b32 %r8082, %r8081, %r8080; // inline asm bfe.u32 %r6317, %r6318, %r10322, %r10288; // inline asm mul.wide.u32 %rd2328, %r6317, 4; add.s64 %rd2329, %rd51, %rd2328; ld.shared.u32 %r8083, [%rd2329]; add.s32 %r8084, %r8083, %r8082; xor.b32 %r8085, %r10346, %r6302; xor.b32 %r6334, %r8085, %r8084; // inline asm bfe.u32 %r6321, %r6334, %r10287, %r10288; // inline asm mul.wide.u32 %rd2330, %r6321, 4; add.s64 %rd2331, %rd42, %rd2330; ld.shared.u32 %r8086, [%rd2331]; // inline asm bfe.u32 %r6325, %r6334, %r10289, %r10288; // inline asm mul.wide.u32 %rd2332, %r6325, 4; add.s64 %rd2333, %rd45, %rd2332; ld.shared.u32 %r8087, [%rd2333]; add.s32 %r8088, %r8087, %r8086; // inline asm bfe.u32 %r6329, %r6334, %r10288, %r10288; // inline asm mul.wide.u32 %rd2334, %r6329, 4; add.s64 %rd2335, %rd48, %rd2334; ld.shared.u32 %r8089, [%rd2335]; xor.b32 %r8090, %r8089, %r8088; // inline asm bfe.u32 %r6333, %r6334, %r10322, %r10288; // inline asm mul.wide.u32 %rd2336, %r6333, 4; add.s64 %rd2337, %rd51, %rd2336; ld.shared.u32 %r8091, [%rd2337]; add.s32 %r8092, %r8091, %r8090; xor.b32 %r8093, %r10345, %r6318; xor.b32 %r6350, %r8093, %r8092; // inline asm bfe.u32 %r6337, %r6350, %r10287, %r10288; // inline asm mul.wide.u32 %rd2338, %r6337, 4; add.s64 %rd2339, %rd42, %rd2338; ld.shared.u32 %r8094, [%rd2339]; // inline asm bfe.u32 %r6341, %r6350, %r10289, %r10288; // inline asm mul.wide.u32 %rd2340, %r6341, 4; add.s64 %rd2341, %rd45, %rd2340; ld.shared.u32 %r8095, [%rd2341]; add.s32 %r8096, %r8095, %r8094; // inline asm bfe.u32 %r6345, %r6350, %r10288, %r10288; // inline asm mul.wide.u32 %rd2342, %r6345, 4; add.s64 %rd2343, %rd48, %rd2342; ld.shared.u32 %r8097, [%rd2343]; xor.b32 %r8098, %r8097, %r8096; // inline asm bfe.u32 %r6349, %r6350, %r10322, %r10288; // inline asm mul.wide.u32 %rd2344, %r6349, 4; add.s64 %rd2345, %rd51, %rd2344; ld.shared.u32 %r8099, [%rd2345]; add.s32 %r8100, %r8099, %r8098; xor.b32 %r8101, %r10344, %r6334; xor.b32 %r6366, %r8101, %r8100; // inline asm bfe.u32 %r6353, %r6366, %r10287, %r10288; // inline asm mul.wide.u32 %rd2346, %r6353, 4; add.s64 %rd2347, %rd42, %rd2346; ld.shared.u32 %r8102, [%rd2347]; // inline asm bfe.u32 %r6357, %r6366, %r10289, %r10288; // inline asm mul.wide.u32 %rd2348, %r6357, 4; add.s64 %rd2349, %rd45, %rd2348; ld.shared.u32 %r8103, [%rd2349]; add.s32 %r8104, %r8103, %r8102; // inline asm bfe.u32 %r6361, %r6366, %r10288, %r10288; // inline asm mul.wide.u32 %rd2350, %r6361, 4; add.s64 %rd2351, %rd48, %rd2350; ld.shared.u32 %r8105, [%rd2351]; xor.b32 %r8106, %r8105, %r8104; // inline asm bfe.u32 %r6365, %r6366, %r10322, %r10288; // inline asm mul.wide.u32 %rd2352, %r6365, 4; add.s64 %rd2353, %rd51, %rd2352; ld.shared.u32 %r8107, [%rd2353]; add.s32 %r8108, %r8107, %r8106; xor.b32 %r8109, %r7591, %r6350; xor.b32 %r6382, %r8109, %r8108; // inline asm bfe.u32 %r6369, %r6382, %r10287, %r10288; // inline asm mul.wide.u32 %rd2354, %r6369, 4; add.s64 %rd2355, %rd42, %rd2354; ld.shared.u32 %r8110, [%rd2355]; // inline asm bfe.u32 %r6373, %r6382, %r10289, %r10288; // inline asm mul.wide.u32 %rd2356, %r6373, 4; add.s64 %rd2357, %rd45, %rd2356; ld.shared.u32 %r8111, [%rd2357]; add.s32 %r8112, %r8111, %r8110; // inline asm bfe.u32 %r6377, %r6382, %r10288, %r10288; // inline asm mul.wide.u32 %rd2358, %r6377, 4; add.s64 %rd2359, %rd48, %rd2358; ld.shared.u32 %r8113, [%rd2359]; xor.b32 %r8114, %r8113, %r8112; // inline asm bfe.u32 %r6381, %r6382, %r10322, %r10288; // inline asm mul.wide.u32 %rd2360, %r6381, 4; add.s64 %rd2361, %rd51, %rd2360; ld.shared.u32 %r8115, [%rd2361]; add.s32 %r8116, %r8115, %r8114; xor.b32 %r8117, %r7600, %r6366; xor.b32 %r6398, %r8117, %r8116; // inline asm bfe.u32 %r6385, %r6398, %r10287, %r10288; // inline asm mul.wide.u32 %rd2362, %r6385, 4; add.s64 %rd2363, %rd42, %rd2362; ld.shared.u32 %r8118, [%rd2363]; // inline asm bfe.u32 %r6389, %r6398, %r10289, %r10288; // inline asm mul.wide.u32 %rd2364, %r6389, 4; add.s64 %rd2365, %rd45, %rd2364; ld.shared.u32 %r8119, [%rd2365]; add.s32 %r8120, %r8119, %r8118; // inline asm bfe.u32 %r6393, %r6398, %r10288, %r10288; // inline asm mul.wide.u32 %rd2366, %r6393, 4; add.s64 %rd2367, %rd48, %rd2366; ld.shared.u32 %r8121, [%rd2367]; xor.b32 %r8122, %r8121, %r8120; // inline asm bfe.u32 %r6397, %r6398, %r10322, %r10288; // inline asm mul.wide.u32 %rd2368, %r6397, 4; add.s64 %rd2369, %rd51, %rd2368; ld.shared.u32 %r8123, [%rd2369]; add.s32 %r8124, %r8123, %r8122; xor.b32 %r8125, %r7609, %r6382; xor.b32 %r6414, %r8125, %r8124; // inline asm bfe.u32 %r6401, %r6414, %r10287, %r10288; // inline asm mul.wide.u32 %rd2370, %r6401, 4; add.s64 %rd2371, %rd42, %rd2370; ld.shared.u32 %r8126, [%rd2371]; // inline asm bfe.u32 %r6405, %r6414, %r10289, %r10288; // inline asm mul.wide.u32 %rd2372, %r6405, 4; add.s64 %rd2373, %rd45, %rd2372; ld.shared.u32 %r8127, [%rd2373]; add.s32 %r8128, %r8127, %r8126; // inline asm bfe.u32 %r6409, %r6414, %r10288, %r10288; // inline asm mul.wide.u32 %rd2374, %r6409, 4; add.s64 %rd2375, %rd48, %rd2374; ld.shared.u32 %r8129, [%rd2375]; xor.b32 %r8130, %r8129, %r8128; // inline asm bfe.u32 %r6413, %r6414, %r10322, %r10288; // inline asm mul.wide.u32 %rd2376, %r6413, 4; add.s64 %rd2377, %rd51, %rd2376; ld.shared.u32 %r8131, [%rd2377]; add.s32 %r8132, %r8131, %r8130; xor.b32 %r8133, %r7618, %r6398; xor.b32 %r6430, %r8133, %r8132; // inline asm bfe.u32 %r6417, %r6430, %r10287, %r10288; // inline asm mul.wide.u32 %rd2378, %r6417, 4; add.s64 %rd2379, %rd42, %rd2378; ld.shared.u32 %r8134, [%rd2379]; // inline asm bfe.u32 %r6421, %r6430, %r10289, %r10288; // inline asm mul.wide.u32 %rd2380, %r6421, 4; add.s64 %rd2381, %rd45, %rd2380; ld.shared.u32 %r8135, [%rd2381]; add.s32 %r8136, %r8135, %r8134; // inline asm bfe.u32 %r6425, %r6430, %r10288, %r10288; // inline asm mul.wide.u32 %rd2382, %r6425, 4; add.s64 %rd2383, %rd48, %rd2382; ld.shared.u32 %r8137, [%rd2383]; xor.b32 %r8138, %r8137, %r8136; // inline asm bfe.u32 %r6429, %r6430, %r10322, %r10288; // inline asm mul.wide.u32 %rd2384, %r6429, 4; add.s64 %rd2385, %rd51, %rd2384; ld.shared.u32 %r8139, [%rd2385]; add.s32 %r8140, %r8139, %r8138; xor.b32 %r8141, %r7627, %r6414; xor.b32 %r6446, %r8141, %r8140; // inline asm bfe.u32 %r6433, %r6446, %r10287, %r10288; // inline asm mul.wide.u32 %rd2386, %r6433, 4; add.s64 %rd2387, %rd42, %rd2386; ld.shared.u32 %r8142, [%rd2387]; // inline asm bfe.u32 %r6437, %r6446, %r10289, %r10288; // inline asm mul.wide.u32 %rd2388, %r6437, 4; add.s64 %rd2389, %rd45, %rd2388; ld.shared.u32 %r8143, [%rd2389]; add.s32 %r8144, %r8143, %r8142; // inline asm bfe.u32 %r6441, %r6446, %r10288, %r10288; // inline asm mul.wide.u32 %rd2390, %r6441, 4; add.s64 %rd2391, %rd48, %rd2390; ld.shared.u32 %r8145, [%rd2391]; xor.b32 %r8146, %r8145, %r8144; // inline asm bfe.u32 %r6445, %r6446, %r10322, %r10288; // inline asm mul.wide.u32 %rd2392, %r6445, 4; add.s64 %rd2393, %rd51, %rd2392; ld.shared.u32 %r8147, [%rd2393]; add.s32 %r8148, %r8147, %r8146; xor.b32 %r8149, %r7636, %r6430; xor.b32 %r6462, %r8149, %r8148; // inline asm bfe.u32 %r6449, %r6462, %r10287, %r10288; // inline asm mul.wide.u32 %rd2394, %r6449, 4; add.s64 %rd2395, %rd42, %rd2394; ld.shared.u32 %r8150, [%rd2395]; // inline asm bfe.u32 %r6453, %r6462, %r10289, %r10288; // inline asm mul.wide.u32 %rd2396, %r6453, 4; add.s64 %rd2397, %rd45, %rd2396; ld.shared.u32 %r8151, [%rd2397]; add.s32 %r8152, %r8151, %r8150; // inline asm bfe.u32 %r6457, %r6462, %r10288, %r10288; // inline asm mul.wide.u32 %rd2398, %r6457, 4; add.s64 %rd2399, %rd48, %rd2398; ld.shared.u32 %r8153, [%rd2399]; xor.b32 %r8154, %r8153, %r8152; // inline asm bfe.u32 %r6461, %r6462, %r10322, %r10288; // inline asm mul.wide.u32 %rd2400, %r6461, 4; add.s64 %rd2401, %rd51, %rd2400; ld.shared.u32 %r8155, [%rd2401]; add.s32 %r8156, %r8155, %r8154; xor.b32 %r8157, %r7645, %r6446; xor.b32 %r6478, %r8157, %r8156; // inline asm bfe.u32 %r6465, %r6478, %r10287, %r10288; // inline asm mul.wide.u32 %rd2402, %r6465, 4; add.s64 %rd2403, %rd42, %rd2402; ld.shared.u32 %r8158, [%rd2403]; // inline asm bfe.u32 %r6469, %r6478, %r10289, %r10288; // inline asm mul.wide.u32 %rd2404, %r6469, 4; add.s64 %rd2405, %rd45, %rd2404; ld.shared.u32 %r8159, [%rd2405]; add.s32 %r8160, %r8159, %r8158; // inline asm bfe.u32 %r6473, %r6478, %r10288, %r10288; // inline asm mul.wide.u32 %rd2406, %r6473, 4; add.s64 %rd2407, %rd48, %rd2406; ld.shared.u32 %r8161, [%rd2407]; xor.b32 %r8162, %r8161, %r8160; // inline asm bfe.u32 %r6477, %r6478, %r10322, %r10288; // inline asm mul.wide.u32 %rd2408, %r6477, 4; add.s64 %rd2409, %rd51, %rd2408; ld.shared.u32 %r8163, [%rd2409]; add.s32 %r8164, %r8163, %r8162; xor.b32 %r8165, %r7654, %r6462; xor.b32 %r6494, %r8165, %r8164; // inline asm bfe.u32 %r6481, %r6494, %r10287, %r10288; // inline asm mul.wide.u32 %rd2410, %r6481, 4; add.s64 %rd2411, %rd42, %rd2410; ld.shared.u32 %r8166, [%rd2411]; // inline asm bfe.u32 %r6485, %r6494, %r10289, %r10288; // inline asm mul.wide.u32 %rd2412, %r6485, 4; add.s64 %rd2413, %rd45, %rd2412; ld.shared.u32 %r8167, [%rd2413]; add.s32 %r8168, %r8167, %r8166; // inline asm bfe.u32 %r6489, %r6494, %r10288, %r10288; // inline asm mul.wide.u32 %rd2414, %r6489, 4; add.s64 %rd2415, %rd48, %rd2414; ld.shared.u32 %r8169, [%rd2415]; xor.b32 %r8170, %r8169, %r8168; // inline asm bfe.u32 %r6493, %r6494, %r10322, %r10288; // inline asm mul.wide.u32 %rd2416, %r6493, 4; add.s64 %rd2417, %rd51, %rd2416; ld.shared.u32 %r8171, [%rd2417]; add.s32 %r8172, %r8171, %r8170; xor.b32 %r8173, %r7663, %r6478; xor.b32 %r10342, %r8173, %r8172; xor.b32 %r10343, %r7665, %r6494; xor.b32 %r6510, %r5470, %r6494; // inline asm bfe.u32 %r6497, %r6510, %r10287, %r10288; // inline asm mul.wide.u32 %rd2418, %r6497, 4; add.s64 %rd2419, %rd42, %rd2418; ld.shared.u32 %r8174, [%rd2419]; // inline asm bfe.u32 %r6501, %r6510, %r10289, %r10288; // inline asm mul.wide.u32 %rd2420, %r6501, 4; add.s64 %rd2421, %rd45, %rd2420; ld.shared.u32 %r8175, [%rd2421]; add.s32 %r8176, %r8175, %r8174; // inline asm bfe.u32 %r6505, %r6510, %r10288, %r10288; // inline asm mul.wide.u32 %rd2422, %r6505, 4; add.s64 %rd2423, %rd48, %rd2422; ld.shared.u32 %r8177, [%rd2423]; xor.b32 %r8178, %r8177, %r8176; // inline asm bfe.u32 %r6509, %r6510, %r10322, %r10288; // inline asm mul.wide.u32 %rd2424, %r6509, 4; add.s64 %rd2425, %rd51, %rd2424; ld.shared.u32 %r8179, [%rd2425]; add.s32 %r8180, %r8179, %r8178; xor.b32 %r8181, %r10350, %r10342; xor.b32 %r6526, %r8181, %r8180; // inline asm bfe.u32 %r6513, %r6526, %r10287, %r10288; // inline asm mul.wide.u32 %rd2426, %r6513, 4; add.s64 %rd2427, %rd42, %rd2426; ld.shared.u32 %r8182, [%rd2427]; // inline asm bfe.u32 %r6517, %r6526, %r10289, %r10288; // inline asm mul.wide.u32 %rd2428, %r6517, 4; add.s64 %rd2429, %rd45, %rd2428; ld.shared.u32 %r8183, [%rd2429]; add.s32 %r8184, %r8183, %r8182; // inline asm bfe.u32 %r6521, %r6526, %r10288, %r10288; // inline asm mul.wide.u32 %rd2430, %r6521, 4; add.s64 %rd2431, %rd48, %rd2430; ld.shared.u32 %r8185, [%rd2431]; xor.b32 %r8186, %r8185, %r8184; // inline asm bfe.u32 %r6525, %r6526, %r10322, %r10288; // inline asm mul.wide.u32 %rd2432, %r6525, 4; add.s64 %rd2433, %rd51, %rd2432; ld.shared.u32 %r8187, [%rd2433]; add.s32 %r8188, %r8187, %r8186; xor.b32 %r8189, %r10349, %r6510; xor.b32 %r6542, %r8189, %r8188; // inline asm bfe.u32 %r6529, %r6542, %r10287, %r10288; // inline asm mul.wide.u32 %rd2434, %r6529, 4; add.s64 %rd2435, %rd42, %rd2434; ld.shared.u32 %r8190, [%rd2435]; // inline asm bfe.u32 %r6533, %r6542, %r10289, %r10288; // inline asm mul.wide.u32 %rd2436, %r6533, 4; add.s64 %rd2437, %rd45, %rd2436; ld.shared.u32 %r8191, [%rd2437]; add.s32 %r8192, %r8191, %r8190; // inline asm bfe.u32 %r6537, %r6542, %r10288, %r10288; // inline asm mul.wide.u32 %rd2438, %r6537, 4; add.s64 %rd2439, %rd48, %rd2438; ld.shared.u32 %r8193, [%rd2439]; xor.b32 %r8194, %r8193, %r8192; // inline asm bfe.u32 %r6541, %r6542, %r10322, %r10288; // inline asm mul.wide.u32 %rd2440, %r6541, 4; add.s64 %rd2441, %rd51, %rd2440; ld.shared.u32 %r8195, [%rd2441]; add.s32 %r8196, %r8195, %r8194; xor.b32 %r8197, %r10348, %r6526; xor.b32 %r6558, %r8197, %r8196; // inline asm bfe.u32 %r6545, %r6558, %r10287, %r10288; // inline asm mul.wide.u32 %rd2442, %r6545, 4; add.s64 %rd2443, %rd42, %rd2442; ld.shared.u32 %r8198, [%rd2443]; // inline asm bfe.u32 %r6549, %r6558, %r10289, %r10288; // inline asm mul.wide.u32 %rd2444, %r6549, 4; add.s64 %rd2445, %rd45, %rd2444; ld.shared.u32 %r8199, [%rd2445]; add.s32 %r8200, %r8199, %r8198; // inline asm bfe.u32 %r6553, %r6558, %r10288, %r10288; // inline asm mul.wide.u32 %rd2446, %r6553, 4; add.s64 %rd2447, %rd48, %rd2446; ld.shared.u32 %r8201, [%rd2447]; xor.b32 %r8202, %r8201, %r8200; // inline asm bfe.u32 %r6557, %r6558, %r10322, %r10288; // inline asm mul.wide.u32 %rd2448, %r6557, 4; add.s64 %rd2449, %rd51, %rd2448; ld.shared.u32 %r8203, [%rd2449]; add.s32 %r8204, %r8203, %r8202; xor.b32 %r8205, %r10347, %r6542; xor.b32 %r6574, %r8205, %r8204; // inline asm bfe.u32 %r6561, %r6574, %r10287, %r10288; // inline asm mul.wide.u32 %rd2450, %r6561, 4; add.s64 %rd2451, %rd42, %rd2450; ld.shared.u32 %r8206, [%rd2451]; // inline asm bfe.u32 %r6565, %r6574, %r10289, %r10288; // inline asm mul.wide.u32 %rd2452, %r6565, 4; add.s64 %rd2453, %rd45, %rd2452; ld.shared.u32 %r8207, [%rd2453]; add.s32 %r8208, %r8207, %r8206; // inline asm bfe.u32 %r6569, %r6574, %r10288, %r10288; // inline asm mul.wide.u32 %rd2454, %r6569, 4; add.s64 %rd2455, %rd48, %rd2454; ld.shared.u32 %r8209, [%rd2455]; xor.b32 %r8210, %r8209, %r8208; // inline asm bfe.u32 %r6573, %r6574, %r10322, %r10288; // inline asm mul.wide.u32 %rd2456, %r6573, 4; add.s64 %rd2457, %rd51, %rd2456; ld.shared.u32 %r8211, [%rd2457]; add.s32 %r8212, %r8211, %r8210; xor.b32 %r8213, %r10346, %r6558; xor.b32 %r6590, %r8213, %r8212; // inline asm bfe.u32 %r6577, %r6590, %r10287, %r10288; // inline asm mul.wide.u32 %rd2458, %r6577, 4; add.s64 %rd2459, %rd42, %rd2458; ld.shared.u32 %r8214, [%rd2459]; // inline asm bfe.u32 %r6581, %r6590, %r10289, %r10288; // inline asm mul.wide.u32 %rd2460, %r6581, 4; add.s64 %rd2461, %rd45, %rd2460; ld.shared.u32 %r8215, [%rd2461]; add.s32 %r8216, %r8215, %r8214; // inline asm bfe.u32 %r6585, %r6590, %r10288, %r10288; // inline asm mul.wide.u32 %rd2462, %r6585, 4; add.s64 %rd2463, %rd48, %rd2462; ld.shared.u32 %r8217, [%rd2463]; xor.b32 %r8218, %r8217, %r8216; // inline asm bfe.u32 %r6589, %r6590, %r10322, %r10288; // inline asm mul.wide.u32 %rd2464, %r6589, 4; add.s64 %rd2465, %rd51, %rd2464; ld.shared.u32 %r8219, [%rd2465]; add.s32 %r8220, %r8219, %r8218; xor.b32 %r8221, %r10345, %r6574; xor.b32 %r6606, %r8221, %r8220; // inline asm bfe.u32 %r6593, %r6606, %r10287, %r10288; // inline asm mul.wide.u32 %rd2466, %r6593, 4; add.s64 %rd2467, %rd42, %rd2466; ld.shared.u32 %r8222, [%rd2467]; // inline asm bfe.u32 %r6597, %r6606, %r10289, %r10288; // inline asm mul.wide.u32 %rd2468, %r6597, 4; add.s64 %rd2469, %rd45, %rd2468; ld.shared.u32 %r8223, [%rd2469]; add.s32 %r8224, %r8223, %r8222; // inline asm bfe.u32 %r6601, %r6606, %r10288, %r10288; // inline asm mul.wide.u32 %rd2470, %r6601, 4; add.s64 %rd2471, %rd48, %rd2470; ld.shared.u32 %r8225, [%rd2471]; xor.b32 %r8226, %r8225, %r8224; // inline asm bfe.u32 %r6605, %r6606, %r10322, %r10288; // inline asm mul.wide.u32 %rd2472, %r6605, 4; add.s64 %rd2473, %rd51, %rd2472; ld.shared.u32 %r8227, [%rd2473]; add.s32 %r8228, %r8227, %r8226; xor.b32 %r8229, %r10344, %r6590; xor.b32 %r6622, %r8229, %r8228; // inline asm bfe.u32 %r6609, %r6622, %r10287, %r10288; // inline asm mul.wide.u32 %rd2474, %r6609, 4; add.s64 %rd2475, %rd42, %rd2474; ld.shared.u32 %r8230, [%rd2475]; // inline asm bfe.u32 %r6613, %r6622, %r10289, %r10288; // inline asm mul.wide.u32 %rd2476, %r6613, 4; add.s64 %rd2477, %rd45, %rd2476; ld.shared.u32 %r8231, [%rd2477]; add.s32 %r8232, %r8231, %r8230; // inline asm bfe.u32 %r6617, %r6622, %r10288, %r10288; // inline asm mul.wide.u32 %rd2478, %r6617, 4; add.s64 %rd2479, %rd48, %rd2478; ld.shared.u32 %r8233, [%rd2479]; xor.b32 %r8234, %r8233, %r8232; // inline asm bfe.u32 %r6621, %r6622, %r10322, %r10288; // inline asm mul.wide.u32 %rd2480, %r6621, 4; add.s64 %rd2481, %rd51, %rd2480; ld.shared.u32 %r8235, [%rd2481]; add.s32 %r8236, %r8235, %r8234; xor.b32 %r8237, %r10343, %r6606; xor.b32 %r6638, %r8237, %r8236; // inline asm bfe.u32 %r6625, %r6638, %r10287, %r10288; // inline asm mul.wide.u32 %rd2482, %r6625, 4; add.s64 %rd2483, %rd42, %rd2482; ld.shared.u32 %r8238, [%rd2483]; // inline asm bfe.u32 %r6629, %r6638, %r10289, %r10288; // inline asm mul.wide.u32 %rd2484, %r6629, 4; add.s64 %rd2485, %rd45, %rd2484; ld.shared.u32 %r8239, [%rd2485]; add.s32 %r8240, %r8239, %r8238; // inline asm bfe.u32 %r6633, %r6638, %r10288, %r10288; // inline asm mul.wide.u32 %rd2486, %r6633, 4; add.s64 %rd2487, %rd48, %rd2486; ld.shared.u32 %r8241, [%rd2487]; xor.b32 %r8242, %r8241, %r8240; // inline asm bfe.u32 %r6637, %r6638, %r10322, %r10288; // inline asm mul.wide.u32 %rd2488, %r6637, 4; add.s64 %rd2489, %rd51, %rd2488; ld.shared.u32 %r8243, [%rd2489]; add.s32 %r8244, %r8243, %r8242; xor.b32 %r8245, %r10342, %r6622; xor.b32 %r6654, %r8245, %r8244; // inline asm bfe.u32 %r6641, %r6654, %r10287, %r10288; // inline asm mul.wide.u32 %rd2490, %r6641, 4; add.s64 %rd2491, %rd42, %rd2490; ld.shared.u32 %r8246, [%rd2491]; // inline asm bfe.u32 %r6645, %r6654, %r10289, %r10288; // inline asm mul.wide.u32 %rd2492, %r6645, 4; add.s64 %rd2493, %rd45, %rd2492; ld.shared.u32 %r8247, [%rd2493]; add.s32 %r8248, %r8247, %r8246; // inline asm bfe.u32 %r6649, %r6654, %r10288, %r10288; // inline asm mul.wide.u32 %rd2494, %r6649, 4; add.s64 %rd2495, %rd48, %rd2494; ld.shared.u32 %r8249, [%rd2495]; xor.b32 %r8250, %r8249, %r8248; // inline asm bfe.u32 %r6653, %r6654, %r10322, %r10288; // inline asm mul.wide.u32 %rd2496, %r6653, 4; add.s64 %rd2497, %rd51, %rd2496; ld.shared.u32 %r8251, [%rd2497]; add.s32 %r8252, %r8251, %r8250; xor.b32 %r8253, %r7609, %r6638; xor.b32 %r6670, %r8253, %r8252; // inline asm bfe.u32 %r6657, %r6670, %r10287, %r10288; // inline asm mul.wide.u32 %rd2498, %r6657, 4; add.s64 %rd2499, %rd42, %rd2498; ld.shared.u32 %r8254, [%rd2499]; // inline asm bfe.u32 %r6661, %r6670, %r10289, %r10288; // inline asm mul.wide.u32 %rd2500, %r6661, 4; add.s64 %rd2501, %rd45, %rd2500; ld.shared.u32 %r8255, [%rd2501]; add.s32 %r8256, %r8255, %r8254; // inline asm bfe.u32 %r6665, %r6670, %r10288, %r10288; // inline asm mul.wide.u32 %rd2502, %r6665, 4; add.s64 %rd2503, %rd48, %rd2502; ld.shared.u32 %r8257, [%rd2503]; xor.b32 %r8258, %r8257, %r8256; // inline asm bfe.u32 %r6669, %r6670, %r10322, %r10288; // inline asm mul.wide.u32 %rd2504, %r6669, 4; add.s64 %rd2505, %rd51, %rd2504; ld.shared.u32 %r8259, [%rd2505]; add.s32 %r8260, %r8259, %r8258; xor.b32 %r8261, %r7618, %r6654; xor.b32 %r6686, %r8261, %r8260; // inline asm bfe.u32 %r6673, %r6686, %r10287, %r10288; // inline asm mul.wide.u32 %rd2506, %r6673, 4; add.s64 %rd2507, %rd42, %rd2506; ld.shared.u32 %r8262, [%rd2507]; // inline asm bfe.u32 %r6677, %r6686, %r10289, %r10288; // inline asm mul.wide.u32 %rd2508, %r6677, 4; add.s64 %rd2509, %rd45, %rd2508; ld.shared.u32 %r8263, [%rd2509]; add.s32 %r8264, %r8263, %r8262; // inline asm bfe.u32 %r6681, %r6686, %r10288, %r10288; // inline asm mul.wide.u32 %rd2510, %r6681, 4; add.s64 %rd2511, %rd48, %rd2510; ld.shared.u32 %r8265, [%rd2511]; xor.b32 %r8266, %r8265, %r8264; // inline asm bfe.u32 %r6685, %r6686, %r10322, %r10288; // inline asm mul.wide.u32 %rd2512, %r6685, 4; add.s64 %rd2513, %rd51, %rd2512; ld.shared.u32 %r8267, [%rd2513]; add.s32 %r8268, %r8267, %r8266; xor.b32 %r8269, %r7627, %r6670; xor.b32 %r6702, %r8269, %r8268; // inline asm bfe.u32 %r6689, %r6702, %r10287, %r10288; // inline asm mul.wide.u32 %rd2514, %r6689, 4; add.s64 %rd2515, %rd42, %rd2514; ld.shared.u32 %r8270, [%rd2515]; // inline asm bfe.u32 %r6693, %r6702, %r10289, %r10288; // inline asm mul.wide.u32 %rd2516, %r6693, 4; add.s64 %rd2517, %rd45, %rd2516; ld.shared.u32 %r8271, [%rd2517]; add.s32 %r8272, %r8271, %r8270; // inline asm bfe.u32 %r6697, %r6702, %r10288, %r10288; // inline asm mul.wide.u32 %rd2518, %r6697, 4; add.s64 %rd2519, %rd48, %rd2518; ld.shared.u32 %r8273, [%rd2519]; xor.b32 %r8274, %r8273, %r8272; // inline asm bfe.u32 %r6701, %r6702, %r10322, %r10288; // inline asm mul.wide.u32 %rd2520, %r6701, 4; add.s64 %rd2521, %rd51, %rd2520; ld.shared.u32 %r8275, [%rd2521]; add.s32 %r8276, %r8275, %r8274; xor.b32 %r8277, %r7636, %r6686; xor.b32 %r6718, %r8277, %r8276; // inline asm bfe.u32 %r6705, %r6718, %r10287, %r10288; // inline asm mul.wide.u32 %rd2522, %r6705, 4; add.s64 %rd2523, %rd42, %rd2522; ld.shared.u32 %r8278, [%rd2523]; // inline asm bfe.u32 %r6709, %r6718, %r10289, %r10288; // inline asm mul.wide.u32 %rd2524, %r6709, 4; add.s64 %rd2525, %rd45, %rd2524; ld.shared.u32 %r8279, [%rd2525]; add.s32 %r8280, %r8279, %r8278; // inline asm bfe.u32 %r6713, %r6718, %r10288, %r10288; // inline asm mul.wide.u32 %rd2526, %r6713, 4; add.s64 %rd2527, %rd48, %rd2526; ld.shared.u32 %r8281, [%rd2527]; xor.b32 %r8282, %r8281, %r8280; // inline asm bfe.u32 %r6717, %r6718, %r10322, %r10288; // inline asm mul.wide.u32 %rd2528, %r6717, 4; add.s64 %rd2529, %rd51, %rd2528; ld.shared.u32 %r8283, [%rd2529]; add.s32 %r8284, %r8283, %r8282; xor.b32 %r8285, %r7645, %r6702; xor.b32 %r6734, %r8285, %r8284; // inline asm bfe.u32 %r6721, %r6734, %r10287, %r10288; // inline asm mul.wide.u32 %rd2530, %r6721, 4; add.s64 %rd2531, %rd42, %rd2530; ld.shared.u32 %r8286, [%rd2531]; // inline asm bfe.u32 %r6725, %r6734, %r10289, %r10288; // inline asm mul.wide.u32 %rd2532, %r6725, 4; add.s64 %rd2533, %rd45, %rd2532; ld.shared.u32 %r8287, [%rd2533]; add.s32 %r8288, %r8287, %r8286; // inline asm bfe.u32 %r6729, %r6734, %r10288, %r10288; // inline asm mul.wide.u32 %rd2534, %r6729, 4; add.s64 %rd2535, %rd48, %rd2534; ld.shared.u32 %r8289, [%rd2535]; xor.b32 %r8290, %r8289, %r8288; // inline asm bfe.u32 %r6733, %r6734, %r10322, %r10288; // inline asm mul.wide.u32 %rd2536, %r6733, 4; add.s64 %rd2537, %rd51, %rd2536; ld.shared.u32 %r8291, [%rd2537]; add.s32 %r8292, %r8291, %r8290; xor.b32 %r8293, %r7654, %r6718; xor.b32 %r6750, %r8293, %r8292; // inline asm bfe.u32 %r6737, %r6750, %r10287, %r10288; // inline asm mul.wide.u32 %rd2538, %r6737, 4; add.s64 %rd2539, %rd42, %rd2538; ld.shared.u32 %r8294, [%rd2539]; // inline asm bfe.u32 %r6741, %r6750, %r10289, %r10288; // inline asm mul.wide.u32 %rd2540, %r6741, 4; add.s64 %rd2541, %rd45, %rd2540; ld.shared.u32 %r8295, [%rd2541]; add.s32 %r8296, %r8295, %r8294; // inline asm bfe.u32 %r6745, %r6750, %r10288, %r10288; // inline asm mul.wide.u32 %rd2542, %r6745, 4; add.s64 %rd2543, %rd48, %rd2542; ld.shared.u32 %r8297, [%rd2543]; xor.b32 %r8298, %r8297, %r8296; // inline asm bfe.u32 %r6749, %r6750, %r10322, %r10288; // inline asm mul.wide.u32 %rd2544, %r6749, 4; add.s64 %rd2545, %rd51, %rd2544; ld.shared.u32 %r8299, [%rd2545]; add.s32 %r8300, %r8299, %r8298; xor.b32 %r8301, %r7663, %r6734; xor.b32 %r10340, %r8301, %r8300; xor.b32 %r10341, %r7665, %r6750; xor.b32 %r6766, %r5470, %r6750; // inline asm bfe.u32 %r6753, %r6766, %r10287, %r10288; // inline asm mul.wide.u32 %rd2546, %r6753, 4; add.s64 %rd2547, %rd42, %rd2546; ld.shared.u32 %r8302, [%rd2547]; // inline asm bfe.u32 %r6757, %r6766, %r10289, %r10288; // inline asm mul.wide.u32 %rd2548, %r6757, 4; add.s64 %rd2549, %rd45, %rd2548; ld.shared.u32 %r8303, [%rd2549]; add.s32 %r8304, %r8303, %r8302; // inline asm bfe.u32 %r6761, %r6766, %r10288, %r10288; // inline asm mul.wide.u32 %rd2550, %r6761, 4; add.s64 %rd2551, %rd48, %rd2550; ld.shared.u32 %r8305, [%rd2551]; xor.b32 %r8306, %r8305, %r8304; // inline asm bfe.u32 %r6765, %r6766, %r10322, %r10288; // inline asm mul.wide.u32 %rd2552, %r6765, 4; add.s64 %rd2553, %rd51, %rd2552; ld.shared.u32 %r8307, [%rd2553]; add.s32 %r8308, %r8307, %r8306; xor.b32 %r8309, %r10350, %r10340; xor.b32 %r6782, %r8309, %r8308; // inline asm bfe.u32 %r6769, %r6782, %r10287, %r10288; // inline asm mul.wide.u32 %rd2554, %r6769, 4; add.s64 %rd2555, %rd42, %rd2554; ld.shared.u32 %r8310, [%rd2555]; // inline asm bfe.u32 %r6773, %r6782, %r10289, %r10288; // inline asm mul.wide.u32 %rd2556, %r6773, 4; add.s64 %rd2557, %rd45, %rd2556; ld.shared.u32 %r8311, [%rd2557]; add.s32 %r8312, %r8311, %r8310; // inline asm bfe.u32 %r6777, %r6782, %r10288, %r10288; // inline asm mul.wide.u32 %rd2558, %r6777, 4; add.s64 %rd2559, %rd48, %rd2558; ld.shared.u32 %r8313, [%rd2559]; xor.b32 %r8314, %r8313, %r8312; // inline asm bfe.u32 %r6781, %r6782, %r10322, %r10288; // inline asm mul.wide.u32 %rd2560, %r6781, 4; add.s64 %rd2561, %rd51, %rd2560; ld.shared.u32 %r8315, [%rd2561]; add.s32 %r8316, %r8315, %r8314; xor.b32 %r8317, %r10349, %r6766; xor.b32 %r6798, %r8317, %r8316; // inline asm bfe.u32 %r6785, %r6798, %r10287, %r10288; // inline asm mul.wide.u32 %rd2562, %r6785, 4; add.s64 %rd2563, %rd42, %rd2562; ld.shared.u32 %r8318, [%rd2563]; // inline asm bfe.u32 %r6789, %r6798, %r10289, %r10288; // inline asm mul.wide.u32 %rd2564, %r6789, 4; add.s64 %rd2565, %rd45, %rd2564; ld.shared.u32 %r8319, [%rd2565]; add.s32 %r8320, %r8319, %r8318; // inline asm bfe.u32 %r6793, %r6798, %r10288, %r10288; // inline asm mul.wide.u32 %rd2566, %r6793, 4; add.s64 %rd2567, %rd48, %rd2566; ld.shared.u32 %r8321, [%rd2567]; xor.b32 %r8322, %r8321, %r8320; // inline asm bfe.u32 %r6797, %r6798, %r10322, %r10288; // inline asm mul.wide.u32 %rd2568, %r6797, 4; add.s64 %rd2569, %rd51, %rd2568; ld.shared.u32 %r8323, [%rd2569]; add.s32 %r8324, %r8323, %r8322; xor.b32 %r8325, %r10348, %r6782; xor.b32 %r6814, %r8325, %r8324; // inline asm bfe.u32 %r6801, %r6814, %r10287, %r10288; // inline asm mul.wide.u32 %rd2570, %r6801, 4; add.s64 %rd2571, %rd42, %rd2570; ld.shared.u32 %r8326, [%rd2571]; // inline asm bfe.u32 %r6805, %r6814, %r10289, %r10288; // inline asm mul.wide.u32 %rd2572, %r6805, 4; add.s64 %rd2573, %rd45, %rd2572; ld.shared.u32 %r8327, [%rd2573]; add.s32 %r8328, %r8327, %r8326; // inline asm bfe.u32 %r6809, %r6814, %r10288, %r10288; // inline asm mul.wide.u32 %rd2574, %r6809, 4; add.s64 %rd2575, %rd48, %rd2574; ld.shared.u32 %r8329, [%rd2575]; xor.b32 %r8330, %r8329, %r8328; // inline asm bfe.u32 %r6813, %r6814, %r10322, %r10288; // inline asm mul.wide.u32 %rd2576, %r6813, 4; add.s64 %rd2577, %rd51, %rd2576; ld.shared.u32 %r8331, [%rd2577]; add.s32 %r8332, %r8331, %r8330; xor.b32 %r8333, %r10347, %r6798; xor.b32 %r6830, %r8333, %r8332; // inline asm bfe.u32 %r6817, %r6830, %r10287, %r10288; // inline asm mul.wide.u32 %rd2578, %r6817, 4; add.s64 %rd2579, %rd42, %rd2578; ld.shared.u32 %r8334, [%rd2579]; // inline asm bfe.u32 %r6821, %r6830, %r10289, %r10288; // inline asm mul.wide.u32 %rd2580, %r6821, 4; add.s64 %rd2581, %rd45, %rd2580; ld.shared.u32 %r8335, [%rd2581]; add.s32 %r8336, %r8335, %r8334; // inline asm bfe.u32 %r6825, %r6830, %r10288, %r10288; // inline asm mul.wide.u32 %rd2582, %r6825, 4; add.s64 %rd2583, %rd48, %rd2582; ld.shared.u32 %r8337, [%rd2583]; xor.b32 %r8338, %r8337, %r8336; // inline asm bfe.u32 %r6829, %r6830, %r10322, %r10288; // inline asm mul.wide.u32 %rd2584, %r6829, 4; add.s64 %rd2585, %rd51, %rd2584; ld.shared.u32 %r8339, [%rd2585]; add.s32 %r8340, %r8339, %r8338; xor.b32 %r8341, %r10346, %r6814; xor.b32 %r6846, %r8341, %r8340; // inline asm bfe.u32 %r6833, %r6846, %r10287, %r10288; // inline asm mul.wide.u32 %rd2586, %r6833, 4; add.s64 %rd2587, %rd42, %rd2586; ld.shared.u32 %r8342, [%rd2587]; // inline asm bfe.u32 %r6837, %r6846, %r10289, %r10288; // inline asm mul.wide.u32 %rd2588, %r6837, 4; add.s64 %rd2589, %rd45, %rd2588; ld.shared.u32 %r8343, [%rd2589]; add.s32 %r8344, %r8343, %r8342; // inline asm bfe.u32 %r6841, %r6846, %r10288, %r10288; // inline asm mul.wide.u32 %rd2590, %r6841, 4; add.s64 %rd2591, %rd48, %rd2590; ld.shared.u32 %r8345, [%rd2591]; xor.b32 %r8346, %r8345, %r8344; // inline asm bfe.u32 %r6845, %r6846, %r10322, %r10288; // inline asm mul.wide.u32 %rd2592, %r6845, 4; add.s64 %rd2593, %rd51, %rd2592; ld.shared.u32 %r8347, [%rd2593]; add.s32 %r8348, %r8347, %r8346; xor.b32 %r8349, %r10345, %r6830; xor.b32 %r6862, %r8349, %r8348; // inline asm bfe.u32 %r6849, %r6862, %r10287, %r10288; // inline asm mul.wide.u32 %rd2594, %r6849, 4; add.s64 %rd2595, %rd42, %rd2594; ld.shared.u32 %r8350, [%rd2595]; // inline asm bfe.u32 %r6853, %r6862, %r10289, %r10288; // inline asm mul.wide.u32 %rd2596, %r6853, 4; add.s64 %rd2597, %rd45, %rd2596; ld.shared.u32 %r8351, [%rd2597]; add.s32 %r8352, %r8351, %r8350; // inline asm bfe.u32 %r6857, %r6862, %r10288, %r10288; // inline asm mul.wide.u32 %rd2598, %r6857, 4; add.s64 %rd2599, %rd48, %rd2598; ld.shared.u32 %r8353, [%rd2599]; xor.b32 %r8354, %r8353, %r8352; // inline asm bfe.u32 %r6861, %r6862, %r10322, %r10288; // inline asm mul.wide.u32 %rd2600, %r6861, 4; add.s64 %rd2601, %rd51, %rd2600; ld.shared.u32 %r8355, [%rd2601]; add.s32 %r8356, %r8355, %r8354; xor.b32 %r8357, %r10344, %r6846; xor.b32 %r6878, %r8357, %r8356; // inline asm bfe.u32 %r6865, %r6878, %r10287, %r10288; // inline asm mul.wide.u32 %rd2602, %r6865, 4; add.s64 %rd2603, %rd42, %rd2602; ld.shared.u32 %r8358, [%rd2603]; // inline asm bfe.u32 %r6869, %r6878, %r10289, %r10288; // inline asm mul.wide.u32 %rd2604, %r6869, 4; add.s64 %rd2605, %rd45, %rd2604; ld.shared.u32 %r8359, [%rd2605]; add.s32 %r8360, %r8359, %r8358; // inline asm bfe.u32 %r6873, %r6878, %r10288, %r10288; // inline asm mul.wide.u32 %rd2606, %r6873, 4; add.s64 %rd2607, %rd48, %rd2606; ld.shared.u32 %r8361, [%rd2607]; xor.b32 %r8362, %r8361, %r8360; // inline asm bfe.u32 %r6877, %r6878, %r10322, %r10288; // inline asm mul.wide.u32 %rd2608, %r6877, 4; add.s64 %rd2609, %rd51, %rd2608; ld.shared.u32 %r8363, [%rd2609]; add.s32 %r8364, %r8363, %r8362; xor.b32 %r8365, %r10343, %r6862; xor.b32 %r6894, %r8365, %r8364; // inline asm bfe.u32 %r6881, %r6894, %r10287, %r10288; // inline asm mul.wide.u32 %rd2610, %r6881, 4; add.s64 %rd2611, %rd42, %rd2610; ld.shared.u32 %r8366, [%rd2611]; // inline asm bfe.u32 %r6885, %r6894, %r10289, %r10288; // inline asm mul.wide.u32 %rd2612, %r6885, 4; add.s64 %rd2613, %rd45, %rd2612; ld.shared.u32 %r8367, [%rd2613]; add.s32 %r8368, %r8367, %r8366; // inline asm bfe.u32 %r6889, %r6894, %r10288, %r10288; // inline asm mul.wide.u32 %rd2614, %r6889, 4; add.s64 %rd2615, %rd48, %rd2614; ld.shared.u32 %r8369, [%rd2615]; xor.b32 %r8370, %r8369, %r8368; // inline asm bfe.u32 %r6893, %r6894, %r10322, %r10288; // inline asm mul.wide.u32 %rd2616, %r6893, 4; add.s64 %rd2617, %rd51, %rd2616; ld.shared.u32 %r8371, [%rd2617]; add.s32 %r8372, %r8371, %r8370; xor.b32 %r8373, %r10342, %r6878; xor.b32 %r6910, %r8373, %r8372; // inline asm bfe.u32 %r6897, %r6910, %r10287, %r10288; // inline asm mul.wide.u32 %rd2618, %r6897, 4; add.s64 %rd2619, %rd42, %rd2618; ld.shared.u32 %r8374, [%rd2619]; // inline asm bfe.u32 %r6901, %r6910, %r10289, %r10288; // inline asm mul.wide.u32 %rd2620, %r6901, 4; add.s64 %rd2621, %rd45, %rd2620; ld.shared.u32 %r8375, [%rd2621]; add.s32 %r8376, %r8375, %r8374; // inline asm bfe.u32 %r6905, %r6910, %r10288, %r10288; // inline asm mul.wide.u32 %rd2622, %r6905, 4; add.s64 %rd2623, %rd48, %rd2622; ld.shared.u32 %r8377, [%rd2623]; xor.b32 %r8378, %r8377, %r8376; // inline asm bfe.u32 %r6909, %r6910, %r10322, %r10288; // inline asm mul.wide.u32 %rd2624, %r6909, 4; add.s64 %rd2625, %rd51, %rd2624; ld.shared.u32 %r8379, [%rd2625]; add.s32 %r8380, %r8379, %r8378; xor.b32 %r8381, %r10341, %r6894; xor.b32 %r6926, %r8381, %r8380; // inline asm bfe.u32 %r6913, %r6926, %r10287, %r10288; // inline asm mul.wide.u32 %rd2626, %r6913, 4; add.s64 %rd2627, %rd42, %rd2626; ld.shared.u32 %r8382, [%rd2627]; // inline asm bfe.u32 %r6917, %r6926, %r10289, %r10288; // inline asm mul.wide.u32 %rd2628, %r6917, 4; add.s64 %rd2629, %rd45, %rd2628; ld.shared.u32 %r8383, [%rd2629]; add.s32 %r8384, %r8383, %r8382; // inline asm bfe.u32 %r6921, %r6926, %r10288, %r10288; // inline asm mul.wide.u32 %rd2630, %r6921, 4; add.s64 %rd2631, %rd48, %rd2630; ld.shared.u32 %r8385, [%rd2631]; xor.b32 %r8386, %r8385, %r8384; // inline asm bfe.u32 %r6925, %r6926, %r10322, %r10288; // inline asm mul.wide.u32 %rd2632, %r6925, 4; add.s64 %rd2633, %rd51, %rd2632; ld.shared.u32 %r8387, [%rd2633]; add.s32 %r8388, %r8387, %r8386; xor.b32 %r8389, %r10340, %r6910; xor.b32 %r6942, %r8389, %r8388; // inline asm bfe.u32 %r6929, %r6942, %r10287, %r10288; // inline asm mul.wide.u32 %rd2634, %r6929, 4; add.s64 %rd2635, %rd42, %rd2634; ld.shared.u32 %r8390, [%rd2635]; // inline asm bfe.u32 %r6933, %r6942, %r10289, %r10288; // inline asm mul.wide.u32 %rd2636, %r6933, 4; add.s64 %rd2637, %rd45, %rd2636; ld.shared.u32 %r8391, [%rd2637]; add.s32 %r8392, %r8391, %r8390; // inline asm bfe.u32 %r6937, %r6942, %r10288, %r10288; // inline asm mul.wide.u32 %rd2638, %r6937, 4; add.s64 %rd2639, %rd48, %rd2638; ld.shared.u32 %r8393, [%rd2639]; xor.b32 %r8394, %r8393, %r8392; // inline asm bfe.u32 %r6941, %r6942, %r10322, %r10288; // inline asm mul.wide.u32 %rd2640, %r6941, 4; add.s64 %rd2641, %rd51, %rd2640; ld.shared.u32 %r8395, [%rd2641]; add.s32 %r8396, %r8395, %r8394; xor.b32 %r8397, %r7627, %r6926; xor.b32 %r6958, %r8397, %r8396; // inline asm bfe.u32 %r6945, %r6958, %r10287, %r10288; // inline asm mul.wide.u32 %rd2642, %r6945, 4; add.s64 %rd2643, %rd42, %rd2642; ld.shared.u32 %r8398, [%rd2643]; // inline asm bfe.u32 %r6949, %r6958, %r10289, %r10288; // inline asm mul.wide.u32 %rd2644, %r6949, 4; add.s64 %rd2645, %rd45, %rd2644; ld.shared.u32 %r8399, [%rd2645]; add.s32 %r8400, %r8399, %r8398; // inline asm bfe.u32 %r6953, %r6958, %r10288, %r10288; // inline asm mul.wide.u32 %rd2646, %r6953, 4; add.s64 %rd2647, %rd48, %rd2646; ld.shared.u32 %r8401, [%rd2647]; xor.b32 %r8402, %r8401, %r8400; // inline asm bfe.u32 %r6957, %r6958, %r10322, %r10288; // inline asm mul.wide.u32 %rd2648, %r6957, 4; add.s64 %rd2649, %rd51, %rd2648; ld.shared.u32 %r8403, [%rd2649]; add.s32 %r8404, %r8403, %r8402; xor.b32 %r8405, %r7636, %r6942; xor.b32 %r6974, %r8405, %r8404; // inline asm bfe.u32 %r6961, %r6974, %r10287, %r10288; // inline asm mul.wide.u32 %rd2650, %r6961, 4; add.s64 %rd2651, %rd42, %rd2650; ld.shared.u32 %r8406, [%rd2651]; // inline asm bfe.u32 %r6965, %r6974, %r10289, %r10288; // inline asm mul.wide.u32 %rd2652, %r6965, 4; add.s64 %rd2653, %rd45, %rd2652; ld.shared.u32 %r8407, [%rd2653]; add.s32 %r8408, %r8407, %r8406; // inline asm bfe.u32 %r6969, %r6974, %r10288, %r10288; // inline asm mul.wide.u32 %rd2654, %r6969, 4; add.s64 %rd2655, %rd48, %rd2654; ld.shared.u32 %r8409, [%rd2655]; xor.b32 %r8410, %r8409, %r8408; // inline asm bfe.u32 %r6973, %r6974, %r10322, %r10288; // inline asm mul.wide.u32 %rd2656, %r6973, 4; add.s64 %rd2657, %rd51, %rd2656; ld.shared.u32 %r8411, [%rd2657]; add.s32 %r8412, %r8411, %r8410; xor.b32 %r8413, %r7645, %r6958; xor.b32 %r6990, %r8413, %r8412; // inline asm bfe.u32 %r6977, %r6990, %r10287, %r10288; // inline asm mul.wide.u32 %rd2658, %r6977, 4; add.s64 %rd2659, %rd42, %rd2658; ld.shared.u32 %r8414, [%rd2659]; // inline asm bfe.u32 %r6981, %r6990, %r10289, %r10288; // inline asm mul.wide.u32 %rd2660, %r6981, 4; add.s64 %rd2661, %rd45, %rd2660; ld.shared.u32 %r8415, [%rd2661]; add.s32 %r8416, %r8415, %r8414; // inline asm bfe.u32 %r6985, %r6990, %r10288, %r10288; // inline asm mul.wide.u32 %rd2662, %r6985, 4; add.s64 %rd2663, %rd48, %rd2662; ld.shared.u32 %r8417, [%rd2663]; xor.b32 %r8418, %r8417, %r8416; // inline asm bfe.u32 %r6989, %r6990, %r10322, %r10288; // inline asm mul.wide.u32 %rd2664, %r6989, 4; add.s64 %rd2665, %rd51, %rd2664; ld.shared.u32 %r8419, [%rd2665]; add.s32 %r8420, %r8419, %r8418; xor.b32 %r8421, %r7654, %r6974; xor.b32 %r7006, %r8421, %r8420; // inline asm bfe.u32 %r6993, %r7006, %r10287, %r10288; // inline asm mul.wide.u32 %rd2666, %r6993, 4; add.s64 %rd2667, %rd42, %rd2666; ld.shared.u32 %r8422, [%rd2667]; // inline asm bfe.u32 %r6997, %r7006, %r10289, %r10288; // inline asm mul.wide.u32 %rd2668, %r6997, 4; add.s64 %rd2669, %rd45, %rd2668; ld.shared.u32 %r8423, [%rd2669]; add.s32 %r8424, %r8423, %r8422; // inline asm bfe.u32 %r7001, %r7006, %r10288, %r10288; // inline asm mul.wide.u32 %rd2670, %r7001, 4; add.s64 %rd2671, %rd48, %rd2670; ld.shared.u32 %r8425, [%rd2671]; xor.b32 %r8426, %r8425, %r8424; // inline asm bfe.u32 %r7005, %r7006, %r10322, %r10288; // inline asm mul.wide.u32 %rd2672, %r7005, 4; add.s64 %rd2673, %rd51, %rd2672; ld.shared.u32 %r8427, [%rd2673]; add.s32 %r8428, %r8427, %r8426; xor.b32 %r8429, %r7663, %r6990; xor.b32 %r10338, %r8429, %r8428; xor.b32 %r10339, %r7665, %r7006; xor.b32 %r7022, %r5470, %r7006; // inline asm bfe.u32 %r7009, %r7022, %r10287, %r10288; // inline asm mul.wide.u32 %rd2674, %r7009, 4; add.s64 %rd2675, %rd42, %rd2674; ld.shared.u32 %r8430, [%rd2675]; // inline asm bfe.u32 %r7013, %r7022, %r10289, %r10288; // inline asm mul.wide.u32 %rd2676, %r7013, 4; add.s64 %rd2677, %rd45, %rd2676; ld.shared.u32 %r8431, [%rd2677]; add.s32 %r8432, %r8431, %r8430; // inline asm bfe.u32 %r7017, %r7022, %r10288, %r10288; // inline asm mul.wide.u32 %rd2678, %r7017, 4; add.s64 %rd2679, %rd48, %rd2678; ld.shared.u32 %r8433, [%rd2679]; xor.b32 %r8434, %r8433, %r8432; // inline asm bfe.u32 %r7021, %r7022, %r10322, %r10288; // inline asm mul.wide.u32 %rd2680, %r7021, 4; add.s64 %rd2681, %rd51, %rd2680; ld.shared.u32 %r8435, [%rd2681]; add.s32 %r8436, %r8435, %r8434; xor.b32 %r8437, %r10350, %r10338; xor.b32 %r7038, %r8437, %r8436; // inline asm bfe.u32 %r7025, %r7038, %r10287, %r10288; // inline asm mul.wide.u32 %rd2682, %r7025, 4; add.s64 %rd2683, %rd42, %rd2682; ld.shared.u32 %r8438, [%rd2683]; // inline asm bfe.u32 %r7029, %r7038, %r10289, %r10288; // inline asm mul.wide.u32 %rd2684, %r7029, 4; add.s64 %rd2685, %rd45, %rd2684; ld.shared.u32 %r8439, [%rd2685]; add.s32 %r8440, %r8439, %r8438; // inline asm bfe.u32 %r7033, %r7038, %r10288, %r10288; // inline asm mul.wide.u32 %rd2686, %r7033, 4; add.s64 %rd2687, %rd48, %rd2686; ld.shared.u32 %r8441, [%rd2687]; xor.b32 %r8442, %r8441, %r8440; // inline asm bfe.u32 %r7037, %r7038, %r10322, %r10288; // inline asm mul.wide.u32 %rd2688, %r7037, 4; add.s64 %rd2689, %rd51, %rd2688; ld.shared.u32 %r8443, [%rd2689]; add.s32 %r8444, %r8443, %r8442; xor.b32 %r8445, %r10349, %r7022; xor.b32 %r7054, %r8445, %r8444; // inline asm bfe.u32 %r7041, %r7054, %r10287, %r10288; // inline asm mul.wide.u32 %rd2690, %r7041, 4; add.s64 %rd2691, %rd42, %rd2690; ld.shared.u32 %r8446, [%rd2691]; // inline asm bfe.u32 %r7045, %r7054, %r10289, %r10288; // inline asm mul.wide.u32 %rd2692, %r7045, 4; add.s64 %rd2693, %rd45, %rd2692; ld.shared.u32 %r8447, [%rd2693]; add.s32 %r8448, %r8447, %r8446; // inline asm bfe.u32 %r7049, %r7054, %r10288, %r10288; // inline asm mul.wide.u32 %rd2694, %r7049, 4; add.s64 %rd2695, %rd48, %rd2694; ld.shared.u32 %r8449, [%rd2695]; xor.b32 %r8450, %r8449, %r8448; // inline asm bfe.u32 %r7053, %r7054, %r10322, %r10288; // inline asm mul.wide.u32 %rd2696, %r7053, 4; add.s64 %rd2697, %rd51, %rd2696; ld.shared.u32 %r8451, [%rd2697]; add.s32 %r8452, %r8451, %r8450; xor.b32 %r8453, %r10348, %r7038; xor.b32 %r7070, %r8453, %r8452; // inline asm bfe.u32 %r7057, %r7070, %r10287, %r10288; // inline asm mul.wide.u32 %rd2698, %r7057, 4; add.s64 %rd2699, %rd42, %rd2698; ld.shared.u32 %r8454, [%rd2699]; // inline asm bfe.u32 %r7061, %r7070, %r10289, %r10288; // inline asm mul.wide.u32 %rd2700, %r7061, 4; add.s64 %rd2701, %rd45, %rd2700; ld.shared.u32 %r8455, [%rd2701]; add.s32 %r8456, %r8455, %r8454; // inline asm bfe.u32 %r7065, %r7070, %r10288, %r10288; // inline asm mul.wide.u32 %rd2702, %r7065, 4; add.s64 %rd2703, %rd48, %rd2702; ld.shared.u32 %r8457, [%rd2703]; xor.b32 %r8458, %r8457, %r8456; // inline asm bfe.u32 %r7069, %r7070, %r10322, %r10288; // inline asm mul.wide.u32 %rd2704, %r7069, 4; add.s64 %rd2705, %rd51, %rd2704; ld.shared.u32 %r8459, [%rd2705]; add.s32 %r8460, %r8459, %r8458; xor.b32 %r8461, %r10347, %r7054; xor.b32 %r7086, %r8461, %r8460; // inline asm bfe.u32 %r7073, %r7086, %r10287, %r10288; // inline asm mul.wide.u32 %rd2706, %r7073, 4; add.s64 %rd2707, %rd42, %rd2706; ld.shared.u32 %r8462, [%rd2707]; // inline asm bfe.u32 %r7077, %r7086, %r10289, %r10288; // inline asm mul.wide.u32 %rd2708, %r7077, 4; add.s64 %rd2709, %rd45, %rd2708; ld.shared.u32 %r8463, [%rd2709]; add.s32 %r8464, %r8463, %r8462; // inline asm bfe.u32 %r7081, %r7086, %r10288, %r10288; // inline asm mul.wide.u32 %rd2710, %r7081, 4; add.s64 %rd2711, %rd48, %rd2710; ld.shared.u32 %r8465, [%rd2711]; xor.b32 %r8466, %r8465, %r8464; // inline asm bfe.u32 %r7085, %r7086, %r10322, %r10288; // inline asm mul.wide.u32 %rd2712, %r7085, 4; add.s64 %rd2713, %rd51, %rd2712; ld.shared.u32 %r8467, [%rd2713]; add.s32 %r8468, %r8467, %r8466; xor.b32 %r8469, %r10346, %r7070; xor.b32 %r7102, %r8469, %r8468; // inline asm bfe.u32 %r7089, %r7102, %r10287, %r10288; // inline asm mul.wide.u32 %rd2714, %r7089, 4; add.s64 %rd2715, %rd42, %rd2714; ld.shared.u32 %r8470, [%rd2715]; // inline asm bfe.u32 %r7093, %r7102, %r10289, %r10288; // inline asm mul.wide.u32 %rd2716, %r7093, 4; add.s64 %rd2717, %rd45, %rd2716; ld.shared.u32 %r8471, [%rd2717]; add.s32 %r8472, %r8471, %r8470; // inline asm bfe.u32 %r7097, %r7102, %r10288, %r10288; // inline asm mul.wide.u32 %rd2718, %r7097, 4; add.s64 %rd2719, %rd48, %rd2718; ld.shared.u32 %r8473, [%rd2719]; xor.b32 %r8474, %r8473, %r8472; // inline asm bfe.u32 %r7101, %r7102, %r10322, %r10288; // inline asm mul.wide.u32 %rd2720, %r7101, 4; add.s64 %rd2721, %rd51, %rd2720; ld.shared.u32 %r8475, [%rd2721]; add.s32 %r8476, %r8475, %r8474; xor.b32 %r8477, %r10345, %r7086; xor.b32 %r7118, %r8477, %r8476; // inline asm bfe.u32 %r7105, %r7118, %r10287, %r10288; // inline asm mul.wide.u32 %rd2722, %r7105, 4; add.s64 %rd2723, %rd42, %rd2722; ld.shared.u32 %r8478, [%rd2723]; // inline asm bfe.u32 %r7109, %r7118, %r10289, %r10288; // inline asm mul.wide.u32 %rd2724, %r7109, 4; add.s64 %rd2725, %rd45, %rd2724; ld.shared.u32 %r8479, [%rd2725]; add.s32 %r8480, %r8479, %r8478; // inline asm bfe.u32 %r7113, %r7118, %r10288, %r10288; // inline asm mul.wide.u32 %rd2726, %r7113, 4; add.s64 %rd2727, %rd48, %rd2726; ld.shared.u32 %r8481, [%rd2727]; xor.b32 %r8482, %r8481, %r8480; // inline asm bfe.u32 %r7117, %r7118, %r10322, %r10288; // inline asm mul.wide.u32 %rd2728, %r7117, 4; add.s64 %rd2729, %rd51, %rd2728; ld.shared.u32 %r8483, [%rd2729]; add.s32 %r8484, %r8483, %r8482; xor.b32 %r8485, %r10344, %r7102; xor.b32 %r7134, %r8485, %r8484; // inline asm bfe.u32 %r7121, %r7134, %r10287, %r10288; // inline asm mul.wide.u32 %rd2730, %r7121, 4; add.s64 %rd2731, %rd42, %rd2730; ld.shared.u32 %r8486, [%rd2731]; // inline asm bfe.u32 %r7125, %r7134, %r10289, %r10288; // inline asm mul.wide.u32 %rd2732, %r7125, 4; add.s64 %rd2733, %rd45, %rd2732; ld.shared.u32 %r8487, [%rd2733]; add.s32 %r8488, %r8487, %r8486; // inline asm bfe.u32 %r7129, %r7134, %r10288, %r10288; // inline asm mul.wide.u32 %rd2734, %r7129, 4; add.s64 %rd2735, %rd48, %rd2734; ld.shared.u32 %r8489, [%rd2735]; xor.b32 %r8490, %r8489, %r8488; // inline asm bfe.u32 %r7133, %r7134, %r10322, %r10288; // inline asm mul.wide.u32 %rd2736, %r7133, 4; add.s64 %rd2737, %rd51, %rd2736; ld.shared.u32 %r8491, [%rd2737]; add.s32 %r8492, %r8491, %r8490; xor.b32 %r8493, %r10343, %r7118; xor.b32 %r7150, %r8493, %r8492; // inline asm bfe.u32 %r7137, %r7150, %r10287, %r10288; // inline asm mul.wide.u32 %rd2738, %r7137, 4; add.s64 %rd2739, %rd42, %rd2738; ld.shared.u32 %r8494, [%rd2739]; // inline asm bfe.u32 %r7141, %r7150, %r10289, %r10288; // inline asm mul.wide.u32 %rd2740, %r7141, 4; add.s64 %rd2741, %rd45, %rd2740; ld.shared.u32 %r8495, [%rd2741]; add.s32 %r8496, %r8495, %r8494; // inline asm bfe.u32 %r7145, %r7150, %r10288, %r10288; // inline asm mul.wide.u32 %rd2742, %r7145, 4; add.s64 %rd2743, %rd48, %rd2742; ld.shared.u32 %r8497, [%rd2743]; xor.b32 %r8498, %r8497, %r8496; // inline asm bfe.u32 %r7149, %r7150, %r10322, %r10288; // inline asm mul.wide.u32 %rd2744, %r7149, 4; add.s64 %rd2745, %rd51, %rd2744; ld.shared.u32 %r8499, [%rd2745]; add.s32 %r8500, %r8499, %r8498; xor.b32 %r8501, %r10342, %r7134; xor.b32 %r7166, %r8501, %r8500; // inline asm bfe.u32 %r7153, %r7166, %r10287, %r10288; // inline asm mul.wide.u32 %rd2746, %r7153, 4; add.s64 %rd2747, %rd42, %rd2746; ld.shared.u32 %r8502, [%rd2747]; // inline asm bfe.u32 %r7157, %r7166, %r10289, %r10288; // inline asm mul.wide.u32 %rd2748, %r7157, 4; add.s64 %rd2749, %rd45, %rd2748; ld.shared.u32 %r8503, [%rd2749]; add.s32 %r8504, %r8503, %r8502; // inline asm bfe.u32 %r7161, %r7166, %r10288, %r10288; // inline asm mul.wide.u32 %rd2750, %r7161, 4; add.s64 %rd2751, %rd48, %rd2750; ld.shared.u32 %r8505, [%rd2751]; xor.b32 %r8506, %r8505, %r8504; // inline asm bfe.u32 %r7165, %r7166, %r10322, %r10288; // inline asm mul.wide.u32 %rd2752, %r7165, 4; add.s64 %rd2753, %rd51, %rd2752; ld.shared.u32 %r8507, [%rd2753]; add.s32 %r8508, %r8507, %r8506; xor.b32 %r8509, %r10341, %r7150; xor.b32 %r7182, %r8509, %r8508; // inline asm bfe.u32 %r7169, %r7182, %r10287, %r10288; // inline asm mul.wide.u32 %rd2754, %r7169, 4; add.s64 %rd2755, %rd42, %rd2754; ld.shared.u32 %r8510, [%rd2755]; // inline asm bfe.u32 %r7173, %r7182, %r10289, %r10288; // inline asm mul.wide.u32 %rd2756, %r7173, 4; add.s64 %rd2757, %rd45, %rd2756; ld.shared.u32 %r8511, [%rd2757]; add.s32 %r8512, %r8511, %r8510; // inline asm bfe.u32 %r7177, %r7182, %r10288, %r10288; // inline asm mul.wide.u32 %rd2758, %r7177, 4; add.s64 %rd2759, %rd48, %rd2758; ld.shared.u32 %r8513, [%rd2759]; xor.b32 %r8514, %r8513, %r8512; // inline asm bfe.u32 %r7181, %r7182, %r10322, %r10288; // inline asm mul.wide.u32 %rd2760, %r7181, 4; add.s64 %rd2761, %rd51, %rd2760; ld.shared.u32 %r8515, [%rd2761]; add.s32 %r8516, %r8515, %r8514; xor.b32 %r8517, %r10340, %r7166; xor.b32 %r7198, %r8517, %r8516; // inline asm bfe.u32 %r7185, %r7198, %r10287, %r10288; // inline asm mul.wide.u32 %rd2762, %r7185, 4; add.s64 %rd2763, %rd42, %rd2762; ld.shared.u32 %r8518, [%rd2763]; // inline asm bfe.u32 %r7189, %r7198, %r10289, %r10288; // inline asm mul.wide.u32 %rd2764, %r7189, 4; add.s64 %rd2765, %rd45, %rd2764; ld.shared.u32 %r8519, [%rd2765]; add.s32 %r8520, %r8519, %r8518; // inline asm bfe.u32 %r7193, %r7198, %r10288, %r10288; // inline asm mul.wide.u32 %rd2766, %r7193, 4; add.s64 %rd2767, %rd48, %rd2766; ld.shared.u32 %r8521, [%rd2767]; xor.b32 %r8522, %r8521, %r8520; // inline asm bfe.u32 %r7197, %r7198, %r10322, %r10288; // inline asm mul.wide.u32 %rd2768, %r7197, 4; add.s64 %rd2769, %rd51, %rd2768; ld.shared.u32 %r8523, [%rd2769]; add.s32 %r8524, %r8523, %r8522; xor.b32 %r8525, %r10339, %r7182; xor.b32 %r7214, %r8525, %r8524; // inline asm bfe.u32 %r7201, %r7214, %r10287, %r10288; // inline asm mul.wide.u32 %rd2770, %r7201, 4; add.s64 %rd2771, %rd42, %rd2770; ld.shared.u32 %r8526, [%rd2771]; // inline asm bfe.u32 %r7205, %r7214, %r10289, %r10288; // inline asm mul.wide.u32 %rd2772, %r7205, 4; add.s64 %rd2773, %rd45, %rd2772; ld.shared.u32 %r8527, [%rd2773]; add.s32 %r8528, %r8527, %r8526; // inline asm bfe.u32 %r7209, %r7214, %r10288, %r10288; // inline asm mul.wide.u32 %rd2774, %r7209, 4; add.s64 %rd2775, %rd48, %rd2774; ld.shared.u32 %r8529, [%rd2775]; xor.b32 %r8530, %r8529, %r8528; // inline asm bfe.u32 %r7213, %r7214, %r10322, %r10288; // inline asm mul.wide.u32 %rd2776, %r7213, 4; add.s64 %rd2777, %rd51, %rd2776; ld.shared.u32 %r8531, [%rd2777]; add.s32 %r8532, %r8531, %r8530; xor.b32 %r8533, %r10338, %r7198; xor.b32 %r7230, %r8533, %r8532; // inline asm bfe.u32 %r7217, %r7230, %r10287, %r10288; // inline asm mul.wide.u32 %rd2778, %r7217, 4; add.s64 %rd2779, %rd42, %rd2778; ld.shared.u32 %r8534, [%rd2779]; // inline asm bfe.u32 %r7221, %r7230, %r10289, %r10288; // inline asm mul.wide.u32 %rd2780, %r7221, 4; add.s64 %rd2781, %rd45, %rd2780; ld.shared.u32 %r8535, [%rd2781]; add.s32 %r8536, %r8535, %r8534; // inline asm bfe.u32 %r7225, %r7230, %r10288, %r10288; // inline asm mul.wide.u32 %rd2782, %r7225, 4; add.s64 %rd2783, %rd48, %rd2782; ld.shared.u32 %r8537, [%rd2783]; xor.b32 %r8538, %r8537, %r8536; // inline asm bfe.u32 %r7229, %r7230, %r10322, %r10288; // inline asm mul.wide.u32 %rd2784, %r7229, 4; add.s64 %rd2785, %rd51, %rd2784; ld.shared.u32 %r8539, [%rd2785]; add.s32 %r8540, %r8539, %r8538; xor.b32 %r8541, %r7645, %r7214; xor.b32 %r7246, %r8541, %r8540; // inline asm bfe.u32 %r7233, %r7246, %r10287, %r10288; // inline asm mul.wide.u32 %rd2786, %r7233, 4; add.s64 %rd2787, %rd42, %rd2786; ld.shared.u32 %r8542, [%rd2787]; // inline asm bfe.u32 %r7237, %r7246, %r10289, %r10288; // inline asm mul.wide.u32 %rd2788, %r7237, 4; add.s64 %rd2789, %rd45, %rd2788; ld.shared.u32 %r8543, [%rd2789]; add.s32 %r8544, %r8543, %r8542; // inline asm bfe.u32 %r7241, %r7246, %r10288, %r10288; // inline asm mul.wide.u32 %rd2790, %r7241, 4; add.s64 %rd2791, %rd48, %rd2790; ld.shared.u32 %r8545, [%rd2791]; xor.b32 %r8546, %r8545, %r8544; // inline asm bfe.u32 %r7245, %r7246, %r10322, %r10288; // inline asm mul.wide.u32 %rd2792, %r7245, 4; add.s64 %rd2793, %rd51, %rd2792; ld.shared.u32 %r8547, [%rd2793]; add.s32 %r8548, %r8547, %r8546; xor.b32 %r8549, %r7654, %r7230; xor.b32 %r7262, %r8549, %r8548; // inline asm bfe.u32 %r7249, %r7262, %r10287, %r10288; // inline asm mul.wide.u32 %rd2794, %r7249, 4; add.s64 %rd2795, %rd42, %rd2794; ld.shared.u32 %r8550, [%rd2795]; // inline asm bfe.u32 %r7253, %r7262, %r10289, %r10288; // inline asm mul.wide.u32 %rd2796, %r7253, 4; add.s64 %rd2797, %rd45, %rd2796; ld.shared.u32 %r8551, [%rd2797]; add.s32 %r8552, %r8551, %r8550; // inline asm bfe.u32 %r7257, %r7262, %r10288, %r10288; // inline asm mul.wide.u32 %rd2798, %r7257, 4; add.s64 %rd2799, %rd48, %rd2798; ld.shared.u32 %r8553, [%rd2799]; xor.b32 %r8554, %r8553, %r8552; // inline asm bfe.u32 %r7261, %r7262, %r10322, %r10288; // inline asm mul.wide.u32 %rd2800, %r7261, 4; add.s64 %rd2801, %rd51, %rd2800; ld.shared.u32 %r8555, [%rd2801]; add.s32 %r8556, %r8555, %r8554; xor.b32 %r8557, %r7663, %r7246; xor.b32 %r10336, %r8557, %r8556; xor.b32 %r10337, %r7665, %r7262; xor.b32 %r7278, %r5470, %r7262; // inline asm bfe.u32 %r7265, %r7278, %r10287, %r10288; // inline asm mul.wide.u32 %rd2802, %r7265, 4; add.s64 %rd2803, %rd42, %rd2802; ld.shared.u32 %r8558, [%rd2803]; // inline asm bfe.u32 %r7269, %r7278, %r10289, %r10288; // inline asm mul.wide.u32 %rd2804, %r7269, 4; add.s64 %rd2805, %rd45, %rd2804; ld.shared.u32 %r8559, [%rd2805]; add.s32 %r8560, %r8559, %r8558; // inline asm bfe.u32 %r7273, %r7278, %r10288, %r10288; // inline asm mul.wide.u32 %rd2806, %r7273, 4; add.s64 %rd2807, %rd48, %rd2806; ld.shared.u32 %r8561, [%rd2807]; xor.b32 %r8562, %r8561, %r8560; // inline asm bfe.u32 %r7277, %r7278, %r10322, %r10288; // inline asm mul.wide.u32 %rd2808, %r7277, 4; add.s64 %rd2809, %rd51, %rd2808; ld.shared.u32 %r8563, [%rd2809]; add.s32 %r8564, %r8563, %r8562; xor.b32 %r8565, %r10350, %r10336; xor.b32 %r7294, %r8565, %r8564; // inline asm bfe.u32 %r7281, %r7294, %r10287, %r10288; // inline asm mul.wide.u32 %rd2810, %r7281, 4; add.s64 %rd2811, %rd42, %rd2810; ld.shared.u32 %r8566, [%rd2811]; // inline asm bfe.u32 %r7285, %r7294, %r10289, %r10288; // inline asm mul.wide.u32 %rd2812, %r7285, 4; add.s64 %rd2813, %rd45, %rd2812; ld.shared.u32 %r8567, [%rd2813]; add.s32 %r8568, %r8567, %r8566; // inline asm bfe.u32 %r7289, %r7294, %r10288, %r10288; // inline asm mul.wide.u32 %rd2814, %r7289, 4; add.s64 %rd2815, %rd48, %rd2814; ld.shared.u32 %r8569, [%rd2815]; xor.b32 %r8570, %r8569, %r8568; // inline asm bfe.u32 %r7293, %r7294, %r10322, %r10288; // inline asm mul.wide.u32 %rd2816, %r7293, 4; add.s64 %rd2817, %rd51, %rd2816; ld.shared.u32 %r8571, [%rd2817]; add.s32 %r8572, %r8571, %r8570; xor.b32 %r8573, %r10349, %r7278; xor.b32 %r7310, %r8573, %r8572; // inline asm bfe.u32 %r7297, %r7310, %r10287, %r10288; // inline asm mul.wide.u32 %rd2818, %r7297, 4; add.s64 %rd2819, %rd42, %rd2818; ld.shared.u32 %r8574, [%rd2819]; // inline asm bfe.u32 %r7301, %r7310, %r10289, %r10288; // inline asm mul.wide.u32 %rd2820, %r7301, 4; add.s64 %rd2821, %rd45, %rd2820; ld.shared.u32 %r8575, [%rd2821]; add.s32 %r8576, %r8575, %r8574; // inline asm bfe.u32 %r7305, %r7310, %r10288, %r10288; // inline asm mul.wide.u32 %rd2822, %r7305, 4; add.s64 %rd2823, %rd48, %rd2822; ld.shared.u32 %r8577, [%rd2823]; xor.b32 %r8578, %r8577, %r8576; // inline asm bfe.u32 %r7309, %r7310, %r10322, %r10288; // inline asm mul.wide.u32 %rd2824, %r7309, 4; add.s64 %rd2825, %rd51, %rd2824; ld.shared.u32 %r8579, [%rd2825]; add.s32 %r8580, %r8579, %r8578; xor.b32 %r8581, %r10348, %r7294; xor.b32 %r7326, %r8581, %r8580; // inline asm bfe.u32 %r7313, %r7326, %r10287, %r10288; // inline asm mul.wide.u32 %rd2826, %r7313, 4; add.s64 %rd2827, %rd42, %rd2826; ld.shared.u32 %r8582, [%rd2827]; // inline asm bfe.u32 %r7317, %r7326, %r10289, %r10288; // inline asm mul.wide.u32 %rd2828, %r7317, 4; add.s64 %rd2829, %rd45, %rd2828; ld.shared.u32 %r8583, [%rd2829]; add.s32 %r8584, %r8583, %r8582; // inline asm bfe.u32 %r7321, %r7326, %r10288, %r10288; // inline asm mul.wide.u32 %rd2830, %r7321, 4; add.s64 %rd2831, %rd48, %rd2830; ld.shared.u32 %r8585, [%rd2831]; xor.b32 %r8586, %r8585, %r8584; // inline asm bfe.u32 %r7325, %r7326, %r10322, %r10288; // inline asm mul.wide.u32 %rd2832, %r7325, 4; add.s64 %rd2833, %rd51, %rd2832; ld.shared.u32 %r8587, [%rd2833]; add.s32 %r8588, %r8587, %r8586; xor.b32 %r8589, %r10347, %r7310; xor.b32 %r7342, %r8589, %r8588; // inline asm bfe.u32 %r7329, %r7342, %r10287, %r10288; // inline asm mul.wide.u32 %rd2834, %r7329, 4; add.s64 %rd2835, %rd42, %rd2834; ld.shared.u32 %r8590, [%rd2835]; // inline asm bfe.u32 %r7333, %r7342, %r10289, %r10288; // inline asm mul.wide.u32 %rd2836, %r7333, 4; add.s64 %rd2837, %rd45, %rd2836; ld.shared.u32 %r8591, [%rd2837]; add.s32 %r8592, %r8591, %r8590; // inline asm bfe.u32 %r7337, %r7342, %r10288, %r10288; // inline asm mul.wide.u32 %rd2838, %r7337, 4; add.s64 %rd2839, %rd48, %rd2838; ld.shared.u32 %r8593, [%rd2839]; xor.b32 %r8594, %r8593, %r8592; // inline asm bfe.u32 %r7341, %r7342, %r10322, %r10288; // inline asm mul.wide.u32 %rd2840, %r7341, 4; add.s64 %rd2841, %rd51, %rd2840; ld.shared.u32 %r8595, [%rd2841]; add.s32 %r8596, %r8595, %r8594; xor.b32 %r8597, %r10346, %r7326; xor.b32 %r7358, %r8597, %r8596; // inline asm bfe.u32 %r7345, %r7358, %r10287, %r10288; // inline asm mul.wide.u32 %rd2842, %r7345, 4; add.s64 %rd2843, %rd42, %rd2842; ld.shared.u32 %r8598, [%rd2843]; // inline asm bfe.u32 %r7349, %r7358, %r10289, %r10288; // inline asm mul.wide.u32 %rd2844, %r7349, 4; add.s64 %rd2845, %rd45, %rd2844; ld.shared.u32 %r8599, [%rd2845]; add.s32 %r8600, %r8599, %r8598; // inline asm bfe.u32 %r7353, %r7358, %r10288, %r10288; // inline asm mul.wide.u32 %rd2846, %r7353, 4; add.s64 %rd2847, %rd48, %rd2846; ld.shared.u32 %r8601, [%rd2847]; xor.b32 %r8602, %r8601, %r8600; // inline asm bfe.u32 %r7357, %r7358, %r10322, %r10288; // inline asm mul.wide.u32 %rd2848, %r7357, 4; add.s64 %rd2849, %rd51, %rd2848; ld.shared.u32 %r8603, [%rd2849]; add.s32 %r8604, %r8603, %r8602; xor.b32 %r8605, %r10345, %r7342; xor.b32 %r7374, %r8605, %r8604; // inline asm bfe.u32 %r7361, %r7374, %r10287, %r10288; // inline asm mul.wide.u32 %rd2850, %r7361, 4; add.s64 %rd2851, %rd42, %rd2850; ld.shared.u32 %r8606, [%rd2851]; // inline asm bfe.u32 %r7365, %r7374, %r10289, %r10288; // inline asm mul.wide.u32 %rd2852, %r7365, 4; add.s64 %rd2853, %rd45, %rd2852; ld.shared.u32 %r8607, [%rd2853]; add.s32 %r8608, %r8607, %r8606; // inline asm bfe.u32 %r7369, %r7374, %r10288, %r10288; // inline asm mul.wide.u32 %rd2854, %r7369, 4; add.s64 %rd2855, %rd48, %rd2854; ld.shared.u32 %r8609, [%rd2855]; xor.b32 %r8610, %r8609, %r8608; // inline asm bfe.u32 %r7373, %r7374, %r10322, %r10288; // inline asm mul.wide.u32 %rd2856, %r7373, 4; add.s64 %rd2857, %rd51, %rd2856; ld.shared.u32 %r8611, [%rd2857]; add.s32 %r8612, %r8611, %r8610; xor.b32 %r8613, %r10344, %r7358; xor.b32 %r7390, %r8613, %r8612; // inline asm bfe.u32 %r7377, %r7390, %r10287, %r10288; // inline asm mul.wide.u32 %rd2858, %r7377, 4; add.s64 %rd2859, %rd42, %rd2858; ld.shared.u32 %r8614, [%rd2859]; // inline asm bfe.u32 %r7381, %r7390, %r10289, %r10288; // inline asm mul.wide.u32 %rd2860, %r7381, 4; add.s64 %rd2861, %rd45, %rd2860; ld.shared.u32 %r8615, [%rd2861]; add.s32 %r8616, %r8615, %r8614; // inline asm bfe.u32 %r7385, %r7390, %r10288, %r10288; // inline asm mul.wide.u32 %rd2862, %r7385, 4; add.s64 %rd2863, %rd48, %rd2862; ld.shared.u32 %r8617, [%rd2863]; xor.b32 %r8618, %r8617, %r8616; // inline asm bfe.u32 %r7389, %r7390, %r10322, %r10288; // inline asm mul.wide.u32 %rd2864, %r7389, 4; add.s64 %rd2865, %rd51, %rd2864; ld.shared.u32 %r8619, [%rd2865]; add.s32 %r8620, %r8619, %r8618; xor.b32 %r8621, %r10343, %r7374; xor.b32 %r7406, %r8621, %r8620; // inline asm bfe.u32 %r7393, %r7406, %r10287, %r10288; // inline asm mul.wide.u32 %rd2866, %r7393, 4; add.s64 %rd2867, %rd42, %rd2866; ld.shared.u32 %r8622, [%rd2867]; // inline asm bfe.u32 %r7397, %r7406, %r10289, %r10288; // inline asm mul.wide.u32 %rd2868, %r7397, 4; add.s64 %rd2869, %rd45, %rd2868; ld.shared.u32 %r8623, [%rd2869]; add.s32 %r8624, %r8623, %r8622; // inline asm bfe.u32 %r7401, %r7406, %r10288, %r10288; // inline asm mul.wide.u32 %rd2870, %r7401, 4; add.s64 %rd2871, %rd48, %rd2870; ld.shared.u32 %r8625, [%rd2871]; xor.b32 %r8626, %r8625, %r8624; // inline asm bfe.u32 %r7405, %r7406, %r10322, %r10288; // inline asm mul.wide.u32 %rd2872, %r7405, 4; add.s64 %rd2873, %rd51, %rd2872; ld.shared.u32 %r8627, [%rd2873]; add.s32 %r8628, %r8627, %r8626; xor.b32 %r8629, %r10342, %r7390; xor.b32 %r7422, %r8629, %r8628; // inline asm bfe.u32 %r7409, %r7422, %r10287, %r10288; // inline asm mul.wide.u32 %rd2874, %r7409, 4; add.s64 %rd2875, %rd42, %rd2874; ld.shared.u32 %r8630, [%rd2875]; // inline asm bfe.u32 %r7413, %r7422, %r10289, %r10288; // inline asm mul.wide.u32 %rd2876, %r7413, 4; add.s64 %rd2877, %rd45, %rd2876; ld.shared.u32 %r8631, [%rd2877]; add.s32 %r8632, %r8631, %r8630; // inline asm bfe.u32 %r7417, %r7422, %r10288, %r10288; // inline asm mul.wide.u32 %rd2878, %r7417, 4; add.s64 %rd2879, %rd48, %rd2878; ld.shared.u32 %r8633, [%rd2879]; xor.b32 %r8634, %r8633, %r8632; // inline asm bfe.u32 %r7421, %r7422, %r10322, %r10288; // inline asm mul.wide.u32 %rd2880, %r7421, 4; add.s64 %rd2881, %rd51, %rd2880; ld.shared.u32 %r8635, [%rd2881]; add.s32 %r8636, %r8635, %r8634; xor.b32 %r8637, %r10341, %r7406; xor.b32 %r7438, %r8637, %r8636; // inline asm bfe.u32 %r7425, %r7438, %r10287, %r10288; // inline asm mul.wide.u32 %rd2882, %r7425, 4; add.s64 %rd2883, %rd42, %rd2882; ld.shared.u32 %r8638, [%rd2883]; // inline asm bfe.u32 %r7429, %r7438, %r10289, %r10288; // inline asm mul.wide.u32 %rd2884, %r7429, 4; add.s64 %rd2885, %rd45, %rd2884; ld.shared.u32 %r8639, [%rd2885]; add.s32 %r8640, %r8639, %r8638; // inline asm bfe.u32 %r7433, %r7438, %r10288, %r10288; // inline asm mul.wide.u32 %rd2886, %r7433, 4; add.s64 %rd2887, %rd48, %rd2886; ld.shared.u32 %r8641, [%rd2887]; xor.b32 %r8642, %r8641, %r8640; // inline asm bfe.u32 %r7437, %r7438, %r10322, %r10288; // inline asm mul.wide.u32 %rd2888, %r7437, 4; add.s64 %rd2889, %rd51, %rd2888; ld.shared.u32 %r8643, [%rd2889]; add.s32 %r8644, %r8643, %r8642; xor.b32 %r8645, %r10340, %r7422; xor.b32 %r7454, %r8645, %r8644; // inline asm bfe.u32 %r7441, %r7454, %r10287, %r10288; // inline asm mul.wide.u32 %rd2890, %r7441, 4; add.s64 %rd2891, %rd42, %rd2890; ld.shared.u32 %r8646, [%rd2891]; // inline asm bfe.u32 %r7445, %r7454, %r10289, %r10288; // inline asm mul.wide.u32 %rd2892, %r7445, 4; add.s64 %rd2893, %rd45, %rd2892; ld.shared.u32 %r8647, [%rd2893]; add.s32 %r8648, %r8647, %r8646; // inline asm bfe.u32 %r7449, %r7454, %r10288, %r10288; // inline asm mul.wide.u32 %rd2894, %r7449, 4; add.s64 %rd2895, %rd48, %rd2894; ld.shared.u32 %r8649, [%rd2895]; xor.b32 %r8650, %r8649, %r8648; // inline asm bfe.u32 %r7453, %r7454, %r10322, %r10288; // inline asm mul.wide.u32 %rd2896, %r7453, 4; add.s64 %rd2897, %rd51, %rd2896; ld.shared.u32 %r8651, [%rd2897]; add.s32 %r8652, %r8651, %r8650; xor.b32 %r8653, %r10339, %r7438; xor.b32 %r7470, %r8653, %r8652; // inline asm bfe.u32 %r7457, %r7470, %r10287, %r10288; // inline asm mul.wide.u32 %rd2898, %r7457, 4; add.s64 %rd2899, %rd42, %rd2898; ld.shared.u32 %r8654, [%rd2899]; // inline asm bfe.u32 %r7461, %r7470, %r10289, %r10288; // inline asm mul.wide.u32 %rd2900, %r7461, 4; add.s64 %rd2901, %rd45, %rd2900; ld.shared.u32 %r8655, [%rd2901]; add.s32 %r8656, %r8655, %r8654; // inline asm bfe.u32 %r7465, %r7470, %r10288, %r10288; // inline asm mul.wide.u32 %rd2902, %r7465, 4; add.s64 %rd2903, %rd48, %rd2902; ld.shared.u32 %r8657, [%rd2903]; xor.b32 %r8658, %r8657, %r8656; // inline asm bfe.u32 %r7469, %r7470, %r10322, %r10288; // inline asm mul.wide.u32 %rd2904, %r7469, 4; add.s64 %rd2905, %rd51, %rd2904; ld.shared.u32 %r8659, [%rd2905]; add.s32 %r8660, %r8659, %r8658; xor.b32 %r8661, %r10338, %r7454; xor.b32 %r7486, %r8661, %r8660; // inline asm bfe.u32 %r7473, %r7486, %r10287, %r10288; // inline asm mul.wide.u32 %rd2906, %r7473, 4; add.s64 %rd2907, %rd42, %rd2906; ld.shared.u32 %r8662, [%rd2907]; // inline asm bfe.u32 %r7477, %r7486, %r10289, %r10288; // inline asm mul.wide.u32 %rd2908, %r7477, 4; add.s64 %rd2909, %rd45, %rd2908; ld.shared.u32 %r8663, [%rd2909]; add.s32 %r8664, %r8663, %r8662; // inline asm bfe.u32 %r7481, %r7486, %r10288, %r10288; // inline asm mul.wide.u32 %rd2910, %r7481, 4; add.s64 %rd2911, %rd48, %rd2910; ld.shared.u32 %r8665, [%rd2911]; xor.b32 %r8666, %r8665, %r8664; // inline asm bfe.u32 %r7485, %r7486, %r10322, %r10288; // inline asm mul.wide.u32 %rd2912, %r7485, 4; add.s64 %rd2913, %rd51, %rd2912; ld.shared.u32 %r8667, [%rd2913]; add.s32 %r8668, %r8667, %r8666; xor.b32 %r8669, %r10337, %r7470; xor.b32 %r7502, %r8669, %r8668; // inline asm bfe.u32 %r7489, %r7502, %r10287, %r10288; // inline asm mul.wide.u32 %rd2914, %r7489, 4; add.s64 %rd2915, %rd42, %rd2914; ld.shared.u32 %r8670, [%rd2915]; // inline asm bfe.u32 %r7493, %r7502, %r10289, %r10288; // inline asm mul.wide.u32 %rd2916, %r7493, 4; add.s64 %rd2917, %rd45, %rd2916; ld.shared.u32 %r8671, [%rd2917]; add.s32 %r8672, %r8671, %r8670; // inline asm bfe.u32 %r7497, %r7502, %r10288, %r10288; // inline asm mul.wide.u32 %rd2918, %r7497, 4; add.s64 %rd2919, %rd48, %rd2918; ld.shared.u32 %r8673, [%rd2919]; xor.b32 %r8674, %r8673, %r8672; // inline asm bfe.u32 %r7501, %r7502, %r10322, %r10288; // inline asm mul.wide.u32 %rd2920, %r7501, 4; add.s64 %rd2921, %rd51, %rd2920; ld.shared.u32 %r8675, [%rd2921]; add.s32 %r8676, %r8675, %r8674; xor.b32 %r8677, %r10336, %r7486; xor.b32 %r7518, %r8677, %r8676; // inline asm bfe.u32 %r7505, %r7518, %r10287, %r10288; // inline asm mul.wide.u32 %rd2922, %r7505, 4; add.s64 %rd2923, %rd42, %rd2922; ld.shared.u32 %r8678, [%rd2923]; // inline asm bfe.u32 %r7509, %r7518, %r10289, %r10288; // inline asm mul.wide.u32 %rd2924, %r7509, 4; add.s64 %rd2925, %rd45, %rd2924; ld.shared.u32 %r8679, [%rd2925]; add.s32 %r8680, %r8679, %r8678; // inline asm bfe.u32 %r7513, %r7518, %r10288, %r10288; // inline asm mul.wide.u32 %rd2926, %r7513, 4; add.s64 %rd2927, %rd48, %rd2926; ld.shared.u32 %r8681, [%rd2927]; xor.b32 %r8682, %r8681, %r8680; // inline asm bfe.u32 %r7517, %r7518, %r10322, %r10288; // inline asm mul.wide.u32 %rd2928, %r7517, 4; add.s64 %rd2929, %rd51, %rd2928; ld.shared.u32 %r8683, [%rd2929]; add.s32 %r8684, %r8683, %r8682; xor.b32 %r8685, %r7663, %r7502; xor.b32 %r10334, %r8685, %r8684; xor.b32 %r10335, %r7665, %r7518; mov.u64 %rd3499, %rd3; mov.u32 %r10326, %r10334; mov.u32 %r10327, %r10335; BB4_14: mov.u32 %r10325, 0; mov.u32 %r10260, 16; mov.u32 %r10259, 8; mov.u32 %r10258, 24; xor.b32 %r8699, %r10351, %r10327; // inline asm bfe.u32 %r8686, %r8699, %r10258, %r10259; // inline asm mul.wide.u32 %rd2933, %r8686, 4; add.s64 %rd2934, %rd42, %rd2933; ld.shared.u32 %r8943, [%rd2934]; // inline asm bfe.u32 %r8690, %r8699, %r10260, %r10259; // inline asm mul.wide.u32 %rd2937, %r8690, 4; add.s64 %rd2938, %rd45, %rd2937; ld.shared.u32 %r8944, [%rd2938]; add.s32 %r8945, %r8944, %r8943; // inline asm bfe.u32 %r8694, %r8699, %r10259, %r10259; // inline asm mul.wide.u32 %rd2941, %r8694, 4; add.s64 %rd2942, %rd48, %rd2941; ld.shared.u32 %r8946, [%rd2942]; xor.b32 %r8947, %r8946, %r8945; // inline asm bfe.u32 %r8698, %r8699, %r10325, %r10259; // inline asm mul.wide.u32 %rd2945, %r8698, 4; add.s64 %rd2946, %rd51, %rd2945; ld.shared.u32 %r8948, [%rd2946]; add.s32 %r8949, %r8948, %r8947; xor.b32 %r8950, %r10350, %r10326; xor.b32 %r8715, %r8950, %r8949; // inline asm bfe.u32 %r8702, %r8715, %r10258, %r10259; // inline asm mul.wide.u32 %rd2947, %r8702, 4; add.s64 %rd2948, %rd42, %rd2947; ld.shared.u32 %r8951, [%rd2948]; // inline asm bfe.u32 %r8706, %r8715, %r10260, %r10259; // inline asm mul.wide.u32 %rd2949, %r8706, 4; add.s64 %rd2950, %rd45, %rd2949; ld.shared.u32 %r8952, [%rd2950]; add.s32 %r8953, %r8952, %r8951; // inline asm bfe.u32 %r8710, %r8715, %r10259, %r10259; // inline asm mul.wide.u32 %rd2951, %r8710, 4; add.s64 %rd2952, %rd48, %rd2951; ld.shared.u32 %r8954, [%rd2952]; xor.b32 %r8955, %r8954, %r8953; // inline asm bfe.u32 %r8714, %r8715, %r10325, %r10259; // inline asm mul.wide.u32 %rd2953, %r8714, 4; add.s64 %rd2954, %rd51, %rd2953; ld.shared.u32 %r8956, [%rd2954]; add.s32 %r8957, %r8956, %r8955; xor.b32 %r8958, %r10349, %r8699; xor.b32 %r8731, %r8958, %r8957; // inline asm bfe.u32 %r8718, %r8731, %r10258, %r10259; // inline asm mul.wide.u32 %rd2955, %r8718, 4; add.s64 %rd2956, %rd42, %rd2955; ld.shared.u32 %r8959, [%rd2956]; // inline asm bfe.u32 %r8722, %r8731, %r10260, %r10259; // inline asm mul.wide.u32 %rd2957, %r8722, 4; add.s64 %rd2958, %rd45, %rd2957; ld.shared.u32 %r8960, [%rd2958]; add.s32 %r8961, %r8960, %r8959; // inline asm bfe.u32 %r8726, %r8731, %r10259, %r10259; // inline asm mul.wide.u32 %rd2959, %r8726, 4; add.s64 %rd2960, %rd48, %rd2959; ld.shared.u32 %r8962, [%rd2960]; xor.b32 %r8963, %r8962, %r8961; // inline asm bfe.u32 %r8730, %r8731, %r10325, %r10259; // inline asm mul.wide.u32 %rd2961, %r8730, 4; add.s64 %rd2962, %rd51, %rd2961; ld.shared.u32 %r8964, [%rd2962]; add.s32 %r8965, %r8964, %r8963; xor.b32 %r8966, %r10348, %r8715; xor.b32 %r8747, %r8966, %r8965; // inline asm bfe.u32 %r8734, %r8747, %r10258, %r10259; // inline asm mul.wide.u32 %rd2963, %r8734, 4; add.s64 %rd2964, %rd42, %rd2963; ld.shared.u32 %r8967, [%rd2964]; // inline asm bfe.u32 %r8738, %r8747, %r10260, %r10259; // inline asm mul.wide.u32 %rd2965, %r8738, 4; add.s64 %rd2966, %rd45, %rd2965; ld.shared.u32 %r8968, [%rd2966]; add.s32 %r8969, %r8968, %r8967; // inline asm bfe.u32 %r8742, %r8747, %r10259, %r10259; // inline asm mul.wide.u32 %rd2967, %r8742, 4; add.s64 %rd2968, %rd48, %rd2967; ld.shared.u32 %r8970, [%rd2968]; xor.b32 %r8971, %r8970, %r8969; // inline asm bfe.u32 %r8746, %r8747, %r10325, %r10259; // inline asm mul.wide.u32 %rd2969, %r8746, 4; add.s64 %rd2970, %rd51, %rd2969; ld.shared.u32 %r8972, [%rd2970]; add.s32 %r8973, %r8972, %r8971; xor.b32 %r8974, %r10347, %r8731; xor.b32 %r8763, %r8974, %r8973; // inline asm bfe.u32 %r8750, %r8763, %r10258, %r10259; // inline asm mul.wide.u32 %rd2971, %r8750, 4; add.s64 %rd2972, %rd42, %rd2971; ld.shared.u32 %r8975, [%rd2972]; // inline asm bfe.u32 %r8754, %r8763, %r10260, %r10259; // inline asm mul.wide.u32 %rd2973, %r8754, 4; add.s64 %rd2974, %rd45, %rd2973; ld.shared.u32 %r8976, [%rd2974]; add.s32 %r8977, %r8976, %r8975; // inline asm bfe.u32 %r8758, %r8763, %r10259, %r10259; // inline asm mul.wide.u32 %rd2975, %r8758, 4; add.s64 %rd2976, %rd48, %rd2975; ld.shared.u32 %r8978, [%rd2976]; xor.b32 %r8979, %r8978, %r8977; // inline asm bfe.u32 %r8762, %r8763, %r10325, %r10259; // inline asm mul.wide.u32 %rd2977, %r8762, 4; add.s64 %rd2978, %rd51, %rd2977; ld.shared.u32 %r8980, [%rd2978]; add.s32 %r8981, %r8980, %r8979; xor.b32 %r8982, %r10346, %r8747; xor.b32 %r8779, %r8982, %r8981; // inline asm bfe.u32 %r8766, %r8779, %r10258, %r10259; // inline asm mul.wide.u32 %rd2979, %r8766, 4; add.s64 %rd2980, %rd42, %rd2979; ld.shared.u32 %r8983, [%rd2980]; // inline asm bfe.u32 %r8770, %r8779, %r10260, %r10259; // inline asm mul.wide.u32 %rd2981, %r8770, 4; add.s64 %rd2982, %rd45, %rd2981; ld.shared.u32 %r8984, [%rd2982]; add.s32 %r8985, %r8984, %r8983; // inline asm bfe.u32 %r8774, %r8779, %r10259, %r10259; // inline asm mul.wide.u32 %rd2983, %r8774, 4; add.s64 %rd2984, %rd48, %rd2983; ld.shared.u32 %r8986, [%rd2984]; xor.b32 %r8987, %r8986, %r8985; // inline asm bfe.u32 %r8778, %r8779, %r10325, %r10259; // inline asm mul.wide.u32 %rd2985, %r8778, 4; add.s64 %rd2986, %rd51, %rd2985; ld.shared.u32 %r8988, [%rd2986]; add.s32 %r8989, %r8988, %r8987; xor.b32 %r8990, %r10345, %r8763; xor.b32 %r8795, %r8990, %r8989; // inline asm bfe.u32 %r8782, %r8795, %r10258, %r10259; // inline asm mul.wide.u32 %rd2987, %r8782, 4; add.s64 %rd2988, %rd42, %rd2987; ld.shared.u32 %r8991, [%rd2988]; // inline asm bfe.u32 %r8786, %r8795, %r10260, %r10259; // inline asm mul.wide.u32 %rd2989, %r8786, 4; add.s64 %rd2990, %rd45, %rd2989; ld.shared.u32 %r8992, [%rd2990]; add.s32 %r8993, %r8992, %r8991; // inline asm bfe.u32 %r8790, %r8795, %r10259, %r10259; // inline asm mul.wide.u32 %rd2991, %r8790, 4; add.s64 %rd2992, %rd48, %rd2991; ld.shared.u32 %r8994, [%rd2992]; xor.b32 %r8995, %r8994, %r8993; // inline asm bfe.u32 %r8794, %r8795, %r10325, %r10259; // inline asm mul.wide.u32 %rd2993, %r8794, 4; add.s64 %rd2994, %rd51, %rd2993; ld.shared.u32 %r8996, [%rd2994]; add.s32 %r8997, %r8996, %r8995; xor.b32 %r8998, %r10344, %r8779; xor.b32 %r8811, %r8998, %r8997; // inline asm bfe.u32 %r8798, %r8811, %r10258, %r10259; // inline asm mul.wide.u32 %rd2995, %r8798, 4; add.s64 %rd2996, %rd42, %rd2995; ld.shared.u32 %r8999, [%rd2996]; // inline asm bfe.u32 %r8802, %r8811, %r10260, %r10259; // inline asm mul.wide.u32 %rd2997, %r8802, 4; add.s64 %rd2998, %rd45, %rd2997; ld.shared.u32 %r9000, [%rd2998]; add.s32 %r9001, %r9000, %r8999; // inline asm bfe.u32 %r8806, %r8811, %r10259, %r10259; // inline asm mul.wide.u32 %rd2999, %r8806, 4; add.s64 %rd3000, %rd48, %rd2999; ld.shared.u32 %r9002, [%rd3000]; xor.b32 %r9003, %r9002, %r9001; // inline asm bfe.u32 %r8810, %r8811, %r10325, %r10259; // inline asm mul.wide.u32 %rd3001, %r8810, 4; add.s64 %rd3002, %rd51, %rd3001; ld.shared.u32 %r9004, [%rd3002]; add.s32 %r9005, %r9004, %r9003; xor.b32 %r9006, %r10343, %r8795; xor.b32 %r8827, %r9006, %r9005; // inline asm bfe.u32 %r8814, %r8827, %r10258, %r10259; // inline asm mul.wide.u32 %rd3003, %r8814, 4; add.s64 %rd3004, %rd42, %rd3003; ld.shared.u32 %r9007, [%rd3004]; // inline asm bfe.u32 %r8818, %r8827, %r10260, %r10259; // inline asm mul.wide.u32 %rd3005, %r8818, 4; add.s64 %rd3006, %rd45, %rd3005; ld.shared.u32 %r9008, [%rd3006]; add.s32 %r9009, %r9008, %r9007; // inline asm bfe.u32 %r8822, %r8827, %r10259, %r10259; // inline asm mul.wide.u32 %rd3007, %r8822, 4; add.s64 %rd3008, %rd48, %rd3007; ld.shared.u32 %r9010, [%rd3008]; xor.b32 %r9011, %r9010, %r9009; // inline asm bfe.u32 %r8826, %r8827, %r10325, %r10259; // inline asm mul.wide.u32 %rd3009, %r8826, 4; add.s64 %rd3010, %rd51, %rd3009; ld.shared.u32 %r9012, [%rd3010]; add.s32 %r9013, %r9012, %r9011; xor.b32 %r9014, %r10342, %r8811; xor.b32 %r8843, %r9014, %r9013; // inline asm bfe.u32 %r8830, %r8843, %r10258, %r10259; // inline asm mul.wide.u32 %rd3011, %r8830, 4; add.s64 %rd3012, %rd42, %rd3011; ld.shared.u32 %r9015, [%rd3012]; // inline asm bfe.u32 %r8834, %r8843, %r10260, %r10259; // inline asm mul.wide.u32 %rd3013, %r8834, 4; add.s64 %rd3014, %rd45, %rd3013; ld.shared.u32 %r9016, [%rd3014]; add.s32 %r9017, %r9016, %r9015; // inline asm bfe.u32 %r8838, %r8843, %r10259, %r10259; // inline asm mul.wide.u32 %rd3015, %r8838, 4; add.s64 %rd3016, %rd48, %rd3015; ld.shared.u32 %r9018, [%rd3016]; xor.b32 %r9019, %r9018, %r9017; // inline asm bfe.u32 %r8842, %r8843, %r10325, %r10259; // inline asm mul.wide.u32 %rd3017, %r8842, 4; add.s64 %rd3018, %rd51, %rd3017; ld.shared.u32 %r9020, [%rd3018]; add.s32 %r9021, %r9020, %r9019; xor.b32 %r9022, %r10341, %r8827; xor.b32 %r8859, %r9022, %r9021; // inline asm bfe.u32 %r8846, %r8859, %r10258, %r10259; // inline asm mul.wide.u32 %rd3019, %r8846, 4; add.s64 %rd3020, %rd42, %rd3019; ld.shared.u32 %r9023, [%rd3020]; // inline asm bfe.u32 %r8850, %r8859, %r10260, %r10259; // inline asm mul.wide.u32 %rd3021, %r8850, 4; add.s64 %rd3022, %rd45, %rd3021; ld.shared.u32 %r9024, [%rd3022]; add.s32 %r9025, %r9024, %r9023; // inline asm bfe.u32 %r8854, %r8859, %r10259, %r10259; // inline asm mul.wide.u32 %rd3023, %r8854, 4; add.s64 %rd3024, %rd48, %rd3023; ld.shared.u32 %r9026, [%rd3024]; xor.b32 %r9027, %r9026, %r9025; // inline asm bfe.u32 %r8858, %r8859, %r10325, %r10259; // inline asm mul.wide.u32 %rd3025, %r8858, 4; add.s64 %rd3026, %rd51, %rd3025; ld.shared.u32 %r9028, [%rd3026]; add.s32 %r9029, %r9028, %r9027; xor.b32 %r9030, %r10340, %r8843; xor.b32 %r8875, %r9030, %r9029; // inline asm bfe.u32 %r8862, %r8875, %r10258, %r10259; // inline asm mul.wide.u32 %rd3027, %r8862, 4; add.s64 %rd3028, %rd42, %rd3027; ld.shared.u32 %r9031, [%rd3028]; // inline asm bfe.u32 %r8866, %r8875, %r10260, %r10259; // inline asm mul.wide.u32 %rd3029, %r8866, 4; add.s64 %rd3030, %rd45, %rd3029; ld.shared.u32 %r9032, [%rd3030]; add.s32 %r9033, %r9032, %r9031; // inline asm bfe.u32 %r8870, %r8875, %r10259, %r10259; // inline asm mul.wide.u32 %rd3031, %r8870, 4; add.s64 %rd3032, %rd48, %rd3031; ld.shared.u32 %r9034, [%rd3032]; xor.b32 %r9035, %r9034, %r9033; // inline asm bfe.u32 %r8874, %r8875, %r10325, %r10259; // inline asm mul.wide.u32 %rd3033, %r8874, 4; add.s64 %rd3034, %rd51, %rd3033; ld.shared.u32 %r9036, [%rd3034]; add.s32 %r9037, %r9036, %r9035; xor.b32 %r9038, %r10339, %r8859; xor.b32 %r8891, %r9038, %r9037; // inline asm bfe.u32 %r8878, %r8891, %r10258, %r10259; // inline asm mul.wide.u32 %rd3035, %r8878, 4; add.s64 %rd3036, %rd42, %rd3035; ld.shared.u32 %r9039, [%rd3036]; // inline asm bfe.u32 %r8882, %r8891, %r10260, %r10259; // inline asm mul.wide.u32 %rd3037, %r8882, 4; add.s64 %rd3038, %rd45, %rd3037; ld.shared.u32 %r9040, [%rd3038]; add.s32 %r9041, %r9040, %r9039; // inline asm bfe.u32 %r8886, %r8891, %r10259, %r10259; // inline asm mul.wide.u32 %rd3039, %r8886, 4; add.s64 %rd3040, %rd48, %rd3039; ld.shared.u32 %r9042, [%rd3040]; xor.b32 %r9043, %r9042, %r9041; // inline asm bfe.u32 %r8890, %r8891, %r10325, %r10259; // inline asm mul.wide.u32 %rd3041, %r8890, 4; add.s64 %rd3042, %rd51, %rd3041; ld.shared.u32 %r9044, [%rd3042]; add.s32 %r9045, %r9044, %r9043; xor.b32 %r9046, %r10338, %r8875; xor.b32 %r8907, %r9046, %r9045; // inline asm bfe.u32 %r8894, %r8907, %r10258, %r10259; // inline asm mul.wide.u32 %rd3043, %r8894, 4; add.s64 %rd3044, %rd42, %rd3043; ld.shared.u32 %r9047, [%rd3044]; // inline asm bfe.u32 %r8898, %r8907, %r10260, %r10259; // inline asm mul.wide.u32 %rd3045, %r8898, 4; add.s64 %rd3046, %rd45, %rd3045; ld.shared.u32 %r9048, [%rd3046]; add.s32 %r9049, %r9048, %r9047; // inline asm bfe.u32 %r8902, %r8907, %r10259, %r10259; // inline asm mul.wide.u32 %rd3047, %r8902, 4; add.s64 %rd3048, %rd48, %rd3047; ld.shared.u32 %r9050, [%rd3048]; xor.b32 %r9051, %r9050, %r9049; // inline asm bfe.u32 %r8906, %r8907, %r10325, %r10259; // inline asm mul.wide.u32 %rd3049, %r8906, 4; add.s64 %rd3050, %rd51, %rd3049; ld.shared.u32 %r9052, [%rd3050]; add.s32 %r9053, %r9052, %r9051; xor.b32 %r9054, %r10337, %r8891; xor.b32 %r8923, %r9054, %r9053; // inline asm bfe.u32 %r8910, %r8923, %r10258, %r10259; // inline asm mul.wide.u32 %rd3051, %r8910, 4; add.s64 %rd3052, %rd42, %rd3051; ld.shared.u32 %r9055, [%rd3052]; // inline asm bfe.u32 %r8914, %r8923, %r10260, %r10259; // inline asm mul.wide.u32 %rd3053, %r8914, 4; add.s64 %rd3054, %rd45, %rd3053; ld.shared.u32 %r9056, [%rd3054]; add.s32 %r9057, %r9056, %r9055; // inline asm bfe.u32 %r8918, %r8923, %r10259, %r10259; // inline asm mul.wide.u32 %rd3055, %r8918, 4; add.s64 %rd3056, %rd48, %rd3055; ld.shared.u32 %r9058, [%rd3056]; xor.b32 %r9059, %r9058, %r9057; // inline asm bfe.u32 %r8922, %r8923, %r10325, %r10259; // inline asm mul.wide.u32 %rd3057, %r8922, 4; add.s64 %rd3058, %rd51, %rd3057; ld.shared.u32 %r9060, [%rd3058]; add.s32 %r9061, %r9060, %r9059; xor.b32 %r9062, %r10336, %r8907; xor.b32 %r8939, %r9062, %r9061; // inline asm bfe.u32 %r8926, %r8939, %r10258, %r10259; // inline asm mul.wide.u32 %rd3059, %r8926, 4; add.s64 %rd3060, %rd42, %rd3059; ld.shared.u32 %r9063, [%rd3060]; // inline asm bfe.u32 %r8930, %r8939, %r10260, %r10259; // inline asm mul.wide.u32 %rd3061, %r8930, 4; add.s64 %rd3062, %rd45, %rd3061; ld.shared.u32 %r9064, [%rd3062]; add.s32 %r9065, %r9064, %r9063; // inline asm bfe.u32 %r8934, %r8939, %r10259, %r10259; // inline asm mul.wide.u32 %rd3063, %r8934, 4; add.s64 %rd3064, %rd48, %rd3063; ld.shared.u32 %r9066, [%rd3064]; xor.b32 %r9067, %r9066, %r9065; // inline asm bfe.u32 %r8938, %r8939, %r10325, %r10259; // inline asm mul.wide.u32 %rd3065, %r8938, 4; add.s64 %rd3066, %rd51, %rd3065; ld.shared.u32 %r9068, [%rd3066]; add.s32 %r9069, %r9068, %r9067; xor.b32 %r9070, %r10335, %r8923; xor.b32 %r10326, %r9070, %r9069; xor.b32 %r10327, %r10334, %r8939; st.shared.u32 [%rd3499], %r10327; st.shared.u32 [%rd3499+4], %r10326; add.s64 %rd3499, %rd3499, 8; add.s32 %r10322, %r10322, 2; setp.lt.u32 %p8, %r10322, 256; @%p8 bra BB4_14; mov.u64 %rd3500, %rd4; BB4_16: mov.u32 %r10328, 0; mov.u32 %r10264, 16; mov.u32 %r10263, 8; mov.u32 %r10262, 24; xor.b32 %r9084, %r10351, %r10327; // inline asm bfe.u32 %r9071, %r9084, %r10262, %r10263; // inline asm mul.wide.u32 %rd3070, %r9071, 4; add.s64 %rd3071, %rd42, %rd3070; ld.shared.u32 %r9328, [%rd3071]; // inline asm bfe.u32 %r9075, %r9084, %r10264, %r10263; // inline asm mul.wide.u32 %rd3074, %r9075, 4; add.s64 %rd3075, %rd45, %rd3074; ld.shared.u32 %r9329, [%rd3075]; add.s32 %r9330, %r9329, %r9328; // inline asm bfe.u32 %r9079, %r9084, %r10263, %r10263; // inline asm mul.wide.u32 %rd3078, %r9079, 4; add.s64 %rd3079, %rd48, %rd3078; ld.shared.u32 %r9331, [%rd3079]; xor.b32 %r9332, %r9331, %r9330; // inline asm bfe.u32 %r9083, %r9084, %r10328, %r10263; // inline asm mul.wide.u32 %rd3082, %r9083, 4; add.s64 %rd3083, %rd51, %rd3082; ld.shared.u32 %r9333, [%rd3083]; add.s32 %r9334, %r9333, %r9332; xor.b32 %r9335, %r10350, %r10326; xor.b32 %r9100, %r9335, %r9334; // inline asm bfe.u32 %r9087, %r9100, %r10262, %r10263; // inline asm mul.wide.u32 %rd3084, %r9087, 4; add.s64 %rd3085, %rd42, %rd3084; ld.shared.u32 %r9336, [%rd3085]; // inline asm bfe.u32 %r9091, %r9100, %r10264, %r10263; // inline asm mul.wide.u32 %rd3086, %r9091, 4; add.s64 %rd3087, %rd45, %rd3086; ld.shared.u32 %r9337, [%rd3087]; add.s32 %r9338, %r9337, %r9336; // inline asm bfe.u32 %r9095, %r9100, %r10263, %r10263; // inline asm mul.wide.u32 %rd3088, %r9095, 4; add.s64 %rd3089, %rd48, %rd3088; ld.shared.u32 %r9339, [%rd3089]; xor.b32 %r9340, %r9339, %r9338; // inline asm bfe.u32 %r9099, %r9100, %r10328, %r10263; // inline asm mul.wide.u32 %rd3090, %r9099, 4; add.s64 %rd3091, %rd51, %rd3090; ld.shared.u32 %r9341, [%rd3091]; add.s32 %r9342, %r9341, %r9340; xor.b32 %r9343, %r10349, %r9084; xor.b32 %r9116, %r9343, %r9342; // inline asm bfe.u32 %r9103, %r9116, %r10262, %r10263; // inline asm mul.wide.u32 %rd3092, %r9103, 4; add.s64 %rd3093, %rd42, %rd3092; ld.shared.u32 %r9344, [%rd3093]; // inline asm bfe.u32 %r9107, %r9116, %r10264, %r10263; // inline asm mul.wide.u32 %rd3094, %r9107, 4; add.s64 %rd3095, %rd45, %rd3094; ld.shared.u32 %r9345, [%rd3095]; add.s32 %r9346, %r9345, %r9344; // inline asm bfe.u32 %r9111, %r9116, %r10263, %r10263; // inline asm mul.wide.u32 %rd3096, %r9111, 4; add.s64 %rd3097, %rd48, %rd3096; ld.shared.u32 %r9347, [%rd3097]; xor.b32 %r9348, %r9347, %r9346; // inline asm bfe.u32 %r9115, %r9116, %r10328, %r10263; // inline asm mul.wide.u32 %rd3098, %r9115, 4; add.s64 %rd3099, %rd51, %rd3098; ld.shared.u32 %r9349, [%rd3099]; add.s32 %r9350, %r9349, %r9348; xor.b32 %r9351, %r10348, %r9100; xor.b32 %r9132, %r9351, %r9350; // inline asm bfe.u32 %r9119, %r9132, %r10262, %r10263; // inline asm mul.wide.u32 %rd3100, %r9119, 4; add.s64 %rd3101, %rd42, %rd3100; ld.shared.u32 %r9352, [%rd3101]; // inline asm bfe.u32 %r9123, %r9132, %r10264, %r10263; // inline asm mul.wide.u32 %rd3102, %r9123, 4; add.s64 %rd3103, %rd45, %rd3102; ld.shared.u32 %r9353, [%rd3103]; add.s32 %r9354, %r9353, %r9352; // inline asm bfe.u32 %r9127, %r9132, %r10263, %r10263; // inline asm mul.wide.u32 %rd3104, %r9127, 4; add.s64 %rd3105, %rd48, %rd3104; ld.shared.u32 %r9355, [%rd3105]; xor.b32 %r9356, %r9355, %r9354; // inline asm bfe.u32 %r9131, %r9132, %r10328, %r10263; // inline asm mul.wide.u32 %rd3106, %r9131, 4; add.s64 %rd3107, %rd51, %rd3106; ld.shared.u32 %r9357, [%rd3107]; add.s32 %r9358, %r9357, %r9356; xor.b32 %r9359, %r10347, %r9116; xor.b32 %r9148, %r9359, %r9358; // inline asm bfe.u32 %r9135, %r9148, %r10262, %r10263; // inline asm mul.wide.u32 %rd3108, %r9135, 4; add.s64 %rd3109, %rd42, %rd3108; ld.shared.u32 %r9360, [%rd3109]; // inline asm bfe.u32 %r9139, %r9148, %r10264, %r10263; // inline asm mul.wide.u32 %rd3110, %r9139, 4; add.s64 %rd3111, %rd45, %rd3110; ld.shared.u32 %r9361, [%rd3111]; add.s32 %r9362, %r9361, %r9360; // inline asm bfe.u32 %r9143, %r9148, %r10263, %r10263; // inline asm mul.wide.u32 %rd3112, %r9143, 4; add.s64 %rd3113, %rd48, %rd3112; ld.shared.u32 %r9363, [%rd3113]; xor.b32 %r9364, %r9363, %r9362; // inline asm bfe.u32 %r9147, %r9148, %r10328, %r10263; // inline asm mul.wide.u32 %rd3114, %r9147, 4; add.s64 %rd3115, %rd51, %rd3114; ld.shared.u32 %r9365, [%rd3115]; add.s32 %r9366, %r9365, %r9364; xor.b32 %r9367, %r10346, %r9132; xor.b32 %r9164, %r9367, %r9366; // inline asm bfe.u32 %r9151, %r9164, %r10262, %r10263; // inline asm mul.wide.u32 %rd3116, %r9151, 4; add.s64 %rd3117, %rd42, %rd3116; ld.shared.u32 %r9368, [%rd3117]; // inline asm bfe.u32 %r9155, %r9164, %r10264, %r10263; // inline asm mul.wide.u32 %rd3118, %r9155, 4; add.s64 %rd3119, %rd45, %rd3118; ld.shared.u32 %r9369, [%rd3119]; add.s32 %r9370, %r9369, %r9368; // inline asm bfe.u32 %r9159, %r9164, %r10263, %r10263; // inline asm mul.wide.u32 %rd3120, %r9159, 4; add.s64 %rd3121, %rd48, %rd3120; ld.shared.u32 %r9371, [%rd3121]; xor.b32 %r9372, %r9371, %r9370; // inline asm bfe.u32 %r9163, %r9164, %r10328, %r10263; // inline asm mul.wide.u32 %rd3122, %r9163, 4; add.s64 %rd3123, %rd51, %rd3122; ld.shared.u32 %r9373, [%rd3123]; add.s32 %r9374, %r9373, %r9372; xor.b32 %r9375, %r10345, %r9148; xor.b32 %r9180, %r9375, %r9374; // inline asm bfe.u32 %r9167, %r9180, %r10262, %r10263; // inline asm mul.wide.u32 %rd3124, %r9167, 4; add.s64 %rd3125, %rd42, %rd3124; ld.shared.u32 %r9376, [%rd3125]; // inline asm bfe.u32 %r9171, %r9180, %r10264, %r10263; // inline asm mul.wide.u32 %rd3126, %r9171, 4; add.s64 %rd3127, %rd45, %rd3126; ld.shared.u32 %r9377, [%rd3127]; add.s32 %r9378, %r9377, %r9376; // inline asm bfe.u32 %r9175, %r9180, %r10263, %r10263; // inline asm mul.wide.u32 %rd3128, %r9175, 4; add.s64 %rd3129, %rd48, %rd3128; ld.shared.u32 %r9379, [%rd3129]; xor.b32 %r9380, %r9379, %r9378; // inline asm bfe.u32 %r9179, %r9180, %r10328, %r10263; // inline asm mul.wide.u32 %rd3130, %r9179, 4; add.s64 %rd3131, %rd51, %rd3130; ld.shared.u32 %r9381, [%rd3131]; add.s32 %r9382, %r9381, %r9380; xor.b32 %r9383, %r10344, %r9164; xor.b32 %r9196, %r9383, %r9382; // inline asm bfe.u32 %r9183, %r9196, %r10262, %r10263; // inline asm mul.wide.u32 %rd3132, %r9183, 4; add.s64 %rd3133, %rd42, %rd3132; ld.shared.u32 %r9384, [%rd3133]; // inline asm bfe.u32 %r9187, %r9196, %r10264, %r10263; // inline asm mul.wide.u32 %rd3134, %r9187, 4; add.s64 %rd3135, %rd45, %rd3134; ld.shared.u32 %r9385, [%rd3135]; add.s32 %r9386, %r9385, %r9384; // inline asm bfe.u32 %r9191, %r9196, %r10263, %r10263; // inline asm mul.wide.u32 %rd3136, %r9191, 4; add.s64 %rd3137, %rd48, %rd3136; ld.shared.u32 %r9387, [%rd3137]; xor.b32 %r9388, %r9387, %r9386; // inline asm bfe.u32 %r9195, %r9196, %r10328, %r10263; // inline asm mul.wide.u32 %rd3138, %r9195, 4; add.s64 %rd3139, %rd51, %rd3138; ld.shared.u32 %r9389, [%rd3139]; add.s32 %r9390, %r9389, %r9388; xor.b32 %r9391, %r10343, %r9180; xor.b32 %r9212, %r9391, %r9390; // inline asm bfe.u32 %r9199, %r9212, %r10262, %r10263; // inline asm mul.wide.u32 %rd3140, %r9199, 4; add.s64 %rd3141, %rd42, %rd3140; ld.shared.u32 %r9392, [%rd3141]; // inline asm bfe.u32 %r9203, %r9212, %r10264, %r10263; // inline asm mul.wide.u32 %rd3142, %r9203, 4; add.s64 %rd3143, %rd45, %rd3142; ld.shared.u32 %r9393, [%rd3143]; add.s32 %r9394, %r9393, %r9392; // inline asm bfe.u32 %r9207, %r9212, %r10263, %r10263; // inline asm mul.wide.u32 %rd3144, %r9207, 4; add.s64 %rd3145, %rd48, %rd3144; ld.shared.u32 %r9395, [%rd3145]; xor.b32 %r9396, %r9395, %r9394; // inline asm bfe.u32 %r9211, %r9212, %r10328, %r10263; // inline asm mul.wide.u32 %rd3146, %r9211, 4; add.s64 %rd3147, %rd51, %rd3146; ld.shared.u32 %r9397, [%rd3147]; add.s32 %r9398, %r9397, %r9396; xor.b32 %r9399, %r10342, %r9196; xor.b32 %r9228, %r9399, %r9398; // inline asm bfe.u32 %r9215, %r9228, %r10262, %r10263; // inline asm mul.wide.u32 %rd3148, %r9215, 4; add.s64 %rd3149, %rd42, %rd3148; ld.shared.u32 %r9400, [%rd3149]; // inline asm bfe.u32 %r9219, %r9228, %r10264, %r10263; // inline asm mul.wide.u32 %rd3150, %r9219, 4; add.s64 %rd3151, %rd45, %rd3150; ld.shared.u32 %r9401, [%rd3151]; add.s32 %r9402, %r9401, %r9400; // inline asm bfe.u32 %r9223, %r9228, %r10263, %r10263; // inline asm mul.wide.u32 %rd3152, %r9223, 4; add.s64 %rd3153, %rd48, %rd3152; ld.shared.u32 %r9403, [%rd3153]; xor.b32 %r9404, %r9403, %r9402; // inline asm bfe.u32 %r9227, %r9228, %r10328, %r10263; // inline asm mul.wide.u32 %rd3154, %r9227, 4; add.s64 %rd3155, %rd51, %rd3154; ld.shared.u32 %r9405, [%rd3155]; add.s32 %r9406, %r9405, %r9404; xor.b32 %r9407, %r10341, %r9212; xor.b32 %r9244, %r9407, %r9406; // inline asm bfe.u32 %r9231, %r9244, %r10262, %r10263; // inline asm mul.wide.u32 %rd3156, %r9231, 4; add.s64 %rd3157, %rd42, %rd3156; ld.shared.u32 %r9408, [%rd3157]; // inline asm bfe.u32 %r9235, %r9244, %r10264, %r10263; // inline asm mul.wide.u32 %rd3158, %r9235, 4; add.s64 %rd3159, %rd45, %rd3158; ld.shared.u32 %r9409, [%rd3159]; add.s32 %r9410, %r9409, %r9408; // inline asm bfe.u32 %r9239, %r9244, %r10263, %r10263; // inline asm mul.wide.u32 %rd3160, %r9239, 4; add.s64 %rd3161, %rd48, %rd3160; ld.shared.u32 %r9411, [%rd3161]; xor.b32 %r9412, %r9411, %r9410; // inline asm bfe.u32 %r9243, %r9244, %r10328, %r10263; // inline asm mul.wide.u32 %rd3162, %r9243, 4; add.s64 %rd3163, %rd51, %rd3162; ld.shared.u32 %r9413, [%rd3163]; add.s32 %r9414, %r9413, %r9412; xor.b32 %r9415, %r10340, %r9228; xor.b32 %r9260, %r9415, %r9414; // inline asm bfe.u32 %r9247, %r9260, %r10262, %r10263; // inline asm mul.wide.u32 %rd3164, %r9247, 4; add.s64 %rd3165, %rd42, %rd3164; ld.shared.u32 %r9416, [%rd3165]; // inline asm bfe.u32 %r9251, %r9260, %r10264, %r10263; // inline asm mul.wide.u32 %rd3166, %r9251, 4; add.s64 %rd3167, %rd45, %rd3166; ld.shared.u32 %r9417, [%rd3167]; add.s32 %r9418, %r9417, %r9416; // inline asm bfe.u32 %r9255, %r9260, %r10263, %r10263; // inline asm mul.wide.u32 %rd3168, %r9255, 4; add.s64 %rd3169, %rd48, %rd3168; ld.shared.u32 %r9419, [%rd3169]; xor.b32 %r9420, %r9419, %r9418; // inline asm bfe.u32 %r9259, %r9260, %r10328, %r10263; // inline asm mul.wide.u32 %rd3170, %r9259, 4; add.s64 %rd3171, %rd51, %rd3170; ld.shared.u32 %r9421, [%rd3171]; add.s32 %r9422, %r9421, %r9420; xor.b32 %r9423, %r10339, %r9244; xor.b32 %r9276, %r9423, %r9422; // inline asm bfe.u32 %r9263, %r9276, %r10262, %r10263; // inline asm mul.wide.u32 %rd3172, %r9263, 4; add.s64 %rd3173, %rd42, %rd3172; ld.shared.u32 %r9424, [%rd3173]; // inline asm bfe.u32 %r9267, %r9276, %r10264, %r10263; // inline asm mul.wide.u32 %rd3174, %r9267, 4; add.s64 %rd3175, %rd45, %rd3174; ld.shared.u32 %r9425, [%rd3175]; add.s32 %r9426, %r9425, %r9424; // inline asm bfe.u32 %r9271, %r9276, %r10263, %r10263; // inline asm mul.wide.u32 %rd3176, %r9271, 4; add.s64 %rd3177, %rd48, %rd3176; ld.shared.u32 %r9427, [%rd3177]; xor.b32 %r9428, %r9427, %r9426; // inline asm bfe.u32 %r9275, %r9276, %r10328, %r10263; // inline asm mul.wide.u32 %rd3178, %r9275, 4; add.s64 %rd3179, %rd51, %rd3178; ld.shared.u32 %r9429, [%rd3179]; add.s32 %r9430, %r9429, %r9428; xor.b32 %r9431, %r10338, %r9260; xor.b32 %r9292, %r9431, %r9430; // inline asm bfe.u32 %r9279, %r9292, %r10262, %r10263; // inline asm mul.wide.u32 %rd3180, %r9279, 4; add.s64 %rd3181, %rd42, %rd3180; ld.shared.u32 %r9432, [%rd3181]; // inline asm bfe.u32 %r9283, %r9292, %r10264, %r10263; // inline asm mul.wide.u32 %rd3182, %r9283, 4; add.s64 %rd3183, %rd45, %rd3182; ld.shared.u32 %r9433, [%rd3183]; add.s32 %r9434, %r9433, %r9432; // inline asm bfe.u32 %r9287, %r9292, %r10263, %r10263; // inline asm mul.wide.u32 %rd3184, %r9287, 4; add.s64 %rd3185, %rd48, %rd3184; ld.shared.u32 %r9435, [%rd3185]; xor.b32 %r9436, %r9435, %r9434; // inline asm bfe.u32 %r9291, %r9292, %r10328, %r10263; // inline asm mul.wide.u32 %rd3186, %r9291, 4; add.s64 %rd3187, %rd51, %rd3186; ld.shared.u32 %r9437, [%rd3187]; add.s32 %r9438, %r9437, %r9436; xor.b32 %r9439, %r10337, %r9276; xor.b32 %r9308, %r9439, %r9438; // inline asm bfe.u32 %r9295, %r9308, %r10262, %r10263; // inline asm mul.wide.u32 %rd3188, %r9295, 4; add.s64 %rd3189, %rd42, %rd3188; ld.shared.u32 %r9440, [%rd3189]; // inline asm bfe.u32 %r9299, %r9308, %r10264, %r10263; // inline asm mul.wide.u32 %rd3190, %r9299, 4; add.s64 %rd3191, %rd45, %rd3190; ld.shared.u32 %r9441, [%rd3191]; add.s32 %r9442, %r9441, %r9440; // inline asm bfe.u32 %r9303, %r9308, %r10263, %r10263; // inline asm mul.wide.u32 %rd3192, %r9303, 4; add.s64 %rd3193, %rd48, %rd3192; ld.shared.u32 %r9443, [%rd3193]; xor.b32 %r9444, %r9443, %r9442; // inline asm bfe.u32 %r9307, %r9308, %r10328, %r10263; // inline asm mul.wide.u32 %rd3194, %r9307, 4; add.s64 %rd3195, %rd51, %rd3194; ld.shared.u32 %r9445, [%rd3195]; add.s32 %r9446, %r9445, %r9444; xor.b32 %r9447, %r10336, %r9292; xor.b32 %r9324, %r9447, %r9446; // inline asm bfe.u32 %r9311, %r9324, %r10262, %r10263; // inline asm mul.wide.u32 %rd3196, %r9311, 4; add.s64 %rd3197, %rd42, %rd3196; ld.shared.u32 %r9448, [%rd3197]; // inline asm bfe.u32 %r9315, %r9324, %r10264, %r10263; // inline asm mul.wide.u32 %rd3198, %r9315, 4; add.s64 %rd3199, %rd45, %rd3198; ld.shared.u32 %r9449, [%rd3199]; add.s32 %r9450, %r9449, %r9448; // inline asm bfe.u32 %r9319, %r9324, %r10263, %r10263; // inline asm mul.wide.u32 %rd3200, %r9319, 4; add.s64 %rd3201, %rd48, %rd3200; ld.shared.u32 %r9451, [%rd3201]; xor.b32 %r9452, %r9451, %r9450; // inline asm bfe.u32 %r9323, %r9324, %r10328, %r10263; // inline asm mul.wide.u32 %rd3202, %r9323, 4; add.s64 %rd3203, %rd51, %rd3202; ld.shared.u32 %r9453, [%rd3203]; add.s32 %r9454, %r9453, %r9452; xor.b32 %r9455, %r10335, %r9308; xor.b32 %r10326, %r9455, %r9454; xor.b32 %r10327, %r10334, %r9324; st.shared.u32 [%rd3500], %r10327; st.shared.u32 [%rd3500+4], %r10326; add.s64 %rd3500, %rd3500, 8; add.s32 %r10325, %r10325, 2; setp.lt.u32 %p9, %r10325, 256; @%p9 bra BB4_16; mov.u64 %rd3501, %rd5; BB4_18: mov.u32 %r10331, 0; mov.u32 %r10268, 16; mov.u32 %r10267, 8; mov.u32 %r10266, 24; xor.b32 %r9469, %r10351, %r10327; // inline asm bfe.u32 %r9456, %r9469, %r10266, %r10267; // inline asm mul.wide.u32 %rd3207, %r9456, 4; add.s64 %rd3208, %rd42, %rd3207; ld.shared.u32 %r9713, [%rd3208]; // inline asm bfe.u32 %r9460, %r9469, %r10268, %r10267; // inline asm mul.wide.u32 %rd3211, %r9460, 4; add.s64 %rd3212, %rd45, %rd3211; ld.shared.u32 %r9714, [%rd3212]; add.s32 %r9715, %r9714, %r9713; // inline asm bfe.u32 %r9464, %r9469, %r10267, %r10267; // inline asm mul.wide.u32 %rd3215, %r9464, 4; add.s64 %rd3216, %rd48, %rd3215; ld.shared.u32 %r9716, [%rd3216]; xor.b32 %r9717, %r9716, %r9715; // inline asm bfe.u32 %r9468, %r9469, %r10331, %r10267; // inline asm mul.wide.u32 %rd3219, %r9468, 4; add.s64 %rd3220, %rd51, %rd3219; ld.shared.u32 %r9718, [%rd3220]; add.s32 %r9719, %r9718, %r9717; xor.b32 %r9720, %r10350, %r10326; xor.b32 %r9485, %r9720, %r9719; // inline asm bfe.u32 %r9472, %r9485, %r10266, %r10267; // inline asm mul.wide.u32 %rd3221, %r9472, 4; add.s64 %rd3222, %rd42, %rd3221; ld.shared.u32 %r9721, [%rd3222]; // inline asm bfe.u32 %r9476, %r9485, %r10268, %r10267; // inline asm mul.wide.u32 %rd3223, %r9476, 4; add.s64 %rd3224, %rd45, %rd3223; ld.shared.u32 %r9722, [%rd3224]; add.s32 %r9723, %r9722, %r9721; // inline asm bfe.u32 %r9480, %r9485, %r10267, %r10267; // inline asm mul.wide.u32 %rd3225, %r9480, 4; add.s64 %rd3226, %rd48, %rd3225; ld.shared.u32 %r9724, [%rd3226]; xor.b32 %r9725, %r9724, %r9723; // inline asm bfe.u32 %r9484, %r9485, %r10331, %r10267; // inline asm mul.wide.u32 %rd3227, %r9484, 4; add.s64 %rd3228, %rd51, %rd3227; ld.shared.u32 %r9726, [%rd3228]; add.s32 %r9727, %r9726, %r9725; xor.b32 %r9728, %r10349, %r9469; xor.b32 %r9501, %r9728, %r9727; // inline asm bfe.u32 %r9488, %r9501, %r10266, %r10267; // inline asm mul.wide.u32 %rd3229, %r9488, 4; add.s64 %rd3230, %rd42, %rd3229; ld.shared.u32 %r9729, [%rd3230]; // inline asm bfe.u32 %r9492, %r9501, %r10268, %r10267; // inline asm mul.wide.u32 %rd3231, %r9492, 4; add.s64 %rd3232, %rd45, %rd3231; ld.shared.u32 %r9730, [%rd3232]; add.s32 %r9731, %r9730, %r9729; // inline asm bfe.u32 %r9496, %r9501, %r10267, %r10267; // inline asm mul.wide.u32 %rd3233, %r9496, 4; add.s64 %rd3234, %rd48, %rd3233; ld.shared.u32 %r9732, [%rd3234]; xor.b32 %r9733, %r9732, %r9731; // inline asm bfe.u32 %r9500, %r9501, %r10331, %r10267; // inline asm mul.wide.u32 %rd3235, %r9500, 4; add.s64 %rd3236, %rd51, %rd3235; ld.shared.u32 %r9734, [%rd3236]; add.s32 %r9735, %r9734, %r9733; xor.b32 %r9736, %r10348, %r9485; xor.b32 %r9517, %r9736, %r9735; // inline asm bfe.u32 %r9504, %r9517, %r10266, %r10267; // inline asm mul.wide.u32 %rd3237, %r9504, 4; add.s64 %rd3238, %rd42, %rd3237; ld.shared.u32 %r9737, [%rd3238]; // inline asm bfe.u32 %r9508, %r9517, %r10268, %r10267; // inline asm mul.wide.u32 %rd3239, %r9508, 4; add.s64 %rd3240, %rd45, %rd3239; ld.shared.u32 %r9738, [%rd3240]; add.s32 %r9739, %r9738, %r9737; // inline asm bfe.u32 %r9512, %r9517, %r10267, %r10267; // inline asm mul.wide.u32 %rd3241, %r9512, 4; add.s64 %rd3242, %rd48, %rd3241; ld.shared.u32 %r9740, [%rd3242]; xor.b32 %r9741, %r9740, %r9739; // inline asm bfe.u32 %r9516, %r9517, %r10331, %r10267; // inline asm mul.wide.u32 %rd3243, %r9516, 4; add.s64 %rd3244, %rd51, %rd3243; ld.shared.u32 %r9742, [%rd3244]; add.s32 %r9743, %r9742, %r9741; xor.b32 %r9744, %r10347, %r9501; xor.b32 %r9533, %r9744, %r9743; // inline asm bfe.u32 %r9520, %r9533, %r10266, %r10267; // inline asm mul.wide.u32 %rd3245, %r9520, 4; add.s64 %rd3246, %rd42, %rd3245; ld.shared.u32 %r9745, [%rd3246]; // inline asm bfe.u32 %r9524, %r9533, %r10268, %r10267; // inline asm mul.wide.u32 %rd3247, %r9524, 4; add.s64 %rd3248, %rd45, %rd3247; ld.shared.u32 %r9746, [%rd3248]; add.s32 %r9747, %r9746, %r9745; // inline asm bfe.u32 %r9528, %r9533, %r10267, %r10267; // inline asm mul.wide.u32 %rd3249, %r9528, 4; add.s64 %rd3250, %rd48, %rd3249; ld.shared.u32 %r9748, [%rd3250]; xor.b32 %r9749, %r9748, %r9747; // inline asm bfe.u32 %r9532, %r9533, %r10331, %r10267; // inline asm mul.wide.u32 %rd3251, %r9532, 4; add.s64 %rd3252, %rd51, %rd3251; ld.shared.u32 %r9750, [%rd3252]; add.s32 %r9751, %r9750, %r9749; xor.b32 %r9752, %r10346, %r9517; xor.b32 %r9549, %r9752, %r9751; // inline asm bfe.u32 %r9536, %r9549, %r10266, %r10267; // inline asm mul.wide.u32 %rd3253, %r9536, 4; add.s64 %rd3254, %rd42, %rd3253; ld.shared.u32 %r9753, [%rd3254]; // inline asm bfe.u32 %r9540, %r9549, %r10268, %r10267; // inline asm mul.wide.u32 %rd3255, %r9540, 4; add.s64 %rd3256, %rd45, %rd3255; ld.shared.u32 %r9754, [%rd3256]; add.s32 %r9755, %r9754, %r9753; // inline asm bfe.u32 %r9544, %r9549, %r10267, %r10267; // inline asm mul.wide.u32 %rd3257, %r9544, 4; add.s64 %rd3258, %rd48, %rd3257; ld.shared.u32 %r9756, [%rd3258]; xor.b32 %r9757, %r9756, %r9755; // inline asm bfe.u32 %r9548, %r9549, %r10331, %r10267; // inline asm mul.wide.u32 %rd3259, %r9548, 4; add.s64 %rd3260, %rd51, %rd3259; ld.shared.u32 %r9758, [%rd3260]; add.s32 %r9759, %r9758, %r9757; xor.b32 %r9760, %r10345, %r9533; xor.b32 %r9565, %r9760, %r9759; // inline asm bfe.u32 %r9552, %r9565, %r10266, %r10267; // inline asm mul.wide.u32 %rd3261, %r9552, 4; add.s64 %rd3262, %rd42, %rd3261; ld.shared.u32 %r9761, [%rd3262]; // inline asm bfe.u32 %r9556, %r9565, %r10268, %r10267; // inline asm mul.wide.u32 %rd3263, %r9556, 4; add.s64 %rd3264, %rd45, %rd3263; ld.shared.u32 %r9762, [%rd3264]; add.s32 %r9763, %r9762, %r9761; // inline asm bfe.u32 %r9560, %r9565, %r10267, %r10267; // inline asm mul.wide.u32 %rd3265, %r9560, 4; add.s64 %rd3266, %rd48, %rd3265; ld.shared.u32 %r9764, [%rd3266]; xor.b32 %r9765, %r9764, %r9763; // inline asm bfe.u32 %r9564, %r9565, %r10331, %r10267; // inline asm mul.wide.u32 %rd3267, %r9564, 4; add.s64 %rd3268, %rd51, %rd3267; ld.shared.u32 %r9766, [%rd3268]; add.s32 %r9767, %r9766, %r9765; xor.b32 %r9768, %r10344, %r9549; xor.b32 %r9581, %r9768, %r9767; // inline asm bfe.u32 %r9568, %r9581, %r10266, %r10267; // inline asm mul.wide.u32 %rd3269, %r9568, 4; add.s64 %rd3270, %rd42, %rd3269; ld.shared.u32 %r9769, [%rd3270]; // inline asm bfe.u32 %r9572, %r9581, %r10268, %r10267; // inline asm mul.wide.u32 %rd3271, %r9572, 4; add.s64 %rd3272, %rd45, %rd3271; ld.shared.u32 %r9770, [%rd3272]; add.s32 %r9771, %r9770, %r9769; // inline asm bfe.u32 %r9576, %r9581, %r10267, %r10267; // inline asm mul.wide.u32 %rd3273, %r9576, 4; add.s64 %rd3274, %rd48, %rd3273; ld.shared.u32 %r9772, [%rd3274]; xor.b32 %r9773, %r9772, %r9771; // inline asm bfe.u32 %r9580, %r9581, %r10331, %r10267; // inline asm mul.wide.u32 %rd3275, %r9580, 4; add.s64 %rd3276, %rd51, %rd3275; ld.shared.u32 %r9774, [%rd3276]; add.s32 %r9775, %r9774, %r9773; xor.b32 %r9776, %r10343, %r9565; xor.b32 %r9597, %r9776, %r9775; // inline asm bfe.u32 %r9584, %r9597, %r10266, %r10267; // inline asm mul.wide.u32 %rd3277, %r9584, 4; add.s64 %rd3278, %rd42, %rd3277; ld.shared.u32 %r9777, [%rd3278]; // inline asm bfe.u32 %r9588, %r9597, %r10268, %r10267; // inline asm mul.wide.u32 %rd3279, %r9588, 4; add.s64 %rd3280, %rd45, %rd3279; ld.shared.u32 %r9778, [%rd3280]; add.s32 %r9779, %r9778, %r9777; // inline asm bfe.u32 %r9592, %r9597, %r10267, %r10267; // inline asm mul.wide.u32 %rd3281, %r9592, 4; add.s64 %rd3282, %rd48, %rd3281; ld.shared.u32 %r9780, [%rd3282]; xor.b32 %r9781, %r9780, %r9779; // inline asm bfe.u32 %r9596, %r9597, %r10331, %r10267; // inline asm mul.wide.u32 %rd3283, %r9596, 4; add.s64 %rd3284, %rd51, %rd3283; ld.shared.u32 %r9782, [%rd3284]; add.s32 %r9783, %r9782, %r9781; xor.b32 %r9784, %r10342, %r9581; xor.b32 %r9613, %r9784, %r9783; // inline asm bfe.u32 %r9600, %r9613, %r10266, %r10267; // inline asm mul.wide.u32 %rd3285, %r9600, 4; add.s64 %rd3286, %rd42, %rd3285; ld.shared.u32 %r9785, [%rd3286]; // inline asm bfe.u32 %r9604, %r9613, %r10268, %r10267; // inline asm mul.wide.u32 %rd3287, %r9604, 4; add.s64 %rd3288, %rd45, %rd3287; ld.shared.u32 %r9786, [%rd3288]; add.s32 %r9787, %r9786, %r9785; // inline asm bfe.u32 %r9608, %r9613, %r10267, %r10267; // inline asm mul.wide.u32 %rd3289, %r9608, 4; add.s64 %rd3290, %rd48, %rd3289; ld.shared.u32 %r9788, [%rd3290]; xor.b32 %r9789, %r9788, %r9787; // inline asm bfe.u32 %r9612, %r9613, %r10331, %r10267; // inline asm mul.wide.u32 %rd3291, %r9612, 4; add.s64 %rd3292, %rd51, %rd3291; ld.shared.u32 %r9790, [%rd3292]; add.s32 %r9791, %r9790, %r9789; xor.b32 %r9792, %r10341, %r9597; xor.b32 %r9629, %r9792, %r9791; // inline asm bfe.u32 %r9616, %r9629, %r10266, %r10267; // inline asm mul.wide.u32 %rd3293, %r9616, 4; add.s64 %rd3294, %rd42, %rd3293; ld.shared.u32 %r9793, [%rd3294]; // inline asm bfe.u32 %r9620, %r9629, %r10268, %r10267; // inline asm mul.wide.u32 %rd3295, %r9620, 4; add.s64 %rd3296, %rd45, %rd3295; ld.shared.u32 %r9794, [%rd3296]; add.s32 %r9795, %r9794, %r9793; // inline asm bfe.u32 %r9624, %r9629, %r10267, %r10267; // inline asm mul.wide.u32 %rd3297, %r9624, 4; add.s64 %rd3298, %rd48, %rd3297; ld.shared.u32 %r9796, [%rd3298]; xor.b32 %r9797, %r9796, %r9795; // inline asm bfe.u32 %r9628, %r9629, %r10331, %r10267; // inline asm mul.wide.u32 %rd3299, %r9628, 4; add.s64 %rd3300, %rd51, %rd3299; ld.shared.u32 %r9798, [%rd3300]; add.s32 %r9799, %r9798, %r9797; xor.b32 %r9800, %r10340, %r9613; xor.b32 %r9645, %r9800, %r9799; // inline asm bfe.u32 %r9632, %r9645, %r10266, %r10267; // inline asm mul.wide.u32 %rd3301, %r9632, 4; add.s64 %rd3302, %rd42, %rd3301; ld.shared.u32 %r9801, [%rd3302]; // inline asm bfe.u32 %r9636, %r9645, %r10268, %r10267; // inline asm mul.wide.u32 %rd3303, %r9636, 4; add.s64 %rd3304, %rd45, %rd3303; ld.shared.u32 %r9802, [%rd3304]; add.s32 %r9803, %r9802, %r9801; // inline asm bfe.u32 %r9640, %r9645, %r10267, %r10267; // inline asm mul.wide.u32 %rd3305, %r9640, 4; add.s64 %rd3306, %rd48, %rd3305; ld.shared.u32 %r9804, [%rd3306]; xor.b32 %r9805, %r9804, %r9803; // inline asm bfe.u32 %r9644, %r9645, %r10331, %r10267; // inline asm mul.wide.u32 %rd3307, %r9644, 4; add.s64 %rd3308, %rd51, %rd3307; ld.shared.u32 %r9806, [%rd3308]; add.s32 %r9807, %r9806, %r9805; xor.b32 %r9808, %r10339, %r9629; xor.b32 %r9661, %r9808, %r9807; // inline asm bfe.u32 %r9648, %r9661, %r10266, %r10267; // inline asm mul.wide.u32 %rd3309, %r9648, 4; add.s64 %rd3310, %rd42, %rd3309; ld.shared.u32 %r9809, [%rd3310]; // inline asm bfe.u32 %r9652, %r9661, %r10268, %r10267; // inline asm mul.wide.u32 %rd3311, %r9652, 4; add.s64 %rd3312, %rd45, %rd3311; ld.shared.u32 %r9810, [%rd3312]; add.s32 %r9811, %r9810, %r9809; // inline asm bfe.u32 %r9656, %r9661, %r10267, %r10267; // inline asm mul.wide.u32 %rd3313, %r9656, 4; add.s64 %rd3314, %rd48, %rd3313; ld.shared.u32 %r9812, [%rd3314]; xor.b32 %r9813, %r9812, %r9811; // inline asm bfe.u32 %r9660, %r9661, %r10331, %r10267; // inline asm mul.wide.u32 %rd3315, %r9660, 4; add.s64 %rd3316, %rd51, %rd3315; ld.shared.u32 %r9814, [%rd3316]; add.s32 %r9815, %r9814, %r9813; xor.b32 %r9816, %r10338, %r9645; xor.b32 %r9677, %r9816, %r9815; // inline asm bfe.u32 %r9664, %r9677, %r10266, %r10267; // inline asm mul.wide.u32 %rd3317, %r9664, 4; add.s64 %rd3318, %rd42, %rd3317; ld.shared.u32 %r9817, [%rd3318]; // inline asm bfe.u32 %r9668, %r9677, %r10268, %r10267; // inline asm mul.wide.u32 %rd3319, %r9668, 4; add.s64 %rd3320, %rd45, %rd3319; ld.shared.u32 %r9818, [%rd3320]; add.s32 %r9819, %r9818, %r9817; // inline asm bfe.u32 %r9672, %r9677, %r10267, %r10267; // inline asm mul.wide.u32 %rd3321, %r9672, 4; add.s64 %rd3322, %rd48, %rd3321; ld.shared.u32 %r9820, [%rd3322]; xor.b32 %r9821, %r9820, %r9819; // inline asm bfe.u32 %r9676, %r9677, %r10331, %r10267; // inline asm mul.wide.u32 %rd3323, %r9676, 4; add.s64 %rd3324, %rd51, %rd3323; ld.shared.u32 %r9822, [%rd3324]; add.s32 %r9823, %r9822, %r9821; xor.b32 %r9824, %r10337, %r9661; xor.b32 %r9693, %r9824, %r9823; // inline asm bfe.u32 %r9680, %r9693, %r10266, %r10267; // inline asm mul.wide.u32 %rd3325, %r9680, 4; add.s64 %rd3326, %rd42, %rd3325; ld.shared.u32 %r9825, [%rd3326]; // inline asm bfe.u32 %r9684, %r9693, %r10268, %r10267; // inline asm mul.wide.u32 %rd3327, %r9684, 4; add.s64 %rd3328, %rd45, %rd3327; ld.shared.u32 %r9826, [%rd3328]; add.s32 %r9827, %r9826, %r9825; // inline asm bfe.u32 %r9688, %r9693, %r10267, %r10267; // inline asm mul.wide.u32 %rd3329, %r9688, 4; add.s64 %rd3330, %rd48, %rd3329; ld.shared.u32 %r9828, [%rd3330]; xor.b32 %r9829, %r9828, %r9827; // inline asm bfe.u32 %r9692, %r9693, %r10331, %r10267; // inline asm mul.wide.u32 %rd3331, %r9692, 4; add.s64 %rd3332, %rd51, %rd3331; ld.shared.u32 %r9830, [%rd3332]; add.s32 %r9831, %r9830, %r9829; xor.b32 %r9832, %r10336, %r9677; xor.b32 %r9709, %r9832, %r9831; // inline asm bfe.u32 %r9696, %r9709, %r10266, %r10267; // inline asm mul.wide.u32 %rd3333, %r9696, 4; add.s64 %rd3334, %rd42, %rd3333; ld.shared.u32 %r9833, [%rd3334]; // inline asm bfe.u32 %r9700, %r9709, %r10268, %r10267; // inline asm mul.wide.u32 %rd3335, %r9700, 4; add.s64 %rd3336, %rd45, %rd3335; ld.shared.u32 %r9834, [%rd3336]; add.s32 %r9835, %r9834, %r9833; // inline asm bfe.u32 %r9704, %r9709, %r10267, %r10267; // inline asm mul.wide.u32 %rd3337, %r9704, 4; add.s64 %rd3338, %rd48, %rd3337; ld.shared.u32 %r9836, [%rd3338]; xor.b32 %r9837, %r9836, %r9835; // inline asm bfe.u32 %r9708, %r9709, %r10331, %r10267; // inline asm mul.wide.u32 %rd3339, %r9708, 4; add.s64 %rd3340, %rd51, %rd3339; ld.shared.u32 %r9838, [%rd3340]; add.s32 %r9839, %r9838, %r9837; xor.b32 %r9840, %r10335, %r9693; xor.b32 %r10326, %r9840, %r9839; xor.b32 %r10327, %r10334, %r9709; st.shared.u32 [%rd3501], %r10327; st.shared.u32 [%rd3501+4], %r10326; add.s64 %rd3501, %rd3501, 8; add.s32 %r10328, %r10328, 2; setp.lt.u32 %p10, %r10328, 256; @%p10 bra BB4_18; mov.u64 %rd3502, %rd6; BB4_20: mov.u32 %r10273, 0; mov.u32 %r10272, 16; mov.u32 %r10271, 8; mov.u32 %r10270, 24; xor.b32 %r9854, %r10351, %r10327; // inline asm bfe.u32 %r9841, %r9854, %r10270, %r10271; // inline asm mul.wide.u32 %rd3344, %r9841, 4; add.s64 %rd3345, %rd42, %rd3344; ld.shared.u32 %r10097, [%rd3345]; // inline asm bfe.u32 %r9845, %r9854, %r10272, %r10271; // inline asm mul.wide.u32 %rd3348, %r9845, 4; add.s64 %rd3349, %rd45, %rd3348; ld.shared.u32 %r10098, [%rd3349]; add.s32 %r10099, %r10098, %r10097; // inline asm bfe.u32 %r9849, %r9854, %r10271, %r10271; // inline asm mul.wide.u32 %rd3352, %r9849, 4; add.s64 %rd3353, %rd48, %rd3352; ld.shared.u32 %r10100, [%rd3353]; xor.b32 %r10101, %r10100, %r10099; // inline asm bfe.u32 %r9853, %r9854, %r10273, %r10271; // inline asm mul.wide.u32 %rd3356, %r9853, 4; add.s64 %rd3357, %rd51, %rd3356; ld.shared.u32 %r10102, [%rd3357]; add.s32 %r10103, %r10102, %r10101; xor.b32 %r10104, %r10350, %r10326; xor.b32 %r9870, %r10104, %r10103; // inline asm bfe.u32 %r9857, %r9870, %r10270, %r10271; // inline asm mul.wide.u32 %rd3358, %r9857, 4; add.s64 %rd3359, %rd42, %rd3358; ld.shared.u32 %r10105, [%rd3359]; // inline asm bfe.u32 %r9861, %r9870, %r10272, %r10271; // inline asm mul.wide.u32 %rd3360, %r9861, 4; add.s64 %rd3361, %rd45, %rd3360; ld.shared.u32 %r10106, [%rd3361]; add.s32 %r10107, %r10106, %r10105; // inline asm bfe.u32 %r9865, %r9870, %r10271, %r10271; // inline asm mul.wide.u32 %rd3362, %r9865, 4; add.s64 %rd3363, %rd48, %rd3362; ld.shared.u32 %r10108, [%rd3363]; xor.b32 %r10109, %r10108, %r10107; // inline asm bfe.u32 %r9869, %r9870, %r10273, %r10271; // inline asm mul.wide.u32 %rd3364, %r9869, 4; add.s64 %rd3365, %rd51, %rd3364; ld.shared.u32 %r10110, [%rd3365]; add.s32 %r10111, %r10110, %r10109; xor.b32 %r10112, %r10349, %r9854; xor.b32 %r9886, %r10112, %r10111; // inline asm bfe.u32 %r9873, %r9886, %r10270, %r10271; // inline asm mul.wide.u32 %rd3366, %r9873, 4; add.s64 %rd3367, %rd42, %rd3366; ld.shared.u32 %r10113, [%rd3367]; // inline asm bfe.u32 %r9877, %r9886, %r10272, %r10271; // inline asm mul.wide.u32 %rd3368, %r9877, 4; add.s64 %rd3369, %rd45, %rd3368; ld.shared.u32 %r10114, [%rd3369]; add.s32 %r10115, %r10114, %r10113; // inline asm bfe.u32 %r9881, %r9886, %r10271, %r10271; // inline asm mul.wide.u32 %rd3370, %r9881, 4; add.s64 %rd3371, %rd48, %rd3370; ld.shared.u32 %r10116, [%rd3371]; xor.b32 %r10117, %r10116, %r10115; // inline asm bfe.u32 %r9885, %r9886, %r10273, %r10271; // inline asm mul.wide.u32 %rd3372, %r9885, 4; add.s64 %rd3373, %rd51, %rd3372; ld.shared.u32 %r10118, [%rd3373]; add.s32 %r10119, %r10118, %r10117; xor.b32 %r10120, %r10348, %r9870; xor.b32 %r9902, %r10120, %r10119; // inline asm bfe.u32 %r9889, %r9902, %r10270, %r10271; // inline asm mul.wide.u32 %rd3374, %r9889, 4; add.s64 %rd3375, %rd42, %rd3374; ld.shared.u32 %r10121, [%rd3375]; // inline asm bfe.u32 %r9893, %r9902, %r10272, %r10271; // inline asm mul.wide.u32 %rd3376, %r9893, 4; add.s64 %rd3377, %rd45, %rd3376; ld.shared.u32 %r10122, [%rd3377]; add.s32 %r10123, %r10122, %r10121; // inline asm bfe.u32 %r9897, %r9902, %r10271, %r10271; // inline asm mul.wide.u32 %rd3378, %r9897, 4; add.s64 %rd3379, %rd48, %rd3378; ld.shared.u32 %r10124, [%rd3379]; xor.b32 %r10125, %r10124, %r10123; // inline asm bfe.u32 %r9901, %r9902, %r10273, %r10271; // inline asm mul.wide.u32 %rd3380, %r9901, 4; add.s64 %rd3381, %rd51, %rd3380; ld.shared.u32 %r10126, [%rd3381]; add.s32 %r10127, %r10126, %r10125; xor.b32 %r10128, %r10347, %r9886; xor.b32 %r9918, %r10128, %r10127; // inline asm bfe.u32 %r9905, %r9918, %r10270, %r10271; // inline asm mul.wide.u32 %rd3382, %r9905, 4; add.s64 %rd3383, %rd42, %rd3382; ld.shared.u32 %r10129, [%rd3383]; // inline asm bfe.u32 %r9909, %r9918, %r10272, %r10271; // inline asm mul.wide.u32 %rd3384, %r9909, 4; add.s64 %rd3385, %rd45, %rd3384; ld.shared.u32 %r10130, [%rd3385]; add.s32 %r10131, %r10130, %r10129; // inline asm bfe.u32 %r9913, %r9918, %r10271, %r10271; // inline asm mul.wide.u32 %rd3386, %r9913, 4; add.s64 %rd3387, %rd48, %rd3386; ld.shared.u32 %r10132, [%rd3387]; xor.b32 %r10133, %r10132, %r10131; // inline asm bfe.u32 %r9917, %r9918, %r10273, %r10271; // inline asm mul.wide.u32 %rd3388, %r9917, 4; add.s64 %rd3389, %rd51, %rd3388; ld.shared.u32 %r10134, [%rd3389]; add.s32 %r10135, %r10134, %r10133; xor.b32 %r10136, %r10346, %r9902; xor.b32 %r9934, %r10136, %r10135; // inline asm bfe.u32 %r9921, %r9934, %r10270, %r10271; // inline asm mul.wide.u32 %rd3390, %r9921, 4; add.s64 %rd3391, %rd42, %rd3390; ld.shared.u32 %r10137, [%rd3391]; // inline asm bfe.u32 %r9925, %r9934, %r10272, %r10271; // inline asm mul.wide.u32 %rd3392, %r9925, 4; add.s64 %rd3393, %rd45, %rd3392; ld.shared.u32 %r10138, [%rd3393]; add.s32 %r10139, %r10138, %r10137; // inline asm bfe.u32 %r9929, %r9934, %r10271, %r10271; // inline asm mul.wide.u32 %rd3394, %r9929, 4; add.s64 %rd3395, %rd48, %rd3394; ld.shared.u32 %r10140, [%rd3395]; xor.b32 %r10141, %r10140, %r10139; // inline asm bfe.u32 %r9933, %r9934, %r10273, %r10271; // inline asm mul.wide.u32 %rd3396, %r9933, 4; add.s64 %rd3397, %rd51, %rd3396; ld.shared.u32 %r10142, [%rd3397]; add.s32 %r10143, %r10142, %r10141; xor.b32 %r10144, %r10345, %r9918; xor.b32 %r9950, %r10144, %r10143; // inline asm bfe.u32 %r9937, %r9950, %r10270, %r10271; // inline asm mul.wide.u32 %rd3398, %r9937, 4; add.s64 %rd3399, %rd42, %rd3398; ld.shared.u32 %r10145, [%rd3399]; // inline asm bfe.u32 %r9941, %r9950, %r10272, %r10271; // inline asm mul.wide.u32 %rd3400, %r9941, 4; add.s64 %rd3401, %rd45, %rd3400; ld.shared.u32 %r10146, [%rd3401]; add.s32 %r10147, %r10146, %r10145; // inline asm bfe.u32 %r9945, %r9950, %r10271, %r10271; // inline asm mul.wide.u32 %rd3402, %r9945, 4; add.s64 %rd3403, %rd48, %rd3402; ld.shared.u32 %r10148, [%rd3403]; xor.b32 %r10149, %r10148, %r10147; // inline asm bfe.u32 %r9949, %r9950, %r10273, %r10271; // inline asm mul.wide.u32 %rd3404, %r9949, 4; add.s64 %rd3405, %rd51, %rd3404; ld.shared.u32 %r10150, [%rd3405]; add.s32 %r10151, %r10150, %r10149; xor.b32 %r10152, %r10344, %r9934; xor.b32 %r9966, %r10152, %r10151; // inline asm bfe.u32 %r9953, %r9966, %r10270, %r10271; // inline asm mul.wide.u32 %rd3406, %r9953, 4; add.s64 %rd3407, %rd42, %rd3406; ld.shared.u32 %r10153, [%rd3407]; // inline asm bfe.u32 %r9957, %r9966, %r10272, %r10271; // inline asm mul.wide.u32 %rd3408, %r9957, 4; add.s64 %rd3409, %rd45, %rd3408; ld.shared.u32 %r10154, [%rd3409]; add.s32 %r10155, %r10154, %r10153; // inline asm bfe.u32 %r9961, %r9966, %r10271, %r10271; // inline asm mul.wide.u32 %rd3410, %r9961, 4; add.s64 %rd3411, %rd48, %rd3410; ld.shared.u32 %r10156, [%rd3411]; xor.b32 %r10157, %r10156, %r10155; // inline asm bfe.u32 %r9965, %r9966, %r10273, %r10271; // inline asm mul.wide.u32 %rd3412, %r9965, 4; add.s64 %rd3413, %rd51, %rd3412; ld.shared.u32 %r10158, [%rd3413]; add.s32 %r10159, %r10158, %r10157; xor.b32 %r10160, %r10343, %r9950; xor.b32 %r9982, %r10160, %r10159; // inline asm bfe.u32 %r9969, %r9982, %r10270, %r10271; // inline asm mul.wide.u32 %rd3414, %r9969, 4; add.s64 %rd3415, %rd42, %rd3414; ld.shared.u32 %r10161, [%rd3415]; // inline asm bfe.u32 %r9973, %r9982, %r10272, %r10271; // inline asm mul.wide.u32 %rd3416, %r9973, 4; add.s64 %rd3417, %rd45, %rd3416; ld.shared.u32 %r10162, [%rd3417]; add.s32 %r10163, %r10162, %r10161; // inline asm bfe.u32 %r9977, %r9982, %r10271, %r10271; // inline asm mul.wide.u32 %rd3418, %r9977, 4; add.s64 %rd3419, %rd48, %rd3418; ld.shared.u32 %r10164, [%rd3419]; xor.b32 %r10165, %r10164, %r10163; // inline asm bfe.u32 %r9981, %r9982, %r10273, %r10271; // inline asm mul.wide.u32 %rd3420, %r9981, 4; add.s64 %rd3421, %rd51, %rd3420; ld.shared.u32 %r10166, [%rd3421]; add.s32 %r10167, %r10166, %r10165; xor.b32 %r10168, %r10342, %r9966; xor.b32 %r9998, %r10168, %r10167; // inline asm bfe.u32 %r9985, %r9998, %r10270, %r10271; // inline asm mul.wide.u32 %rd3422, %r9985, 4; add.s64 %rd3423, %rd42, %rd3422; ld.shared.u32 %r10169, [%rd3423]; // inline asm bfe.u32 %r9989, %r9998, %r10272, %r10271; // inline asm mul.wide.u32 %rd3424, %r9989, 4; add.s64 %rd3425, %rd45, %rd3424; ld.shared.u32 %r10170, [%rd3425]; add.s32 %r10171, %r10170, %r10169; // inline asm bfe.u32 %r9993, %r9998, %r10271, %r10271; // inline asm mul.wide.u32 %rd3426, %r9993, 4; add.s64 %rd3427, %rd48, %rd3426; ld.shared.u32 %r10172, [%rd3427]; xor.b32 %r10173, %r10172, %r10171; // inline asm bfe.u32 %r9997, %r9998, %r10273, %r10271; // inline asm mul.wide.u32 %rd3428, %r9997, 4; add.s64 %rd3429, %rd51, %rd3428; ld.shared.u32 %r10174, [%rd3429]; add.s32 %r10175, %r10174, %r10173; xor.b32 %r10176, %r10341, %r9982; xor.b32 %r10014, %r10176, %r10175; // inline asm bfe.u32 %r10001, %r10014, %r10270, %r10271; // inline asm mul.wide.u32 %rd3430, %r10001, 4; add.s64 %rd3431, %rd42, %rd3430; ld.shared.u32 %r10177, [%rd3431]; // inline asm bfe.u32 %r10005, %r10014, %r10272, %r10271; // inline asm mul.wide.u32 %rd3432, %r10005, 4; add.s64 %rd3433, %rd45, %rd3432; ld.shared.u32 %r10178, [%rd3433]; add.s32 %r10179, %r10178, %r10177; // inline asm bfe.u32 %r10009, %r10014, %r10271, %r10271; // inline asm mul.wide.u32 %rd3434, %r10009, 4; add.s64 %rd3435, %rd48, %rd3434; ld.shared.u32 %r10180, [%rd3435]; xor.b32 %r10181, %r10180, %r10179; // inline asm bfe.u32 %r10013, %r10014, %r10273, %r10271; // inline asm mul.wide.u32 %rd3436, %r10013, 4; add.s64 %rd3437, %rd51, %rd3436; ld.shared.u32 %r10182, [%rd3437]; add.s32 %r10183, %r10182, %r10181; xor.b32 %r10184, %r10340, %r9998; xor.b32 %r10030, %r10184, %r10183; // inline asm bfe.u32 %r10017, %r10030, %r10270, %r10271; // inline asm mul.wide.u32 %rd3438, %r10017, 4; add.s64 %rd3439, %rd42, %rd3438; ld.shared.u32 %r10185, [%rd3439]; // inline asm bfe.u32 %r10021, %r10030, %r10272, %r10271; // inline asm mul.wide.u32 %rd3440, %r10021, 4; add.s64 %rd3441, %rd45, %rd3440; ld.shared.u32 %r10186, [%rd3441]; add.s32 %r10187, %r10186, %r10185; // inline asm bfe.u32 %r10025, %r10030, %r10271, %r10271; // inline asm mul.wide.u32 %rd3442, %r10025, 4; add.s64 %rd3443, %rd48, %rd3442; ld.shared.u32 %r10188, [%rd3443]; xor.b32 %r10189, %r10188, %r10187; // inline asm bfe.u32 %r10029, %r10030, %r10273, %r10271; // inline asm mul.wide.u32 %rd3444, %r10029, 4; add.s64 %rd3445, %rd51, %rd3444; ld.shared.u32 %r10190, [%rd3445]; add.s32 %r10191, %r10190, %r10189; xor.b32 %r10192, %r10339, %r10014; xor.b32 %r10046, %r10192, %r10191; // inline asm bfe.u32 %r10033, %r10046, %r10270, %r10271; // inline asm mul.wide.u32 %rd3446, %r10033, 4; add.s64 %rd3447, %rd42, %rd3446; ld.shared.u32 %r10193, [%rd3447]; // inline asm bfe.u32 %r10037, %r10046, %r10272, %r10271; // inline asm mul.wide.u32 %rd3448, %r10037, 4; add.s64 %rd3449, %rd45, %rd3448; ld.shared.u32 %r10194, [%rd3449]; add.s32 %r10195, %r10194, %r10193; // inline asm bfe.u32 %r10041, %r10046, %r10271, %r10271; // inline asm mul.wide.u32 %rd3450, %r10041, 4; add.s64 %rd3451, %rd48, %rd3450; ld.shared.u32 %r10196, [%rd3451]; xor.b32 %r10197, %r10196, %r10195; // inline asm bfe.u32 %r10045, %r10046, %r10273, %r10271; // inline asm mul.wide.u32 %rd3452, %r10045, 4; add.s64 %rd3453, %rd51, %rd3452; ld.shared.u32 %r10198, [%rd3453]; add.s32 %r10199, %r10198, %r10197; xor.b32 %r10200, %r10338, %r10030; xor.b32 %r10062, %r10200, %r10199; // inline asm bfe.u32 %r10049, %r10062, %r10270, %r10271; // inline asm mul.wide.u32 %rd3454, %r10049, 4; add.s64 %rd3455, %rd42, %rd3454; ld.shared.u32 %r10201, [%rd3455]; // inline asm bfe.u32 %r10053, %r10062, %r10272, %r10271; // inline asm mul.wide.u32 %rd3456, %r10053, 4; add.s64 %rd3457, %rd45, %rd3456; ld.shared.u32 %r10202, [%rd3457]; add.s32 %r10203, %r10202, %r10201; // inline asm bfe.u32 %r10057, %r10062, %r10271, %r10271; // inline asm mul.wide.u32 %rd3458, %r10057, 4; add.s64 %rd3459, %rd48, %rd3458; ld.shared.u32 %r10204, [%rd3459]; xor.b32 %r10205, %r10204, %r10203; // inline asm bfe.u32 %r10061, %r10062, %r10273, %r10271; // inline asm mul.wide.u32 %rd3460, %r10061, 4; add.s64 %rd3461, %rd51, %rd3460; ld.shared.u32 %r10206, [%rd3461]; add.s32 %r10207, %r10206, %r10205; xor.b32 %r10208, %r10337, %r10046; xor.b32 %r10078, %r10208, %r10207; // inline asm bfe.u32 %r10065, %r10078, %r10270, %r10271; // inline asm mul.wide.u32 %rd3462, %r10065, 4; add.s64 %rd3463, %rd42, %rd3462; ld.shared.u32 %r10209, [%rd3463]; // inline asm bfe.u32 %r10069, %r10078, %r10272, %r10271; // inline asm mul.wide.u32 %rd3464, %r10069, 4; add.s64 %rd3465, %rd45, %rd3464; ld.shared.u32 %r10210, [%rd3465]; add.s32 %r10211, %r10210, %r10209; // inline asm bfe.u32 %r10073, %r10078, %r10271, %r10271; // inline asm mul.wide.u32 %rd3466, %r10073, 4; add.s64 %rd3467, %rd48, %rd3466; ld.shared.u32 %r10212, [%rd3467]; xor.b32 %r10213, %r10212, %r10211; // inline asm bfe.u32 %r10077, %r10078, %r10273, %r10271; // inline asm mul.wide.u32 %rd3468, %r10077, 4; add.s64 %rd3469, %rd51, %rd3468; ld.shared.u32 %r10214, [%rd3469]; add.s32 %r10215, %r10214, %r10213; xor.b32 %r10216, %r10336, %r10062; xor.b32 %r10094, %r10216, %r10215; // inline asm bfe.u32 %r10081, %r10094, %r10270, %r10271; // inline asm mul.wide.u32 %rd3470, %r10081, 4; add.s64 %rd3471, %rd42, %rd3470; ld.shared.u32 %r10217, [%rd3471]; // inline asm bfe.u32 %r10085, %r10094, %r10272, %r10271; // inline asm mul.wide.u32 %rd3472, %r10085, 4; add.s64 %rd3473, %rd45, %rd3472; ld.shared.u32 %r10218, [%rd3473]; add.s32 %r10219, %r10218, %r10217; // inline asm bfe.u32 %r10089, %r10094, %r10271, %r10271; // inline asm mul.wide.u32 %rd3474, %r10089, 4; add.s64 %rd3475, %rd48, %rd3474; ld.shared.u32 %r10220, [%rd3475]; xor.b32 %r10221, %r10220, %r10219; // inline asm bfe.u32 %r10093, %r10094, %r10273, %r10271; // inline asm mul.wide.u32 %rd3476, %r10093, 4; add.s64 %rd3477, %rd51, %rd3476; ld.shared.u32 %r10222, [%rd3477]; add.s32 %r10223, %r10222, %r10221; xor.b32 %r10224, %r10335, %r10078; xor.b32 %r10326, %r10224, %r10223; xor.b32 %r10327, %r10334, %r10094; st.shared.u32 [%rd3502], %r10327; st.shared.u32 [%rd3502+4], %r10326; add.s64 %rd3502, %rd3502, 8; add.s32 %r10331, %r10331, 2; setp.lt.u32 %p11, %r10331, 256; @%p11 bra BB4_20; ld.param.u32 %r10274, [m03200_loop_param_29]; add.s32 %r10309, %r10309, 1; setp.lt.u32 %p12, %r10309, %r10274; @%p12 bra BB4_5; BB4_22: st.global.u32 [%rd2+72], %r10351; st.global.u32 [%rd2+76], %r10350; st.global.u32 [%rd2+80], %r10349; st.global.u32 [%rd2+84], %r10348; st.global.u32 [%rd2+88], %r10347; st.global.u32 [%rd2+92], %r10346; st.global.u32 [%rd2+96], %r10345; st.global.u32 [%rd2+100], %r10344; st.global.u32 [%rd2+104], %r10343; st.global.u32 [%rd2+108], %r10342; st.global.u32 [%rd2+112], %r10341; st.global.u32 [%rd2+116], %r10340; st.global.u32 [%rd2+120], %r10339; st.global.u32 [%rd2+124], %r10338; st.global.u32 [%rd2+128], %r10337; st.global.u32 [%rd2+132], %r10336; st.global.u32 [%rd2+136], %r10335; st.global.u32 [%rd2+140], %r10334; mov.u32 %r10352, 0; BB4_23: mul.wide.u32 %rd3481, %r10352, 4; add.s64 %rd3482, %rd42, %rd3481; ld.shared.u32 %r10226, [%rd3482]; add.s64 %rd3485, %rd2, %rd3481; ld.shared.u32 %r10227, [%rd3482+4]; ld.shared.u32 %r10228, [%rd3482+8]; ld.shared.u32 %r10229, [%rd3482+12]; ld.shared.u32 %r10230, [%rd3482+16]; ld.shared.u32 %r10231, [%rd3482+20]; ld.shared.u32 %r10232, [%rd3482+24]; ld.shared.u32 %r10233, [%rd3482+28]; st.global.u32 [%rd3485+144], %r10226; add.s64 %rd3488, %rd45, %rd3481; st.global.u32 [%rd3485+148], %r10227; st.global.u32 [%rd3485+152], %r10228; st.global.u32 [%rd3485+156], %r10229; st.global.u32 [%rd3485+160], %r10230; st.global.u32 [%rd3485+164], %r10231; st.global.u32 [%rd3485+168], %r10232; st.global.u32 [%rd3485+172], %r10233; ld.shared.u32 %r10234, [%rd3488]; ld.shared.u32 %r10235, [%rd3488+4]; ld.shared.u32 %r10236, [%rd3488+8]; ld.shared.u32 %r10237, [%rd3488+12]; ld.shared.u32 %r10238, [%rd3488+16]; ld.shared.u32 %r10239, [%rd3488+20]; ld.shared.u32 %r10240, [%rd3488+24]; ld.shared.u32 %r10241, [%rd3488+28]; st.global.u32 [%rd3485+1168], %r10234; add.s64 %rd3491, %rd48, %rd3481; st.global.u32 [%rd3485+1172], %r10235; st.global.u32 [%rd3485+1176], %r10236; st.global.u32 [%rd3485+1180], %r10237; st.global.u32 [%rd3485+1184], %r10238; st.global.u32 [%rd3485+1188], %r10239; st.global.u32 [%rd3485+1192], %r10240; st.global.u32 [%rd3485+1196], %r10241; ld.shared.u32 %r10242, [%rd3491]; ld.shared.u32 %r10243, [%rd3491+4]; ld.shared.u32 %r10244, [%rd3491+8]; ld.shared.u32 %r10245, [%rd3491+12]; ld.shared.u32 %r10246, [%rd3491+16]; ld.shared.u32 %r10247, [%rd3491+20]; ld.shared.u32 %r10248, [%rd3491+24]; ld.shared.u32 %r10249, [%rd3491+28]; st.global.u32 [%rd3485+2192], %r10242; add.s64 %rd3494, %rd51, %rd3481; st.global.u32 [%rd3485+2196], %r10243; st.global.u32 [%rd3485+2200], %r10244; st.global.u32 [%rd3485+2204], %r10245; st.global.u32 [%rd3485+2208], %r10246; st.global.u32 [%rd3485+2212], %r10247; st.global.u32 [%rd3485+2216], %r10248; st.global.u32 [%rd3485+2220], %r10249; ld.shared.u32 %r10250, [%rd3494]; ld.shared.u32 %r10251, [%rd3494+4]; ld.shared.u32 %r10252, [%rd3494+8]; ld.shared.u32 %r10253, [%rd3494+12]; ld.shared.u32 %r10254, [%rd3494+16]; ld.shared.u32 %r10255, [%rd3494+20]; ld.shared.u32 %r10256, [%rd3494+24]; ld.shared.u32 %r10257, [%rd3494+28]; st.global.u32 [%rd3485+3216], %r10250; st.global.u32 [%rd3485+3220], %r10251; st.global.u32 [%rd3485+3224], %r10252; st.global.u32 [%rd3485+3228], %r10253; st.global.u32 [%rd3485+3232], %r10254; st.global.u32 [%rd3485+3236], %r10255; st.global.u32 [%rd3485+3240], %r10256; st.global.u32 [%rd3485+3244], %r10257; add.s32 %r10352, %r10352, 8; setp.ne.s32 %p13, %r10352, 256; @%p13 bra BB4_23; BB4_24: ret; } // .globl m03200_comp .entry m03200_comp( .param .u64 .ptr .global .align 4 m03200_comp_param_0, .param .u64 .ptr .global .align 4 m03200_comp_param_1, .param .u64 .ptr .global .align 4 m03200_comp_param_2, .param .u64 .ptr .global .align 4 m03200_comp_param_3, .param .u64 .ptr .global .align 4 m03200_comp_param_4, .param .u64 .ptr .global .align 1 m03200_comp_param_5, .param .u64 .ptr .global .align 4 m03200_comp_param_6, .param .u64 .ptr .global .align 4 m03200_comp_param_7, .param .u64 .ptr .global .align 4 m03200_comp_param_8, .param .u64 .ptr .global .align 4 m03200_comp_param_9, .param .u64 .ptr .global .align 4 m03200_comp_param_10, .param .u64 .ptr .global .align 4 m03200_comp_param_11, .param .u64 .ptr .global .align 4 m03200_comp_param_12, .param .u64 .ptr .global .align 4 m03200_comp_param_13, .param .u64 .ptr .global .align 8 m03200_comp_param_14, .param .u64 .ptr .global .align 4 m03200_comp_param_15, .param .u64 .ptr .global .align 4 m03200_comp_param_16, .param .u64 .ptr .global .align 4 m03200_comp_param_17, .param .u64 .ptr .global .align 1 m03200_comp_param_18, .param .u64 .ptr .global .align 4 m03200_comp_param_19, .param .u64 .ptr .global .align 4 m03200_comp_param_20, .param .u64 .ptr .global .align 4 m03200_comp_param_21, .param .u64 .ptr .global .align 4 m03200_comp_param_22, .param .u64 .ptr .global .align 4 m03200_comp_param_23, .param .u32 m03200_comp_param_24, .param .u32 m03200_comp_param_25, .param .u32 m03200_comp_param_26, .param .u32 m03200_comp_param_27, .param .u32 m03200_comp_param_28, .param .u32 m03200_comp_param_29, .param .u32 m03200_comp_param_30, .param .u32 m03200_comp_param_31, .param .u32 m03200_comp_param_32, .param .u32 m03200_comp_param_33, .param .u64 m03200_comp_param_34 ) .reqntid 8, 1, 1 { .reg .pred %p<28>; .reg .b32 %r<971>; .reg .b64 %rd<376>; // demoted variable .shared .align 4 .b8 m03200_comp$S0_all[8192]; // demoted variable .shared .align 4 .b8 m03200_comp$S1_all[8192]; // demoted variable .shared .align 4 .b8 m03200_comp$S2_all[8192]; // demoted variable .shared .align 4 .b8 m03200_comp$S3_all[8192]; ld.param.u64 %rd5, [m03200_comp_param_4]; ld.param.u64 %rd18, [m03200_comp_param_34]; mov.b32 %r61, %envreg3; mov.u32 %r62, %ctaid.x; mov.u32 %r63, %ntid.x; mad.lo.s32 %r64, %r62, %r63, %r61; mov.u32 %r65, %tid.x; add.s32 %r1, %r64, %r65; cvt.s64.s32 %rd1, %r1; cvt.s64.s32 %rd2, %r65; setp.ge.u64 %p1, %rd1, %rd18; @%p1 bra BB5_34; mul.wide.s32 %rd19, %r1, 4240; add.s64 %rd20, %rd5, %rd19; ld.global.u32 %r2, [%rd20+72]; ld.global.u32 %r3, [%rd20+76]; ld.global.u32 %r4, [%rd20+80]; ld.global.u32 %r5, [%rd20+84]; ld.global.u32 %r6, [%rd20+88]; ld.global.u32 %r7, [%rd20+92]; ld.global.u32 %r8, [%rd20+96]; ld.global.u32 %r9, [%rd20+100]; ld.global.u32 %r10, [%rd20+104]; ld.global.u32 %r11, [%rd20+108]; ld.global.u32 %r12, [%rd20+112]; ld.global.u32 %r13, [%rd20+116]; ld.global.u32 %r14, [%rd20+120]; ld.global.u32 %r15, [%rd20+124]; ld.global.u32 %r16, [%rd20+128]; ld.global.u32 %r17, [%rd20+132]; ld.global.u32 %r18, [%rd20+136]; ld.global.u32 %r19, [%rd20+140]; mov.u32 %r960, 0; BB5_2: mul.wide.u32 %rd23, %r960, 4; add.s64 %rd24, %rd20, %rd23; ld.global.u32 %r70, [%rd24+144]; shl.b64 %rd25, %rd2, 10; mov.u64 %rd26, m03200_comp$S0_all; add.s64 %rd27, %rd26, %rd25; add.s64 %rd28, %rd27, %rd23; ld.global.u32 %r71, [%rd24+1168]; ld.global.u32 %r72, [%rd24+2192]; ld.global.u32 %r73, [%rd24+3216]; ld.global.u32 %r74, [%rd24+148]; ld.global.u32 %r75, [%rd24+1172]; ld.global.u32 %r76, [%rd24+2196]; ld.global.u32 %r77, [%rd24+3220]; ld.global.u32 %r78, [%rd24+152]; ld.global.u32 %r79, [%rd24+1176]; ld.global.u32 %r80, [%rd24+2200]; ld.global.u32 %r81, [%rd24+3224]; ld.global.u32 %r82, [%rd24+156]; ld.global.u32 %r83, [%rd24+1180]; ld.global.u32 %r84, [%rd24+2204]; ld.global.u32 %r85, [%rd24+3228]; ld.global.u32 %r86, [%rd24+160]; ld.global.u32 %r87, [%rd24+1184]; ld.global.u32 %r88, [%rd24+2208]; ld.global.u32 %r89, [%rd24+3232]; ld.global.u32 %r90, [%rd24+164]; ld.global.u32 %r91, [%rd24+1188]; ld.global.u32 %r92, [%rd24+2212]; ld.global.u32 %r93, [%rd24+3236]; ld.global.u32 %r94, [%rd24+168]; ld.global.u32 %r95, [%rd24+1192]; ld.global.u32 %r96, [%rd24+2216]; ld.global.u32 %r97, [%rd24+3240]; ld.global.u32 %r98, [%rd24+172]; ld.global.u32 %r99, [%rd24+1196]; ld.global.u32 %r100, [%rd24+2220]; ld.global.u32 %r101, [%rd24+3244]; st.shared.u32 [%rd28], %r70; mov.u64 %rd29, m03200_comp$S1_all; add.s64 %rd30, %rd29, %rd25; add.s64 %rd31, %rd30, %rd23; st.shared.u32 [%rd28+4], %r74; st.shared.u32 [%rd28+8], %r78; st.shared.u32 [%rd28+12], %r82; st.shared.u32 [%rd28+16], %r86; st.shared.u32 [%rd28+20], %r90; st.shared.u32 [%rd28+24], %r94; st.shared.u32 [%rd28+28], %r98; st.shared.u32 [%rd31], %r71; mov.u64 %rd32, m03200_comp$S2_all; add.s64 %rd33, %rd32, %rd25; add.s64 %rd34, %rd33, %rd23; st.shared.u32 [%rd31+4], %r75; st.shared.u32 [%rd31+8], %r79; st.shared.u32 [%rd31+12], %r83; st.shared.u32 [%rd31+16], %r87; st.shared.u32 [%rd31+20], %r91; st.shared.u32 [%rd31+24], %r95; st.shared.u32 [%rd31+28], %r99; st.shared.u32 [%rd34], %r72; mov.u64 %rd35, m03200_comp$S3_all; add.s64 %rd36, %rd35, %rd25; add.s64 %rd37, %rd36, %rd23; st.shared.u32 [%rd34+4], %r76; st.shared.u32 [%rd34+8], %r80; st.shared.u32 [%rd34+12], %r84; st.shared.u32 [%rd34+16], %r88; st.shared.u32 [%rd34+20], %r92; st.shared.u32 [%rd34+24], %r96; st.shared.u32 [%rd34+28], %r100; st.shared.u32 [%rd37], %r73; st.shared.u32 [%rd37+4], %r77; st.shared.u32 [%rd37+8], %r81; st.shared.u32 [%rd37+12], %r85; st.shared.u32 [%rd37+16], %r89; st.shared.u32 [%rd37+20], %r93; st.shared.u32 [%rd37+24], %r97; st.shared.u32 [%rd37+28], %r101; add.s32 %r960, %r960, 8; mov.u32 %r963, 1332899944; mov.u32 %r962, 1700884034; mov.u32 %r961, -64; setp.ne.s32 %p2, %r960, 256; @%p2 bra BB5_2; BB5_3: mov.u32 %r959, %tid.x; cvt.s64.s32 %rd375, %r959; shl.b64 %rd374, %rd375, 10; mov.u64 %rd373, m03200_comp$S3_all; add.s64 %rd372, %rd373, %rd374; mov.u32 %r957, %tid.x; cvt.s64.s32 %rd367, %r957; shl.b64 %rd366, %rd367, 10; mov.u64 %rd365, m03200_comp$S2_all; add.s64 %rd364, %rd365, %rd366; mov.u64 %rd363, m03200_comp$S1_all; add.s64 %rd362, %rd363, %rd366; mov.u32 %r955, %tid.x; cvt.s64.s32 %rd355, %r955; shl.b64 %rd354, %rd355, 10; mov.u64 %rd353, m03200_comp$S0_all; add.s64 %rd352, %rd353, %rd354; xor.b32 %r115, %r2, %r963; mov.u32 %r344, 24; mov.u32 %r357, 8; // inline asm bfe.u32 %r102, %r115, %r344, %r357; // inline asm mul.wide.u32 %rd41, %r102, 4; add.s64 %rd42, %rd352, %rd41; ld.shared.u32 %r361, [%rd42]; mov.u32 %r348, 16; // inline asm bfe.u32 %r106, %r115, %r348, %r357; // inline asm mul.wide.u32 %rd45, %r106, 4; add.s64 %rd46, %rd362, %rd45; ld.shared.u32 %r362, [%rd46]; add.s32 %r363, %r362, %r361; // inline asm bfe.u32 %r110, %r115, %r357, %r357; // inline asm mul.wide.u32 %rd49, %r110, 4; add.s64 %rd50, %rd364, %rd49; ld.shared.u32 %r364, [%rd50]; xor.b32 %r365, %r364, %r363; mov.u32 %r356, 0; // inline asm bfe.u32 %r114, %r115, %r356, %r357; // inline asm mul.wide.u32 %rd53, %r114, 4; add.s64 %rd54, %rd372, %rd53; ld.shared.u32 %r366, [%rd54]; add.s32 %r367, %r366, %r365; xor.b32 %r368, %r3, %r962; xor.b32 %r131, %r368, %r367; // inline asm bfe.u32 %r118, %r131, %r344, %r357; // inline asm mul.wide.u32 %rd55, %r118, 4; add.s64 %rd56, %rd352, %rd55; ld.shared.u32 %r369, [%rd56]; // inline asm bfe.u32 %r122, %r131, %r348, %r357; // inline asm mul.wide.u32 %rd57, %r122, 4; add.s64 %rd58, %rd362, %rd57; ld.shared.u32 %r370, [%rd58]; add.s32 %r371, %r370, %r369; // inline asm bfe.u32 %r126, %r131, %r357, %r357; // inline asm mul.wide.u32 %rd59, %r126, 4; add.s64 %rd60, %rd364, %rd59; ld.shared.u32 %r372, [%rd60]; xor.b32 %r373, %r372, %r371; // inline asm bfe.u32 %r130, %r131, %r356, %r357; // inline asm mul.wide.u32 %rd61, %r130, 4; add.s64 %rd62, %rd372, %rd61; ld.shared.u32 %r374, [%rd62]; add.s32 %r375, %r374, %r373; xor.b32 %r376, %r4, %r115; xor.b32 %r147, %r376, %r375; // inline asm bfe.u32 %r134, %r147, %r344, %r357; // inline asm mul.wide.u32 %rd63, %r134, 4; add.s64 %rd64, %rd352, %rd63; ld.shared.u32 %r377, [%rd64]; // inline asm bfe.u32 %r138, %r147, %r348, %r357; // inline asm mul.wide.u32 %rd65, %r138, 4; add.s64 %rd66, %rd362, %rd65; ld.shared.u32 %r378, [%rd66]; add.s32 %r379, %r378, %r377; // inline asm bfe.u32 %r142, %r147, %r357, %r357; // inline asm mul.wide.u32 %rd67, %r142, 4; add.s64 %rd68, %rd364, %rd67; ld.shared.u32 %r380, [%rd68]; xor.b32 %r381, %r380, %r379; // inline asm bfe.u32 %r146, %r147, %r356, %r357; // inline asm mul.wide.u32 %rd69, %r146, 4; add.s64 %rd70, %rd372, %rd69; ld.shared.u32 %r382, [%rd70]; add.s32 %r383, %r382, %r381; xor.b32 %r384, %r5, %r131; xor.b32 %r163, %r384, %r383; // inline asm bfe.u32 %r150, %r163, %r344, %r357; // inline asm mul.wide.u32 %rd71, %r150, 4; add.s64 %rd72, %rd352, %rd71; ld.shared.u32 %r385, [%rd72]; // inline asm bfe.u32 %r154, %r163, %r348, %r357; // inline asm mul.wide.u32 %rd73, %r154, 4; add.s64 %rd74, %rd362, %rd73; ld.shared.u32 %r386, [%rd74]; add.s32 %r387, %r386, %r385; // inline asm bfe.u32 %r158, %r163, %r357, %r357; // inline asm mul.wide.u32 %rd75, %r158, 4; add.s64 %rd76, %rd364, %rd75; ld.shared.u32 %r388, [%rd76]; xor.b32 %r389, %r388, %r387; // inline asm bfe.u32 %r162, %r163, %r356, %r357; // inline asm mul.wide.u32 %rd77, %r162, 4; add.s64 %rd78, %rd372, %rd77; ld.shared.u32 %r390, [%rd78]; add.s32 %r391, %r390, %r389; xor.b32 %r392, %r6, %r147; xor.b32 %r179, %r392, %r391; // inline asm bfe.u32 %r166, %r179, %r344, %r357; // inline asm mul.wide.u32 %rd79, %r166, 4; add.s64 %rd80, %rd352, %rd79; ld.shared.u32 %r393, [%rd80]; // inline asm bfe.u32 %r170, %r179, %r348, %r357; // inline asm mul.wide.u32 %rd81, %r170, 4; add.s64 %rd82, %rd362, %rd81; ld.shared.u32 %r394, [%rd82]; add.s32 %r395, %r394, %r393; // inline asm bfe.u32 %r174, %r179, %r357, %r357; // inline asm mul.wide.u32 %rd83, %r174, 4; add.s64 %rd84, %rd364, %rd83; ld.shared.u32 %r396, [%rd84]; xor.b32 %r397, %r396, %r395; // inline asm bfe.u32 %r178, %r179, %r356, %r357; // inline asm mul.wide.u32 %rd85, %r178, 4; add.s64 %rd86, %rd372, %rd85; ld.shared.u32 %r398, [%rd86]; add.s32 %r399, %r398, %r397; xor.b32 %r400, %r7, %r163; xor.b32 %r195, %r400, %r399; // inline asm bfe.u32 %r182, %r195, %r344, %r357; // inline asm mul.wide.u32 %rd87, %r182, 4; add.s64 %rd88, %rd352, %rd87; ld.shared.u32 %r401, [%rd88]; // inline asm bfe.u32 %r186, %r195, %r348, %r357; // inline asm mul.wide.u32 %rd89, %r186, 4; add.s64 %rd90, %rd362, %rd89; ld.shared.u32 %r402, [%rd90]; add.s32 %r403, %r402, %r401; // inline asm bfe.u32 %r190, %r195, %r357, %r357; // inline asm mul.wide.u32 %rd91, %r190, 4; add.s64 %rd92, %rd364, %rd91; ld.shared.u32 %r404, [%rd92]; xor.b32 %r405, %r404, %r403; // inline asm bfe.u32 %r194, %r195, %r356, %r357; // inline asm mul.wide.u32 %rd93, %r194, 4; add.s64 %rd94, %rd372, %rd93; ld.shared.u32 %r406, [%rd94]; add.s32 %r407, %r406, %r405; xor.b32 %r408, %r8, %r179; xor.b32 %r211, %r408, %r407; // inline asm bfe.u32 %r198, %r211, %r344, %r357; // inline asm mul.wide.u32 %rd95, %r198, 4; add.s64 %rd96, %rd352, %rd95; ld.shared.u32 %r409, [%rd96]; // inline asm bfe.u32 %r202, %r211, %r348, %r357; // inline asm mul.wide.u32 %rd97, %r202, 4; add.s64 %rd98, %rd362, %rd97; ld.shared.u32 %r410, [%rd98]; add.s32 %r411, %r410, %r409; // inline asm bfe.u32 %r206, %r211, %r357, %r357; // inline asm mul.wide.u32 %rd99, %r206, 4; add.s64 %rd100, %rd364, %rd99; ld.shared.u32 %r412, [%rd100]; xor.b32 %r413, %r412, %r411; // inline asm bfe.u32 %r210, %r211, %r356, %r357; // inline asm mul.wide.u32 %rd101, %r210, 4; add.s64 %rd102, %rd372, %rd101; ld.shared.u32 %r414, [%rd102]; add.s32 %r415, %r414, %r413; xor.b32 %r416, %r9, %r195; xor.b32 %r227, %r416, %r415; // inline asm bfe.u32 %r214, %r227, %r344, %r357; // inline asm mul.wide.u32 %rd103, %r214, 4; add.s64 %rd104, %rd352, %rd103; ld.shared.u32 %r417, [%rd104]; // inline asm bfe.u32 %r218, %r227, %r348, %r357; // inline asm mul.wide.u32 %rd105, %r218, 4; add.s64 %rd106, %rd362, %rd105; ld.shared.u32 %r418, [%rd106]; add.s32 %r419, %r418, %r417; // inline asm bfe.u32 %r222, %r227, %r357, %r357; // inline asm mul.wide.u32 %rd107, %r222, 4; add.s64 %rd108, %rd364, %rd107; ld.shared.u32 %r420, [%rd108]; xor.b32 %r421, %r420, %r419; // inline asm bfe.u32 %r226, %r227, %r356, %r357; // inline asm mul.wide.u32 %rd109, %r226, 4; add.s64 %rd110, %rd372, %rd109; ld.shared.u32 %r422, [%rd110]; add.s32 %r423, %r422, %r421; xor.b32 %r424, %r10, %r211; xor.b32 %r243, %r424, %r423; // inline asm bfe.u32 %r230, %r243, %r344, %r357; // inline asm mul.wide.u32 %rd111, %r230, 4; add.s64 %rd112, %rd352, %rd111; ld.shared.u32 %r425, [%rd112]; // inline asm bfe.u32 %r234, %r243, %r348, %r357; // inline asm mul.wide.u32 %rd113, %r234, 4; add.s64 %rd114, %rd362, %rd113; ld.shared.u32 %r426, [%rd114]; add.s32 %r427, %r426, %r425; // inline asm bfe.u32 %r238, %r243, %r357, %r357; // inline asm mul.wide.u32 %rd115, %r238, 4; add.s64 %rd116, %rd364, %rd115; ld.shared.u32 %r428, [%rd116]; xor.b32 %r429, %r428, %r427; // inline asm bfe.u32 %r242, %r243, %r356, %r357; // inline asm mul.wide.u32 %rd117, %r242, 4; add.s64 %rd118, %rd372, %rd117; ld.shared.u32 %r430, [%rd118]; add.s32 %r431, %r430, %r429; xor.b32 %r432, %r11, %r227; xor.b32 %r259, %r432, %r431; // inline asm bfe.u32 %r246, %r259, %r344, %r357; // inline asm mul.wide.u32 %rd119, %r246, 4; add.s64 %rd120, %rd352, %rd119; ld.shared.u32 %r433, [%rd120]; // inline asm bfe.u32 %r250, %r259, %r348, %r357; // inline asm mul.wide.u32 %rd121, %r250, 4; add.s64 %rd122, %rd362, %rd121; ld.shared.u32 %r434, [%rd122]; add.s32 %r435, %r434, %r433; // inline asm bfe.u32 %r254, %r259, %r357, %r357; // inline asm mul.wide.u32 %rd123, %r254, 4; add.s64 %rd124, %rd364, %rd123; ld.shared.u32 %r436, [%rd124]; xor.b32 %r437, %r436, %r435; // inline asm bfe.u32 %r258, %r259, %r356, %r357; // inline asm mul.wide.u32 %rd125, %r258, 4; add.s64 %rd126, %rd372, %rd125; ld.shared.u32 %r438, [%rd126]; add.s32 %r439, %r438, %r437; xor.b32 %r440, %r12, %r243; xor.b32 %r275, %r440, %r439; // inline asm bfe.u32 %r262, %r275, %r344, %r357; // inline asm mul.wide.u32 %rd127, %r262, 4; add.s64 %rd128, %rd352, %rd127; ld.shared.u32 %r441, [%rd128]; // inline asm bfe.u32 %r266, %r275, %r348, %r357; // inline asm mul.wide.u32 %rd129, %r266, 4; add.s64 %rd130, %rd362, %rd129; ld.shared.u32 %r442, [%rd130]; add.s32 %r443, %r442, %r441; // inline asm bfe.u32 %r270, %r275, %r357, %r357; // inline asm mul.wide.u32 %rd131, %r270, 4; add.s64 %rd132, %rd364, %rd131; ld.shared.u32 %r444, [%rd132]; xor.b32 %r445, %r444, %r443; // inline asm bfe.u32 %r274, %r275, %r356, %r357; // inline asm mul.wide.u32 %rd133, %r274, 4; add.s64 %rd134, %rd372, %rd133; ld.shared.u32 %r446, [%rd134]; add.s32 %r447, %r446, %r445; xor.b32 %r448, %r13, %r259; xor.b32 %r291, %r448, %r447; // inline asm bfe.u32 %r278, %r291, %r344, %r357; // inline asm mul.wide.u32 %rd135, %r278, 4; add.s64 %rd136, %rd352, %rd135; ld.shared.u32 %r449, [%rd136]; // inline asm bfe.u32 %r282, %r291, %r348, %r357; // inline asm mul.wide.u32 %rd137, %r282, 4; add.s64 %rd138, %rd362, %rd137; ld.shared.u32 %r450, [%rd138]; add.s32 %r451, %r450, %r449; // inline asm bfe.u32 %r286, %r291, %r357, %r357; // inline asm mul.wide.u32 %rd139, %r286, 4; add.s64 %rd140, %rd364, %rd139; ld.shared.u32 %r452, [%rd140]; xor.b32 %r453, %r452, %r451; // inline asm bfe.u32 %r290, %r291, %r356, %r357; // inline asm mul.wide.u32 %rd141, %r290, 4; add.s64 %rd142, %rd372, %rd141; ld.shared.u32 %r454, [%rd142]; add.s32 %r455, %r454, %r453; xor.b32 %r456, %r14, %r275; xor.b32 %r307, %r456, %r455; // inline asm bfe.u32 %r294, %r307, %r344, %r357; // inline asm mul.wide.u32 %rd143, %r294, 4; add.s64 %rd144, %rd352, %rd143; ld.shared.u32 %r457, [%rd144]; // inline asm bfe.u32 %r298, %r307, %r348, %r357; // inline asm mul.wide.u32 %rd145, %r298, 4; add.s64 %rd146, %rd362, %rd145; ld.shared.u32 %r458, [%rd146]; add.s32 %r459, %r458, %r457; // inline asm bfe.u32 %r302, %r307, %r357, %r357; // inline asm mul.wide.u32 %rd147, %r302, 4; add.s64 %rd148, %rd364, %rd147; ld.shared.u32 %r460, [%rd148]; xor.b32 %r461, %r460, %r459; // inline asm bfe.u32 %r306, %r307, %r356, %r357; // inline asm mul.wide.u32 %rd149, %r306, 4; add.s64 %rd150, %rd372, %rd149; ld.shared.u32 %r462, [%rd150]; add.s32 %r463, %r462, %r461; xor.b32 %r464, %r15, %r291; xor.b32 %r323, %r464, %r463; // inline asm bfe.u32 %r310, %r323, %r344, %r357; // inline asm mul.wide.u32 %rd151, %r310, 4; add.s64 %rd152, %rd352, %rd151; ld.shared.u32 %r465, [%rd152]; // inline asm bfe.u32 %r314, %r323, %r348, %r357; // inline asm mul.wide.u32 %rd153, %r314, 4; add.s64 %rd154, %rd362, %rd153; ld.shared.u32 %r466, [%rd154]; add.s32 %r467, %r466, %r465; // inline asm bfe.u32 %r318, %r323, %r357, %r357; // inline asm mul.wide.u32 %rd155, %r318, 4; add.s64 %rd156, %rd364, %rd155; ld.shared.u32 %r468, [%rd156]; xor.b32 %r469, %r468, %r467; // inline asm bfe.u32 %r322, %r323, %r356, %r357; // inline asm mul.wide.u32 %rd157, %r322, 4; add.s64 %rd158, %rd372, %rd157; ld.shared.u32 %r470, [%rd158]; add.s32 %r471, %r470, %r469; xor.b32 %r472, %r16, %r307; xor.b32 %r339, %r472, %r471; // inline asm bfe.u32 %r326, %r339, %r344, %r357; // inline asm mul.wide.u32 %rd159, %r326, 4; add.s64 %rd160, %rd352, %rd159; ld.shared.u32 %r473, [%rd160]; // inline asm bfe.u32 %r330, %r339, %r348, %r357; // inline asm mul.wide.u32 %rd161, %r330, 4; add.s64 %rd162, %rd362, %rd161; ld.shared.u32 %r474, [%rd162]; add.s32 %r475, %r474, %r473; // inline asm bfe.u32 %r334, %r339, %r357, %r357; // inline asm mul.wide.u32 %rd163, %r334, 4; add.s64 %rd164, %rd364, %rd163; ld.shared.u32 %r476, [%rd164]; xor.b32 %r477, %r476, %r475; // inline asm bfe.u32 %r338, %r339, %r356, %r357; // inline asm mul.wide.u32 %rd165, %r338, 4; add.s64 %rd166, %rd372, %rd165; ld.shared.u32 %r478, [%rd166]; add.s32 %r479, %r478, %r477; xor.b32 %r480, %r17, %r323; xor.b32 %r355, %r480, %r479; // inline asm bfe.u32 %r342, %r355, %r344, %r357; // inline asm mul.wide.u32 %rd167, %r342, 4; add.s64 %rd168, %rd352, %rd167; ld.shared.u32 %r481, [%rd168]; // inline asm bfe.u32 %r346, %r355, %r348, %r357; // inline asm mul.wide.u32 %rd169, %r346, 4; add.s64 %rd170, %rd362, %rd169; ld.shared.u32 %r482, [%rd170]; add.s32 %r483, %r482, %r481; // inline asm bfe.u32 %r350, %r355, %r357, %r357; // inline asm mul.wide.u32 %rd171, %r350, 4; add.s64 %rd172, %rd364, %rd171; ld.shared.u32 %r484, [%rd172]; xor.b32 %r485, %r484, %r483; // inline asm bfe.u32 %r354, %r355, %r356, %r357; // inline asm mul.wide.u32 %rd173, %r354, 4; add.s64 %rd174, %rd372, %rd173; ld.shared.u32 %r486, [%rd174]; add.s32 %r487, %r486, %r485; xor.b32 %r488, %r18, %r339; xor.b32 %r962, %r488, %r487; xor.b32 %r963, %r19, %r355; add.s32 %r961, %r961, 1; setp.ne.s32 %p3, %r961, 0; @%p3 bra BB5_3; mov.u32 %r966, 1701343084; mov.u32 %r965, 1684370003; mov.u32 %r964, -64; BB5_5: mov.u32 %r958, %tid.x; cvt.s64.s32 %rd371, %r958; shl.b64 %rd370, %rd371, 10; mov.u64 %rd369, m03200_comp$S3_all; add.s64 %rd368, %rd369, %rd370; mov.u32 %r956, %tid.x; cvt.s64.s32 %rd361, %r956; shl.b64 %rd360, %rd361, 10; mov.u64 %rd359, m03200_comp$S2_all; add.s64 %rd358, %rd359, %rd360; mov.u64 %rd357, m03200_comp$S1_all; add.s64 %rd356, %rd357, %rd360; mov.u32 %r943, %tid.x; cvt.s64.s32 %rd347, %r943; shl.b64 %rd346, %rd347, 10; mov.u64 %rd345, m03200_comp$S0_all; add.s64 %rd344, %rd345, %rd346; mov.u32 %r936, 0; mov.u32 %r935, 16; mov.u32 %r934, 8; mov.u32 %r933, 24; xor.b32 %r502, %r2, %r966; // inline asm bfe.u32 %r489, %r502, %r933, %r934; // inline asm mul.wide.u32 %rd178, %r489, 4; add.s64 %rd179, %rd344, %rd178; ld.shared.u32 %r745, [%rd179]; // inline asm bfe.u32 %r493, %r502, %r935, %r934; // inline asm mul.wide.u32 %rd182, %r493, 4; add.s64 %rd183, %rd356, %rd182; ld.shared.u32 %r746, [%rd183]; add.s32 %r747, %r746, %r745; // inline asm bfe.u32 %r497, %r502, %r934, %r934; // inline asm mul.wide.u32 %rd186, %r497, 4; add.s64 %rd187, %rd358, %rd186; ld.shared.u32 %r748, [%rd187]; xor.b32 %r749, %r748, %r747; // inline asm bfe.u32 %r501, %r502, %r936, %r934; // inline asm mul.wide.u32 %rd190, %r501, 4; add.s64 %rd191, %rd368, %rd190; ld.shared.u32 %r750, [%rd191]; add.s32 %r751, %r750, %r749; xor.b32 %r752, %r3, %r965; xor.b32 %r518, %r752, %r751; // inline asm bfe.u32 %r505, %r518, %r933, %r934; // inline asm mul.wide.u32 %rd192, %r505, 4; add.s64 %rd193, %rd344, %rd192; ld.shared.u32 %r753, [%rd193]; // inline asm bfe.u32 %r509, %r518, %r935, %r934; // inline asm mul.wide.u32 %rd194, %r509, 4; add.s64 %rd195, %rd356, %rd194; ld.shared.u32 %r754, [%rd195]; add.s32 %r755, %r754, %r753; // inline asm bfe.u32 %r513, %r518, %r934, %r934; // inline asm mul.wide.u32 %rd196, %r513, 4; add.s64 %rd197, %rd358, %rd196; ld.shared.u32 %r756, [%rd197]; xor.b32 %r757, %r756, %r755; // inline asm bfe.u32 %r517, %r518, %r936, %r934; // inline asm mul.wide.u32 %rd198, %r517, 4; add.s64 %rd199, %rd368, %rd198; ld.shared.u32 %r758, [%rd199]; add.s32 %r759, %r758, %r757; xor.b32 %r760, %r4, %r502; xor.b32 %r534, %r760, %r759; // inline asm bfe.u32 %r521, %r534, %r933, %r934; // inline asm mul.wide.u32 %rd200, %r521, 4; add.s64 %rd201, %rd344, %rd200; ld.shared.u32 %r761, [%rd201]; // inline asm bfe.u32 %r525, %r534, %r935, %r934; // inline asm mul.wide.u32 %rd202, %r525, 4; add.s64 %rd203, %rd356, %rd202; ld.shared.u32 %r762, [%rd203]; add.s32 %r763, %r762, %r761; // inline asm bfe.u32 %r529, %r534, %r934, %r934; // inline asm mul.wide.u32 %rd204, %r529, 4; add.s64 %rd205, %rd358, %rd204; ld.shared.u32 %r764, [%rd205]; xor.b32 %r765, %r764, %r763; // inline asm bfe.u32 %r533, %r534, %r936, %r934; // inline asm mul.wide.u32 %rd206, %r533, 4; add.s64 %rd207, %rd368, %rd206; ld.shared.u32 %r766, [%rd207]; add.s32 %r767, %r766, %r765; xor.b32 %r768, %r5, %r518; xor.b32 %r550, %r768, %r767; // inline asm bfe.u32 %r537, %r550, %r933, %r934; // inline asm mul.wide.u32 %rd208, %r537, 4; add.s64 %rd209, %rd344, %rd208; ld.shared.u32 %r769, [%rd209]; // inline asm bfe.u32 %r541, %r550, %r935, %r934; // inline asm mul.wide.u32 %rd210, %r541, 4; add.s64 %rd211, %rd356, %rd210; ld.shared.u32 %r770, [%rd211]; add.s32 %r771, %r770, %r769; // inline asm bfe.u32 %r545, %r550, %r934, %r934; // inline asm mul.wide.u32 %rd212, %r545, 4; add.s64 %rd213, %rd358, %rd212; ld.shared.u32 %r772, [%rd213]; xor.b32 %r773, %r772, %r771; // inline asm bfe.u32 %r549, %r550, %r936, %r934; // inline asm mul.wide.u32 %rd214, %r549, 4; add.s64 %rd215, %rd368, %rd214; ld.shared.u32 %r774, [%rd215]; add.s32 %r775, %r774, %r773; xor.b32 %r776, %r6, %r534; xor.b32 %r566, %r776, %r775; // inline asm bfe.u32 %r553, %r566, %r933, %r934; // inline asm mul.wide.u32 %rd216, %r553, 4; add.s64 %rd217, %rd344, %rd216; ld.shared.u32 %r777, [%rd217]; // inline asm bfe.u32 %r557, %r566, %r935, %r934; // inline asm mul.wide.u32 %rd218, %r557, 4; add.s64 %rd219, %rd356, %rd218; ld.shared.u32 %r778, [%rd219]; add.s32 %r779, %r778, %r777; // inline asm bfe.u32 %r561, %r566, %r934, %r934; // inline asm mul.wide.u32 %rd220, %r561, 4; add.s64 %rd221, %rd358, %rd220; ld.shared.u32 %r780, [%rd221]; xor.b32 %r781, %r780, %r779; // inline asm bfe.u32 %r565, %r566, %r936, %r934; // inline asm mul.wide.u32 %rd222, %r565, 4; add.s64 %rd223, %rd368, %rd222; ld.shared.u32 %r782, [%rd223]; add.s32 %r783, %r782, %r781; xor.b32 %r784, %r7, %r550; xor.b32 %r582, %r784, %r783; // inline asm bfe.u32 %r569, %r582, %r933, %r934; // inline asm mul.wide.u32 %rd224, %r569, 4; add.s64 %rd225, %rd344, %rd224; ld.shared.u32 %r785, [%rd225]; // inline asm bfe.u32 %r573, %r582, %r935, %r934; // inline asm mul.wide.u32 %rd226, %r573, 4; add.s64 %rd227, %rd356, %rd226; ld.shared.u32 %r786, [%rd227]; add.s32 %r787, %r786, %r785; // inline asm bfe.u32 %r577, %r582, %r934, %r934; // inline asm mul.wide.u32 %rd228, %r577, 4; add.s64 %rd229, %rd358, %rd228; ld.shared.u32 %r788, [%rd229]; xor.b32 %r789, %r788, %r787; // inline asm bfe.u32 %r581, %r582, %r936, %r934; // inline asm mul.wide.u32 %rd230, %r581, 4; add.s64 %rd231, %rd368, %rd230; ld.shared.u32 %r790, [%rd231]; add.s32 %r791, %r790, %r789; xor.b32 %r792, %r8, %r566; xor.b32 %r598, %r792, %r791; // inline asm bfe.u32 %r585, %r598, %r933, %r934; // inline asm mul.wide.u32 %rd232, %r585, 4; add.s64 %rd233, %rd344, %rd232; ld.shared.u32 %r793, [%rd233]; // inline asm bfe.u32 %r589, %r598, %r935, %r934; // inline asm mul.wide.u32 %rd234, %r589, 4; add.s64 %rd235, %rd356, %rd234; ld.shared.u32 %r794, [%rd235]; add.s32 %r795, %r794, %r793; // inline asm bfe.u32 %r593, %r598, %r934, %r934; // inline asm mul.wide.u32 %rd236, %r593, 4; add.s64 %rd237, %rd358, %rd236; ld.shared.u32 %r796, [%rd237]; xor.b32 %r797, %r796, %r795; // inline asm bfe.u32 %r597, %r598, %r936, %r934; // inline asm mul.wide.u32 %rd238, %r597, 4; add.s64 %rd239, %rd368, %rd238; ld.shared.u32 %r798, [%rd239]; add.s32 %r799, %r798, %r797; xor.b32 %r800, %r9, %r582; xor.b32 %r614, %r800, %r799; // inline asm bfe.u32 %r601, %r614, %r933, %r934; // inline asm mul.wide.u32 %rd240, %r601, 4; add.s64 %rd241, %rd344, %rd240; ld.shared.u32 %r801, [%rd241]; // inline asm bfe.u32 %r605, %r614, %r935, %r934; // inline asm mul.wide.u32 %rd242, %r605, 4; add.s64 %rd243, %rd356, %rd242; ld.shared.u32 %r802, [%rd243]; add.s32 %r803, %r802, %r801; // inline asm bfe.u32 %r609, %r614, %r934, %r934; // inline asm mul.wide.u32 %rd244, %r609, 4; add.s64 %rd245, %rd358, %rd244; ld.shared.u32 %r804, [%rd245]; xor.b32 %r805, %r804, %r803; // inline asm bfe.u32 %r613, %r614, %r936, %r934; // inline asm mul.wide.u32 %rd246, %r613, 4; add.s64 %rd247, %rd368, %rd246; ld.shared.u32 %r806, [%rd247]; add.s32 %r807, %r806, %r805; xor.b32 %r808, %r10, %r598; xor.b32 %r630, %r808, %r807; // inline asm bfe.u32 %r617, %r630, %r933, %r934; // inline asm mul.wide.u32 %rd248, %r617, 4; add.s64 %rd249, %rd344, %rd248; ld.shared.u32 %r809, [%rd249]; // inline asm bfe.u32 %r621, %r630, %r935, %r934; // inline asm mul.wide.u32 %rd250, %r621, 4; add.s64 %rd251, %rd356, %rd250; ld.shared.u32 %r810, [%rd251]; add.s32 %r811, %r810, %r809; // inline asm bfe.u32 %r625, %r630, %r934, %r934; // inline asm mul.wide.u32 %rd252, %r625, 4; add.s64 %rd253, %rd358, %rd252; ld.shared.u32 %r812, [%rd253]; xor.b32 %r813, %r812, %r811; // inline asm bfe.u32 %r629, %r630, %r936, %r934; // inline asm mul.wide.u32 %rd254, %r629, 4; add.s64 %rd255, %rd368, %rd254; ld.shared.u32 %r814, [%rd255]; add.s32 %r815, %r814, %r813; xor.b32 %r816, %r11, %r614; xor.b32 %r646, %r816, %r815; // inline asm bfe.u32 %r633, %r646, %r933, %r934; // inline asm mul.wide.u32 %rd256, %r633, 4; add.s64 %rd257, %rd344, %rd256; ld.shared.u32 %r817, [%rd257]; // inline asm bfe.u32 %r637, %r646, %r935, %r934; // inline asm mul.wide.u32 %rd258, %r637, 4; add.s64 %rd259, %rd356, %rd258; ld.shared.u32 %r818, [%rd259]; add.s32 %r819, %r818, %r817; // inline asm bfe.u32 %r641, %r646, %r934, %r934; // inline asm mul.wide.u32 %rd260, %r641, 4; add.s64 %rd261, %rd358, %rd260; ld.shared.u32 %r820, [%rd261]; xor.b32 %r821, %r820, %r819; // inline asm bfe.u32 %r645, %r646, %r936, %r934; // inline asm mul.wide.u32 %rd262, %r645, 4; add.s64 %rd263, %rd368, %rd262; ld.shared.u32 %r822, [%rd263]; add.s32 %r823, %r822, %r821; xor.b32 %r824, %r12, %r630; xor.b32 %r662, %r824, %r823; // inline asm bfe.u32 %r649, %r662, %r933, %r934; // inline asm mul.wide.u32 %rd264, %r649, 4; add.s64 %rd265, %rd344, %rd264; ld.shared.u32 %r825, [%rd265]; // inline asm bfe.u32 %r653, %r662, %r935, %r934; // inline asm mul.wide.u32 %rd266, %r653, 4; add.s64 %rd267, %rd356, %rd266; ld.shared.u32 %r826, [%rd267]; add.s32 %r827, %r826, %r825; // inline asm bfe.u32 %r657, %r662, %r934, %r934; // inline asm mul.wide.u32 %rd268, %r657, 4; add.s64 %rd269, %rd358, %rd268; ld.shared.u32 %r828, [%rd269]; xor.b32 %r829, %r828, %r827; // inline asm bfe.u32 %r661, %r662, %r936, %r934; // inline asm mul.wide.u32 %rd270, %r661, 4; add.s64 %rd271, %rd368, %rd270; ld.shared.u32 %r830, [%rd271]; add.s32 %r831, %r830, %r829; xor.b32 %r832, %r13, %r646; xor.b32 %r678, %r832, %r831; // inline asm bfe.u32 %r665, %r678, %r933, %r934; // inline asm mul.wide.u32 %rd272, %r665, 4; add.s64 %rd273, %rd344, %rd272; ld.shared.u32 %r833, [%rd273]; // inline asm bfe.u32 %r669, %r678, %r935, %r934; // inline asm mul.wide.u32 %rd274, %r669, 4; add.s64 %rd275, %rd356, %rd274; ld.shared.u32 %r834, [%rd275]; add.s32 %r835, %r834, %r833; // inline asm bfe.u32 %r673, %r678, %r934, %r934; // inline asm mul.wide.u32 %rd276, %r673, 4; add.s64 %rd277, %rd358, %rd276; ld.shared.u32 %r836, [%rd277]; xor.b32 %r837, %r836, %r835; // inline asm bfe.u32 %r677, %r678, %r936, %r934; // inline asm mul.wide.u32 %rd278, %r677, 4; add.s64 %rd279, %rd368, %rd278; ld.shared.u32 %r838, [%rd279]; add.s32 %r839, %r838, %r837; xor.b32 %r840, %r14, %r662; xor.b32 %r694, %r840, %r839; // inline asm bfe.u32 %r681, %r694, %r933, %r934; // inline asm mul.wide.u32 %rd280, %r681, 4; add.s64 %rd281, %rd344, %rd280; ld.shared.u32 %r841, [%rd281]; // inline asm bfe.u32 %r685, %r694, %r935, %r934; // inline asm mul.wide.u32 %rd282, %r685, 4; add.s64 %rd283, %rd356, %rd282; ld.shared.u32 %r842, [%rd283]; add.s32 %r843, %r842, %r841; // inline asm bfe.u32 %r689, %r694, %r934, %r934; // inline asm mul.wide.u32 %rd284, %r689, 4; add.s64 %rd285, %rd358, %rd284; ld.shared.u32 %r844, [%rd285]; xor.b32 %r845, %r844, %r843; // inline asm bfe.u32 %r693, %r694, %r936, %r934; // inline asm mul.wide.u32 %rd286, %r693, 4; add.s64 %rd287, %rd368, %rd286; ld.shared.u32 %r846, [%rd287]; add.s32 %r847, %r846, %r845; xor.b32 %r848, %r15, %r678; xor.b32 %r710, %r848, %r847; // inline asm bfe.u32 %r697, %r710, %r933, %r934; // inline asm mul.wide.u32 %rd288, %r697, 4; add.s64 %rd289, %rd344, %rd288; ld.shared.u32 %r849, [%rd289]; // inline asm bfe.u32 %r701, %r710, %r935, %r934; // inline asm mul.wide.u32 %rd290, %r701, 4; add.s64 %rd291, %rd356, %rd290; ld.shared.u32 %r850, [%rd291]; add.s32 %r851, %r850, %r849; // inline asm bfe.u32 %r705, %r710, %r934, %r934; // inline asm mul.wide.u32 %rd292, %r705, 4; add.s64 %rd293, %rd358, %rd292; ld.shared.u32 %r852, [%rd293]; xor.b32 %r853, %r852, %r851; // inline asm bfe.u32 %r709, %r710, %r936, %r934; // inline asm mul.wide.u32 %rd294, %r709, 4; add.s64 %rd295, %rd368, %rd294; ld.shared.u32 %r854, [%rd295]; add.s32 %r855, %r854, %r853; xor.b32 %r856, %r16, %r694; xor.b32 %r726, %r856, %r855; // inline asm bfe.u32 %r713, %r726, %r933, %r934; // inline asm mul.wide.u32 %rd296, %r713, 4; add.s64 %rd297, %rd344, %rd296; ld.shared.u32 %r857, [%rd297]; // inline asm bfe.u32 %r717, %r726, %r935, %r934; // inline asm mul.wide.u32 %rd298, %r717, 4; add.s64 %rd299, %rd356, %rd298; ld.shared.u32 %r858, [%rd299]; add.s32 %r859, %r858, %r857; // inline asm bfe.u32 %r721, %r726, %r934, %r934; // inline asm mul.wide.u32 %rd300, %r721, 4; add.s64 %rd301, %rd358, %rd300; ld.shared.u32 %r860, [%rd301]; xor.b32 %r861, %r860, %r859; // inline asm bfe.u32 %r725, %r726, %r936, %r934; // inline asm mul.wide.u32 %rd302, %r725, 4; add.s64 %rd303, %rd368, %rd302; ld.shared.u32 %r862, [%rd303]; add.s32 %r863, %r862, %r861; xor.b32 %r864, %r17, %r710; xor.b32 %r742, %r864, %r863; // inline asm bfe.u32 %r729, %r742, %r933, %r934; // inline asm mul.wide.u32 %rd304, %r729, 4; add.s64 %rd305, %rd344, %rd304; ld.shared.u32 %r865, [%rd305]; // inline asm bfe.u32 %r733, %r742, %r935, %r934; // inline asm mul.wide.u32 %rd306, %r733, 4; add.s64 %rd307, %rd356, %rd306; ld.shared.u32 %r866, [%rd307]; add.s32 %r867, %r866, %r865; // inline asm bfe.u32 %r737, %r742, %r934, %r934; // inline asm mul.wide.u32 %rd308, %r737, 4; add.s64 %rd309, %rd358, %rd308; ld.shared.u32 %r868, [%rd309]; xor.b32 %r869, %r868, %r867; // inline asm bfe.u32 %r741, %r742, %r936, %r934; // inline asm mul.wide.u32 %rd310, %r741, 4; add.s64 %rd311, %rd368, %rd310; ld.shared.u32 %r870, [%rd311]; add.s32 %r871, %r870, %r869; xor.b32 %r872, %r18, %r726; xor.b32 %r965, %r872, %r871; xor.b32 %r966, %r19, %r742; add.s32 %r964, %r964, 1; setp.ne.s32 %p4, %r964, 0; @%p4 bra BB5_5; ld.param.u32 %r944, [m03200_comp_param_24]; ld.param.u64 %rd335, [m03200_comp_param_6]; ld.param.u32 %r937, [m03200_comp_param_25]; and.b32 %r34, %r937, 31; shr.u32 %r873, %r963, %r34; and.b32 %r874, %r873, %r944; mul.wide.u32 %rd312, %r874, 4; add.s64 %rd313, %rd335, %rd312; and.b32 %r875, %r963, 31; mov.u32 %r876, 1; shl.b32 %r35, %r876, %r875; ld.global.u32 %r877, [%rd313]; and.b32 %r878, %r877, %r35; setp.eq.s32 %p5, %r878, 0; @%p5 bra BB5_34; ld.param.u64 %rd348, [m03200_comp_param_7]; ld.param.u32 %r945, [m03200_comp_param_24]; shr.u32 %r879, %r962, %r34; and.b32 %r880, %r879, %r945; mul.wide.u32 %rd314, %r880, 4; add.s64 %rd315, %rd348, %rd314; and.b32 %r881, %r962, 31; shl.b32 %r36, %r876, %r881; ld.global.u32 %r883, [%rd315]; and.b32 %r884, %r883, %r36; setp.eq.s32 %p6, %r884, 0; @%p6 bra BB5_34; ld.param.u32 %r946, [m03200_comp_param_24]; ld.param.u64 %rd336, [m03200_comp_param_8]; shr.u32 %r885, %r966, %r34; and.b32 %r886, %r885, %r946; mul.wide.u32 %rd316, %r886, 4; add.s64 %rd317, %rd336, %rd316; and.b32 %r887, %r966, 31; shl.b32 %r37, %r876, %r887; ld.global.u32 %r889, [%rd317]; and.b32 %r890, %r889, %r37; setp.eq.s32 %p7, %r890, 0; @%p7 bra BB5_34; ld.param.u32 %r947, [m03200_comp_param_24]; ld.param.u64 %rd337, [m03200_comp_param_9]; shr.u32 %r891, %r965, %r34; and.b32 %r892, %r891, %r947; mul.wide.u32 %rd318, %r892, 4; add.s64 %rd319, %rd337, %rd318; and.b32 %r893, %r965, 31; shl.b32 %r38, %r876, %r893; ld.global.u32 %r895, [%rd319]; and.b32 %r896, %r895, %r38; setp.eq.s32 %p8, %r896, 0; @%p8 bra BB5_34; ld.param.u32 %r948, [m03200_comp_param_24]; ld.param.u64 %rd338, [m03200_comp_param_10]; ld.param.u32 %r938, [m03200_comp_param_26]; and.b32 %r39, %r938, 31; shr.u32 %r897, %r963, %r39; and.b32 %r898, %r897, %r948; mul.wide.u32 %rd320, %r898, 4; add.s64 %rd321, %rd338, %rd320; ld.global.u32 %r899, [%rd321]; and.b32 %r900, %r899, %r35; setp.eq.s32 %p9, %r900, 0; @%p9 bra BB5_34; ld.param.u32 %r949, [m03200_comp_param_24]; ld.param.u64 %rd339, [m03200_comp_param_11]; shr.u32 %r901, %r962, %r39; and.b32 %r902, %r901, %r949; mul.wide.u32 %rd322, %r902, 4; add.s64 %rd323, %rd339, %rd322; ld.global.u32 %r903, [%rd323]; and.b32 %r904, %r903, %r36; setp.eq.s32 %p10, %r904, 0; @%p10 bra BB5_34; ld.param.u32 %r950, [m03200_comp_param_24]; ld.param.u64 %rd340, [m03200_comp_param_12]; shr.u32 %r905, %r966, %r39; and.b32 %r906, %r905, %r950; mul.wide.u32 %rd324, %r906, 4; add.s64 %rd325, %rd340, %rd324; ld.global.u32 %r907, [%rd325]; and.b32 %r908, %r907, %r37; setp.eq.s32 %p11, %r908, 0; @%p11 bra BB5_34; ld.param.u32 %r951, [m03200_comp_param_24]; ld.param.u64 %rd341, [m03200_comp_param_13]; shr.u32 %r909, %r965, %r39; and.b32 %r910, %r909, %r951; mul.wide.u32 %rd326, %r910, 4; add.s64 %rd327, %rd341, %rd326; ld.global.u32 %r911, [%rd327]; and.b32 %r912, %r911, %r38; setp.eq.s32 %p12, %r912, 0; @%p12 bra BB5_34; ld.param.u32 %r952, [m03200_comp_param_31]; ld.param.u32 %r939, [m03200_comp_param_32]; cvt.u64.u32 %rd3, %r939; setp.eq.s32 %p13, %r952, 0; mov.u32 %r913, -1; @%p13 bra BB5_28; ld.param.u32 %r967, [m03200_comp_param_31]; mov.u32 %r968, 0; BB5_16: ld.param.u64 %rd342, [m03200_comp_param_15]; shr.u32 %r42, %r967, 1; add.s32 %r970, %r42, %r968; cvt.u64.u32 %rd328, %r970; add.s64 %rd329, %rd328, %rd3; mul.lo.s64 %rd330, %rd329, 24; add.s64 %rd4, %rd342, %rd330; ld.global.u32 %r44, [%rd4+12]; setp.gt.u32 %p14, %r965, %r44; mov.u32 %r969, %r876; @%p14 bra BB5_26; setp.lt.u32 %p15, %r965, %r44; mov.u32 %r916, -1; @%p15 bra BB5_18; bra.uni BB5_19; BB5_18: mov.u32 %r969, %r916; bra.uni BB5_26; BB5_19: ld.global.u32 %r45, [%rd4+8]; setp.gt.u32 %p16, %r966, %r45; mov.u32 %r969, %r876; @%p16 bra BB5_26; setp.lt.u32 %p17, %r966, %r45; @%p17 bra BB5_21; bra.uni BB5_22; BB5_21: mov.u32 %r969, %r916; bra.uni BB5_26; BB5_22: ld.global.u32 %r46, [%rd4+4]; setp.gt.u32 %p18, %r962, %r46; mov.u32 %r969, %r876; @%p18 bra BB5_26; setp.lt.u32 %p19, %r962, %r46; mov.u32 %r969, %r916; @%p19 bra BB5_26; ld.global.u32 %r47, [%rd4]; setp.gt.u32 %p20, %r963, %r47; mov.u32 %r969, %r876; @%p20 bra BB5_26; setp.lt.u32 %p21, %r963, %r47; selp.b32 %r969, -1, 0, %p21; BB5_26: add.s32 %r922, %r42, 1; setp.gt.s32 %p22, %r969, 0; selp.b32 %r923, %r922, 0, %p22; add.s32 %r968, %r923, %r968; selp.b32 %r924, -1, 0, %p22; add.s32 %r925, %r924, %r967; shr.u32 %r967, %r925, 1; setp.eq.s32 %p23, %r969, 0; @%p23 bra BB5_29; setp.ne.s32 %p24, %r967, 0; @%p24 bra BB5_16; BB5_28: mov.u32 %r970, %r913; BB5_29: setp.eq.s32 %p25, %r970, -1; @%p25 bra BB5_34; ld.param.u64 %rd343, [m03200_comp_param_16]; ld.param.u32 %r941, [m03200_comp_param_32]; add.s32 %r53, %r970, %r941; mul.wide.u32 %rd331, %r53, 4; add.s64 %rd332, %rd343, %rd331; atom.global.add.u32 %r927, [%rd332], 1; setp.ne.s32 %p26, %r927, 0; @%p26 bra BB5_34; ld.param.u64 %rd349, [m03200_comp_param_19]; ld.param.u32 %r954, [m03200_comp_param_31]; atom.global.add.u32 %r54, [%rd349], 1; setp.lt.u32 %p27, %r54, %r954; @%p27 bra BB5_33; bra.uni BB5_32; BB5_33: ld.param.u64 %rd351, [m03200_comp_param_14]; ld.param.u32 %r942, [m03200_comp_param_27]; mul.wide.u32 %rd333, %r54, 24; add.s64 %rd334, %rd351, %rd333; st.global.v2.u32 [%rd334+16], {%r970, %r53}; mov.u32 %r929, 0; st.global.v2.u32 [%rd334+8], {%r929, %r942}; st.global.u64 [%rd334], %rd1; bra.uni BB5_34; BB5_32: ld.param.u64 %rd350, [m03200_comp_param_19]; atom.global.add.u32 %r928, [%rd350], -1; BB5_34: ret; } .metadata_section { .metadata 0 { "cl_kernel_attributes", "m03200_init", "reqd_work_group_size(8,1,1)" } .metadata 1 { "cl_kernel_attributes", "m03200_loop", "reqd_work_group_size(8,1,1)" } .metadata 2 { "cl_kernel_attributes", "m03200_comp", "reqd_work_group_size(8,1,1)" } } // end of .metadata_section