mirror of
https://github.com/bvanroll/odiseectf.git
synced 2025-08-30 04:12:44 +00:00
19219 lines
582 KiB
Plaintext
19219 lines
582 KiB
Plaintext
//
|
||
// Generated by NVIDIA NVVM Compiler
|
||
//
|
||
// Compiler Build ID: UNKNOWN
|
||
// Unknown Toolkit Version
|
||
// Based on LLVM 3.4svn
|
||
//
|
||
|
||
.version 6.3
|
||
.target sm_52, texmode_independent
|
||
.address_size 64
|
||
|
||
// .globl gpu_decompress
|
||
.const .align 8 .b8 c_sbox0[1024] = {166, 11, 49, 209, 172, 181, 223, 152, 219, 114, 253, 47, 183, 223, 26, 208, 237, 175, 225, 184, 150, 126, 38, 106, 69, 144, 124, 186, 153, 127, 44, 241, 71, 153, 161, 36, 247, 108, 145, 179, 226, 242, 1, 8, 22, 252, 142, 133, 216, 32, 105, 99, 105, 78, 87, 113, 163, 254, 88, 164, 126, 61, 147, 244, 143, 116, 149, 13, 88, 182, 142, 114, 88, 205, 139, 113, 238, 74, 21, 130, 29, 164, 84, 123, 181, 89, 90, 194, 57, 213, 48, 156, 19, 96, 242, 42, 35, 176, 209, 197, 240, 133, 96, 40, 24, 121, 65, 202, 239, 56, 219, 184, 176, 220, 121, 142, 14, 24, 58, 96, 139, 14, 158, 108, 62, 138, 30, 176, 193, 119, 21, 215, 39, 75, 49, 189, 218, 47, 175, 120, 96, 92, 96, 85, 243, 37, 85, 230, 148, 171, 85, 170, 98, 152, 72, 87, 64, 20, 232, 99, 106, 57, 202, 85, 182, 16, 171, 42, 52, 92, 204, 180, 206, 232, 65, 17, 175, 134, 84, 161, 147, 233, 114, 124, 17, 20, 238, 179, 42, 188, 111, 99, 93, 197, 169, 43, 246, 49, 24, 116, 22, 62, 92, 206, 30, 147, 135, 155, 51, 186, 214, 175, 92, 207, 36, 108, 129, 83, 50, 122, 119, 134, 149, 40, 152, 72, 143, 59, 175, 185, 75, 107, 27, 232, 191, 196, 147, 33, 40, 102, 204, 9, 216, 97, 145, 169, 33, 251, 96, 172, 124, 72, 50, 128, 236, 93, 93, 93, 132, 239, 177, 117, 133, 233, 2, 35, 38, 220, 136, 27, 101, 235, 129, 62, 137, 35, 197, 172, 150, 211, 243, 111, 109, 15, 57, 66, 244, 131, 130, 68, 11, 46, 4, 32, 132, 164, 74, 240, 200, 105, 94, 155, 31, 158, 66, 104, 198, 33, 154, 108, 233, 246, 97, 156, 12, 103, 240, 136, 211, 171, 210, 160, 81, 106, 104, 47, 84, 216, 40, 167, 15, 150, 163, 51, 81, 171, 108, 11, 239, 110, 228, 59, 122, 19, 80, 240, 59, 186, 152, 42, 251, 126, 29, 101, 241, 161, 118, 1, 175, 57, 62, 89, 202, 102, 136, 14, 67, 130, 25, 134, 238, 140, 180, 159, 111, 69, 195, 165, 132, 125, 190, 94, 139, 59, 216, 117, 111, 224, 115, 32, 193, 133, 159, 68, 26, 64, 166, 106, 193, 86, 98, 170, 211, 78, 6, 119, 63, 54, 114, 223, 254, 27, 61, 2, 155, 66, 36, 215, 208, 55, 72, 18, 10, 208, 211, 234, 15, 219, 155, 192, 241, 73, 201, 114, 83, 7, 123, 27, 153, 128, 216, 121, 212, 37, 247, 222, 232, 246, 26, 80, 254, 227, 59, 76, 121, 182, 189, 224, 108, 151, 186, 6, 192, 4, 182, 79, 169, 193, 196, 96, 159, 64, 194, 158, 92, 94, 99, 36, 106, 25, 175, 111, 251, 104, 181, 83, 108, 62, 235, 178, 57, 19, 111, 236, 82, 59, 31, 81, 252, 109, 44, 149, 48, 155, 68, 69, 129, 204, 9, 189, 94, 175, 4, 208, 227, 190, 253, 74, 51, 222, 7, 40, 15, 102, 179, 75, 46, 25, 87, 168, 203, 192, 15, 116, 200, 69, 57, 95, 11, 210, 219, 251, 211, 185, 189, 192, 121, 85, 10, 50, 96, 26, 198, 0, 161, 214, 121, 114, 44, 64, 254, 37, 159, 103, 204, 163, 31, 251, 248, 233, 165, 142, 248, 34, 50, 219, 223, 22, 117, 60, 21, 107, 97, 253, 200, 30, 80, 47, 171, 82, 5, 173, 250, 181, 61, 50, 96, 135, 35, 253, 72, 123, 49, 83, 130, 223, 0, 62, 187, 87, 92, 158, 160, 140, 111, 202, 46, 86, 135, 26, 219, 105, 23, 223, 246, 168, 66, 213, 195, 255, 126, 40, 198, 50, 103, 172, 115, 85, 79, 140, 176, 39, 91, 105, 200, 88, 202, 187, 93, 163, 255, 225, 160, 17, 240, 184, 152, 61, 250, 16, 184, 131, 33, 253, 108, 181, 252, 74, 91, 211, 209, 45, 121, 228, 83, 154, 101, 69, 248, 182, 188, 73, 142, 210, 144, 151, 251, 75, 218, 242, 221, 225, 51, 126, 203, 164, 65, 19, 251, 98, 232, 198, 228, 206, 218, 202, 32, 239, 1, 76, 119, 54, 254, 158, 126, 208, 180, 31, 241, 43, 77, 218, 219, 149, 152, 145, 144, 174, 113, 142, 173, 234, 160, 213, 147, 107, 208, 209, 142, 208, 224, 37, 199, 175, 47, 91, 60, 142, 183, 148, 117, 142, 251, 226, 246, 143, 100, 43, 18, 242, 18, 184, 136, 136, 28, 240, 13, 144, 160, 94, 173, 79, 28, 195, 143, 104, 145, 241, 207, 209, 173, 193, 168, 179, 24, 34, 47, 47, 119, 23, 14, 190, 254, 45, 117, 234, 161, 31, 2, 139, 15, 204, 160, 229, 232, 116, 111, 181, 214, 243, 172, 24, 153, 226, 137, 206, 224, 79, 168, 180, 183, 224, 19, 253, 129, 59, 196, 124, 217, 168, 173, 210, 102, 162, 95, 22, 5, 119, 149, 128, 20, 115, 204, 147, 119, 20, 26, 33, 101, 32, 173, 230, 134, 250, 181, 119, 245, 66, 84, 199, 207, 53, 157, 251, 12, 175, 205, 235, 160, 137, 62, 123, 211, 27, 65, 214, 73, 126, 30, 174, 45, 14, 37, 0, 94, 179, 113, 32, 187, 0, 104, 34, 175, 224, 184, 87, 155, 54, 100, 36, 30, 185, 9, 240, 29, 145, 99, 85, 170, 166, 223, 89, 137, 67, 193, 120, 127, 83, 90, 217, 162, 91, 125, 32, 197, 185, 229, 2, 118, 3, 38, 131, 169, 207, 149, 98, 104, 25, 200, 17, 65, 74, 115, 78, 202, 45, 71, 179, 74, 169, 20, 123, 82, 0, 81, 27, 21, 41, 83, 154, 63, 87, 15, 214, 228, 198, 155, 188, 118, 164, 96, 43, 0, 116, 230, 129, 181, 111, 186, 8, 31, 233, 27, 87, 107, 236, 150, 242, 21, 217, 13, 42, 33, 101, 99, 182, 182, 249, 185, 231, 46, 5, 52, 255, 100, 86, 133, 197, 93, 45, 176, 83, 161, 143, 159, 169, 153, 71, 186, 8, 106, 7, 133, 110};
|
||
.const .align 8 .b8 c_sbox1[1024] = {233, 112, 122, 75, 68, 41, 179, 181, 46, 9, 117, 219, 35, 38, 25, 196, 176, 166, 110, 173, 125, 223, 167, 73, 184, 96, 238, 156, 102, 178, 237, 143, 113, 140, 170, 236, 255, 23, 154, 105, 108, 82, 100, 86, 225, 158, 177, 194, 165, 2, 54, 25, 41, 76, 9, 117, 64, 19, 89, 160, 62, 58, 24, 228, 154, 152, 84, 63, 101, 157, 66, 91, 214, 228, 143, 107, 214, 63, 247, 153, 7, 156, 210, 161, 245, 48, 232, 239, 230, 56, 45, 77, 193, 93, 37, 240, 134, 32, 221, 76, 38, 235, 112, 132, 198, 233, 130, 99, 94, 204, 30, 2, 63, 107, 104, 9, 201, 239, 186, 62, 20, 24, 151, 60, 161, 112, 106, 107, 132, 53, 127, 104, 134, 226, 160, 82, 5, 83, 156, 183, 55, 7, 80, 170, 28, 132, 7, 62, 92, 174, 222, 127, 236, 68, 125, 142, 184, 242, 22, 87, 55, 218, 58, 176, 13, 12, 80, 240, 4, 31, 28, 240, 255, 179, 0, 2, 26, 245, 12, 174, 178, 116, 181, 60, 88, 122, 131, 37, 189, 33, 9, 220, 249, 19, 145, 209, 246, 47, 169, 124, 115, 71, 50, 148, 1, 71, 245, 34, 129, 229, 229, 58, 220, 218, 194, 55, 52, 118, 181, 200, 167, 221, 243, 154, 70, 97, 68, 169, 14, 3, 208, 15, 62, 199, 200, 236, 65, 30, 117, 164, 153, 205, 56, 226, 47, 14, 234, 59, 161, 187, 128, 50, 49, 179, 62, 24, 56, 139, 84, 78, 8, 185, 109, 79, 3, 13, 66, 111, 191, 4, 10, 246, 144, 18, 184, 44, 121, 124, 151, 36, 114, 176, 121, 86, 175, 137, 175, 188, 31, 119, 154, 222, 16, 8, 147, 217, 18, 174, 139, 179, 46, 63, 207, 220, 31, 114, 18, 85, 36, 113, 107, 46, 230, 221, 26, 80, 135, 205, 132, 159, 24, 71, 88, 122, 23, 218, 8, 116, 188, 154, 159, 188, 140, 125, 75, 233, 58, 236, 122, 236, 250, 29, 133, 219, 102, 67, 9, 99, 210, 195, 100, 196, 71, 24, 28, 239, 8, 217, 21, 50, 55, 59, 67, 221, 22, 186, 194, 36, 67, 77, 161, 18, 81, 196, 101, 42, 2, 0, 148, 80, 221, 228, 58, 19, 158, 248, 223, 113, 85, 78, 49, 16, 214, 119, 172, 129, 155, 25, 17, 95, 241, 86, 53, 4, 107, 199, 163, 215, 59, 24, 17, 60, 9, 165, 36, 89, 237, 230, 143, 242, 250, 251, 241, 151, 44, 191, 186, 158, 110, 60, 21, 30, 112, 69, 227, 134, 177, 111, 233, 234, 10, 94, 14, 134, 179, 42, 62, 90, 28, 231, 31, 119, 250, 6, 61, 78, 185, 220, 101, 41, 15, 29, 231, 153, 214, 137, 62, 128, 37, 200, 102, 82, 120, 201, 76, 46, 106, 179, 16, 156, 186, 14, 21, 198, 120, 234, 226, 148, 83, 60, 252, 165, 244, 45, 10, 30, 167, 78, 247, 242, 61, 43, 29, 54, 15, 38, 57, 25, 96, 121, 194, 25, 8, 167, 35, 82, 182, 18, 19, 247, 110, 254, 173, 235, 102, 31, 195, 234, 149, 69, 188, 227, 131, 200, 123, 166, 209, 55, 127, 177, 40, 255, 140, 1, 239, 221, 50, 195, 165, 90, 108, 190, 133, 33, 88, 101, 2, 152, 171, 104, 15, 165, 206, 238, 59, 149, 47, 219, 173, 125, 239, 42, 132, 47, 110, 91, 40, 182, 33, 21, 112, 97, 7, 41, 117, 71, 221, 236, 16, 21, 159, 97, 48, 168, 204, 19, 150, 189, 97, 235, 30, 254, 52, 3, 207, 99, 3, 170, 144, 92, 115, 181, 57, 162, 112, 76, 11, 158, 158, 213, 20, 222, 170, 203, 188, 134, 204, 238, 167, 44, 98, 96, 171, 92, 171, 156, 110, 132, 243, 178, 175, 30, 139, 100, 202, 240, 189, 25, 185, 105, 35, 160, 80, 187, 90, 101, 50, 90, 104, 64, 179, 180, 42, 60, 213, 233, 158, 49, 247, 184, 33, 192, 25, 11, 84, 155, 153, 160, 95, 135, 126, 153, 247, 149, 168, 125, 61, 98, 154, 136, 55, 248, 119, 45, 227, 151, 95, 147, 237, 17, 129, 18, 104, 22, 41, 136, 53, 14, 214, 31, 230, 199, 161, 223, 222, 150, 153, 186, 88, 120, 165, 132, 245, 87, 99, 114, 34, 27, 255, 195, 131, 155, 150, 70, 194, 26, 235, 10, 179, 205, 84, 48, 46, 83, 228, 72, 217, 143, 40, 49, 188, 109, 239, 242, 235, 88, 234, 255, 198, 52, 97, 237, 40, 254, 115, 60, 124, 238, 217, 20, 74, 93, 227, 183, 100, 232, 20, 93, 16, 66, 224, 19, 62, 32, 182, 226, 238, 69, 234, 171, 170, 163, 21, 79, 108, 219, 208, 79, 203, 250, 66, 244, 66, 199, 181, 187, 106, 239, 29, 59, 79, 101, 5, 33, 205, 65, 158, 121, 30, 216, 199, 77, 133, 134, 106, 71, 75, 228, 80, 98, 129, 61, 242, 161, 98, 207, 70, 38, 141, 91, 160, 131, 136, 252, 163, 182, 199, 193, 195, 36, 21, 127, 146, 116, 203, 105, 11, 138, 132, 71, 133, 178, 146, 86, 0, 191, 91, 9, 157, 72, 25, 173, 116, 177, 98, 20, 0, 14, 130, 35, 42, 141, 66, 88, 234, 245, 85, 12, 62, 244, 173, 29, 97, 112, 63, 35, 146, 240, 114, 51, 65, 126, 147, 141, 241, 236, 95, 214, 219, 59, 34, 108, 89, 55, 222, 124, 96, 116, 238, 203, 167, 242, 133, 64, 110, 50, 119, 206, 132, 128, 7, 166, 158, 80, 248, 25, 85, 216, 239, 232, 53, 151, 217, 97, 170, 167, 105, 169, 194, 6, 12, 197, 252, 171, 4, 90, 220, 202, 11, 128, 46, 122, 68, 158, 132, 52, 69, 195, 5, 103, 213, 253, 201, 158, 30, 14, 211, 219, 115, 219, 205, 136, 85, 16, 121, 218, 95, 103, 64, 67, 103, 227, 101, 52, 196, 197, 216, 56, 62, 113, 158, 248, 40, 61, 32, 255, 109, 241, 231, 33, 62, 21, 74, 61, 176, 143, 43, 159, 227, 230, 247, 173, 131, 219};
|
||
.const .align 8 .b8 c_sbox2[1024] = {104, 90, 61, 233, 247, 64, 129, 148, 28, 38, 76, 246, 52, 41, 105, 148, 247, 32, 21, 65, 247, 212, 2, 118, 46, 107, 244, 188, 104, 0, 162, 212, 113, 36, 8, 212, 106, 244, 32, 51, 183, 212, 183, 67, 175, 97, 0, 80, 46, 246, 57, 30, 70, 69, 36, 151, 116, 79, 33, 20, 64, 136, 139, 191, 29, 252, 149, 77, 175, 145, 181, 150, 211, 221, 244, 112, 69, 47, 160, 102, 236, 9, 188, 191, 133, 151, 189, 3, 208, 109, 172, 127, 4, 133, 203, 49, 179, 39, 235, 150, 65, 57, 253, 85, 230, 71, 37, 218, 154, 10, 202, 171, 37, 120, 80, 40, 244, 41, 4, 83, 218, 134, 44, 10, 251, 109, 182, 233, 98, 20, 220, 104, 0, 105, 72, 215, 164, 192, 14, 104, 238, 141, 161, 39, 162, 254, 63, 79, 140, 173, 135, 232, 6, 224, 140, 181, 182, 214, 244, 122, 124, 30, 206, 170, 236, 95, 55, 211, 153, 163, 120, 206, 66, 42, 107, 64, 53, 158, 254, 32, 185, 133, 243, 217, 171, 215, 57, 238, 139, 78, 18, 59, 247, 250, 201, 29, 86, 24, 109, 75, 49, 102, 163, 38, 178, 151, 227, 234, 116, 250, 110, 58, 50, 67, 91, 221, 247, 231, 65, 104, 251, 32, 120, 202, 78, 245, 10, 251, 151, 179, 254, 216, 172, 86, 64, 69, 39, 149, 72, 186, 58, 58, 83, 85, 135, 141, 131, 32, 183, 169, 107, 254, 75, 149, 150, 208, 188, 103, 168, 85, 88, 154, 21, 161, 99, 41, 169, 204, 51, 219, 225, 153, 86, 74, 42, 166, 249, 37, 49, 63, 28, 126, 244, 94, 124, 49, 41, 144, 2, 232, 248, 253, 112, 47, 39, 4, 92, 21, 187, 128, 227, 44, 40, 5, 72, 21, 193, 149, 34, 109, 198, 228, 63, 19, 193, 72, 220, 134, 15, 199, 238, 201, 249, 7, 15, 31, 4, 65, 164, 121, 71, 64, 23, 110, 136, 93, 235, 81, 95, 50, 209, 192, 155, 213, 143, 193, 188, 242, 100, 53, 17, 65, 52, 120, 123, 37, 96, 156, 42, 96, 163, 232, 248, 223, 27, 108, 99, 31, 194, 180, 18, 14, 158, 50, 225, 2, 209, 79, 102, 175, 21, 129, 209, 202, 224, 149, 35, 107, 225, 146, 62, 51, 98, 11, 36, 59, 34, 185, 190, 238, 14, 162, 178, 133, 153, 13, 186, 230, 140, 12, 114, 222, 40, 247, 162, 45, 69, 120, 18, 208, 253, 148, 183, 149, 98, 8, 125, 100, 240, 245, 204, 231, 111, 163, 73, 84, 250, 72, 125, 135, 39, 253, 157, 195, 30, 141, 62, 243, 65, 99, 71, 10, 116, 255, 46, 153, 171, 110, 111, 58, 55, 253, 248, 244, 96, 220, 18, 168, 248, 221, 235, 161, 76, 225, 27, 153, 13, 107, 110, 219, 16, 85, 123, 198, 55, 44, 103, 109, 59, 212, 101, 39, 4, 232, 208, 220, 199, 13, 41, 241, 163, 255, 0, 204, 146, 15, 57, 181, 11, 237, 15, 105, 251, 159, 123, 102, 156, 125, 219, 206, 11, 207, 145, 160, 163, 94, 21, 217, 136, 47, 19, 187, 36, 173, 91, 81, 191, 121, 148, 123, 235, 214, 59, 118, 179, 46, 57, 55, 121, 89, 17, 204, 151, 226, 38, 128, 45, 49, 46, 244, 167, 173, 66, 104, 59, 43, 106, 198, 204, 76, 117, 18, 28, 241, 46, 120, 55, 66, 18, 106, 231, 81, 146, 183, 230, 187, 161, 6, 80, 99, 251, 75, 24, 16, 107, 26, 250, 237, 202, 17, 216, 189, 37, 61, 201, 195, 225, 226, 89, 22, 66, 68, 134, 19, 18, 10, 110, 236, 12, 217, 42, 234, 171, 213, 78, 103, 175, 100, 95, 168, 134, 218, 136, 233, 191, 190, 254, 195, 228, 100, 87, 128, 188, 157, 134, 192, 247, 240, 248, 123, 120, 96, 77, 96, 3, 96, 70, 131, 253, 209, 176, 31, 56, 246, 4, 174, 69, 119, 204, 252, 54, 215, 51, 107, 66, 131, 113, 171, 30, 240, 135, 65, 128, 176, 95, 94, 0, 60, 190, 87, 160, 119, 36, 174, 232, 189, 153, 66, 70, 85, 97, 46, 88, 191, 143, 244, 88, 78, 162, 253, 221, 242, 56, 239, 116, 244, 194, 189, 137, 135, 195, 249, 102, 83, 116, 142, 179, 200, 85, 242, 117, 180, 185, 217, 252, 70, 97, 38, 235, 122, 132, 223, 29, 139, 121, 14, 106, 132, 226, 149, 95, 145, 142, 89, 110, 70, 112, 87, 180, 32, 145, 85, 213, 140, 76, 222, 2, 201, 225, 172, 11, 185, 208, 5, 130, 187, 72, 98, 168, 17, 158, 169, 116, 117, 182, 25, 127, 183, 9, 220, 169, 224, 161, 9, 45, 102, 51, 70, 50, 196, 2, 31, 90, 232, 140, 190, 240, 9, 37, 160, 153, 74, 16, 254, 110, 29, 29, 61, 185, 26, 223, 164, 165, 11, 15, 242, 134, 161, 105, 241, 104, 40, 131, 218, 183, 220, 254, 6, 57, 87, 155, 206, 226, 161, 82, 127, 205, 79, 1, 94, 17, 80, 250, 131, 6, 167, 196, 181, 2, 160, 39, 208, 230, 13, 39, 140, 248, 154, 65, 134, 63, 119, 6, 76, 96, 195, 181, 6, 168, 97, 40, 122, 23, 240, 224, 134, 245, 192, 170, 88, 96, 0, 98, 125, 220, 48, 215, 158, 230, 17, 99, 234, 56, 35, 148, 221, 194, 83, 52, 22, 194, 194, 86, 238, 203, 187, 222, 182, 188, 144, 161, 125, 252, 235, 118, 29, 89, 206, 9, 228, 5, 111, 136, 1, 124, 75, 61, 10, 114, 57, 36, 124, 146, 124, 95, 114, 227, 134, 185, 157, 77, 114, 180, 91, 193, 26, 252, 184, 158, 211, 120, 85, 84, 237, 181, 165, 252, 8, 211, 124, 61, 216, 196, 15, 173, 77, 94, 239, 80, 30, 248, 230, 97, 177, 217, 20, 133, 162, 60, 19, 81, 108, 231, 199, 213, 111, 196, 78, 225, 86, 206, 191, 42, 54, 55, 200, 198, 221, 52, 50, 154, 215, 18, 130, 99, 146, 142, 250, 14, 103, 224, 0, 96, 64};
|
||
.const .align 8 .b8 c_sbox3[1024] = {55, 206, 57, 58, 207, 245, 250, 211, 55, 119, 194, 171, 27, 45, 197, 90, 158, 103, 176, 92, 66, 55, 163, 79, 64, 39, 130, 211, 190, 155, 188, 153, 157, 142, 17, 213, 21, 115, 15, 191, 126, 28, 45, 214, 123, 196, 0, 199, 107, 27, 140, 183, 69, 144, 161, 33, 190, 177, 110, 178, 180, 110, 54, 106, 47, 171, 72, 87, 121, 110, 148, 188, 210, 118, 163, 198, 200, 194, 73, 101, 238, 248, 15, 83, 125, 222, 141, 70, 29, 10, 115, 213, 198, 77, 208, 76, 219, 187, 57, 41, 80, 70, 186, 169, 232, 38, 149, 172, 4, 227, 94, 190, 240, 213, 250, 161, 154, 81, 45, 106, 226, 140, 239, 99, 34, 238, 134, 154, 184, 194, 137, 192, 246, 46, 36, 67, 170, 3, 30, 165, 164, 208, 242, 156, 186, 97, 192, 131, 77, 106, 233, 155, 80, 21, 229, 143, 214, 91, 100, 186, 249, 162, 38, 40, 225, 58, 58, 167, 134, 149, 169, 75, 233, 98, 85, 239, 211, 239, 47, 199, 218, 247, 82, 247, 105, 111, 4, 63, 89, 10, 250, 119, 21, 169, 228, 128, 1, 134, 176, 135, 173, 230, 9, 155, 147, 229, 62, 59, 90, 253, 144, 233, 151, 215, 52, 158, 217, 183, 240, 44, 81, 139, 43, 2, 58, 172, 213, 150, 125, 166, 125, 1, 214, 62, 207, 209, 40, 45, 125, 124, 207, 37, 159, 31, 155, 184, 242, 173, 114, 180, 214, 90, 76, 245, 136, 90, 113, 172, 41, 224, 230, 165, 25, 224, 253, 172, 176, 71, 155, 250, 147, 237, 141, 196, 211, 232, 204, 87, 59, 40, 41, 102, 213, 248, 40, 46, 19, 121, 145, 1, 95, 120, 85, 96, 117, 237, 68, 14, 150, 247, 140, 94, 211, 227, 212, 109, 5, 21, 186, 109, 244, 136, 37, 97, 161, 3, 189, 240, 100, 5, 21, 158, 235, 195, 162, 87, 144, 60, 236, 26, 39, 151, 42, 7, 58, 169, 155, 109, 63, 27, 245, 33, 99, 30, 251, 102, 156, 245, 25, 243, 220, 38, 40, 217, 51, 117, 245, 253, 85, 177, 130, 52, 86, 3, 187, 60, 186, 138, 17, 119, 81, 40, 248, 217, 10, 194, 103, 81, 204, 171, 95, 146, 173, 204, 81, 23, 232, 77, 142, 220, 48, 56, 98, 88, 157, 55, 145, 249, 32, 147, 194, 144, 122, 234, 206, 123, 62, 251, 100, 206, 33, 81, 50, 190, 79, 119, 126, 227, 182, 168, 70, 61, 41, 195, 105, 83, 222, 72, 128, 230, 19, 100, 16, 8, 174, 162, 36, 178, 109, 221, 253, 45, 133, 105, 102, 33, 7, 9, 10, 70, 154, 179, 221, 192, 69, 100, 207, 222, 108, 88, 174, 200, 32, 28, 221, 247, 190, 91, 64, 141, 88, 27, 127, 1, 210, 204, 187, 227, 180, 107, 126, 106, 162, 221, 69, 255, 89, 58, 68, 10, 53, 62, 213, 205, 180, 188, 168, 206, 234, 114, 187, 132, 100, 250, 174, 18, 102, 141, 71, 111, 60, 191, 99, 228, 155, 210, 158, 93, 47, 84, 27, 119, 194, 174, 112, 99, 78, 246, 141, 13, 14, 116, 87, 19, 91, 231, 113, 22, 114, 248, 93, 125, 83, 175, 8, 203, 64, 64, 204, 226, 180, 78, 106, 70, 210, 52, 132, 175, 21, 1, 40, 4, 176, 225, 29, 58, 152, 149, 180, 159, 184, 6, 72, 160, 110, 206, 130, 59, 63, 111, 130, 171, 32, 53, 75, 29, 26, 1, 248, 39, 114, 39, 177, 96, 21, 97, 220, 63, 147, 231, 43, 121, 58, 187, 189, 37, 69, 52, 225, 57, 136, 160, 75, 121, 206, 81, 183, 201, 50, 47, 201, 186, 31, 160, 126, 200, 28, 224, 246, 209, 199, 188, 195, 17, 1, 207, 199, 170, 232, 161, 73, 135, 144, 26, 154, 189, 79, 212, 203, 222, 218, 208, 56, 218, 10, 213, 42, 195, 57, 3, 103, 54, 145, 198, 124, 49, 249, 141, 79, 43, 177, 224, 183, 89, 158, 247, 58, 187, 245, 67, 255, 25, 213, 242, 156, 69, 217, 39, 44, 34, 151, 191, 42, 252, 230, 21, 113, 252, 145, 15, 37, 21, 148, 155, 97, 147, 229, 250, 235, 156, 182, 206, 89, 100, 168, 194, 209, 168, 186, 18, 94, 7, 193, 182, 12, 106, 5, 227, 101, 80, 210, 16, 66, 164, 3, 203, 14, 110, 236, 224, 59, 219, 152, 22, 190, 160, 152, 76, 100, 233, 120, 50, 50, 149, 31, 159, 223, 146, 211, 224, 43, 52, 160, 211, 30, 242, 113, 137, 65, 116, 10, 27, 140, 52, 163, 75, 32, 113, 190, 197, 216, 50, 118, 195, 141, 159, 53, 223, 46, 47, 153, 155, 71, 111, 11, 230, 29, 241, 227, 15, 84, 218, 76, 229, 145, 216, 218, 30, 207, 121, 98, 206, 111, 126, 62, 205, 102, 177, 24, 22, 5, 29, 44, 253, 197, 210, 143, 132, 153, 34, 251, 246, 87, 243, 35, 245, 35, 118, 50, 166, 49, 53, 168, 147, 2, 205, 204, 86, 98, 129, 240, 172, 181, 235, 117, 90, 151, 54, 22, 110, 204, 115, 210, 136, 146, 98, 150, 222, 208, 73, 185, 129, 27, 144, 80, 76, 20, 86, 198, 113, 189, 199, 198, 230, 10, 20, 122, 50, 6, 208, 225, 69, 154, 123, 242, 195, 253, 83, 170, 201, 0, 15, 168, 98, 226, 191, 37, 187, 246, 210, 189, 53, 5, 105, 18, 113, 34, 2, 4, 178, 124, 207, 203, 182, 43, 156, 118, 205, 192, 62, 17, 83, 211, 227, 64, 22, 96, 189, 171, 56, 240, 173, 71, 37, 156, 32, 56, 186, 118, 206, 70, 247, 197, 161, 175, 119, 96, 96, 117, 32, 78, 254, 203, 133, 216, 141, 232, 138, 176, 249, 170, 122, 126, 170, 249, 76, 92, 194, 72, 25, 140, 138, 251, 2, 228, 106, 195, 1, 249, 225, 235, 214, 105, 248, 212, 144, 160, 222, 92, 166, 45, 37, 9, 63, 159, 230, 8, 194, 50, 97, 78, 183, 91, 226, 119, 206, 227, 223, 143, 87, 230, 114, 195, 58};
|
||
// m03200_init$S0_all has been demoted
|
||
// m03200_init$S1_all has been demoted
|
||
// m03200_init$S2_all has been demoted
|
||
// m03200_init$S3_all has been demoted
|
||
.const .align 4 .b8 m03200_init$P[72] = {136, 106, 63, 36, 211, 8, 163, 133, 46, 138, 25, 19, 68, 115, 112, 3, 34, 56, 9, 164, 208, 49, 159, 41, 152, 250, 46, 8, 137, 108, 78, 236, 230, 33, 40, 69, 119, 19, 208, 56, 207, 102, 84, 190, 108, 12, 233, 52, 183, 41, 172, 192, 221, 80, 124, 201, 181, 213, 132, 63, 23, 9, 71, 181, 217, 213, 22, 146, 27, 251, 121, 137};
|
||
// m03200_loop$S0_all has been demoted
|
||
// m03200_loop$S1_all has been demoted
|
||
// m03200_loop$S2_all has been demoted
|
||
// m03200_loop$S3_all has been demoted
|
||
// m03200_comp$S0_all has been demoted
|
||
// m03200_comp$S1_all has been demoted
|
||
// m03200_comp$S2_all has been demoted
|
||
// m03200_comp$S3_all has been demoted
|
||
|
||
.entry gpu_decompress(
|
||
.param .u64 .ptr .global .align 4 gpu_decompress_param_0,
|
||
.param .u64 .ptr .global .align 4 gpu_decompress_param_1,
|
||
.param .u64 .ptr .global .align 4 gpu_decompress_param_2,
|
||
.param .u64 gpu_decompress_param_3
|
||
)
|
||
{
|
||
.local .align 4 .b8 __local_depot0[260];
|
||
.reg .b64 %SP;
|
||
.reg .b64 %SPL;
|
||
.reg .pred %p<9>;
|
||
.reg .b32 %r<58>;
|
||
.reg .b64 %rd<45>;
|
||
|
||
|
||
mov.u64 %SPL, __local_depot0;
|
||
ld.param.u64 %rd7, [gpu_decompress_param_0];
|
||
ld.param.u64 %rd8, [gpu_decompress_param_1];
|
||
ld.param.u64 %rd9, [gpu_decompress_param_2];
|
||
ld.param.u64 %rd10, [gpu_decompress_param_3];
|
||
add.u64 %rd43, %SPL, 0;
|
||
mov.u32 %r24, %ctaid.x;
|
||
mov.u32 %r25, %ntid.x;
|
||
mov.b32 %r26, %envreg3;
|
||
mad.lo.s32 %r1, %r24, %r25, %r26;
|
||
mov.u32 %r27, %tid.x;
|
||
add.s32 %r2, %r1, %r27;
|
||
cvt.s64.s32 %rd12, %r2;
|
||
setp.ge.u64 %p1, %rd12, %rd10;
|
||
@%p1 bra BB0_12;
|
||
|
||
mul.wide.s32 %rd13, %r2, 12;
|
||
add.s64 %rd14, %rd7, %rd13;
|
||
ld.global.u32 %r3, [%rd14];
|
||
ld.global.u32 %r4, [%rd14+4];
|
||
ld.global.u32 %r5, [%rd14+8];
|
||
mov.u64 %rd15, 0;
|
||
st.local.u32 [%rd43+4], %rd15;
|
||
st.local.u32 [%rd43], %rd15;
|
||
st.local.u32 [%rd43+12], %rd15;
|
||
st.local.u32 [%rd43+8], %rd15;
|
||
st.local.u32 [%rd43+20], %rd15;
|
||
st.local.u32 [%rd43+16], %rd15;
|
||
st.local.u32 [%rd43+28], %rd15;
|
||
st.local.u32 [%rd43+24], %rd15;
|
||
st.local.u32 [%rd43+36], %rd15;
|
||
st.local.u32 [%rd43+32], %rd15;
|
||
st.local.u32 [%rd43+44], %rd15;
|
||
st.local.u32 [%rd43+40], %rd15;
|
||
st.local.u32 [%rd43+52], %rd15;
|
||
st.local.u32 [%rd43+48], %rd15;
|
||
st.local.u32 [%rd43+60], %rd15;
|
||
st.local.u32 [%rd43+56], %rd15;
|
||
st.local.u32 [%rd43+68], %rd15;
|
||
st.local.u32 [%rd43+64], %rd15;
|
||
st.local.u32 [%rd43+76], %rd15;
|
||
st.local.u32 [%rd43+72], %rd15;
|
||
st.local.u32 [%rd43+84], %rd15;
|
||
st.local.u32 [%rd43+80], %rd15;
|
||
st.local.u32 [%rd43+92], %rd15;
|
||
st.local.u32 [%rd43+88], %rd15;
|
||
st.local.u32 [%rd43+100], %rd15;
|
||
st.local.u32 [%rd43+96], %rd15;
|
||
st.local.u32 [%rd43+108], %rd15;
|
||
st.local.u32 [%rd43+104], %rd15;
|
||
st.local.u32 [%rd43+116], %rd15;
|
||
st.local.u32 [%rd43+112], %rd15;
|
||
st.local.u32 [%rd43+124], %rd15;
|
||
st.local.u32 [%rd43+120], %rd15;
|
||
st.local.u32 [%rd43+132], %rd15;
|
||
st.local.u32 [%rd43+128], %rd15;
|
||
st.local.u32 [%rd43+140], %rd15;
|
||
st.local.u32 [%rd43+136], %rd15;
|
||
st.local.u32 [%rd43+148], %rd15;
|
||
st.local.u32 [%rd43+144], %rd15;
|
||
st.local.u32 [%rd43+156], %rd15;
|
||
st.local.u32 [%rd43+152], %rd15;
|
||
st.local.u32 [%rd43+164], %rd15;
|
||
st.local.u32 [%rd43+160], %rd15;
|
||
st.local.u32 [%rd43+172], %rd15;
|
||
st.local.u32 [%rd43+168], %rd15;
|
||
st.local.u32 [%rd43+180], %rd15;
|
||
st.local.u32 [%rd43+176], %rd15;
|
||
st.local.u32 [%rd43+188], %rd15;
|
||
st.local.u32 [%rd43+184], %rd15;
|
||
st.local.u32 [%rd43+196], %rd15;
|
||
st.local.u32 [%rd43+192], %rd15;
|
||
st.local.u32 [%rd43+204], %rd15;
|
||
st.local.u32 [%rd43+200], %rd15;
|
||
st.local.u32 [%rd43+212], %rd15;
|
||
st.local.u32 [%rd43+208], %rd15;
|
||
st.local.u32 [%rd43+220], %rd15;
|
||
st.local.u32 [%rd43+216], %rd15;
|
||
st.local.u32 [%rd43+228], %rd15;
|
||
st.local.u32 [%rd43+224], %rd15;
|
||
st.local.u32 [%rd43+236], %rd15;
|
||
st.local.u32 [%rd43+232], %rd15;
|
||
st.local.u32 [%rd43+244], %rd15;
|
||
st.local.u32 [%rd43+240], %rd15;
|
||
st.local.u32 [%rd43+252], %rd15;
|
||
st.local.u32 [%rd43+248], %rd15;
|
||
setp.eq.s32 %p2, %r4, 0;
|
||
@%p2 bra BB0_10;
|
||
|
||
and.b32 %r6, %r4, 3;
|
||
setp.eq.s32 %p3, %r6, 0;
|
||
mov.u32 %r56, 0;
|
||
@%p3 bra BB0_8;
|
||
|
||
setp.eq.s32 %p4, %r6, 1;
|
||
mov.u32 %r52, 0;
|
||
@%p4 bra BB0_7;
|
||
|
||
setp.eq.s32 %p5, %r6, 2;
|
||
mov.u32 %r50, 0;
|
||
@%p5 bra BB0_6;
|
||
|
||
mul.wide.u32 %rd16, %r3, 4;
|
||
add.s64 %rd17, %rd8, %rd16;
|
||
ld.global.u32 %r32, [%rd17];
|
||
st.local.u32 [%rd43], %r32;
|
||
add.s32 %r3, %r3, 1;
|
||
mov.u32 %r50, 1;
|
||
|
||
BB0_6:
|
||
mul.wide.u32 %rd18, %r3, 4;
|
||
add.s64 %rd19, %rd8, %rd18;
|
||
ld.global.u32 %r33, [%rd19];
|
||
mul.wide.u32 %rd20, %r50, 4;
|
||
add.s64 %rd21, %rd43, %rd20;
|
||
st.local.u32 [%rd21], %r33;
|
||
add.s32 %r52, %r50, 1;
|
||
add.s32 %r3, %r3, 1;
|
||
|
||
BB0_7:
|
||
mul.wide.u32 %rd22, %r3, 4;
|
||
add.s64 %rd23, %rd8, %rd22;
|
||
ld.global.u32 %r34, [%rd23];
|
||
mul.wide.u32 %rd24, %r52, 4;
|
||
add.s64 %rd25, %rd43, %rd24;
|
||
st.local.u32 [%rd25], %r34;
|
||
add.s32 %r56, %r52, 1;
|
||
add.s32 %r3, %r3, 1;
|
||
|
||
BB0_8:
|
||
setp.lt.u32 %p6, %r4, 4;
|
||
@%p6 bra BB0_10;
|
||
|
||
BB0_9:
|
||
mul.wide.u32 %rd26, %r3, 4;
|
||
add.s64 %rd27, %rd8, %rd26;
|
||
ld.global.u32 %r35, [%rd27];
|
||
mul.wide.u32 %rd28, %r56, 4;
|
||
add.s64 %rd29, %rd43, %rd28;
|
||
st.local.u32 [%rd29], %r35;
|
||
add.s32 %r36, %r3, 1;
|
||
mul.wide.u32 %rd30, %r36, 4;
|
||
add.s64 %rd31, %rd8, %rd30;
|
||
ld.global.u32 %r37, [%rd31];
|
||
add.s32 %r38, %r56, 1;
|
||
mul.wide.u32 %rd32, %r38, 4;
|
||
add.s64 %rd33, %rd43, %rd32;
|
||
st.local.u32 [%rd33], %r37;
|
||
add.s32 %r39, %r3, 2;
|
||
mul.wide.u32 %rd34, %r39, 4;
|
||
add.s64 %rd35, %rd8, %rd34;
|
||
ld.global.u32 %r40, [%rd35];
|
||
add.s32 %r41, %r56, 2;
|
||
mul.wide.u32 %rd36, %r41, 4;
|
||
add.s64 %rd37, %rd43, %rd36;
|
||
st.local.u32 [%rd37], %r40;
|
||
add.s32 %r42, %r3, 3;
|
||
mul.wide.u32 %rd38, %r42, 4;
|
||
add.s64 %rd39, %rd8, %rd38;
|
||
ld.global.u32 %r43, [%rd39];
|
||
add.s32 %r44, %r56, 3;
|
||
mul.wide.u32 %rd40, %r44, 4;
|
||
add.s64 %rd41, %rd43, %rd40;
|
||
st.local.u32 [%rd41], %r43;
|
||
add.s32 %r3, %r3, 4;
|
||
add.s32 %r56, %r56, 4;
|
||
setp.lt.u32 %p7, %r56, %r4;
|
||
@%p7 bra BB0_9;
|
||
|
||
BB0_10:
|
||
st.local.u32 [%rd43+256], %r5;
|
||
mul.wide.s32 %rd42, %r2, 260;
|
||
add.s64 %rd44, %rd9, %rd42;
|
||
mov.u32 %r57, 0;
|
||
|
||
BB0_11:
|
||
ld.local.u32 %r48, [%rd43];
|
||
st.global.u32 [%rd44], %r48;
|
||
add.s64 %rd44, %rd44, 4;
|
||
add.s64 %rd43, %rd43, 4;
|
||
add.s32 %r57, %r57, 1;
|
||
setp.lt.u32 %p8, %r57, 65;
|
||
@%p8 bra BB0_11;
|
||
|
||
BB0_12:
|
||
ret;
|
||
}
|
||
|
||
// .globl gpu_memset
|
||
.entry gpu_memset(
|
||
.param .u64 .ptr .global .align 16 gpu_memset_param_0,
|
||
.param .u32 gpu_memset_param_1,
|
||
.param .u64 gpu_memset_param_2
|
||
)
|
||
{
|
||
.reg .pred %p<2>;
|
||
.reg .b32 %r<8>;
|
||
.reg .b64 %rd<6>;
|
||
|
||
|
||
ld.param.u64 %rd1, [gpu_memset_param_0];
|
||
ld.param.u32 %r2, [gpu_memset_param_1];
|
||
ld.param.u64 %rd2, [gpu_memset_param_2];
|
||
mov.b32 %r3, %envreg3;
|
||
mov.u32 %r4, %ctaid.x;
|
||
mov.u32 %r5, %ntid.x;
|
||
mad.lo.s32 %r6, %r4, %r5, %r3;
|
||
mov.u32 %r7, %tid.x;
|
||
add.s32 %r1, %r6, %r7;
|
||
cvt.s64.s32 %rd3, %r1;
|
||
setp.ge.u64 %p1, %rd3, %rd2;
|
||
@%p1 bra BB1_2;
|
||
|
||
mul.wide.s32 %rd4, %r1, 16;
|
||
add.s64 %rd5, %rd1, %rd4;
|
||
st.global.v4.u32 [%rd5], {%r2, %r2, %r2, %r2};
|
||
|
||
BB1_2:
|
||
ret;
|
||
}
|
||
|
||
// .globl gpu_atinit
|
||
.entry gpu_atinit(
|
||
.param .u64 .ptr .global .align 4 gpu_atinit_param_0,
|
||
.param .u64 gpu_atinit_param_1
|
||
)
|
||
{
|
||
.reg .pred %p<2>;
|
||
.reg .b32 %r<13>;
|
||
.reg .b64 %rd<7>;
|
||
|
||
|
||
ld.param.u64 %rd2, [gpu_atinit_param_0];
|
||
ld.param.u64 %rd3, [gpu_atinit_param_1];
|
||
mov.b32 %r2, %envreg3;
|
||
mov.u32 %r3, %ctaid.x;
|
||
mov.u32 %r4, %ntid.x;
|
||
mad.lo.s32 %r5, %r3, %r4, %r2;
|
||
mov.u32 %r6, %tid.x;
|
||
add.s32 %r1, %r5, %r6;
|
||
cvt.s64.s32 %rd1, %r1;
|
||
setp.ge.u64 %p1, %rd1, %rd3;
|
||
@%p1 bra BB2_2;
|
||
|
||
cvt.u32.u64 %r7, %rd1;
|
||
shr.u64 %rd4, %rd1, 32;
|
||
cvt.u32.u64 %r8, %rd4;
|
||
xor.b32 %r9, %r7, 1549556828;
|
||
xor.b32 %r10, %r8, 909522486;
|
||
mul.wide.s32 %rd5, %r1, 260;
|
||
add.s64 %rd6, %rd2, %rd5;
|
||
st.global.u32 [%rd6], %r9;
|
||
st.global.u32 [%rd6+4], %r10;
|
||
mov.u32 %r11, 0;
|
||
st.global.u32 [%rd6+8], %r11;
|
||
st.global.u32 [%rd6+12], %r11;
|
||
st.global.u32 [%rd6+16], %r11;
|
||
st.global.u32 [%rd6+20], %r11;
|
||
st.global.u32 [%rd6+24], %r11;
|
||
st.global.u32 [%rd6+28], %r11;
|
||
st.global.u32 [%rd6+32], %r11;
|
||
st.global.u32 [%rd6+36], %r11;
|
||
st.global.u32 [%rd6+40], %r11;
|
||
st.global.u32 [%rd6+44], %r11;
|
||
st.global.u32 [%rd6+48], %r11;
|
||
st.global.u32 [%rd6+52], %r11;
|
||
st.global.u32 [%rd6+56], %r11;
|
||
st.global.u32 [%rd6+60], %r11;
|
||
st.global.u32 [%rd6+64], %r11;
|
||
st.global.u32 [%rd6+68], %r11;
|
||
st.global.u32 [%rd6+72], %r11;
|
||
st.global.u32 [%rd6+76], %r11;
|
||
st.global.u32 [%rd6+80], %r11;
|
||
st.global.u32 [%rd6+84], %r11;
|
||
st.global.u32 [%rd6+88], %r11;
|
||
st.global.u32 [%rd6+92], %r11;
|
||
st.global.u32 [%rd6+96], %r11;
|
||
st.global.u32 [%rd6+100], %r11;
|
||
st.global.u32 [%rd6+104], %r11;
|
||
st.global.u32 [%rd6+108], %r11;
|
||
st.global.u32 [%rd6+112], %r11;
|
||
st.global.u32 [%rd6+116], %r11;
|
||
st.global.u32 [%rd6+120], %r11;
|
||
st.global.u32 [%rd6+124], %r11;
|
||
st.global.u32 [%rd6+128], %r11;
|
||
st.global.u32 [%rd6+132], %r11;
|
||
st.global.u32 [%rd6+136], %r11;
|
||
st.global.u32 [%rd6+140], %r11;
|
||
st.global.u32 [%rd6+144], %r11;
|
||
st.global.u32 [%rd6+148], %r11;
|
||
st.global.u32 [%rd6+152], %r11;
|
||
st.global.u32 [%rd6+156], %r11;
|
||
st.global.u32 [%rd6+160], %r11;
|
||
st.global.u32 [%rd6+164], %r11;
|
||
st.global.u32 [%rd6+168], %r11;
|
||
st.global.u32 [%rd6+172], %r11;
|
||
st.global.u32 [%rd6+176], %r11;
|
||
st.global.u32 [%rd6+180], %r11;
|
||
st.global.u32 [%rd6+184], %r11;
|
||
st.global.u32 [%rd6+188], %r11;
|
||
st.global.u32 [%rd6+192], %r11;
|
||
st.global.u32 [%rd6+196], %r11;
|
||
st.global.u32 [%rd6+200], %r11;
|
||
st.global.u32 [%rd6+204], %r11;
|
||
st.global.u32 [%rd6+208], %r11;
|
||
st.global.u32 [%rd6+212], %r11;
|
||
st.global.u32 [%rd6+216], %r11;
|
||
st.global.u32 [%rd6+220], %r11;
|
||
st.global.u32 [%rd6+224], %r11;
|
||
st.global.u32 [%rd6+228], %r11;
|
||
st.global.u32 [%rd6+232], %r11;
|
||
st.global.u32 [%rd6+236], %r11;
|
||
st.global.u32 [%rd6+240], %r11;
|
||
st.global.u32 [%rd6+244], %r11;
|
||
st.global.u32 [%rd6+248], %r11;
|
||
st.global.u32 [%rd6+252], %r11;
|
||
mov.u32 %r12, 7;
|
||
st.global.u32 [%rd6+256], %r12;
|
||
|
||
BB2_2:
|
||
ret;
|
||
}
|
||
|
||
// .globl m03200_init
|
||
.entry m03200_init(
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_0,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_1,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_2,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_3,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_4,
|
||
.param .u64 .ptr .global .align 1 m03200_init_param_5,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_6,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_7,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_8,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_9,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_10,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_11,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_12,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_13,
|
||
.param .u64 .ptr .global .align 8 m03200_init_param_14,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_15,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_16,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_17,
|
||
.param .u64 .ptr .global .align 1 m03200_init_param_18,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_19,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_20,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_21,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_22,
|
||
.param .u64 .ptr .global .align 4 m03200_init_param_23,
|
||
.param .u32 m03200_init_param_24,
|
||
.param .u32 m03200_init_param_25,
|
||
.param .u32 m03200_init_param_26,
|
||
.param .u32 m03200_init_param_27,
|
||
.param .u32 m03200_init_param_28,
|
||
.param .u32 m03200_init_param_29,
|
||
.param .u32 m03200_init_param_30,
|
||
.param .u32 m03200_init_param_31,
|
||
.param .u32 m03200_init_param_32,
|
||
.param .u32 m03200_init_param_33,
|
||
.param .u64 m03200_init_param_34
|
||
)
|
||
.reqntid 8, 1, 1
|
||
{
|
||
.local .align 16 .b8 __local_depot3[240];
|
||
.reg .b64 %SP;
|
||
.reg .b64 %SPL;
|
||
.reg .pred %p<18>;
|
||
.reg .b16 %rs<8>;
|
||
.reg .b32 %r<3909>;
|
||
.reg .b64 %rd<1341>;
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_init$S0_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_init$S1_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_init$S2_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_init$S3_all[8192];
|
||
|
||
mov.u64 %SPL, __local_depot3;
|
||
cvta.local.u64 %SP, %SPL;
|
||
ld.param.u64 %rd48, [m03200_init_param_0];
|
||
ld.param.u64 %rd49, [m03200_init_param_4];
|
||
ld.param.u64 %rd50, [m03200_init_param_17];
|
||
ld.param.u32 %r131, [m03200_init_param_27];
|
||
ld.param.u64 %rd51, [m03200_init_param_34];
|
||
add.u64 %rd52, %SP, 96;
|
||
add.u64 %rd1336, %SPL, 96;
|
||
mov.u32 %r132, %ctaid.x;
|
||
mov.u32 %r133, %ntid.x;
|
||
mov.b32 %r134, %envreg3;
|
||
mad.lo.s32 %r135, %r132, %r133, %r134;
|
||
mov.u32 %r1, %tid.x;
|
||
add.s32 %r2, %r135, %r1;
|
||
cvt.s64.s32 %rd53, %r2;
|
||
cvt.s64.s32 %rd2, %r1;
|
||
setp.ge.u64 %p1, %rd53, %rd51;
|
||
@%p1 bra BB3_31;
|
||
|
||
mul.wide.s32 %rd54, %r2, 260;
|
||
add.s64 %rd55, %rd48, %rd54;
|
||
ld.global.u32 %r3, [%rd55+256];
|
||
mov.u32 %r136, 0;
|
||
ld.global.u32 %r137, [%rd55];
|
||
ld.global.u32 %r138, [%rd55+4];
|
||
ld.global.u32 %r139, [%rd55+8];
|
||
ld.global.u32 %r140, [%rd55+12];
|
||
add.u64 %rd57, %SPL, 0;
|
||
ld.global.u32 %r141, [%rd55+16];
|
||
ld.global.u32 %r142, [%rd55+20];
|
||
ld.global.u32 %r143, [%rd55+24];
|
||
ld.global.u32 %r144, [%rd55+28];
|
||
ld.global.u32 %r145, [%rd55+32];
|
||
ld.global.u32 %r146, [%rd55+36];
|
||
ld.global.u32 %r147, [%rd55+40];
|
||
ld.global.u32 %r148, [%rd55+44];
|
||
ld.global.u32 %r149, [%rd55+48];
|
||
ld.global.u32 %r150, [%rd55+52];
|
||
ld.global.u32 %r151, [%rd55+56];
|
||
ld.global.u32 %r152, [%rd55+60];
|
||
ld.global.u32 %r153, [%rd55+64];
|
||
ld.global.u32 %r154, [%rd55+68];
|
||
st.local.v2.u32 [%rd57], {%r137, %r138};
|
||
st.local.v2.u32 [%rd57+8], {%r139, %r140};
|
||
st.local.v2.u32 [%rd57+16], {%r141, %r142};
|
||
st.local.v2.u32 [%rd57+24], {%r143, %r144};
|
||
st.local.v2.u32 [%rd57+32], {%r145, %r146};
|
||
st.local.v2.u32 [%rd57+40], {%r147, %r148};
|
||
st.local.v2.u32 [%rd57+48], {%r149, %r150};
|
||
st.local.v2.u32 [%rd57+56], {%r151, %r152};
|
||
st.local.v2.u32 [%rd57+64], {%r153, %r154};
|
||
add.u64 %rd6, %SPL, 168;
|
||
add.u64 %rd5, %SPL, 80;
|
||
mov.u64 %rd1326, %rd6;
|
||
mov.u32 %r3867, %r136;
|
||
|
||
BB3_2:
|
||
st.local.u32 [%rd1326], %r136;
|
||
add.s64 %rd1326, %rd1326, 4;
|
||
add.s32 %r3867, %r3867, 1;
|
||
setp.lt.u32 %p2, %r3867, 18;
|
||
@%p2 bra BB3_2;
|
||
|
||
not.b32 %r6, %r3;
|
||
mov.u32 %r3868, 0;
|
||
|
||
BB3_4:
|
||
mov.u32 %r157, 72;
|
||
sub.s32 %r158, %r157, %r3868;
|
||
min.s32 %r8, %r3, %r158;
|
||
setp.lt.s32 %p3, %r8, 1;
|
||
@%p3 bra BB3_14;
|
||
|
||
add.s32 %r160, %r3868, -73;
|
||
max.s32 %r9, %r6, %r160;
|
||
not.b32 %r161, %r9;
|
||
and.b32 %r10, %r161, 3;
|
||
setp.eq.s32 %p4, %r10, 0;
|
||
mov.u32 %r3872, 0;
|
||
@%p4 bra BB3_11;
|
||
|
||
setp.eq.s32 %p5, %r10, 1;
|
||
mov.u32 %r3870, 0;
|
||
@%p5 bra BB3_10;
|
||
|
||
setp.eq.s32 %p6, %r10, 2;
|
||
mov.u32 %r3869, 0;
|
||
@%p6 bra BB3_9;
|
||
|
||
ld.local.u8 %rs1, [%rd57];
|
||
cvt.s64.s32 %rd61, %r3868;
|
||
add.s64 %rd62, %rd6, %rd61;
|
||
st.local.u8 [%rd62], %rs1;
|
||
mov.u32 %r3869, 1;
|
||
|
||
BB3_9:
|
||
cvt.u64.u32 %rd63, %r3869;
|
||
add.s64 %rd64, %rd57, %rd63;
|
||
ld.local.u8 %rs2, [%rd64];
|
||
add.s32 %r165, %r3869, %r3868;
|
||
cvt.s64.s32 %rd65, %r165;
|
||
add.s64 %rd66, %rd6, %rd65;
|
||
st.local.u8 [%rd66], %rs2;
|
||
add.s32 %r3870, %r3869, 1;
|
||
|
||
BB3_10:
|
||
cvt.s64.s32 %rd67, %r3870;
|
||
add.s64 %rd68, %rd57, %rd67;
|
||
ld.local.u8 %rs3, [%rd68];
|
||
add.s32 %r166, %r3870, %r3868;
|
||
cvt.s64.s32 %rd69, %r166;
|
||
add.s64 %rd70, %rd6, %rd69;
|
||
st.local.u8 [%rd70], %rs3;
|
||
add.s32 %r3872, %r3870, 1;
|
||
|
||
BB3_11:
|
||
setp.gt.u32 %p7, %r9, -5;
|
||
@%p7 bra BB3_14;
|
||
|
||
add.s32 %r167, %r3872, %r3868;
|
||
cvt.s64.s32 %rd71, %r167;
|
||
add.s64 %rd1328, %rd6, %rd71;
|
||
cvt.s64.s32 %rd72, %r3872;
|
||
add.s64 %rd1327, %rd57, %rd72;
|
||
|
||
BB3_13:
|
||
ld.local.u8 %rs4, [%rd1327];
|
||
ld.local.u8 %rs5, [%rd1327+1];
|
||
ld.local.u8 %rs6, [%rd1327+2];
|
||
ld.local.u8 %rs7, [%rd1327+3];
|
||
st.local.u8 [%rd1328], %rs4;
|
||
st.local.u8 [%rd1328+1], %rs5;
|
||
st.local.u8 [%rd1328+2], %rs6;
|
||
st.local.u8 [%rd1328+3], %rs7;
|
||
add.s64 %rd1328, %rd1328, 4;
|
||
add.s64 %rd1327, %rd1327, 4;
|
||
add.s32 %r3872, %r3872, 4;
|
||
setp.lt.s32 %p8, %r3872, %r8;
|
||
@%p8 bra BB3_13;
|
||
|
||
BB3_14:
|
||
add.s32 %r168, %r3868, %r8;
|
||
add.s32 %r3868, %r168, 1;
|
||
setp.lt.s32 %p9, %r3868, 72;
|
||
@%p9 bra BB3_4;
|
||
|
||
ld.local.u32 %r170, [%rd6];
|
||
// inline asm
|
||
prmt.b32 %r169, %r170, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6], %r169;
|
||
ld.local.u32 %r172, [%rd6+4];
|
||
// inline asm
|
||
prmt.b32 %r171, %r172, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+4], %r171;
|
||
ld.local.u32 %r174, [%rd6+8];
|
||
// inline asm
|
||
prmt.b32 %r173, %r174, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+8], %r173;
|
||
ld.local.u32 %r176, [%rd6+12];
|
||
// inline asm
|
||
prmt.b32 %r175, %r176, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+12], %r175;
|
||
ld.local.u32 %r178, [%rd6+16];
|
||
// inline asm
|
||
prmt.b32 %r177, %r178, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+16], %r177;
|
||
ld.local.u32 %r180, [%rd6+20];
|
||
// inline asm
|
||
prmt.b32 %r179, %r180, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+20], %r179;
|
||
ld.local.u32 %r182, [%rd6+24];
|
||
// inline asm
|
||
prmt.b32 %r181, %r182, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+24], %r181;
|
||
ld.local.u32 %r184, [%rd6+28];
|
||
// inline asm
|
||
prmt.b32 %r183, %r184, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+28], %r183;
|
||
ld.local.u32 %r186, [%rd6+32];
|
||
// inline asm
|
||
prmt.b32 %r185, %r186, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+32], %r185;
|
||
ld.local.u32 %r188, [%rd6+36];
|
||
// inline asm
|
||
prmt.b32 %r187, %r188, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+36], %r187;
|
||
ld.local.u32 %r190, [%rd6+40];
|
||
// inline asm
|
||
prmt.b32 %r189, %r190, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+40], %r189;
|
||
ld.local.u32 %r192, [%rd6+44];
|
||
// inline asm
|
||
prmt.b32 %r191, %r192, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+44], %r191;
|
||
ld.local.u32 %r194, [%rd6+48];
|
||
// inline asm
|
||
prmt.b32 %r193, %r194, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+48], %r193;
|
||
ld.local.u32 %r196, [%rd6+52];
|
||
// inline asm
|
||
prmt.b32 %r195, %r196, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+52], %r195;
|
||
ld.local.u32 %r198, [%rd6+56];
|
||
// inline asm
|
||
prmt.b32 %r197, %r198, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+56], %r197;
|
||
ld.local.u32 %r200, [%rd6+60];
|
||
// inline asm
|
||
prmt.b32 %r199, %r200, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+60], %r199;
|
||
ld.local.u32 %r202, [%rd6+64];
|
||
// inline asm
|
||
prmt.b32 %r201, %r202, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+64], %r201;
|
||
ld.local.u32 %r204, [%rd6+68];
|
||
// inline asm
|
||
prmt.b32 %r203, %r204, 0, 0x0123;
|
||
// inline asm
|
||
st.local.u32 [%rd6+68], %r203;
|
||
mul.wide.s32 %rd74, %r2, 4240;
|
||
add.s64 %rd16, %rd49, %rd74;
|
||
st.global.u32 [%rd16], %r169;
|
||
st.global.u32 [%rd16+4], %r171;
|
||
st.global.u32 [%rd16+8], %r173;
|
||
st.global.u32 [%rd16+12], %r175;
|
||
st.global.u32 [%rd16+16], %r177;
|
||
st.global.u32 [%rd16+20], %r179;
|
||
st.global.u32 [%rd16+24], %r181;
|
||
st.global.u32 [%rd16+28], %r183;
|
||
st.global.u32 [%rd16+32], %r185;
|
||
st.global.u32 [%rd16+36], %r187;
|
||
st.global.u32 [%rd16+40], %r189;
|
||
st.global.u32 [%rd16+44], %r191;
|
||
st.global.u32 [%rd16+48], %r193;
|
||
st.global.u32 [%rd16+52], %r195;
|
||
st.global.u32 [%rd16+56], %r197;
|
||
st.global.u32 [%rd16+60], %r199;
|
||
st.global.u32 [%rd16+64], %r201;
|
||
st.global.u32 [%rd16+68], %r203;
|
||
mul.wide.u32 %rd75, %r131, 560;
|
||
add.s64 %rd76, %rd50, %rd75;
|
||
ld.global.u32 %r37, [%rd76];
|
||
mov.u32 %r3873, 0;
|
||
ld.global.u32 %r38, [%rd76+4];
|
||
ld.global.u32 %r39, [%rd76+8];
|
||
ld.global.u32 %r40, [%rd76+12];
|
||
st.local.v4.u32 [%rd5], {%r37, %r38, %r39, %r40};
|
||
cvta.to.local.u64 %rd1330, %rd52;
|
||
mov.u64 %rd1329, m03200_init$P;
|
||
|
||
BB3_16:
|
||
ld.const.u32 %r206, [%rd1329];
|
||
st.local.u32 [%rd1330], %r206;
|
||
add.s64 %rd1330, %rd1330, 4;
|
||
add.s64 %rd1329, %rd1329, 4;
|
||
add.s32 %r3873, %r3873, 1;
|
||
setp.lt.u32 %p10, %r3873, 18;
|
||
@%p10 bra BB3_16;
|
||
|
||
cvt.u32.u64 %r208, %rd2;
|
||
mul.wide.s32 %rd1335, %r208, 1024;
|
||
mov.u32 %r3874, -256;
|
||
mov.u64 %rd1334, c_sbox3;
|
||
mov.u64 %rd1333, c_sbox2;
|
||
mov.u64 %rd1332, c_sbox1;
|
||
mov.u64 %rd1331, c_sbox0;
|
||
|
||
BB3_18:
|
||
ld.const.v2.u32 {%r209, %r210}, [%rd1331];
|
||
mov.u64 %rd84, m03200_init$S0_all;
|
||
add.s64 %rd85, %rd84, %rd1335;
|
||
st.shared.u32 [%rd85], %r209;
|
||
st.shared.u32 [%rd85+4], %r210;
|
||
ld.const.v2.u32 {%r213, %r214}, [%rd1332];
|
||
mov.u64 %rd86, m03200_init$S1_all;
|
||
add.s64 %rd87, %rd86, %rd1335;
|
||
st.shared.u32 [%rd87], %r213;
|
||
st.shared.u32 [%rd87+4], %r214;
|
||
ld.const.v2.u32 {%r217, %r218}, [%rd1333];
|
||
mov.u64 %rd88, m03200_init$S2_all;
|
||
add.s64 %rd89, %rd88, %rd1335;
|
||
st.shared.u32 [%rd89], %r217;
|
||
st.shared.u32 [%rd89+4], %r218;
|
||
ld.const.v2.u32 {%r221, %r222}, [%rd1334];
|
||
mov.u64 %rd90, m03200_init$S3_all;
|
||
add.s64 %rd91, %rd90, %rd1335;
|
||
st.shared.u32 [%rd91], %r221;
|
||
st.shared.u32 [%rd91+4], %r222;
|
||
ld.const.v2.u32 {%r225, %r226}, [%rd1331+8];
|
||
st.shared.u32 [%rd85+8], %r225;
|
||
st.shared.u32 [%rd85+12], %r226;
|
||
ld.const.v2.u32 {%r229, %r230}, [%rd1332+8];
|
||
st.shared.u32 [%rd87+8], %r229;
|
||
st.shared.u32 [%rd87+12], %r230;
|
||
ld.const.v2.u32 {%r233, %r234}, [%rd1333+8];
|
||
st.shared.u32 [%rd89+8], %r233;
|
||
st.shared.u32 [%rd89+12], %r234;
|
||
ld.const.v2.u32 {%r237, %r238}, [%rd1334+8];
|
||
st.shared.u32 [%rd91+8], %r237;
|
||
st.shared.u32 [%rd91+12], %r238;
|
||
ld.const.v2.u32 {%r241, %r242}, [%rd1331+16];
|
||
st.shared.u32 [%rd85+16], %r241;
|
||
st.shared.u32 [%rd85+20], %r242;
|
||
ld.const.v2.u32 {%r245, %r246}, [%rd1332+16];
|
||
st.shared.u32 [%rd87+16], %r245;
|
||
st.shared.u32 [%rd87+20], %r246;
|
||
ld.const.v2.u32 {%r249, %r250}, [%rd1333+16];
|
||
st.shared.u32 [%rd89+16], %r249;
|
||
st.shared.u32 [%rd89+20], %r250;
|
||
ld.const.v2.u32 {%r253, %r254}, [%rd1334+16];
|
||
st.shared.u32 [%rd91+16], %r253;
|
||
st.shared.u32 [%rd91+20], %r254;
|
||
ld.const.v2.u32 {%r257, %r258}, [%rd1331+24];
|
||
st.shared.u32 [%rd85+24], %r257;
|
||
st.shared.u32 [%rd85+28], %r258;
|
||
ld.const.v2.u32 {%r261, %r262}, [%rd1332+24];
|
||
st.shared.u32 [%rd87+24], %r261;
|
||
st.shared.u32 [%rd87+28], %r262;
|
||
ld.const.v2.u32 {%r265, %r266}, [%rd1333+24];
|
||
st.shared.u32 [%rd89+24], %r265;
|
||
st.shared.u32 [%rd89+28], %r266;
|
||
ld.const.v2.u32 {%r269, %r270}, [%rd1334+24];
|
||
st.shared.u32 [%rd91+24], %r269;
|
||
st.shared.u32 [%rd91+28], %r270;
|
||
add.s64 %rd1335, %rd1335, 32;
|
||
add.s64 %rd1334, %rd1334, 32;
|
||
add.s64 %rd1333, %rd1333, 32;
|
||
add.s64 %rd1332, %rd1332, 32;
|
||
add.s64 %rd1331, %rd1331, 32;
|
||
add.s32 %r3874, %r3874, 8;
|
||
setp.ne.s32 %p11, %r3874, 0;
|
||
@%p11 bra BB3_18;
|
||
|
||
cvta.to.local.u64 %rd93, %rd52;
|
||
ld.local.v2.u32 {%r276, %r277}, [%rd93];
|
||
mov.u32 %r275, 0;
|
||
xor.b32 %r3892, %r276, %r169;
|
||
xor.b32 %r3891, %r277, %r171;
|
||
st.local.v2.u32 [%rd93], {%r3892, %r3891};
|
||
add.s64 %rd33, %rd1336, 8;
|
||
ld.local.v2.u32 {%r280, %r281}, [%rd1336+8];
|
||
xor.b32 %r3890, %r280, %r173;
|
||
xor.b32 %r3889, %r281, %r175;
|
||
st.local.v2.u32 [%rd1336+8], {%r3890, %r3889};
|
||
ld.local.v2.u32 {%r284, %r285}, [%rd1336+16];
|
||
xor.b32 %r3888, %r284, %r177;
|
||
xor.b32 %r3887, %r285, %r179;
|
||
st.local.v2.u32 [%rd1336+16], {%r3888, %r3887};
|
||
ld.local.v2.u32 {%r288, %r289}, [%rd1336+24];
|
||
xor.b32 %r3886, %r288, %r181;
|
||
xor.b32 %r3885, %r289, %r183;
|
||
st.local.v2.u32 [%rd1336+24], {%r3886, %r3885};
|
||
ld.local.v2.u32 {%r292, %r293}, [%rd1336+32];
|
||
xor.b32 %r3884, %r292, %r185;
|
||
xor.b32 %r3883, %r293, %r187;
|
||
st.local.v2.u32 [%rd1336+32], {%r3884, %r3883};
|
||
ld.local.v2.u32 {%r296, %r297}, [%rd1336+40];
|
||
xor.b32 %r3882, %r296, %r189;
|
||
xor.b32 %r3881, %r297, %r191;
|
||
st.local.v2.u32 [%rd1336+40], {%r3882, %r3881};
|
||
ld.local.v2.u32 {%r300, %r301}, [%rd1336+48];
|
||
xor.b32 %r3880, %r300, %r193;
|
||
xor.b32 %r3879, %r301, %r195;
|
||
st.local.v2.u32 [%rd1336+48], {%r3880, %r3879};
|
||
ld.local.v2.u32 {%r304, %r305}, [%rd1336+56];
|
||
xor.b32 %r3878, %r304, %r197;
|
||
xor.b32 %r3877, %r305, %r199;
|
||
st.local.v2.u32 [%rd1336+56], {%r3878, %r3877};
|
||
ld.local.v2.u32 {%r308, %r309}, [%rd1336+64];
|
||
xor.b32 %r3876, %r308, %r201;
|
||
xor.b32 %r3875, %r309, %r203;
|
||
st.local.v2.u32 [%rd1336+64], {%r3876, %r3875};
|
||
mov.u32 %r3893, %r275;
|
||
mov.u32 %r3897, %r275;
|
||
mov.u32 %r3898, %r275;
|
||
|
||
BB3_20:
|
||
and.b32 %r568, %r3893, 2;
|
||
mul.wide.u32 %rd94, %r568, 4;
|
||
add.s64 %rd95, %rd5, %rd94;
|
||
ld.local.u32 %r569, [%rd95];
|
||
xor.b32 %r570, %r569, %r3898;
|
||
ld.local.u32 %r571, [%rd95+4];
|
||
xor.b32 %r325, %r570, %r3892;
|
||
mov.u32 %r554, 24;
|
||
mov.u32 %r567, 8;
|
||
// inline asm
|
||
bfe.u32 %r312, %r325, %r554, %r567;
|
||
// inline asm
|
||
shl.b64 %rd96, %rd2, 10;
|
||
add.s64 %rd98, %rd84, %rd96;
|
||
mul.wide.u32 %rd99, %r312, 4;
|
||
add.s64 %rd100, %rd98, %rd99;
|
||
ld.shared.u32 %r572, [%rd100];
|
||
mov.u32 %r558, 16;
|
||
// inline asm
|
||
bfe.u32 %r316, %r325, %r558, %r567;
|
||
// inline asm
|
||
add.s64 %rd102, %rd86, %rd96;
|
||
mul.wide.u32 %rd103, %r316, 4;
|
||
add.s64 %rd104, %rd102, %rd103;
|
||
ld.shared.u32 %r573, [%rd104];
|
||
add.s32 %r574, %r573, %r572;
|
||
// inline asm
|
||
bfe.u32 %r320, %r325, %r567, %r567;
|
||
// inline asm
|
||
add.s64 %rd106, %rd88, %rd96;
|
||
mul.wide.u32 %rd107, %r320, 4;
|
||
add.s64 %rd108, %rd106, %rd107;
|
||
ld.shared.u32 %r575, [%rd108];
|
||
xor.b32 %r576, %r575, %r574;
|
||
// inline asm
|
||
bfe.u32 %r324, %r325, %r275, %r567;
|
||
// inline asm
|
||
add.s64 %rd110, %rd90, %rd96;
|
||
mul.wide.u32 %rd111, %r324, 4;
|
||
add.s64 %rd112, %rd110, %rd111;
|
||
ld.shared.u32 %r577, [%rd112];
|
||
add.s32 %r578, %r577, %r576;
|
||
xor.b32 %r579, %r571, %r3897;
|
||
xor.b32 %r580, %r579, %r3891;
|
||
xor.b32 %r341, %r580, %r578;
|
||
// inline asm
|
||
bfe.u32 %r328, %r341, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd113, %r328, 4;
|
||
add.s64 %rd114, %rd98, %rd113;
|
||
ld.shared.u32 %r581, [%rd114];
|
||
// inline asm
|
||
bfe.u32 %r332, %r341, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd115, %r332, 4;
|
||
add.s64 %rd116, %rd102, %rd115;
|
||
ld.shared.u32 %r582, [%rd116];
|
||
add.s32 %r583, %r582, %r581;
|
||
// inline asm
|
||
bfe.u32 %r336, %r341, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd117, %r336, 4;
|
||
add.s64 %rd118, %rd106, %rd117;
|
||
ld.shared.u32 %r584, [%rd118];
|
||
xor.b32 %r585, %r584, %r583;
|
||
// inline asm
|
||
bfe.u32 %r340, %r341, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd119, %r340, 4;
|
||
add.s64 %rd120, %rd110, %rd119;
|
||
ld.shared.u32 %r586, [%rd120];
|
||
add.s32 %r587, %r586, %r585;
|
||
xor.b32 %r588, %r3890, %r325;
|
||
xor.b32 %r357, %r588, %r587;
|
||
// inline asm
|
||
bfe.u32 %r344, %r357, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd121, %r344, 4;
|
||
add.s64 %rd122, %rd98, %rd121;
|
||
ld.shared.u32 %r589, [%rd122];
|
||
// inline asm
|
||
bfe.u32 %r348, %r357, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd123, %r348, 4;
|
||
add.s64 %rd124, %rd102, %rd123;
|
||
ld.shared.u32 %r590, [%rd124];
|
||
add.s32 %r591, %r590, %r589;
|
||
// inline asm
|
||
bfe.u32 %r352, %r357, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd125, %r352, 4;
|
||
add.s64 %rd126, %rd106, %rd125;
|
||
ld.shared.u32 %r592, [%rd126];
|
||
xor.b32 %r593, %r592, %r591;
|
||
// inline asm
|
||
bfe.u32 %r356, %r357, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd127, %r356, 4;
|
||
add.s64 %rd128, %rd110, %rd127;
|
||
ld.shared.u32 %r594, [%rd128];
|
||
add.s32 %r595, %r594, %r593;
|
||
xor.b32 %r596, %r3889, %r341;
|
||
xor.b32 %r373, %r596, %r595;
|
||
// inline asm
|
||
bfe.u32 %r360, %r373, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd129, %r360, 4;
|
||
add.s64 %rd130, %rd98, %rd129;
|
||
ld.shared.u32 %r597, [%rd130];
|
||
// inline asm
|
||
bfe.u32 %r364, %r373, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd131, %r364, 4;
|
||
add.s64 %rd132, %rd102, %rd131;
|
||
ld.shared.u32 %r598, [%rd132];
|
||
add.s32 %r599, %r598, %r597;
|
||
// inline asm
|
||
bfe.u32 %r368, %r373, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd133, %r368, 4;
|
||
add.s64 %rd134, %rd106, %rd133;
|
||
ld.shared.u32 %r600, [%rd134];
|
||
xor.b32 %r601, %r600, %r599;
|
||
// inline asm
|
||
bfe.u32 %r372, %r373, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd135, %r372, 4;
|
||
add.s64 %rd136, %rd110, %rd135;
|
||
ld.shared.u32 %r602, [%rd136];
|
||
add.s32 %r603, %r602, %r601;
|
||
xor.b32 %r604, %r3888, %r357;
|
||
xor.b32 %r389, %r604, %r603;
|
||
// inline asm
|
||
bfe.u32 %r376, %r389, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd137, %r376, 4;
|
||
add.s64 %rd138, %rd98, %rd137;
|
||
ld.shared.u32 %r605, [%rd138];
|
||
// inline asm
|
||
bfe.u32 %r380, %r389, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd139, %r380, 4;
|
||
add.s64 %rd140, %rd102, %rd139;
|
||
ld.shared.u32 %r606, [%rd140];
|
||
add.s32 %r607, %r606, %r605;
|
||
// inline asm
|
||
bfe.u32 %r384, %r389, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd141, %r384, 4;
|
||
add.s64 %rd142, %rd106, %rd141;
|
||
ld.shared.u32 %r608, [%rd142];
|
||
xor.b32 %r609, %r608, %r607;
|
||
// inline asm
|
||
bfe.u32 %r388, %r389, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd143, %r388, 4;
|
||
add.s64 %rd144, %rd110, %rd143;
|
||
ld.shared.u32 %r610, [%rd144];
|
||
add.s32 %r611, %r610, %r609;
|
||
xor.b32 %r612, %r3887, %r373;
|
||
xor.b32 %r405, %r612, %r611;
|
||
// inline asm
|
||
bfe.u32 %r392, %r405, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd145, %r392, 4;
|
||
add.s64 %rd146, %rd98, %rd145;
|
||
ld.shared.u32 %r613, [%rd146];
|
||
// inline asm
|
||
bfe.u32 %r396, %r405, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd147, %r396, 4;
|
||
add.s64 %rd148, %rd102, %rd147;
|
||
ld.shared.u32 %r614, [%rd148];
|
||
add.s32 %r615, %r614, %r613;
|
||
// inline asm
|
||
bfe.u32 %r400, %r405, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd149, %r400, 4;
|
||
add.s64 %rd150, %rd106, %rd149;
|
||
ld.shared.u32 %r616, [%rd150];
|
||
xor.b32 %r617, %r616, %r615;
|
||
// inline asm
|
||
bfe.u32 %r404, %r405, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd151, %r404, 4;
|
||
add.s64 %rd152, %rd110, %rd151;
|
||
ld.shared.u32 %r618, [%rd152];
|
||
add.s32 %r619, %r618, %r617;
|
||
xor.b32 %r620, %r3886, %r389;
|
||
xor.b32 %r421, %r620, %r619;
|
||
// inline asm
|
||
bfe.u32 %r408, %r421, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd153, %r408, 4;
|
||
add.s64 %rd154, %rd98, %rd153;
|
||
ld.shared.u32 %r621, [%rd154];
|
||
// inline asm
|
||
bfe.u32 %r412, %r421, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd155, %r412, 4;
|
||
add.s64 %rd156, %rd102, %rd155;
|
||
ld.shared.u32 %r622, [%rd156];
|
||
add.s32 %r623, %r622, %r621;
|
||
// inline asm
|
||
bfe.u32 %r416, %r421, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd157, %r416, 4;
|
||
add.s64 %rd158, %rd106, %rd157;
|
||
ld.shared.u32 %r624, [%rd158];
|
||
xor.b32 %r625, %r624, %r623;
|
||
// inline asm
|
||
bfe.u32 %r420, %r421, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd159, %r420, 4;
|
||
add.s64 %rd160, %rd110, %rd159;
|
||
ld.shared.u32 %r626, [%rd160];
|
||
add.s32 %r627, %r626, %r625;
|
||
xor.b32 %r628, %r3885, %r405;
|
||
xor.b32 %r437, %r628, %r627;
|
||
// inline asm
|
||
bfe.u32 %r424, %r437, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd161, %r424, 4;
|
||
add.s64 %rd162, %rd98, %rd161;
|
||
ld.shared.u32 %r629, [%rd162];
|
||
// inline asm
|
||
bfe.u32 %r428, %r437, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd163, %r428, 4;
|
||
add.s64 %rd164, %rd102, %rd163;
|
||
ld.shared.u32 %r630, [%rd164];
|
||
add.s32 %r631, %r630, %r629;
|
||
// inline asm
|
||
bfe.u32 %r432, %r437, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd165, %r432, 4;
|
||
add.s64 %rd166, %rd106, %rd165;
|
||
ld.shared.u32 %r632, [%rd166];
|
||
xor.b32 %r633, %r632, %r631;
|
||
// inline asm
|
||
bfe.u32 %r436, %r437, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd167, %r436, 4;
|
||
add.s64 %rd168, %rd110, %rd167;
|
||
ld.shared.u32 %r634, [%rd168];
|
||
add.s32 %r635, %r634, %r633;
|
||
xor.b32 %r636, %r3884, %r421;
|
||
xor.b32 %r453, %r636, %r635;
|
||
// inline asm
|
||
bfe.u32 %r440, %r453, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd169, %r440, 4;
|
||
add.s64 %rd170, %rd98, %rd169;
|
||
ld.shared.u32 %r637, [%rd170];
|
||
// inline asm
|
||
bfe.u32 %r444, %r453, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd171, %r444, 4;
|
||
add.s64 %rd172, %rd102, %rd171;
|
||
ld.shared.u32 %r638, [%rd172];
|
||
add.s32 %r639, %r638, %r637;
|
||
// inline asm
|
||
bfe.u32 %r448, %r453, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd173, %r448, 4;
|
||
add.s64 %rd174, %rd106, %rd173;
|
||
ld.shared.u32 %r640, [%rd174];
|
||
xor.b32 %r641, %r640, %r639;
|
||
// inline asm
|
||
bfe.u32 %r452, %r453, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd175, %r452, 4;
|
||
add.s64 %rd176, %rd110, %rd175;
|
||
ld.shared.u32 %r642, [%rd176];
|
||
add.s32 %r643, %r642, %r641;
|
||
xor.b32 %r644, %r3883, %r437;
|
||
xor.b32 %r469, %r644, %r643;
|
||
// inline asm
|
||
bfe.u32 %r456, %r469, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd177, %r456, 4;
|
||
add.s64 %rd178, %rd98, %rd177;
|
||
ld.shared.u32 %r645, [%rd178];
|
||
// inline asm
|
||
bfe.u32 %r460, %r469, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd179, %r460, 4;
|
||
add.s64 %rd180, %rd102, %rd179;
|
||
ld.shared.u32 %r646, [%rd180];
|
||
add.s32 %r647, %r646, %r645;
|
||
// inline asm
|
||
bfe.u32 %r464, %r469, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd181, %r464, 4;
|
||
add.s64 %rd182, %rd106, %rd181;
|
||
ld.shared.u32 %r648, [%rd182];
|
||
xor.b32 %r649, %r648, %r647;
|
||
// inline asm
|
||
bfe.u32 %r468, %r469, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd183, %r468, 4;
|
||
add.s64 %rd184, %rd110, %rd183;
|
||
ld.shared.u32 %r650, [%rd184];
|
||
add.s32 %r651, %r650, %r649;
|
||
xor.b32 %r652, %r3882, %r453;
|
||
xor.b32 %r485, %r652, %r651;
|
||
// inline asm
|
||
bfe.u32 %r472, %r485, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd185, %r472, 4;
|
||
add.s64 %rd186, %rd98, %rd185;
|
||
ld.shared.u32 %r653, [%rd186];
|
||
// inline asm
|
||
bfe.u32 %r476, %r485, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd187, %r476, 4;
|
||
add.s64 %rd188, %rd102, %rd187;
|
||
ld.shared.u32 %r654, [%rd188];
|
||
add.s32 %r655, %r654, %r653;
|
||
// inline asm
|
||
bfe.u32 %r480, %r485, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd189, %r480, 4;
|
||
add.s64 %rd190, %rd106, %rd189;
|
||
ld.shared.u32 %r656, [%rd190];
|
||
xor.b32 %r657, %r656, %r655;
|
||
// inline asm
|
||
bfe.u32 %r484, %r485, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd191, %r484, 4;
|
||
add.s64 %rd192, %rd110, %rd191;
|
||
ld.shared.u32 %r658, [%rd192];
|
||
add.s32 %r659, %r658, %r657;
|
||
xor.b32 %r660, %r3881, %r469;
|
||
xor.b32 %r501, %r660, %r659;
|
||
// inline asm
|
||
bfe.u32 %r488, %r501, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd193, %r488, 4;
|
||
add.s64 %rd194, %rd98, %rd193;
|
||
ld.shared.u32 %r661, [%rd194];
|
||
// inline asm
|
||
bfe.u32 %r492, %r501, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd195, %r492, 4;
|
||
add.s64 %rd196, %rd102, %rd195;
|
||
ld.shared.u32 %r662, [%rd196];
|
||
add.s32 %r663, %r662, %r661;
|
||
// inline asm
|
||
bfe.u32 %r496, %r501, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd197, %r496, 4;
|
||
add.s64 %rd198, %rd106, %rd197;
|
||
ld.shared.u32 %r664, [%rd198];
|
||
xor.b32 %r665, %r664, %r663;
|
||
// inline asm
|
||
bfe.u32 %r500, %r501, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd199, %r500, 4;
|
||
add.s64 %rd200, %rd110, %rd199;
|
||
ld.shared.u32 %r666, [%rd200];
|
||
add.s32 %r667, %r666, %r665;
|
||
xor.b32 %r668, %r3880, %r485;
|
||
xor.b32 %r517, %r668, %r667;
|
||
// inline asm
|
||
bfe.u32 %r504, %r517, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd201, %r504, 4;
|
||
add.s64 %rd202, %rd98, %rd201;
|
||
ld.shared.u32 %r669, [%rd202];
|
||
// inline asm
|
||
bfe.u32 %r508, %r517, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd203, %r508, 4;
|
||
add.s64 %rd204, %rd102, %rd203;
|
||
ld.shared.u32 %r670, [%rd204];
|
||
add.s32 %r671, %r670, %r669;
|
||
// inline asm
|
||
bfe.u32 %r512, %r517, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd205, %r512, 4;
|
||
add.s64 %rd206, %rd106, %rd205;
|
||
ld.shared.u32 %r672, [%rd206];
|
||
xor.b32 %r673, %r672, %r671;
|
||
// inline asm
|
||
bfe.u32 %r516, %r517, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd207, %r516, 4;
|
||
add.s64 %rd208, %rd110, %rd207;
|
||
ld.shared.u32 %r674, [%rd208];
|
||
add.s32 %r675, %r674, %r673;
|
||
xor.b32 %r676, %r3879, %r501;
|
||
xor.b32 %r533, %r676, %r675;
|
||
// inline asm
|
||
bfe.u32 %r520, %r533, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd209, %r520, 4;
|
||
add.s64 %rd210, %rd98, %rd209;
|
||
ld.shared.u32 %r677, [%rd210];
|
||
// inline asm
|
||
bfe.u32 %r524, %r533, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd211, %r524, 4;
|
||
add.s64 %rd212, %rd102, %rd211;
|
||
ld.shared.u32 %r678, [%rd212];
|
||
add.s32 %r679, %r678, %r677;
|
||
// inline asm
|
||
bfe.u32 %r528, %r533, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd213, %r528, 4;
|
||
add.s64 %rd214, %rd106, %rd213;
|
||
ld.shared.u32 %r680, [%rd214];
|
||
xor.b32 %r681, %r680, %r679;
|
||
// inline asm
|
||
bfe.u32 %r532, %r533, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd215, %r532, 4;
|
||
add.s64 %rd216, %rd110, %rd215;
|
||
ld.shared.u32 %r682, [%rd216];
|
||
add.s32 %r683, %r682, %r681;
|
||
xor.b32 %r684, %r3878, %r517;
|
||
xor.b32 %r549, %r684, %r683;
|
||
// inline asm
|
||
bfe.u32 %r536, %r549, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd217, %r536, 4;
|
||
add.s64 %rd218, %rd98, %rd217;
|
||
ld.shared.u32 %r685, [%rd218];
|
||
// inline asm
|
||
bfe.u32 %r540, %r549, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd219, %r540, 4;
|
||
add.s64 %rd220, %rd102, %rd219;
|
||
ld.shared.u32 %r686, [%rd220];
|
||
add.s32 %r687, %r686, %r685;
|
||
// inline asm
|
||
bfe.u32 %r544, %r549, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd221, %r544, 4;
|
||
add.s64 %rd222, %rd106, %rd221;
|
||
ld.shared.u32 %r688, [%rd222];
|
||
xor.b32 %r689, %r688, %r687;
|
||
// inline asm
|
||
bfe.u32 %r548, %r549, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd223, %r548, 4;
|
||
add.s64 %rd224, %rd110, %rd223;
|
||
ld.shared.u32 %r690, [%rd224];
|
||
add.s32 %r691, %r690, %r689;
|
||
xor.b32 %r692, %r3877, %r533;
|
||
xor.b32 %r565, %r692, %r691;
|
||
// inline asm
|
||
bfe.u32 %r552, %r565, %r554, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd225, %r552, 4;
|
||
add.s64 %rd226, %rd98, %rd225;
|
||
ld.shared.u32 %r693, [%rd226];
|
||
// inline asm
|
||
bfe.u32 %r556, %r565, %r558, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd227, %r556, 4;
|
||
add.s64 %rd228, %rd102, %rd227;
|
||
ld.shared.u32 %r694, [%rd228];
|
||
add.s32 %r695, %r694, %r693;
|
||
// inline asm
|
||
bfe.u32 %r560, %r565, %r567, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd229, %r560, 4;
|
||
add.s64 %rd230, %rd106, %rd229;
|
||
ld.shared.u32 %r696, [%rd230];
|
||
xor.b32 %r697, %r696, %r695;
|
||
// inline asm
|
||
bfe.u32 %r564, %r565, %r275, %r567;
|
||
// inline asm
|
||
mul.wide.u32 %rd231, %r564, 4;
|
||
add.s64 %rd232, %rd110, %rd231;
|
||
ld.shared.u32 %r698, [%rd232];
|
||
add.s32 %r699, %r698, %r697;
|
||
xor.b32 %r700, %r3876, %r549;
|
||
xor.b32 %r3897, %r700, %r699;
|
||
xor.b32 %r3898, %r3875, %r565;
|
||
st.local.v2.u32 [%rd1336], {%r3898, %r3897};
|
||
ld.local.v2.u32 {%r3892, %r3891}, [%rd93];
|
||
ld.local.v2.u32 {%r3890, %r3889}, [%rd33];
|
||
ld.local.v2.u32 {%r3888, %r3887}, [%rd33+8];
|
||
ld.local.v2.u32 {%r3886, %r3885}, [%rd33+16];
|
||
ld.local.v2.u32 {%r3884, %r3883}, [%rd33+24];
|
||
ld.local.v2.u32 {%r3882, %r3881}, [%rd33+32];
|
||
ld.local.v2.u32 {%r3880, %r3879}, [%rd33+40];
|
||
ld.local.v2.u32 {%r3878, %r3877}, [%rd33+48];
|
||
ld.local.v2.u32 {%r3876, %r3875}, [%rd33+56];
|
||
add.s64 %rd1336, %rd1336, 8;
|
||
add.s32 %r3893, %r3893, 2;
|
||
setp.lt.u32 %p12, %r3893, 18;
|
||
@%p12 bra BB3_20;
|
||
|
||
mul.wide.s32 %rd235, %r1, 1024;
|
||
add.s64 %rd1337, %rd84, %rd235;
|
||
mov.u32 %r719, 0;
|
||
mov.u32 %r3896, %r719;
|
||
|
||
BB3_22:
|
||
mov.u32 %r3863, 16;
|
||
mov.u32 %r3862, 8;
|
||
mov.u32 %r3861, 24;
|
||
xor.b32 %r1232, %r39, %r3898;
|
||
xor.b32 %r733, %r1232, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r720, %r733, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd240, %r720, 4;
|
||
add.s64 %rd241, %rd98, %rd240;
|
||
ld.shared.u32 %r1233, [%rd241];
|
||
// inline asm
|
||
bfe.u32 %r724, %r733, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd244, %r724, 4;
|
||
add.s64 %rd245, %rd102, %rd244;
|
||
ld.shared.u32 %r1234, [%rd245];
|
||
add.s32 %r1235, %r1234, %r1233;
|
||
// inline asm
|
||
bfe.u32 %r728, %r733, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd248, %r728, 4;
|
||
add.s64 %rd249, %rd106, %rd248;
|
||
ld.shared.u32 %r1236, [%rd249];
|
||
xor.b32 %r1237, %r1236, %r1235;
|
||
// inline asm
|
||
bfe.u32 %r732, %r733, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd252, %r732, 4;
|
||
add.s64 %rd253, %rd110, %rd252;
|
||
ld.shared.u32 %r1238, [%rd253];
|
||
add.s32 %r1239, %r1238, %r1237;
|
||
xor.b32 %r1240, %r40, %r3897;
|
||
xor.b32 %r1241, %r1240, %r3891;
|
||
xor.b32 %r749, %r1241, %r1239;
|
||
// inline asm
|
||
bfe.u32 %r736, %r749, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd254, %r736, 4;
|
||
add.s64 %rd255, %rd98, %rd254;
|
||
ld.shared.u32 %r1242, [%rd255];
|
||
// inline asm
|
||
bfe.u32 %r740, %r749, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd256, %r740, 4;
|
||
add.s64 %rd257, %rd102, %rd256;
|
||
ld.shared.u32 %r1243, [%rd257];
|
||
add.s32 %r1244, %r1243, %r1242;
|
||
// inline asm
|
||
bfe.u32 %r744, %r749, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd258, %r744, 4;
|
||
add.s64 %rd259, %rd106, %rd258;
|
||
ld.shared.u32 %r1245, [%rd259];
|
||
xor.b32 %r1246, %r1245, %r1244;
|
||
// inline asm
|
||
bfe.u32 %r748, %r749, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd260, %r748, 4;
|
||
add.s64 %rd261, %rd110, %rd260;
|
||
ld.shared.u32 %r1247, [%rd261];
|
||
add.s32 %r1248, %r1247, %r1246;
|
||
xor.b32 %r1249, %r3890, %r733;
|
||
xor.b32 %r765, %r1249, %r1248;
|
||
// inline asm
|
||
bfe.u32 %r752, %r765, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd262, %r752, 4;
|
||
add.s64 %rd263, %rd98, %rd262;
|
||
ld.shared.u32 %r1250, [%rd263];
|
||
// inline asm
|
||
bfe.u32 %r756, %r765, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd264, %r756, 4;
|
||
add.s64 %rd265, %rd102, %rd264;
|
||
ld.shared.u32 %r1251, [%rd265];
|
||
add.s32 %r1252, %r1251, %r1250;
|
||
// inline asm
|
||
bfe.u32 %r760, %r765, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd266, %r760, 4;
|
||
add.s64 %rd267, %rd106, %rd266;
|
||
ld.shared.u32 %r1253, [%rd267];
|
||
xor.b32 %r1254, %r1253, %r1252;
|
||
// inline asm
|
||
bfe.u32 %r764, %r765, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd268, %r764, 4;
|
||
add.s64 %rd269, %rd110, %rd268;
|
||
ld.shared.u32 %r1255, [%rd269];
|
||
add.s32 %r1256, %r1255, %r1254;
|
||
xor.b32 %r1257, %r3889, %r749;
|
||
xor.b32 %r781, %r1257, %r1256;
|
||
// inline asm
|
||
bfe.u32 %r768, %r781, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd270, %r768, 4;
|
||
add.s64 %rd271, %rd98, %rd270;
|
||
ld.shared.u32 %r1258, [%rd271];
|
||
// inline asm
|
||
bfe.u32 %r772, %r781, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd272, %r772, 4;
|
||
add.s64 %rd273, %rd102, %rd272;
|
||
ld.shared.u32 %r1259, [%rd273];
|
||
add.s32 %r1260, %r1259, %r1258;
|
||
// inline asm
|
||
bfe.u32 %r776, %r781, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd274, %r776, 4;
|
||
add.s64 %rd275, %rd106, %rd274;
|
||
ld.shared.u32 %r1261, [%rd275];
|
||
xor.b32 %r1262, %r1261, %r1260;
|
||
// inline asm
|
||
bfe.u32 %r780, %r781, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd276, %r780, 4;
|
||
add.s64 %rd277, %rd110, %rd276;
|
||
ld.shared.u32 %r1263, [%rd277];
|
||
add.s32 %r1264, %r1263, %r1262;
|
||
xor.b32 %r1265, %r3888, %r765;
|
||
xor.b32 %r797, %r1265, %r1264;
|
||
// inline asm
|
||
bfe.u32 %r784, %r797, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd278, %r784, 4;
|
||
add.s64 %rd279, %rd98, %rd278;
|
||
ld.shared.u32 %r1266, [%rd279];
|
||
// inline asm
|
||
bfe.u32 %r788, %r797, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd280, %r788, 4;
|
||
add.s64 %rd281, %rd102, %rd280;
|
||
ld.shared.u32 %r1267, [%rd281];
|
||
add.s32 %r1268, %r1267, %r1266;
|
||
// inline asm
|
||
bfe.u32 %r792, %r797, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd282, %r792, 4;
|
||
add.s64 %rd283, %rd106, %rd282;
|
||
ld.shared.u32 %r1269, [%rd283];
|
||
xor.b32 %r1270, %r1269, %r1268;
|
||
// inline asm
|
||
bfe.u32 %r796, %r797, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd284, %r796, 4;
|
||
add.s64 %rd285, %rd110, %rd284;
|
||
ld.shared.u32 %r1271, [%rd285];
|
||
add.s32 %r1272, %r1271, %r1270;
|
||
xor.b32 %r1273, %r3887, %r781;
|
||
xor.b32 %r813, %r1273, %r1272;
|
||
// inline asm
|
||
bfe.u32 %r800, %r813, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd286, %r800, 4;
|
||
add.s64 %rd287, %rd98, %rd286;
|
||
ld.shared.u32 %r1274, [%rd287];
|
||
// inline asm
|
||
bfe.u32 %r804, %r813, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd288, %r804, 4;
|
||
add.s64 %rd289, %rd102, %rd288;
|
||
ld.shared.u32 %r1275, [%rd289];
|
||
add.s32 %r1276, %r1275, %r1274;
|
||
// inline asm
|
||
bfe.u32 %r808, %r813, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd290, %r808, 4;
|
||
add.s64 %rd291, %rd106, %rd290;
|
||
ld.shared.u32 %r1277, [%rd291];
|
||
xor.b32 %r1278, %r1277, %r1276;
|
||
// inline asm
|
||
bfe.u32 %r812, %r813, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd292, %r812, 4;
|
||
add.s64 %rd293, %rd110, %rd292;
|
||
ld.shared.u32 %r1279, [%rd293];
|
||
add.s32 %r1280, %r1279, %r1278;
|
||
xor.b32 %r1281, %r3886, %r797;
|
||
xor.b32 %r829, %r1281, %r1280;
|
||
// inline asm
|
||
bfe.u32 %r816, %r829, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd294, %r816, 4;
|
||
add.s64 %rd295, %rd98, %rd294;
|
||
ld.shared.u32 %r1282, [%rd295];
|
||
// inline asm
|
||
bfe.u32 %r820, %r829, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd296, %r820, 4;
|
||
add.s64 %rd297, %rd102, %rd296;
|
||
ld.shared.u32 %r1283, [%rd297];
|
||
add.s32 %r1284, %r1283, %r1282;
|
||
// inline asm
|
||
bfe.u32 %r824, %r829, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd298, %r824, 4;
|
||
add.s64 %rd299, %rd106, %rd298;
|
||
ld.shared.u32 %r1285, [%rd299];
|
||
xor.b32 %r1286, %r1285, %r1284;
|
||
// inline asm
|
||
bfe.u32 %r828, %r829, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd300, %r828, 4;
|
||
add.s64 %rd301, %rd110, %rd300;
|
||
ld.shared.u32 %r1287, [%rd301];
|
||
add.s32 %r1288, %r1287, %r1286;
|
||
xor.b32 %r1289, %r3885, %r813;
|
||
xor.b32 %r845, %r1289, %r1288;
|
||
// inline asm
|
||
bfe.u32 %r832, %r845, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd302, %r832, 4;
|
||
add.s64 %rd303, %rd98, %rd302;
|
||
ld.shared.u32 %r1290, [%rd303];
|
||
// inline asm
|
||
bfe.u32 %r836, %r845, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd304, %r836, 4;
|
||
add.s64 %rd305, %rd102, %rd304;
|
||
ld.shared.u32 %r1291, [%rd305];
|
||
add.s32 %r1292, %r1291, %r1290;
|
||
// inline asm
|
||
bfe.u32 %r840, %r845, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd306, %r840, 4;
|
||
add.s64 %rd307, %rd106, %rd306;
|
||
ld.shared.u32 %r1293, [%rd307];
|
||
xor.b32 %r1294, %r1293, %r1292;
|
||
// inline asm
|
||
bfe.u32 %r844, %r845, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd308, %r844, 4;
|
||
add.s64 %rd309, %rd110, %rd308;
|
||
ld.shared.u32 %r1295, [%rd309];
|
||
add.s32 %r1296, %r1295, %r1294;
|
||
xor.b32 %r1297, %r3884, %r829;
|
||
xor.b32 %r861, %r1297, %r1296;
|
||
// inline asm
|
||
bfe.u32 %r848, %r861, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd310, %r848, 4;
|
||
add.s64 %rd311, %rd98, %rd310;
|
||
ld.shared.u32 %r1298, [%rd311];
|
||
// inline asm
|
||
bfe.u32 %r852, %r861, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd312, %r852, 4;
|
||
add.s64 %rd313, %rd102, %rd312;
|
||
ld.shared.u32 %r1299, [%rd313];
|
||
add.s32 %r1300, %r1299, %r1298;
|
||
// inline asm
|
||
bfe.u32 %r856, %r861, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd314, %r856, 4;
|
||
add.s64 %rd315, %rd106, %rd314;
|
||
ld.shared.u32 %r1301, [%rd315];
|
||
xor.b32 %r1302, %r1301, %r1300;
|
||
// inline asm
|
||
bfe.u32 %r860, %r861, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd316, %r860, 4;
|
||
add.s64 %rd317, %rd110, %rd316;
|
||
ld.shared.u32 %r1303, [%rd317];
|
||
add.s32 %r1304, %r1303, %r1302;
|
||
xor.b32 %r1305, %r3883, %r845;
|
||
xor.b32 %r877, %r1305, %r1304;
|
||
// inline asm
|
||
bfe.u32 %r864, %r877, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd318, %r864, 4;
|
||
add.s64 %rd319, %rd98, %rd318;
|
||
ld.shared.u32 %r1306, [%rd319];
|
||
// inline asm
|
||
bfe.u32 %r868, %r877, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd320, %r868, 4;
|
||
add.s64 %rd321, %rd102, %rd320;
|
||
ld.shared.u32 %r1307, [%rd321];
|
||
add.s32 %r1308, %r1307, %r1306;
|
||
// inline asm
|
||
bfe.u32 %r872, %r877, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd322, %r872, 4;
|
||
add.s64 %rd323, %rd106, %rd322;
|
||
ld.shared.u32 %r1309, [%rd323];
|
||
xor.b32 %r1310, %r1309, %r1308;
|
||
// inline asm
|
||
bfe.u32 %r876, %r877, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd324, %r876, 4;
|
||
add.s64 %rd325, %rd110, %rd324;
|
||
ld.shared.u32 %r1311, [%rd325];
|
||
add.s32 %r1312, %r1311, %r1310;
|
||
xor.b32 %r1313, %r3882, %r861;
|
||
xor.b32 %r893, %r1313, %r1312;
|
||
// inline asm
|
||
bfe.u32 %r880, %r893, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd326, %r880, 4;
|
||
add.s64 %rd327, %rd98, %rd326;
|
||
ld.shared.u32 %r1314, [%rd327];
|
||
// inline asm
|
||
bfe.u32 %r884, %r893, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd328, %r884, 4;
|
||
add.s64 %rd329, %rd102, %rd328;
|
||
ld.shared.u32 %r1315, [%rd329];
|
||
add.s32 %r1316, %r1315, %r1314;
|
||
// inline asm
|
||
bfe.u32 %r888, %r893, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd330, %r888, 4;
|
||
add.s64 %rd331, %rd106, %rd330;
|
||
ld.shared.u32 %r1317, [%rd331];
|
||
xor.b32 %r1318, %r1317, %r1316;
|
||
// inline asm
|
||
bfe.u32 %r892, %r893, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd332, %r892, 4;
|
||
add.s64 %rd333, %rd110, %rd332;
|
||
ld.shared.u32 %r1319, [%rd333];
|
||
add.s32 %r1320, %r1319, %r1318;
|
||
xor.b32 %r1321, %r3881, %r877;
|
||
xor.b32 %r909, %r1321, %r1320;
|
||
// inline asm
|
||
bfe.u32 %r896, %r909, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd334, %r896, 4;
|
||
add.s64 %rd335, %rd98, %rd334;
|
||
ld.shared.u32 %r1322, [%rd335];
|
||
// inline asm
|
||
bfe.u32 %r900, %r909, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd336, %r900, 4;
|
||
add.s64 %rd337, %rd102, %rd336;
|
||
ld.shared.u32 %r1323, [%rd337];
|
||
add.s32 %r1324, %r1323, %r1322;
|
||
// inline asm
|
||
bfe.u32 %r904, %r909, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd338, %r904, 4;
|
||
add.s64 %rd339, %rd106, %rd338;
|
||
ld.shared.u32 %r1325, [%rd339];
|
||
xor.b32 %r1326, %r1325, %r1324;
|
||
// inline asm
|
||
bfe.u32 %r908, %r909, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd340, %r908, 4;
|
||
add.s64 %rd341, %rd110, %rd340;
|
||
ld.shared.u32 %r1327, [%rd341];
|
||
add.s32 %r1328, %r1327, %r1326;
|
||
xor.b32 %r1329, %r3880, %r893;
|
||
xor.b32 %r925, %r1329, %r1328;
|
||
// inline asm
|
||
bfe.u32 %r912, %r925, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd342, %r912, 4;
|
||
add.s64 %rd343, %rd98, %rd342;
|
||
ld.shared.u32 %r1330, [%rd343];
|
||
// inline asm
|
||
bfe.u32 %r916, %r925, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd344, %r916, 4;
|
||
add.s64 %rd345, %rd102, %rd344;
|
||
ld.shared.u32 %r1331, [%rd345];
|
||
add.s32 %r1332, %r1331, %r1330;
|
||
// inline asm
|
||
bfe.u32 %r920, %r925, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd346, %r920, 4;
|
||
add.s64 %rd347, %rd106, %rd346;
|
||
ld.shared.u32 %r1333, [%rd347];
|
||
xor.b32 %r1334, %r1333, %r1332;
|
||
// inline asm
|
||
bfe.u32 %r924, %r925, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd348, %r924, 4;
|
||
add.s64 %rd349, %rd110, %rd348;
|
||
ld.shared.u32 %r1335, [%rd349];
|
||
add.s32 %r1336, %r1335, %r1334;
|
||
xor.b32 %r1337, %r3879, %r909;
|
||
xor.b32 %r941, %r1337, %r1336;
|
||
// inline asm
|
||
bfe.u32 %r928, %r941, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd350, %r928, 4;
|
||
add.s64 %rd351, %rd98, %rd350;
|
||
ld.shared.u32 %r1338, [%rd351];
|
||
// inline asm
|
||
bfe.u32 %r932, %r941, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd352, %r932, 4;
|
||
add.s64 %rd353, %rd102, %rd352;
|
||
ld.shared.u32 %r1339, [%rd353];
|
||
add.s32 %r1340, %r1339, %r1338;
|
||
// inline asm
|
||
bfe.u32 %r936, %r941, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd354, %r936, 4;
|
||
add.s64 %rd355, %rd106, %rd354;
|
||
ld.shared.u32 %r1341, [%rd355];
|
||
xor.b32 %r1342, %r1341, %r1340;
|
||
// inline asm
|
||
bfe.u32 %r940, %r941, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd356, %r940, 4;
|
||
add.s64 %rd357, %rd110, %rd356;
|
||
ld.shared.u32 %r1343, [%rd357];
|
||
add.s32 %r1344, %r1343, %r1342;
|
||
xor.b32 %r1345, %r3878, %r925;
|
||
xor.b32 %r957, %r1345, %r1344;
|
||
// inline asm
|
||
bfe.u32 %r944, %r957, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd358, %r944, 4;
|
||
add.s64 %rd359, %rd98, %rd358;
|
||
ld.shared.u32 %r1346, [%rd359];
|
||
// inline asm
|
||
bfe.u32 %r948, %r957, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd360, %r948, 4;
|
||
add.s64 %rd361, %rd102, %rd360;
|
||
ld.shared.u32 %r1347, [%rd361];
|
||
add.s32 %r1348, %r1347, %r1346;
|
||
// inline asm
|
||
bfe.u32 %r952, %r957, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd362, %r952, 4;
|
||
add.s64 %rd363, %rd106, %rd362;
|
||
ld.shared.u32 %r1349, [%rd363];
|
||
xor.b32 %r1350, %r1349, %r1348;
|
||
// inline asm
|
||
bfe.u32 %r956, %r957, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd364, %r956, 4;
|
||
add.s64 %rd365, %rd110, %rd364;
|
||
ld.shared.u32 %r1351, [%rd365];
|
||
add.s32 %r1352, %r1351, %r1350;
|
||
xor.b32 %r1353, %r3877, %r941;
|
||
xor.b32 %r973, %r1353, %r1352;
|
||
// inline asm
|
||
bfe.u32 %r960, %r973, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd366, %r960, 4;
|
||
add.s64 %rd367, %rd98, %rd366;
|
||
ld.shared.u32 %r1354, [%rd367];
|
||
// inline asm
|
||
bfe.u32 %r964, %r973, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd368, %r964, 4;
|
||
add.s64 %rd369, %rd102, %rd368;
|
||
ld.shared.u32 %r1355, [%rd369];
|
||
add.s32 %r1356, %r1355, %r1354;
|
||
// inline asm
|
||
bfe.u32 %r968, %r973, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd370, %r968, 4;
|
||
add.s64 %rd371, %rd106, %rd370;
|
||
ld.shared.u32 %r1357, [%rd371];
|
||
xor.b32 %r1358, %r1357, %r1356;
|
||
// inline asm
|
||
bfe.u32 %r972, %r973, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd372, %r972, 4;
|
||
add.s64 %rd373, %rd110, %rd372;
|
||
ld.shared.u32 %r1359, [%rd373];
|
||
add.s32 %r1360, %r1359, %r1358;
|
||
xor.b32 %r1361, %r3876, %r957;
|
||
xor.b32 %r1362, %r1361, %r1360;
|
||
xor.b32 %r1363, %r3875, %r973;
|
||
st.shared.u32 [%rd1337], %r1363;
|
||
st.shared.u32 [%rd1337+4], %r1362;
|
||
xor.b32 %r1364, %r1363, %r37;
|
||
xor.b32 %r989, %r1364, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r976, %r989, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd374, %r976, 4;
|
||
add.s64 %rd375, %rd98, %rd374;
|
||
ld.shared.u32 %r1365, [%rd375];
|
||
// inline asm
|
||
bfe.u32 %r980, %r989, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd376, %r980, 4;
|
||
add.s64 %rd377, %rd102, %rd376;
|
||
ld.shared.u32 %r1366, [%rd377];
|
||
add.s32 %r1367, %r1366, %r1365;
|
||
// inline asm
|
||
bfe.u32 %r984, %r989, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd378, %r984, 4;
|
||
add.s64 %rd379, %rd106, %rd378;
|
||
ld.shared.u32 %r1368, [%rd379];
|
||
xor.b32 %r1369, %r1368, %r1367;
|
||
// inline asm
|
||
bfe.u32 %r988, %r989, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd380, %r988, 4;
|
||
add.s64 %rd381, %rd110, %rd380;
|
||
ld.shared.u32 %r1370, [%rd381];
|
||
add.s32 %r1371, %r1370, %r1369;
|
||
xor.b32 %r1372, %r38, %r1362;
|
||
xor.b32 %r1373, %r1372, %r3891;
|
||
xor.b32 %r1005, %r1373, %r1371;
|
||
// inline asm
|
||
bfe.u32 %r992, %r1005, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd382, %r992, 4;
|
||
add.s64 %rd383, %rd98, %rd382;
|
||
ld.shared.u32 %r1374, [%rd383];
|
||
// inline asm
|
||
bfe.u32 %r996, %r1005, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd384, %r996, 4;
|
||
add.s64 %rd385, %rd102, %rd384;
|
||
ld.shared.u32 %r1375, [%rd385];
|
||
add.s32 %r1376, %r1375, %r1374;
|
||
// inline asm
|
||
bfe.u32 %r1000, %r1005, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd386, %r1000, 4;
|
||
add.s64 %rd387, %rd106, %rd386;
|
||
ld.shared.u32 %r1377, [%rd387];
|
||
xor.b32 %r1378, %r1377, %r1376;
|
||
// inline asm
|
||
bfe.u32 %r1004, %r1005, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd388, %r1004, 4;
|
||
add.s64 %rd389, %rd110, %rd388;
|
||
ld.shared.u32 %r1379, [%rd389];
|
||
add.s32 %r1380, %r1379, %r1378;
|
||
xor.b32 %r1381, %r3890, %r989;
|
||
xor.b32 %r1021, %r1381, %r1380;
|
||
// inline asm
|
||
bfe.u32 %r1008, %r1021, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd390, %r1008, 4;
|
||
add.s64 %rd391, %rd98, %rd390;
|
||
ld.shared.u32 %r1382, [%rd391];
|
||
// inline asm
|
||
bfe.u32 %r1012, %r1021, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd392, %r1012, 4;
|
||
add.s64 %rd393, %rd102, %rd392;
|
||
ld.shared.u32 %r1383, [%rd393];
|
||
add.s32 %r1384, %r1383, %r1382;
|
||
// inline asm
|
||
bfe.u32 %r1016, %r1021, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd394, %r1016, 4;
|
||
add.s64 %rd395, %rd106, %rd394;
|
||
ld.shared.u32 %r1385, [%rd395];
|
||
xor.b32 %r1386, %r1385, %r1384;
|
||
// inline asm
|
||
bfe.u32 %r1020, %r1021, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd396, %r1020, 4;
|
||
add.s64 %rd397, %rd110, %rd396;
|
||
ld.shared.u32 %r1387, [%rd397];
|
||
add.s32 %r1388, %r1387, %r1386;
|
||
xor.b32 %r1389, %r3889, %r1005;
|
||
xor.b32 %r1037, %r1389, %r1388;
|
||
// inline asm
|
||
bfe.u32 %r1024, %r1037, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd398, %r1024, 4;
|
||
add.s64 %rd399, %rd98, %rd398;
|
||
ld.shared.u32 %r1390, [%rd399];
|
||
// inline asm
|
||
bfe.u32 %r1028, %r1037, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd400, %r1028, 4;
|
||
add.s64 %rd401, %rd102, %rd400;
|
||
ld.shared.u32 %r1391, [%rd401];
|
||
add.s32 %r1392, %r1391, %r1390;
|
||
// inline asm
|
||
bfe.u32 %r1032, %r1037, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd402, %r1032, 4;
|
||
add.s64 %rd403, %rd106, %rd402;
|
||
ld.shared.u32 %r1393, [%rd403];
|
||
xor.b32 %r1394, %r1393, %r1392;
|
||
// inline asm
|
||
bfe.u32 %r1036, %r1037, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd404, %r1036, 4;
|
||
add.s64 %rd405, %rd110, %rd404;
|
||
ld.shared.u32 %r1395, [%rd405];
|
||
add.s32 %r1396, %r1395, %r1394;
|
||
xor.b32 %r1397, %r3888, %r1021;
|
||
xor.b32 %r1053, %r1397, %r1396;
|
||
// inline asm
|
||
bfe.u32 %r1040, %r1053, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd406, %r1040, 4;
|
||
add.s64 %rd407, %rd98, %rd406;
|
||
ld.shared.u32 %r1398, [%rd407];
|
||
// inline asm
|
||
bfe.u32 %r1044, %r1053, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd408, %r1044, 4;
|
||
add.s64 %rd409, %rd102, %rd408;
|
||
ld.shared.u32 %r1399, [%rd409];
|
||
add.s32 %r1400, %r1399, %r1398;
|
||
// inline asm
|
||
bfe.u32 %r1048, %r1053, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd410, %r1048, 4;
|
||
add.s64 %rd411, %rd106, %rd410;
|
||
ld.shared.u32 %r1401, [%rd411];
|
||
xor.b32 %r1402, %r1401, %r1400;
|
||
// inline asm
|
||
bfe.u32 %r1052, %r1053, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd412, %r1052, 4;
|
||
add.s64 %rd413, %rd110, %rd412;
|
||
ld.shared.u32 %r1403, [%rd413];
|
||
add.s32 %r1404, %r1403, %r1402;
|
||
xor.b32 %r1405, %r3887, %r1037;
|
||
xor.b32 %r1069, %r1405, %r1404;
|
||
// inline asm
|
||
bfe.u32 %r1056, %r1069, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd414, %r1056, 4;
|
||
add.s64 %rd415, %rd98, %rd414;
|
||
ld.shared.u32 %r1406, [%rd415];
|
||
// inline asm
|
||
bfe.u32 %r1060, %r1069, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd416, %r1060, 4;
|
||
add.s64 %rd417, %rd102, %rd416;
|
||
ld.shared.u32 %r1407, [%rd417];
|
||
add.s32 %r1408, %r1407, %r1406;
|
||
// inline asm
|
||
bfe.u32 %r1064, %r1069, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd418, %r1064, 4;
|
||
add.s64 %rd419, %rd106, %rd418;
|
||
ld.shared.u32 %r1409, [%rd419];
|
||
xor.b32 %r1410, %r1409, %r1408;
|
||
// inline asm
|
||
bfe.u32 %r1068, %r1069, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd420, %r1068, 4;
|
||
add.s64 %rd421, %rd110, %rd420;
|
||
ld.shared.u32 %r1411, [%rd421];
|
||
add.s32 %r1412, %r1411, %r1410;
|
||
xor.b32 %r1413, %r3886, %r1053;
|
||
xor.b32 %r1085, %r1413, %r1412;
|
||
// inline asm
|
||
bfe.u32 %r1072, %r1085, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd422, %r1072, 4;
|
||
add.s64 %rd423, %rd98, %rd422;
|
||
ld.shared.u32 %r1414, [%rd423];
|
||
// inline asm
|
||
bfe.u32 %r1076, %r1085, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd424, %r1076, 4;
|
||
add.s64 %rd425, %rd102, %rd424;
|
||
ld.shared.u32 %r1415, [%rd425];
|
||
add.s32 %r1416, %r1415, %r1414;
|
||
// inline asm
|
||
bfe.u32 %r1080, %r1085, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd426, %r1080, 4;
|
||
add.s64 %rd427, %rd106, %rd426;
|
||
ld.shared.u32 %r1417, [%rd427];
|
||
xor.b32 %r1418, %r1417, %r1416;
|
||
// inline asm
|
||
bfe.u32 %r1084, %r1085, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd428, %r1084, 4;
|
||
add.s64 %rd429, %rd110, %rd428;
|
||
ld.shared.u32 %r1419, [%rd429];
|
||
add.s32 %r1420, %r1419, %r1418;
|
||
xor.b32 %r1421, %r3885, %r1069;
|
||
xor.b32 %r1101, %r1421, %r1420;
|
||
// inline asm
|
||
bfe.u32 %r1088, %r1101, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd430, %r1088, 4;
|
||
add.s64 %rd431, %rd98, %rd430;
|
||
ld.shared.u32 %r1422, [%rd431];
|
||
// inline asm
|
||
bfe.u32 %r1092, %r1101, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd432, %r1092, 4;
|
||
add.s64 %rd433, %rd102, %rd432;
|
||
ld.shared.u32 %r1423, [%rd433];
|
||
add.s32 %r1424, %r1423, %r1422;
|
||
// inline asm
|
||
bfe.u32 %r1096, %r1101, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd434, %r1096, 4;
|
||
add.s64 %rd435, %rd106, %rd434;
|
||
ld.shared.u32 %r1425, [%rd435];
|
||
xor.b32 %r1426, %r1425, %r1424;
|
||
// inline asm
|
||
bfe.u32 %r1100, %r1101, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd436, %r1100, 4;
|
||
add.s64 %rd437, %rd110, %rd436;
|
||
ld.shared.u32 %r1427, [%rd437];
|
||
add.s32 %r1428, %r1427, %r1426;
|
||
xor.b32 %r1429, %r3884, %r1085;
|
||
xor.b32 %r1117, %r1429, %r1428;
|
||
// inline asm
|
||
bfe.u32 %r1104, %r1117, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd438, %r1104, 4;
|
||
add.s64 %rd439, %rd98, %rd438;
|
||
ld.shared.u32 %r1430, [%rd439];
|
||
// inline asm
|
||
bfe.u32 %r1108, %r1117, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd440, %r1108, 4;
|
||
add.s64 %rd441, %rd102, %rd440;
|
||
ld.shared.u32 %r1431, [%rd441];
|
||
add.s32 %r1432, %r1431, %r1430;
|
||
// inline asm
|
||
bfe.u32 %r1112, %r1117, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd442, %r1112, 4;
|
||
add.s64 %rd443, %rd106, %rd442;
|
||
ld.shared.u32 %r1433, [%rd443];
|
||
xor.b32 %r1434, %r1433, %r1432;
|
||
// inline asm
|
||
bfe.u32 %r1116, %r1117, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd444, %r1116, 4;
|
||
add.s64 %rd445, %rd110, %rd444;
|
||
ld.shared.u32 %r1435, [%rd445];
|
||
add.s32 %r1436, %r1435, %r1434;
|
||
xor.b32 %r1437, %r3883, %r1101;
|
||
xor.b32 %r1133, %r1437, %r1436;
|
||
// inline asm
|
||
bfe.u32 %r1120, %r1133, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd446, %r1120, 4;
|
||
add.s64 %rd447, %rd98, %rd446;
|
||
ld.shared.u32 %r1438, [%rd447];
|
||
// inline asm
|
||
bfe.u32 %r1124, %r1133, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd448, %r1124, 4;
|
||
add.s64 %rd449, %rd102, %rd448;
|
||
ld.shared.u32 %r1439, [%rd449];
|
||
add.s32 %r1440, %r1439, %r1438;
|
||
// inline asm
|
||
bfe.u32 %r1128, %r1133, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd450, %r1128, 4;
|
||
add.s64 %rd451, %rd106, %rd450;
|
||
ld.shared.u32 %r1441, [%rd451];
|
||
xor.b32 %r1442, %r1441, %r1440;
|
||
// inline asm
|
||
bfe.u32 %r1132, %r1133, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd452, %r1132, 4;
|
||
add.s64 %rd453, %rd110, %rd452;
|
||
ld.shared.u32 %r1443, [%rd453];
|
||
add.s32 %r1444, %r1443, %r1442;
|
||
xor.b32 %r1445, %r3882, %r1117;
|
||
xor.b32 %r1149, %r1445, %r1444;
|
||
// inline asm
|
||
bfe.u32 %r1136, %r1149, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd454, %r1136, 4;
|
||
add.s64 %rd455, %rd98, %rd454;
|
||
ld.shared.u32 %r1446, [%rd455];
|
||
// inline asm
|
||
bfe.u32 %r1140, %r1149, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd456, %r1140, 4;
|
||
add.s64 %rd457, %rd102, %rd456;
|
||
ld.shared.u32 %r1447, [%rd457];
|
||
add.s32 %r1448, %r1447, %r1446;
|
||
// inline asm
|
||
bfe.u32 %r1144, %r1149, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd458, %r1144, 4;
|
||
add.s64 %rd459, %rd106, %rd458;
|
||
ld.shared.u32 %r1449, [%rd459];
|
||
xor.b32 %r1450, %r1449, %r1448;
|
||
// inline asm
|
||
bfe.u32 %r1148, %r1149, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd460, %r1148, 4;
|
||
add.s64 %rd461, %rd110, %rd460;
|
||
ld.shared.u32 %r1451, [%rd461];
|
||
add.s32 %r1452, %r1451, %r1450;
|
||
xor.b32 %r1453, %r3881, %r1133;
|
||
xor.b32 %r1165, %r1453, %r1452;
|
||
// inline asm
|
||
bfe.u32 %r1152, %r1165, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd462, %r1152, 4;
|
||
add.s64 %rd463, %rd98, %rd462;
|
||
ld.shared.u32 %r1454, [%rd463];
|
||
// inline asm
|
||
bfe.u32 %r1156, %r1165, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd464, %r1156, 4;
|
||
add.s64 %rd465, %rd102, %rd464;
|
||
ld.shared.u32 %r1455, [%rd465];
|
||
add.s32 %r1456, %r1455, %r1454;
|
||
// inline asm
|
||
bfe.u32 %r1160, %r1165, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd466, %r1160, 4;
|
||
add.s64 %rd467, %rd106, %rd466;
|
||
ld.shared.u32 %r1457, [%rd467];
|
||
xor.b32 %r1458, %r1457, %r1456;
|
||
// inline asm
|
||
bfe.u32 %r1164, %r1165, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd468, %r1164, 4;
|
||
add.s64 %rd469, %rd110, %rd468;
|
||
ld.shared.u32 %r1459, [%rd469];
|
||
add.s32 %r1460, %r1459, %r1458;
|
||
xor.b32 %r1461, %r3880, %r1149;
|
||
xor.b32 %r1181, %r1461, %r1460;
|
||
// inline asm
|
||
bfe.u32 %r1168, %r1181, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd470, %r1168, 4;
|
||
add.s64 %rd471, %rd98, %rd470;
|
||
ld.shared.u32 %r1462, [%rd471];
|
||
// inline asm
|
||
bfe.u32 %r1172, %r1181, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd472, %r1172, 4;
|
||
add.s64 %rd473, %rd102, %rd472;
|
||
ld.shared.u32 %r1463, [%rd473];
|
||
add.s32 %r1464, %r1463, %r1462;
|
||
// inline asm
|
||
bfe.u32 %r1176, %r1181, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd474, %r1176, 4;
|
||
add.s64 %rd475, %rd106, %rd474;
|
||
ld.shared.u32 %r1465, [%rd475];
|
||
xor.b32 %r1466, %r1465, %r1464;
|
||
// inline asm
|
||
bfe.u32 %r1180, %r1181, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd476, %r1180, 4;
|
||
add.s64 %rd477, %rd110, %rd476;
|
||
ld.shared.u32 %r1467, [%rd477];
|
||
add.s32 %r1468, %r1467, %r1466;
|
||
xor.b32 %r1469, %r3879, %r1165;
|
||
xor.b32 %r1197, %r1469, %r1468;
|
||
// inline asm
|
||
bfe.u32 %r1184, %r1197, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd478, %r1184, 4;
|
||
add.s64 %rd479, %rd98, %rd478;
|
||
ld.shared.u32 %r1470, [%rd479];
|
||
// inline asm
|
||
bfe.u32 %r1188, %r1197, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd480, %r1188, 4;
|
||
add.s64 %rd481, %rd102, %rd480;
|
||
ld.shared.u32 %r1471, [%rd481];
|
||
add.s32 %r1472, %r1471, %r1470;
|
||
// inline asm
|
||
bfe.u32 %r1192, %r1197, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd482, %r1192, 4;
|
||
add.s64 %rd483, %rd106, %rd482;
|
||
ld.shared.u32 %r1473, [%rd483];
|
||
xor.b32 %r1474, %r1473, %r1472;
|
||
// inline asm
|
||
bfe.u32 %r1196, %r1197, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd484, %r1196, 4;
|
||
add.s64 %rd485, %rd110, %rd484;
|
||
ld.shared.u32 %r1475, [%rd485];
|
||
add.s32 %r1476, %r1475, %r1474;
|
||
xor.b32 %r1477, %r3878, %r1181;
|
||
xor.b32 %r1213, %r1477, %r1476;
|
||
// inline asm
|
||
bfe.u32 %r1200, %r1213, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd486, %r1200, 4;
|
||
add.s64 %rd487, %rd98, %rd486;
|
||
ld.shared.u32 %r1478, [%rd487];
|
||
// inline asm
|
||
bfe.u32 %r1204, %r1213, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd488, %r1204, 4;
|
||
add.s64 %rd489, %rd102, %rd488;
|
||
ld.shared.u32 %r1479, [%rd489];
|
||
add.s32 %r1480, %r1479, %r1478;
|
||
// inline asm
|
||
bfe.u32 %r1208, %r1213, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd490, %r1208, 4;
|
||
add.s64 %rd491, %rd106, %rd490;
|
||
ld.shared.u32 %r1481, [%rd491];
|
||
xor.b32 %r1482, %r1481, %r1480;
|
||
// inline asm
|
||
bfe.u32 %r1212, %r1213, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd492, %r1212, 4;
|
||
add.s64 %rd493, %rd110, %rd492;
|
||
ld.shared.u32 %r1483, [%rd493];
|
||
add.s32 %r1484, %r1483, %r1482;
|
||
xor.b32 %r1485, %r3877, %r1197;
|
||
xor.b32 %r1229, %r1485, %r1484;
|
||
// inline asm
|
||
bfe.u32 %r1216, %r1229, %r3861, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd494, %r1216, 4;
|
||
add.s64 %rd495, %rd98, %rd494;
|
||
ld.shared.u32 %r1486, [%rd495];
|
||
// inline asm
|
||
bfe.u32 %r1220, %r1229, %r3863, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd496, %r1220, 4;
|
||
add.s64 %rd497, %rd102, %rd496;
|
||
ld.shared.u32 %r1487, [%rd497];
|
||
add.s32 %r1488, %r1487, %r1486;
|
||
// inline asm
|
||
bfe.u32 %r1224, %r1229, %r3862, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd498, %r1224, 4;
|
||
add.s64 %rd499, %rd106, %rd498;
|
||
ld.shared.u32 %r1489, [%rd499];
|
||
xor.b32 %r1490, %r1489, %r1488;
|
||
// inline asm
|
||
bfe.u32 %r1228, %r1229, %r719, %r3862;
|
||
// inline asm
|
||
mul.wide.u32 %rd500, %r1228, 4;
|
||
add.s64 %rd501, %rd110, %rd500;
|
||
ld.shared.u32 %r1491, [%rd501];
|
||
add.s32 %r1492, %r1491, %r1490;
|
||
xor.b32 %r1493, %r3876, %r1213;
|
||
xor.b32 %r3897, %r1493, %r1492;
|
||
xor.b32 %r3898, %r3875, %r1229;
|
||
st.shared.u32 [%rd1337+8], %r3898;
|
||
st.shared.u32 [%rd1337+12], %r3897;
|
||
add.s64 %rd1337, %rd1337, 16;
|
||
add.s32 %r3896, %r3896, 4;
|
||
setp.lt.u32 %p13, %r3896, 256;
|
||
@%p13 bra BB3_22;
|
||
|
||
mov.u32 %r3866, %tid.x;
|
||
mul.wide.s32 %rd1325, %r3866, 1024;
|
||
mov.u64 %rd1320, m03200_init$S1_all;
|
||
add.s64 %rd1338, %rd1320, %rd1325;
|
||
mov.u32 %r1494, 0;
|
||
mov.u32 %r3899, %r1494;
|
||
|
||
BB3_24:
|
||
mov.u32 %r3854, 16;
|
||
mov.u32 %r3853, 8;
|
||
mov.u32 %r3852, 24;
|
||
xor.b32 %r2007, %r39, %r3898;
|
||
xor.b32 %r1508, %r2007, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r1495, %r1508, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd507, %r1495, 4;
|
||
add.s64 %rd508, %rd98, %rd507;
|
||
ld.shared.u32 %r2008, [%rd508];
|
||
// inline asm
|
||
bfe.u32 %r1499, %r1508, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd511, %r1499, 4;
|
||
add.s64 %rd512, %rd102, %rd511;
|
||
ld.shared.u32 %r2009, [%rd512];
|
||
add.s32 %r2010, %r2009, %r2008;
|
||
// inline asm
|
||
bfe.u32 %r1503, %r1508, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd515, %r1503, 4;
|
||
add.s64 %rd516, %rd106, %rd515;
|
||
ld.shared.u32 %r2011, [%rd516];
|
||
xor.b32 %r2012, %r2011, %r2010;
|
||
// inline asm
|
||
bfe.u32 %r1507, %r1508, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd519, %r1507, 4;
|
||
add.s64 %rd520, %rd110, %rd519;
|
||
ld.shared.u32 %r2013, [%rd520];
|
||
add.s32 %r2014, %r2013, %r2012;
|
||
xor.b32 %r2015, %r40, %r3897;
|
||
xor.b32 %r2016, %r2015, %r3891;
|
||
xor.b32 %r1524, %r2016, %r2014;
|
||
// inline asm
|
||
bfe.u32 %r1511, %r1524, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd521, %r1511, 4;
|
||
add.s64 %rd522, %rd98, %rd521;
|
||
ld.shared.u32 %r2017, [%rd522];
|
||
// inline asm
|
||
bfe.u32 %r1515, %r1524, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd523, %r1515, 4;
|
||
add.s64 %rd524, %rd102, %rd523;
|
||
ld.shared.u32 %r2018, [%rd524];
|
||
add.s32 %r2019, %r2018, %r2017;
|
||
// inline asm
|
||
bfe.u32 %r1519, %r1524, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd525, %r1519, 4;
|
||
add.s64 %rd526, %rd106, %rd525;
|
||
ld.shared.u32 %r2020, [%rd526];
|
||
xor.b32 %r2021, %r2020, %r2019;
|
||
// inline asm
|
||
bfe.u32 %r1523, %r1524, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd527, %r1523, 4;
|
||
add.s64 %rd528, %rd110, %rd527;
|
||
ld.shared.u32 %r2022, [%rd528];
|
||
add.s32 %r2023, %r2022, %r2021;
|
||
xor.b32 %r2024, %r3890, %r1508;
|
||
xor.b32 %r1540, %r2024, %r2023;
|
||
// inline asm
|
||
bfe.u32 %r1527, %r1540, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd529, %r1527, 4;
|
||
add.s64 %rd530, %rd98, %rd529;
|
||
ld.shared.u32 %r2025, [%rd530];
|
||
// inline asm
|
||
bfe.u32 %r1531, %r1540, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd531, %r1531, 4;
|
||
add.s64 %rd532, %rd102, %rd531;
|
||
ld.shared.u32 %r2026, [%rd532];
|
||
add.s32 %r2027, %r2026, %r2025;
|
||
// inline asm
|
||
bfe.u32 %r1535, %r1540, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd533, %r1535, 4;
|
||
add.s64 %rd534, %rd106, %rd533;
|
||
ld.shared.u32 %r2028, [%rd534];
|
||
xor.b32 %r2029, %r2028, %r2027;
|
||
// inline asm
|
||
bfe.u32 %r1539, %r1540, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd535, %r1539, 4;
|
||
add.s64 %rd536, %rd110, %rd535;
|
||
ld.shared.u32 %r2030, [%rd536];
|
||
add.s32 %r2031, %r2030, %r2029;
|
||
xor.b32 %r2032, %r3889, %r1524;
|
||
xor.b32 %r1556, %r2032, %r2031;
|
||
// inline asm
|
||
bfe.u32 %r1543, %r1556, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd537, %r1543, 4;
|
||
add.s64 %rd538, %rd98, %rd537;
|
||
ld.shared.u32 %r2033, [%rd538];
|
||
// inline asm
|
||
bfe.u32 %r1547, %r1556, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd539, %r1547, 4;
|
||
add.s64 %rd540, %rd102, %rd539;
|
||
ld.shared.u32 %r2034, [%rd540];
|
||
add.s32 %r2035, %r2034, %r2033;
|
||
// inline asm
|
||
bfe.u32 %r1551, %r1556, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd541, %r1551, 4;
|
||
add.s64 %rd542, %rd106, %rd541;
|
||
ld.shared.u32 %r2036, [%rd542];
|
||
xor.b32 %r2037, %r2036, %r2035;
|
||
// inline asm
|
||
bfe.u32 %r1555, %r1556, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd543, %r1555, 4;
|
||
add.s64 %rd544, %rd110, %rd543;
|
||
ld.shared.u32 %r2038, [%rd544];
|
||
add.s32 %r2039, %r2038, %r2037;
|
||
xor.b32 %r2040, %r3888, %r1540;
|
||
xor.b32 %r1572, %r2040, %r2039;
|
||
// inline asm
|
||
bfe.u32 %r1559, %r1572, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd545, %r1559, 4;
|
||
add.s64 %rd546, %rd98, %rd545;
|
||
ld.shared.u32 %r2041, [%rd546];
|
||
// inline asm
|
||
bfe.u32 %r1563, %r1572, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd547, %r1563, 4;
|
||
add.s64 %rd548, %rd102, %rd547;
|
||
ld.shared.u32 %r2042, [%rd548];
|
||
add.s32 %r2043, %r2042, %r2041;
|
||
// inline asm
|
||
bfe.u32 %r1567, %r1572, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd549, %r1567, 4;
|
||
add.s64 %rd550, %rd106, %rd549;
|
||
ld.shared.u32 %r2044, [%rd550];
|
||
xor.b32 %r2045, %r2044, %r2043;
|
||
// inline asm
|
||
bfe.u32 %r1571, %r1572, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd551, %r1571, 4;
|
||
add.s64 %rd552, %rd110, %rd551;
|
||
ld.shared.u32 %r2046, [%rd552];
|
||
add.s32 %r2047, %r2046, %r2045;
|
||
xor.b32 %r2048, %r3887, %r1556;
|
||
xor.b32 %r1588, %r2048, %r2047;
|
||
// inline asm
|
||
bfe.u32 %r1575, %r1588, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd553, %r1575, 4;
|
||
add.s64 %rd554, %rd98, %rd553;
|
||
ld.shared.u32 %r2049, [%rd554];
|
||
// inline asm
|
||
bfe.u32 %r1579, %r1588, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd555, %r1579, 4;
|
||
add.s64 %rd556, %rd102, %rd555;
|
||
ld.shared.u32 %r2050, [%rd556];
|
||
add.s32 %r2051, %r2050, %r2049;
|
||
// inline asm
|
||
bfe.u32 %r1583, %r1588, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd557, %r1583, 4;
|
||
add.s64 %rd558, %rd106, %rd557;
|
||
ld.shared.u32 %r2052, [%rd558];
|
||
xor.b32 %r2053, %r2052, %r2051;
|
||
// inline asm
|
||
bfe.u32 %r1587, %r1588, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd559, %r1587, 4;
|
||
add.s64 %rd560, %rd110, %rd559;
|
||
ld.shared.u32 %r2054, [%rd560];
|
||
add.s32 %r2055, %r2054, %r2053;
|
||
xor.b32 %r2056, %r3886, %r1572;
|
||
xor.b32 %r1604, %r2056, %r2055;
|
||
// inline asm
|
||
bfe.u32 %r1591, %r1604, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd561, %r1591, 4;
|
||
add.s64 %rd562, %rd98, %rd561;
|
||
ld.shared.u32 %r2057, [%rd562];
|
||
// inline asm
|
||
bfe.u32 %r1595, %r1604, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd563, %r1595, 4;
|
||
add.s64 %rd564, %rd102, %rd563;
|
||
ld.shared.u32 %r2058, [%rd564];
|
||
add.s32 %r2059, %r2058, %r2057;
|
||
// inline asm
|
||
bfe.u32 %r1599, %r1604, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd565, %r1599, 4;
|
||
add.s64 %rd566, %rd106, %rd565;
|
||
ld.shared.u32 %r2060, [%rd566];
|
||
xor.b32 %r2061, %r2060, %r2059;
|
||
// inline asm
|
||
bfe.u32 %r1603, %r1604, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd567, %r1603, 4;
|
||
add.s64 %rd568, %rd110, %rd567;
|
||
ld.shared.u32 %r2062, [%rd568];
|
||
add.s32 %r2063, %r2062, %r2061;
|
||
xor.b32 %r2064, %r3885, %r1588;
|
||
xor.b32 %r1620, %r2064, %r2063;
|
||
// inline asm
|
||
bfe.u32 %r1607, %r1620, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd569, %r1607, 4;
|
||
add.s64 %rd570, %rd98, %rd569;
|
||
ld.shared.u32 %r2065, [%rd570];
|
||
// inline asm
|
||
bfe.u32 %r1611, %r1620, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd571, %r1611, 4;
|
||
add.s64 %rd572, %rd102, %rd571;
|
||
ld.shared.u32 %r2066, [%rd572];
|
||
add.s32 %r2067, %r2066, %r2065;
|
||
// inline asm
|
||
bfe.u32 %r1615, %r1620, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd573, %r1615, 4;
|
||
add.s64 %rd574, %rd106, %rd573;
|
||
ld.shared.u32 %r2068, [%rd574];
|
||
xor.b32 %r2069, %r2068, %r2067;
|
||
// inline asm
|
||
bfe.u32 %r1619, %r1620, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd575, %r1619, 4;
|
||
add.s64 %rd576, %rd110, %rd575;
|
||
ld.shared.u32 %r2070, [%rd576];
|
||
add.s32 %r2071, %r2070, %r2069;
|
||
xor.b32 %r2072, %r3884, %r1604;
|
||
xor.b32 %r1636, %r2072, %r2071;
|
||
// inline asm
|
||
bfe.u32 %r1623, %r1636, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd577, %r1623, 4;
|
||
add.s64 %rd578, %rd98, %rd577;
|
||
ld.shared.u32 %r2073, [%rd578];
|
||
// inline asm
|
||
bfe.u32 %r1627, %r1636, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd579, %r1627, 4;
|
||
add.s64 %rd580, %rd102, %rd579;
|
||
ld.shared.u32 %r2074, [%rd580];
|
||
add.s32 %r2075, %r2074, %r2073;
|
||
// inline asm
|
||
bfe.u32 %r1631, %r1636, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd581, %r1631, 4;
|
||
add.s64 %rd582, %rd106, %rd581;
|
||
ld.shared.u32 %r2076, [%rd582];
|
||
xor.b32 %r2077, %r2076, %r2075;
|
||
// inline asm
|
||
bfe.u32 %r1635, %r1636, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd583, %r1635, 4;
|
||
add.s64 %rd584, %rd110, %rd583;
|
||
ld.shared.u32 %r2078, [%rd584];
|
||
add.s32 %r2079, %r2078, %r2077;
|
||
xor.b32 %r2080, %r3883, %r1620;
|
||
xor.b32 %r1652, %r2080, %r2079;
|
||
// inline asm
|
||
bfe.u32 %r1639, %r1652, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd585, %r1639, 4;
|
||
add.s64 %rd586, %rd98, %rd585;
|
||
ld.shared.u32 %r2081, [%rd586];
|
||
// inline asm
|
||
bfe.u32 %r1643, %r1652, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd587, %r1643, 4;
|
||
add.s64 %rd588, %rd102, %rd587;
|
||
ld.shared.u32 %r2082, [%rd588];
|
||
add.s32 %r2083, %r2082, %r2081;
|
||
// inline asm
|
||
bfe.u32 %r1647, %r1652, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd589, %r1647, 4;
|
||
add.s64 %rd590, %rd106, %rd589;
|
||
ld.shared.u32 %r2084, [%rd590];
|
||
xor.b32 %r2085, %r2084, %r2083;
|
||
// inline asm
|
||
bfe.u32 %r1651, %r1652, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd591, %r1651, 4;
|
||
add.s64 %rd592, %rd110, %rd591;
|
||
ld.shared.u32 %r2086, [%rd592];
|
||
add.s32 %r2087, %r2086, %r2085;
|
||
xor.b32 %r2088, %r3882, %r1636;
|
||
xor.b32 %r1668, %r2088, %r2087;
|
||
// inline asm
|
||
bfe.u32 %r1655, %r1668, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd593, %r1655, 4;
|
||
add.s64 %rd594, %rd98, %rd593;
|
||
ld.shared.u32 %r2089, [%rd594];
|
||
// inline asm
|
||
bfe.u32 %r1659, %r1668, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd595, %r1659, 4;
|
||
add.s64 %rd596, %rd102, %rd595;
|
||
ld.shared.u32 %r2090, [%rd596];
|
||
add.s32 %r2091, %r2090, %r2089;
|
||
// inline asm
|
||
bfe.u32 %r1663, %r1668, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd597, %r1663, 4;
|
||
add.s64 %rd598, %rd106, %rd597;
|
||
ld.shared.u32 %r2092, [%rd598];
|
||
xor.b32 %r2093, %r2092, %r2091;
|
||
// inline asm
|
||
bfe.u32 %r1667, %r1668, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd599, %r1667, 4;
|
||
add.s64 %rd600, %rd110, %rd599;
|
||
ld.shared.u32 %r2094, [%rd600];
|
||
add.s32 %r2095, %r2094, %r2093;
|
||
xor.b32 %r2096, %r3881, %r1652;
|
||
xor.b32 %r1684, %r2096, %r2095;
|
||
// inline asm
|
||
bfe.u32 %r1671, %r1684, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd601, %r1671, 4;
|
||
add.s64 %rd602, %rd98, %rd601;
|
||
ld.shared.u32 %r2097, [%rd602];
|
||
// inline asm
|
||
bfe.u32 %r1675, %r1684, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd603, %r1675, 4;
|
||
add.s64 %rd604, %rd102, %rd603;
|
||
ld.shared.u32 %r2098, [%rd604];
|
||
add.s32 %r2099, %r2098, %r2097;
|
||
// inline asm
|
||
bfe.u32 %r1679, %r1684, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd605, %r1679, 4;
|
||
add.s64 %rd606, %rd106, %rd605;
|
||
ld.shared.u32 %r2100, [%rd606];
|
||
xor.b32 %r2101, %r2100, %r2099;
|
||
// inline asm
|
||
bfe.u32 %r1683, %r1684, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd607, %r1683, 4;
|
||
add.s64 %rd608, %rd110, %rd607;
|
||
ld.shared.u32 %r2102, [%rd608];
|
||
add.s32 %r2103, %r2102, %r2101;
|
||
xor.b32 %r2104, %r3880, %r1668;
|
||
xor.b32 %r1700, %r2104, %r2103;
|
||
// inline asm
|
||
bfe.u32 %r1687, %r1700, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd609, %r1687, 4;
|
||
add.s64 %rd610, %rd98, %rd609;
|
||
ld.shared.u32 %r2105, [%rd610];
|
||
// inline asm
|
||
bfe.u32 %r1691, %r1700, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd611, %r1691, 4;
|
||
add.s64 %rd612, %rd102, %rd611;
|
||
ld.shared.u32 %r2106, [%rd612];
|
||
add.s32 %r2107, %r2106, %r2105;
|
||
// inline asm
|
||
bfe.u32 %r1695, %r1700, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd613, %r1695, 4;
|
||
add.s64 %rd614, %rd106, %rd613;
|
||
ld.shared.u32 %r2108, [%rd614];
|
||
xor.b32 %r2109, %r2108, %r2107;
|
||
// inline asm
|
||
bfe.u32 %r1699, %r1700, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd615, %r1699, 4;
|
||
add.s64 %rd616, %rd110, %rd615;
|
||
ld.shared.u32 %r2110, [%rd616];
|
||
add.s32 %r2111, %r2110, %r2109;
|
||
xor.b32 %r2112, %r3879, %r1684;
|
||
xor.b32 %r1716, %r2112, %r2111;
|
||
// inline asm
|
||
bfe.u32 %r1703, %r1716, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd617, %r1703, 4;
|
||
add.s64 %rd618, %rd98, %rd617;
|
||
ld.shared.u32 %r2113, [%rd618];
|
||
// inline asm
|
||
bfe.u32 %r1707, %r1716, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd619, %r1707, 4;
|
||
add.s64 %rd620, %rd102, %rd619;
|
||
ld.shared.u32 %r2114, [%rd620];
|
||
add.s32 %r2115, %r2114, %r2113;
|
||
// inline asm
|
||
bfe.u32 %r1711, %r1716, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd621, %r1711, 4;
|
||
add.s64 %rd622, %rd106, %rd621;
|
||
ld.shared.u32 %r2116, [%rd622];
|
||
xor.b32 %r2117, %r2116, %r2115;
|
||
// inline asm
|
||
bfe.u32 %r1715, %r1716, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd623, %r1715, 4;
|
||
add.s64 %rd624, %rd110, %rd623;
|
||
ld.shared.u32 %r2118, [%rd624];
|
||
add.s32 %r2119, %r2118, %r2117;
|
||
xor.b32 %r2120, %r3878, %r1700;
|
||
xor.b32 %r1732, %r2120, %r2119;
|
||
// inline asm
|
||
bfe.u32 %r1719, %r1732, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd625, %r1719, 4;
|
||
add.s64 %rd626, %rd98, %rd625;
|
||
ld.shared.u32 %r2121, [%rd626];
|
||
// inline asm
|
||
bfe.u32 %r1723, %r1732, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd627, %r1723, 4;
|
||
add.s64 %rd628, %rd102, %rd627;
|
||
ld.shared.u32 %r2122, [%rd628];
|
||
add.s32 %r2123, %r2122, %r2121;
|
||
// inline asm
|
||
bfe.u32 %r1727, %r1732, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd629, %r1727, 4;
|
||
add.s64 %rd630, %rd106, %rd629;
|
||
ld.shared.u32 %r2124, [%rd630];
|
||
xor.b32 %r2125, %r2124, %r2123;
|
||
// inline asm
|
||
bfe.u32 %r1731, %r1732, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd631, %r1731, 4;
|
||
add.s64 %rd632, %rd110, %rd631;
|
||
ld.shared.u32 %r2126, [%rd632];
|
||
add.s32 %r2127, %r2126, %r2125;
|
||
xor.b32 %r2128, %r3877, %r1716;
|
||
xor.b32 %r1748, %r2128, %r2127;
|
||
// inline asm
|
||
bfe.u32 %r1735, %r1748, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd633, %r1735, 4;
|
||
add.s64 %rd634, %rd98, %rd633;
|
||
ld.shared.u32 %r2129, [%rd634];
|
||
// inline asm
|
||
bfe.u32 %r1739, %r1748, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd635, %r1739, 4;
|
||
add.s64 %rd636, %rd102, %rd635;
|
||
ld.shared.u32 %r2130, [%rd636];
|
||
add.s32 %r2131, %r2130, %r2129;
|
||
// inline asm
|
||
bfe.u32 %r1743, %r1748, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd637, %r1743, 4;
|
||
add.s64 %rd638, %rd106, %rd637;
|
||
ld.shared.u32 %r2132, [%rd638];
|
||
xor.b32 %r2133, %r2132, %r2131;
|
||
// inline asm
|
||
bfe.u32 %r1747, %r1748, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd639, %r1747, 4;
|
||
add.s64 %rd640, %rd110, %rd639;
|
||
ld.shared.u32 %r2134, [%rd640];
|
||
add.s32 %r2135, %r2134, %r2133;
|
||
xor.b32 %r2136, %r3876, %r1732;
|
||
xor.b32 %r2137, %r2136, %r2135;
|
||
xor.b32 %r2138, %r3875, %r1748;
|
||
st.shared.u32 [%rd1338], %r2138;
|
||
st.shared.u32 [%rd1338+4], %r2137;
|
||
xor.b32 %r2139, %r2138, %r37;
|
||
xor.b32 %r1764, %r2139, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r1751, %r1764, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd641, %r1751, 4;
|
||
add.s64 %rd642, %rd98, %rd641;
|
||
ld.shared.u32 %r2140, [%rd642];
|
||
// inline asm
|
||
bfe.u32 %r1755, %r1764, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd643, %r1755, 4;
|
||
add.s64 %rd644, %rd102, %rd643;
|
||
ld.shared.u32 %r2141, [%rd644];
|
||
add.s32 %r2142, %r2141, %r2140;
|
||
// inline asm
|
||
bfe.u32 %r1759, %r1764, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd645, %r1759, 4;
|
||
add.s64 %rd646, %rd106, %rd645;
|
||
ld.shared.u32 %r2143, [%rd646];
|
||
xor.b32 %r2144, %r2143, %r2142;
|
||
// inline asm
|
||
bfe.u32 %r1763, %r1764, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd647, %r1763, 4;
|
||
add.s64 %rd648, %rd110, %rd647;
|
||
ld.shared.u32 %r2145, [%rd648];
|
||
add.s32 %r2146, %r2145, %r2144;
|
||
xor.b32 %r2147, %r38, %r2137;
|
||
xor.b32 %r2148, %r2147, %r3891;
|
||
xor.b32 %r1780, %r2148, %r2146;
|
||
// inline asm
|
||
bfe.u32 %r1767, %r1780, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd649, %r1767, 4;
|
||
add.s64 %rd650, %rd98, %rd649;
|
||
ld.shared.u32 %r2149, [%rd650];
|
||
// inline asm
|
||
bfe.u32 %r1771, %r1780, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd651, %r1771, 4;
|
||
add.s64 %rd652, %rd102, %rd651;
|
||
ld.shared.u32 %r2150, [%rd652];
|
||
add.s32 %r2151, %r2150, %r2149;
|
||
// inline asm
|
||
bfe.u32 %r1775, %r1780, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd653, %r1775, 4;
|
||
add.s64 %rd654, %rd106, %rd653;
|
||
ld.shared.u32 %r2152, [%rd654];
|
||
xor.b32 %r2153, %r2152, %r2151;
|
||
// inline asm
|
||
bfe.u32 %r1779, %r1780, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd655, %r1779, 4;
|
||
add.s64 %rd656, %rd110, %rd655;
|
||
ld.shared.u32 %r2154, [%rd656];
|
||
add.s32 %r2155, %r2154, %r2153;
|
||
xor.b32 %r2156, %r3890, %r1764;
|
||
xor.b32 %r1796, %r2156, %r2155;
|
||
// inline asm
|
||
bfe.u32 %r1783, %r1796, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd657, %r1783, 4;
|
||
add.s64 %rd658, %rd98, %rd657;
|
||
ld.shared.u32 %r2157, [%rd658];
|
||
// inline asm
|
||
bfe.u32 %r1787, %r1796, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd659, %r1787, 4;
|
||
add.s64 %rd660, %rd102, %rd659;
|
||
ld.shared.u32 %r2158, [%rd660];
|
||
add.s32 %r2159, %r2158, %r2157;
|
||
// inline asm
|
||
bfe.u32 %r1791, %r1796, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd661, %r1791, 4;
|
||
add.s64 %rd662, %rd106, %rd661;
|
||
ld.shared.u32 %r2160, [%rd662];
|
||
xor.b32 %r2161, %r2160, %r2159;
|
||
// inline asm
|
||
bfe.u32 %r1795, %r1796, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd663, %r1795, 4;
|
||
add.s64 %rd664, %rd110, %rd663;
|
||
ld.shared.u32 %r2162, [%rd664];
|
||
add.s32 %r2163, %r2162, %r2161;
|
||
xor.b32 %r2164, %r3889, %r1780;
|
||
xor.b32 %r1812, %r2164, %r2163;
|
||
// inline asm
|
||
bfe.u32 %r1799, %r1812, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd665, %r1799, 4;
|
||
add.s64 %rd666, %rd98, %rd665;
|
||
ld.shared.u32 %r2165, [%rd666];
|
||
// inline asm
|
||
bfe.u32 %r1803, %r1812, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd667, %r1803, 4;
|
||
add.s64 %rd668, %rd102, %rd667;
|
||
ld.shared.u32 %r2166, [%rd668];
|
||
add.s32 %r2167, %r2166, %r2165;
|
||
// inline asm
|
||
bfe.u32 %r1807, %r1812, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd669, %r1807, 4;
|
||
add.s64 %rd670, %rd106, %rd669;
|
||
ld.shared.u32 %r2168, [%rd670];
|
||
xor.b32 %r2169, %r2168, %r2167;
|
||
// inline asm
|
||
bfe.u32 %r1811, %r1812, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd671, %r1811, 4;
|
||
add.s64 %rd672, %rd110, %rd671;
|
||
ld.shared.u32 %r2170, [%rd672];
|
||
add.s32 %r2171, %r2170, %r2169;
|
||
xor.b32 %r2172, %r3888, %r1796;
|
||
xor.b32 %r1828, %r2172, %r2171;
|
||
// inline asm
|
||
bfe.u32 %r1815, %r1828, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd673, %r1815, 4;
|
||
add.s64 %rd674, %rd98, %rd673;
|
||
ld.shared.u32 %r2173, [%rd674];
|
||
// inline asm
|
||
bfe.u32 %r1819, %r1828, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd675, %r1819, 4;
|
||
add.s64 %rd676, %rd102, %rd675;
|
||
ld.shared.u32 %r2174, [%rd676];
|
||
add.s32 %r2175, %r2174, %r2173;
|
||
// inline asm
|
||
bfe.u32 %r1823, %r1828, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd677, %r1823, 4;
|
||
add.s64 %rd678, %rd106, %rd677;
|
||
ld.shared.u32 %r2176, [%rd678];
|
||
xor.b32 %r2177, %r2176, %r2175;
|
||
// inline asm
|
||
bfe.u32 %r1827, %r1828, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd679, %r1827, 4;
|
||
add.s64 %rd680, %rd110, %rd679;
|
||
ld.shared.u32 %r2178, [%rd680];
|
||
add.s32 %r2179, %r2178, %r2177;
|
||
xor.b32 %r2180, %r3887, %r1812;
|
||
xor.b32 %r1844, %r2180, %r2179;
|
||
// inline asm
|
||
bfe.u32 %r1831, %r1844, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd681, %r1831, 4;
|
||
add.s64 %rd682, %rd98, %rd681;
|
||
ld.shared.u32 %r2181, [%rd682];
|
||
// inline asm
|
||
bfe.u32 %r1835, %r1844, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd683, %r1835, 4;
|
||
add.s64 %rd684, %rd102, %rd683;
|
||
ld.shared.u32 %r2182, [%rd684];
|
||
add.s32 %r2183, %r2182, %r2181;
|
||
// inline asm
|
||
bfe.u32 %r1839, %r1844, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd685, %r1839, 4;
|
||
add.s64 %rd686, %rd106, %rd685;
|
||
ld.shared.u32 %r2184, [%rd686];
|
||
xor.b32 %r2185, %r2184, %r2183;
|
||
// inline asm
|
||
bfe.u32 %r1843, %r1844, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd687, %r1843, 4;
|
||
add.s64 %rd688, %rd110, %rd687;
|
||
ld.shared.u32 %r2186, [%rd688];
|
||
add.s32 %r2187, %r2186, %r2185;
|
||
xor.b32 %r2188, %r3886, %r1828;
|
||
xor.b32 %r1860, %r2188, %r2187;
|
||
// inline asm
|
||
bfe.u32 %r1847, %r1860, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd689, %r1847, 4;
|
||
add.s64 %rd690, %rd98, %rd689;
|
||
ld.shared.u32 %r2189, [%rd690];
|
||
// inline asm
|
||
bfe.u32 %r1851, %r1860, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd691, %r1851, 4;
|
||
add.s64 %rd692, %rd102, %rd691;
|
||
ld.shared.u32 %r2190, [%rd692];
|
||
add.s32 %r2191, %r2190, %r2189;
|
||
// inline asm
|
||
bfe.u32 %r1855, %r1860, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd693, %r1855, 4;
|
||
add.s64 %rd694, %rd106, %rd693;
|
||
ld.shared.u32 %r2192, [%rd694];
|
||
xor.b32 %r2193, %r2192, %r2191;
|
||
// inline asm
|
||
bfe.u32 %r1859, %r1860, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd695, %r1859, 4;
|
||
add.s64 %rd696, %rd110, %rd695;
|
||
ld.shared.u32 %r2194, [%rd696];
|
||
add.s32 %r2195, %r2194, %r2193;
|
||
xor.b32 %r2196, %r3885, %r1844;
|
||
xor.b32 %r1876, %r2196, %r2195;
|
||
// inline asm
|
||
bfe.u32 %r1863, %r1876, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd697, %r1863, 4;
|
||
add.s64 %rd698, %rd98, %rd697;
|
||
ld.shared.u32 %r2197, [%rd698];
|
||
// inline asm
|
||
bfe.u32 %r1867, %r1876, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd699, %r1867, 4;
|
||
add.s64 %rd700, %rd102, %rd699;
|
||
ld.shared.u32 %r2198, [%rd700];
|
||
add.s32 %r2199, %r2198, %r2197;
|
||
// inline asm
|
||
bfe.u32 %r1871, %r1876, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd701, %r1871, 4;
|
||
add.s64 %rd702, %rd106, %rd701;
|
||
ld.shared.u32 %r2200, [%rd702];
|
||
xor.b32 %r2201, %r2200, %r2199;
|
||
// inline asm
|
||
bfe.u32 %r1875, %r1876, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd703, %r1875, 4;
|
||
add.s64 %rd704, %rd110, %rd703;
|
||
ld.shared.u32 %r2202, [%rd704];
|
||
add.s32 %r2203, %r2202, %r2201;
|
||
xor.b32 %r2204, %r3884, %r1860;
|
||
xor.b32 %r1892, %r2204, %r2203;
|
||
// inline asm
|
||
bfe.u32 %r1879, %r1892, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd705, %r1879, 4;
|
||
add.s64 %rd706, %rd98, %rd705;
|
||
ld.shared.u32 %r2205, [%rd706];
|
||
// inline asm
|
||
bfe.u32 %r1883, %r1892, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd707, %r1883, 4;
|
||
add.s64 %rd708, %rd102, %rd707;
|
||
ld.shared.u32 %r2206, [%rd708];
|
||
add.s32 %r2207, %r2206, %r2205;
|
||
// inline asm
|
||
bfe.u32 %r1887, %r1892, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd709, %r1887, 4;
|
||
add.s64 %rd710, %rd106, %rd709;
|
||
ld.shared.u32 %r2208, [%rd710];
|
||
xor.b32 %r2209, %r2208, %r2207;
|
||
// inline asm
|
||
bfe.u32 %r1891, %r1892, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd711, %r1891, 4;
|
||
add.s64 %rd712, %rd110, %rd711;
|
||
ld.shared.u32 %r2210, [%rd712];
|
||
add.s32 %r2211, %r2210, %r2209;
|
||
xor.b32 %r2212, %r3883, %r1876;
|
||
xor.b32 %r1908, %r2212, %r2211;
|
||
// inline asm
|
||
bfe.u32 %r1895, %r1908, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd713, %r1895, 4;
|
||
add.s64 %rd714, %rd98, %rd713;
|
||
ld.shared.u32 %r2213, [%rd714];
|
||
// inline asm
|
||
bfe.u32 %r1899, %r1908, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd715, %r1899, 4;
|
||
add.s64 %rd716, %rd102, %rd715;
|
||
ld.shared.u32 %r2214, [%rd716];
|
||
add.s32 %r2215, %r2214, %r2213;
|
||
// inline asm
|
||
bfe.u32 %r1903, %r1908, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd717, %r1903, 4;
|
||
add.s64 %rd718, %rd106, %rd717;
|
||
ld.shared.u32 %r2216, [%rd718];
|
||
xor.b32 %r2217, %r2216, %r2215;
|
||
// inline asm
|
||
bfe.u32 %r1907, %r1908, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd719, %r1907, 4;
|
||
add.s64 %rd720, %rd110, %rd719;
|
||
ld.shared.u32 %r2218, [%rd720];
|
||
add.s32 %r2219, %r2218, %r2217;
|
||
xor.b32 %r2220, %r3882, %r1892;
|
||
xor.b32 %r1924, %r2220, %r2219;
|
||
// inline asm
|
||
bfe.u32 %r1911, %r1924, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd721, %r1911, 4;
|
||
add.s64 %rd722, %rd98, %rd721;
|
||
ld.shared.u32 %r2221, [%rd722];
|
||
// inline asm
|
||
bfe.u32 %r1915, %r1924, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd723, %r1915, 4;
|
||
add.s64 %rd724, %rd102, %rd723;
|
||
ld.shared.u32 %r2222, [%rd724];
|
||
add.s32 %r2223, %r2222, %r2221;
|
||
// inline asm
|
||
bfe.u32 %r1919, %r1924, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd725, %r1919, 4;
|
||
add.s64 %rd726, %rd106, %rd725;
|
||
ld.shared.u32 %r2224, [%rd726];
|
||
xor.b32 %r2225, %r2224, %r2223;
|
||
// inline asm
|
||
bfe.u32 %r1923, %r1924, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd727, %r1923, 4;
|
||
add.s64 %rd728, %rd110, %rd727;
|
||
ld.shared.u32 %r2226, [%rd728];
|
||
add.s32 %r2227, %r2226, %r2225;
|
||
xor.b32 %r2228, %r3881, %r1908;
|
||
xor.b32 %r1940, %r2228, %r2227;
|
||
// inline asm
|
||
bfe.u32 %r1927, %r1940, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd729, %r1927, 4;
|
||
add.s64 %rd730, %rd98, %rd729;
|
||
ld.shared.u32 %r2229, [%rd730];
|
||
// inline asm
|
||
bfe.u32 %r1931, %r1940, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd731, %r1931, 4;
|
||
add.s64 %rd732, %rd102, %rd731;
|
||
ld.shared.u32 %r2230, [%rd732];
|
||
add.s32 %r2231, %r2230, %r2229;
|
||
// inline asm
|
||
bfe.u32 %r1935, %r1940, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd733, %r1935, 4;
|
||
add.s64 %rd734, %rd106, %rd733;
|
||
ld.shared.u32 %r2232, [%rd734];
|
||
xor.b32 %r2233, %r2232, %r2231;
|
||
// inline asm
|
||
bfe.u32 %r1939, %r1940, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd735, %r1939, 4;
|
||
add.s64 %rd736, %rd110, %rd735;
|
||
ld.shared.u32 %r2234, [%rd736];
|
||
add.s32 %r2235, %r2234, %r2233;
|
||
xor.b32 %r2236, %r3880, %r1924;
|
||
xor.b32 %r1956, %r2236, %r2235;
|
||
// inline asm
|
||
bfe.u32 %r1943, %r1956, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd737, %r1943, 4;
|
||
add.s64 %rd738, %rd98, %rd737;
|
||
ld.shared.u32 %r2237, [%rd738];
|
||
// inline asm
|
||
bfe.u32 %r1947, %r1956, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd739, %r1947, 4;
|
||
add.s64 %rd740, %rd102, %rd739;
|
||
ld.shared.u32 %r2238, [%rd740];
|
||
add.s32 %r2239, %r2238, %r2237;
|
||
// inline asm
|
||
bfe.u32 %r1951, %r1956, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd741, %r1951, 4;
|
||
add.s64 %rd742, %rd106, %rd741;
|
||
ld.shared.u32 %r2240, [%rd742];
|
||
xor.b32 %r2241, %r2240, %r2239;
|
||
// inline asm
|
||
bfe.u32 %r1955, %r1956, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd743, %r1955, 4;
|
||
add.s64 %rd744, %rd110, %rd743;
|
||
ld.shared.u32 %r2242, [%rd744];
|
||
add.s32 %r2243, %r2242, %r2241;
|
||
xor.b32 %r2244, %r3879, %r1940;
|
||
xor.b32 %r1972, %r2244, %r2243;
|
||
// inline asm
|
||
bfe.u32 %r1959, %r1972, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd745, %r1959, 4;
|
||
add.s64 %rd746, %rd98, %rd745;
|
||
ld.shared.u32 %r2245, [%rd746];
|
||
// inline asm
|
||
bfe.u32 %r1963, %r1972, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd747, %r1963, 4;
|
||
add.s64 %rd748, %rd102, %rd747;
|
||
ld.shared.u32 %r2246, [%rd748];
|
||
add.s32 %r2247, %r2246, %r2245;
|
||
// inline asm
|
||
bfe.u32 %r1967, %r1972, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd749, %r1967, 4;
|
||
add.s64 %rd750, %rd106, %rd749;
|
||
ld.shared.u32 %r2248, [%rd750];
|
||
xor.b32 %r2249, %r2248, %r2247;
|
||
// inline asm
|
||
bfe.u32 %r1971, %r1972, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd751, %r1971, 4;
|
||
add.s64 %rd752, %rd110, %rd751;
|
||
ld.shared.u32 %r2250, [%rd752];
|
||
add.s32 %r2251, %r2250, %r2249;
|
||
xor.b32 %r2252, %r3878, %r1956;
|
||
xor.b32 %r1988, %r2252, %r2251;
|
||
// inline asm
|
||
bfe.u32 %r1975, %r1988, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd753, %r1975, 4;
|
||
add.s64 %rd754, %rd98, %rd753;
|
||
ld.shared.u32 %r2253, [%rd754];
|
||
// inline asm
|
||
bfe.u32 %r1979, %r1988, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd755, %r1979, 4;
|
||
add.s64 %rd756, %rd102, %rd755;
|
||
ld.shared.u32 %r2254, [%rd756];
|
||
add.s32 %r2255, %r2254, %r2253;
|
||
// inline asm
|
||
bfe.u32 %r1983, %r1988, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd757, %r1983, 4;
|
||
add.s64 %rd758, %rd106, %rd757;
|
||
ld.shared.u32 %r2256, [%rd758];
|
||
xor.b32 %r2257, %r2256, %r2255;
|
||
// inline asm
|
||
bfe.u32 %r1987, %r1988, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd759, %r1987, 4;
|
||
add.s64 %rd760, %rd110, %rd759;
|
||
ld.shared.u32 %r2258, [%rd760];
|
||
add.s32 %r2259, %r2258, %r2257;
|
||
xor.b32 %r2260, %r3877, %r1972;
|
||
xor.b32 %r2004, %r2260, %r2259;
|
||
// inline asm
|
||
bfe.u32 %r1991, %r2004, %r3852, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd761, %r1991, 4;
|
||
add.s64 %rd762, %rd98, %rd761;
|
||
ld.shared.u32 %r2261, [%rd762];
|
||
// inline asm
|
||
bfe.u32 %r1995, %r2004, %r3854, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd763, %r1995, 4;
|
||
add.s64 %rd764, %rd102, %rd763;
|
||
ld.shared.u32 %r2262, [%rd764];
|
||
add.s32 %r2263, %r2262, %r2261;
|
||
// inline asm
|
||
bfe.u32 %r1999, %r2004, %r3853, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd765, %r1999, 4;
|
||
add.s64 %rd766, %rd106, %rd765;
|
||
ld.shared.u32 %r2264, [%rd766];
|
||
xor.b32 %r2265, %r2264, %r2263;
|
||
// inline asm
|
||
bfe.u32 %r2003, %r2004, %r1494, %r3853;
|
||
// inline asm
|
||
mul.wide.u32 %rd767, %r2003, 4;
|
||
add.s64 %rd768, %rd110, %rd767;
|
||
ld.shared.u32 %r2266, [%rd768];
|
||
add.s32 %r2267, %r2266, %r2265;
|
||
xor.b32 %r2268, %r3876, %r1988;
|
||
xor.b32 %r3897, %r2268, %r2267;
|
||
xor.b32 %r3898, %r3875, %r2004;
|
||
st.shared.u32 [%rd1338+8], %r3898;
|
||
st.shared.u32 [%rd1338+12], %r3897;
|
||
add.s64 %rd1338, %rd1338, 16;
|
||
add.s32 %r3899, %r3899, 4;
|
||
setp.lt.u32 %p14, %r3899, 256;
|
||
@%p14 bra BB3_24;
|
||
|
||
mov.u32 %r3864, %tid.x;
|
||
mul.wide.s32 %rd1323, %r3864, 1024;
|
||
mov.u64 %rd1321, m03200_init$S2_all;
|
||
add.s64 %rd1339, %rd1321, %rd1323;
|
||
mov.u32 %r2269, 0;
|
||
mov.u32 %r3902, %r2269;
|
||
|
||
BB3_26:
|
||
mov.u32 %r3857, 16;
|
||
mov.u32 %r3856, 8;
|
||
mov.u32 %r3855, 24;
|
||
xor.b32 %r2782, %r39, %r3898;
|
||
xor.b32 %r2283, %r2782, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r2270, %r2283, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd774, %r2270, 4;
|
||
add.s64 %rd775, %rd98, %rd774;
|
||
ld.shared.u32 %r2783, [%rd775];
|
||
// inline asm
|
||
bfe.u32 %r2274, %r2283, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd778, %r2274, 4;
|
||
add.s64 %rd779, %rd102, %rd778;
|
||
ld.shared.u32 %r2784, [%rd779];
|
||
add.s32 %r2785, %r2784, %r2783;
|
||
// inline asm
|
||
bfe.u32 %r2278, %r2283, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd782, %r2278, 4;
|
||
add.s64 %rd783, %rd106, %rd782;
|
||
ld.shared.u32 %r2786, [%rd783];
|
||
xor.b32 %r2787, %r2786, %r2785;
|
||
// inline asm
|
||
bfe.u32 %r2282, %r2283, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd786, %r2282, 4;
|
||
add.s64 %rd787, %rd110, %rd786;
|
||
ld.shared.u32 %r2788, [%rd787];
|
||
add.s32 %r2789, %r2788, %r2787;
|
||
xor.b32 %r2790, %r40, %r3897;
|
||
xor.b32 %r2791, %r2790, %r3891;
|
||
xor.b32 %r2299, %r2791, %r2789;
|
||
// inline asm
|
||
bfe.u32 %r2286, %r2299, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd788, %r2286, 4;
|
||
add.s64 %rd789, %rd98, %rd788;
|
||
ld.shared.u32 %r2792, [%rd789];
|
||
// inline asm
|
||
bfe.u32 %r2290, %r2299, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd790, %r2290, 4;
|
||
add.s64 %rd791, %rd102, %rd790;
|
||
ld.shared.u32 %r2793, [%rd791];
|
||
add.s32 %r2794, %r2793, %r2792;
|
||
// inline asm
|
||
bfe.u32 %r2294, %r2299, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd792, %r2294, 4;
|
||
add.s64 %rd793, %rd106, %rd792;
|
||
ld.shared.u32 %r2795, [%rd793];
|
||
xor.b32 %r2796, %r2795, %r2794;
|
||
// inline asm
|
||
bfe.u32 %r2298, %r2299, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd794, %r2298, 4;
|
||
add.s64 %rd795, %rd110, %rd794;
|
||
ld.shared.u32 %r2797, [%rd795];
|
||
add.s32 %r2798, %r2797, %r2796;
|
||
xor.b32 %r2799, %r3890, %r2283;
|
||
xor.b32 %r2315, %r2799, %r2798;
|
||
// inline asm
|
||
bfe.u32 %r2302, %r2315, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd796, %r2302, 4;
|
||
add.s64 %rd797, %rd98, %rd796;
|
||
ld.shared.u32 %r2800, [%rd797];
|
||
// inline asm
|
||
bfe.u32 %r2306, %r2315, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd798, %r2306, 4;
|
||
add.s64 %rd799, %rd102, %rd798;
|
||
ld.shared.u32 %r2801, [%rd799];
|
||
add.s32 %r2802, %r2801, %r2800;
|
||
// inline asm
|
||
bfe.u32 %r2310, %r2315, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd800, %r2310, 4;
|
||
add.s64 %rd801, %rd106, %rd800;
|
||
ld.shared.u32 %r2803, [%rd801];
|
||
xor.b32 %r2804, %r2803, %r2802;
|
||
// inline asm
|
||
bfe.u32 %r2314, %r2315, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd802, %r2314, 4;
|
||
add.s64 %rd803, %rd110, %rd802;
|
||
ld.shared.u32 %r2805, [%rd803];
|
||
add.s32 %r2806, %r2805, %r2804;
|
||
xor.b32 %r2807, %r3889, %r2299;
|
||
xor.b32 %r2331, %r2807, %r2806;
|
||
// inline asm
|
||
bfe.u32 %r2318, %r2331, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd804, %r2318, 4;
|
||
add.s64 %rd805, %rd98, %rd804;
|
||
ld.shared.u32 %r2808, [%rd805];
|
||
// inline asm
|
||
bfe.u32 %r2322, %r2331, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd806, %r2322, 4;
|
||
add.s64 %rd807, %rd102, %rd806;
|
||
ld.shared.u32 %r2809, [%rd807];
|
||
add.s32 %r2810, %r2809, %r2808;
|
||
// inline asm
|
||
bfe.u32 %r2326, %r2331, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd808, %r2326, 4;
|
||
add.s64 %rd809, %rd106, %rd808;
|
||
ld.shared.u32 %r2811, [%rd809];
|
||
xor.b32 %r2812, %r2811, %r2810;
|
||
// inline asm
|
||
bfe.u32 %r2330, %r2331, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd810, %r2330, 4;
|
||
add.s64 %rd811, %rd110, %rd810;
|
||
ld.shared.u32 %r2813, [%rd811];
|
||
add.s32 %r2814, %r2813, %r2812;
|
||
xor.b32 %r2815, %r3888, %r2315;
|
||
xor.b32 %r2347, %r2815, %r2814;
|
||
// inline asm
|
||
bfe.u32 %r2334, %r2347, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd812, %r2334, 4;
|
||
add.s64 %rd813, %rd98, %rd812;
|
||
ld.shared.u32 %r2816, [%rd813];
|
||
// inline asm
|
||
bfe.u32 %r2338, %r2347, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd814, %r2338, 4;
|
||
add.s64 %rd815, %rd102, %rd814;
|
||
ld.shared.u32 %r2817, [%rd815];
|
||
add.s32 %r2818, %r2817, %r2816;
|
||
// inline asm
|
||
bfe.u32 %r2342, %r2347, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd816, %r2342, 4;
|
||
add.s64 %rd817, %rd106, %rd816;
|
||
ld.shared.u32 %r2819, [%rd817];
|
||
xor.b32 %r2820, %r2819, %r2818;
|
||
// inline asm
|
||
bfe.u32 %r2346, %r2347, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd818, %r2346, 4;
|
||
add.s64 %rd819, %rd110, %rd818;
|
||
ld.shared.u32 %r2821, [%rd819];
|
||
add.s32 %r2822, %r2821, %r2820;
|
||
xor.b32 %r2823, %r3887, %r2331;
|
||
xor.b32 %r2363, %r2823, %r2822;
|
||
// inline asm
|
||
bfe.u32 %r2350, %r2363, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd820, %r2350, 4;
|
||
add.s64 %rd821, %rd98, %rd820;
|
||
ld.shared.u32 %r2824, [%rd821];
|
||
// inline asm
|
||
bfe.u32 %r2354, %r2363, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd822, %r2354, 4;
|
||
add.s64 %rd823, %rd102, %rd822;
|
||
ld.shared.u32 %r2825, [%rd823];
|
||
add.s32 %r2826, %r2825, %r2824;
|
||
// inline asm
|
||
bfe.u32 %r2358, %r2363, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd824, %r2358, 4;
|
||
add.s64 %rd825, %rd106, %rd824;
|
||
ld.shared.u32 %r2827, [%rd825];
|
||
xor.b32 %r2828, %r2827, %r2826;
|
||
// inline asm
|
||
bfe.u32 %r2362, %r2363, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd826, %r2362, 4;
|
||
add.s64 %rd827, %rd110, %rd826;
|
||
ld.shared.u32 %r2829, [%rd827];
|
||
add.s32 %r2830, %r2829, %r2828;
|
||
xor.b32 %r2831, %r3886, %r2347;
|
||
xor.b32 %r2379, %r2831, %r2830;
|
||
// inline asm
|
||
bfe.u32 %r2366, %r2379, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd828, %r2366, 4;
|
||
add.s64 %rd829, %rd98, %rd828;
|
||
ld.shared.u32 %r2832, [%rd829];
|
||
// inline asm
|
||
bfe.u32 %r2370, %r2379, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd830, %r2370, 4;
|
||
add.s64 %rd831, %rd102, %rd830;
|
||
ld.shared.u32 %r2833, [%rd831];
|
||
add.s32 %r2834, %r2833, %r2832;
|
||
// inline asm
|
||
bfe.u32 %r2374, %r2379, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd832, %r2374, 4;
|
||
add.s64 %rd833, %rd106, %rd832;
|
||
ld.shared.u32 %r2835, [%rd833];
|
||
xor.b32 %r2836, %r2835, %r2834;
|
||
// inline asm
|
||
bfe.u32 %r2378, %r2379, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd834, %r2378, 4;
|
||
add.s64 %rd835, %rd110, %rd834;
|
||
ld.shared.u32 %r2837, [%rd835];
|
||
add.s32 %r2838, %r2837, %r2836;
|
||
xor.b32 %r2839, %r3885, %r2363;
|
||
xor.b32 %r2395, %r2839, %r2838;
|
||
// inline asm
|
||
bfe.u32 %r2382, %r2395, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd836, %r2382, 4;
|
||
add.s64 %rd837, %rd98, %rd836;
|
||
ld.shared.u32 %r2840, [%rd837];
|
||
// inline asm
|
||
bfe.u32 %r2386, %r2395, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd838, %r2386, 4;
|
||
add.s64 %rd839, %rd102, %rd838;
|
||
ld.shared.u32 %r2841, [%rd839];
|
||
add.s32 %r2842, %r2841, %r2840;
|
||
// inline asm
|
||
bfe.u32 %r2390, %r2395, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd840, %r2390, 4;
|
||
add.s64 %rd841, %rd106, %rd840;
|
||
ld.shared.u32 %r2843, [%rd841];
|
||
xor.b32 %r2844, %r2843, %r2842;
|
||
// inline asm
|
||
bfe.u32 %r2394, %r2395, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd842, %r2394, 4;
|
||
add.s64 %rd843, %rd110, %rd842;
|
||
ld.shared.u32 %r2845, [%rd843];
|
||
add.s32 %r2846, %r2845, %r2844;
|
||
xor.b32 %r2847, %r3884, %r2379;
|
||
xor.b32 %r2411, %r2847, %r2846;
|
||
// inline asm
|
||
bfe.u32 %r2398, %r2411, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd844, %r2398, 4;
|
||
add.s64 %rd845, %rd98, %rd844;
|
||
ld.shared.u32 %r2848, [%rd845];
|
||
// inline asm
|
||
bfe.u32 %r2402, %r2411, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd846, %r2402, 4;
|
||
add.s64 %rd847, %rd102, %rd846;
|
||
ld.shared.u32 %r2849, [%rd847];
|
||
add.s32 %r2850, %r2849, %r2848;
|
||
// inline asm
|
||
bfe.u32 %r2406, %r2411, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd848, %r2406, 4;
|
||
add.s64 %rd849, %rd106, %rd848;
|
||
ld.shared.u32 %r2851, [%rd849];
|
||
xor.b32 %r2852, %r2851, %r2850;
|
||
// inline asm
|
||
bfe.u32 %r2410, %r2411, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd850, %r2410, 4;
|
||
add.s64 %rd851, %rd110, %rd850;
|
||
ld.shared.u32 %r2853, [%rd851];
|
||
add.s32 %r2854, %r2853, %r2852;
|
||
xor.b32 %r2855, %r3883, %r2395;
|
||
xor.b32 %r2427, %r2855, %r2854;
|
||
// inline asm
|
||
bfe.u32 %r2414, %r2427, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd852, %r2414, 4;
|
||
add.s64 %rd853, %rd98, %rd852;
|
||
ld.shared.u32 %r2856, [%rd853];
|
||
// inline asm
|
||
bfe.u32 %r2418, %r2427, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd854, %r2418, 4;
|
||
add.s64 %rd855, %rd102, %rd854;
|
||
ld.shared.u32 %r2857, [%rd855];
|
||
add.s32 %r2858, %r2857, %r2856;
|
||
// inline asm
|
||
bfe.u32 %r2422, %r2427, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd856, %r2422, 4;
|
||
add.s64 %rd857, %rd106, %rd856;
|
||
ld.shared.u32 %r2859, [%rd857];
|
||
xor.b32 %r2860, %r2859, %r2858;
|
||
// inline asm
|
||
bfe.u32 %r2426, %r2427, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd858, %r2426, 4;
|
||
add.s64 %rd859, %rd110, %rd858;
|
||
ld.shared.u32 %r2861, [%rd859];
|
||
add.s32 %r2862, %r2861, %r2860;
|
||
xor.b32 %r2863, %r3882, %r2411;
|
||
xor.b32 %r2443, %r2863, %r2862;
|
||
// inline asm
|
||
bfe.u32 %r2430, %r2443, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd860, %r2430, 4;
|
||
add.s64 %rd861, %rd98, %rd860;
|
||
ld.shared.u32 %r2864, [%rd861];
|
||
// inline asm
|
||
bfe.u32 %r2434, %r2443, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd862, %r2434, 4;
|
||
add.s64 %rd863, %rd102, %rd862;
|
||
ld.shared.u32 %r2865, [%rd863];
|
||
add.s32 %r2866, %r2865, %r2864;
|
||
// inline asm
|
||
bfe.u32 %r2438, %r2443, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd864, %r2438, 4;
|
||
add.s64 %rd865, %rd106, %rd864;
|
||
ld.shared.u32 %r2867, [%rd865];
|
||
xor.b32 %r2868, %r2867, %r2866;
|
||
// inline asm
|
||
bfe.u32 %r2442, %r2443, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd866, %r2442, 4;
|
||
add.s64 %rd867, %rd110, %rd866;
|
||
ld.shared.u32 %r2869, [%rd867];
|
||
add.s32 %r2870, %r2869, %r2868;
|
||
xor.b32 %r2871, %r3881, %r2427;
|
||
xor.b32 %r2459, %r2871, %r2870;
|
||
// inline asm
|
||
bfe.u32 %r2446, %r2459, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd868, %r2446, 4;
|
||
add.s64 %rd869, %rd98, %rd868;
|
||
ld.shared.u32 %r2872, [%rd869];
|
||
// inline asm
|
||
bfe.u32 %r2450, %r2459, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd870, %r2450, 4;
|
||
add.s64 %rd871, %rd102, %rd870;
|
||
ld.shared.u32 %r2873, [%rd871];
|
||
add.s32 %r2874, %r2873, %r2872;
|
||
// inline asm
|
||
bfe.u32 %r2454, %r2459, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd872, %r2454, 4;
|
||
add.s64 %rd873, %rd106, %rd872;
|
||
ld.shared.u32 %r2875, [%rd873];
|
||
xor.b32 %r2876, %r2875, %r2874;
|
||
// inline asm
|
||
bfe.u32 %r2458, %r2459, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd874, %r2458, 4;
|
||
add.s64 %rd875, %rd110, %rd874;
|
||
ld.shared.u32 %r2877, [%rd875];
|
||
add.s32 %r2878, %r2877, %r2876;
|
||
xor.b32 %r2879, %r3880, %r2443;
|
||
xor.b32 %r2475, %r2879, %r2878;
|
||
// inline asm
|
||
bfe.u32 %r2462, %r2475, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd876, %r2462, 4;
|
||
add.s64 %rd877, %rd98, %rd876;
|
||
ld.shared.u32 %r2880, [%rd877];
|
||
// inline asm
|
||
bfe.u32 %r2466, %r2475, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd878, %r2466, 4;
|
||
add.s64 %rd879, %rd102, %rd878;
|
||
ld.shared.u32 %r2881, [%rd879];
|
||
add.s32 %r2882, %r2881, %r2880;
|
||
// inline asm
|
||
bfe.u32 %r2470, %r2475, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd880, %r2470, 4;
|
||
add.s64 %rd881, %rd106, %rd880;
|
||
ld.shared.u32 %r2883, [%rd881];
|
||
xor.b32 %r2884, %r2883, %r2882;
|
||
// inline asm
|
||
bfe.u32 %r2474, %r2475, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd882, %r2474, 4;
|
||
add.s64 %rd883, %rd110, %rd882;
|
||
ld.shared.u32 %r2885, [%rd883];
|
||
add.s32 %r2886, %r2885, %r2884;
|
||
xor.b32 %r2887, %r3879, %r2459;
|
||
xor.b32 %r2491, %r2887, %r2886;
|
||
// inline asm
|
||
bfe.u32 %r2478, %r2491, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd884, %r2478, 4;
|
||
add.s64 %rd885, %rd98, %rd884;
|
||
ld.shared.u32 %r2888, [%rd885];
|
||
// inline asm
|
||
bfe.u32 %r2482, %r2491, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd886, %r2482, 4;
|
||
add.s64 %rd887, %rd102, %rd886;
|
||
ld.shared.u32 %r2889, [%rd887];
|
||
add.s32 %r2890, %r2889, %r2888;
|
||
// inline asm
|
||
bfe.u32 %r2486, %r2491, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd888, %r2486, 4;
|
||
add.s64 %rd889, %rd106, %rd888;
|
||
ld.shared.u32 %r2891, [%rd889];
|
||
xor.b32 %r2892, %r2891, %r2890;
|
||
// inline asm
|
||
bfe.u32 %r2490, %r2491, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd890, %r2490, 4;
|
||
add.s64 %rd891, %rd110, %rd890;
|
||
ld.shared.u32 %r2893, [%rd891];
|
||
add.s32 %r2894, %r2893, %r2892;
|
||
xor.b32 %r2895, %r3878, %r2475;
|
||
xor.b32 %r2507, %r2895, %r2894;
|
||
// inline asm
|
||
bfe.u32 %r2494, %r2507, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd892, %r2494, 4;
|
||
add.s64 %rd893, %rd98, %rd892;
|
||
ld.shared.u32 %r2896, [%rd893];
|
||
// inline asm
|
||
bfe.u32 %r2498, %r2507, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd894, %r2498, 4;
|
||
add.s64 %rd895, %rd102, %rd894;
|
||
ld.shared.u32 %r2897, [%rd895];
|
||
add.s32 %r2898, %r2897, %r2896;
|
||
// inline asm
|
||
bfe.u32 %r2502, %r2507, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd896, %r2502, 4;
|
||
add.s64 %rd897, %rd106, %rd896;
|
||
ld.shared.u32 %r2899, [%rd897];
|
||
xor.b32 %r2900, %r2899, %r2898;
|
||
// inline asm
|
||
bfe.u32 %r2506, %r2507, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd898, %r2506, 4;
|
||
add.s64 %rd899, %rd110, %rd898;
|
||
ld.shared.u32 %r2901, [%rd899];
|
||
add.s32 %r2902, %r2901, %r2900;
|
||
xor.b32 %r2903, %r3877, %r2491;
|
||
xor.b32 %r2523, %r2903, %r2902;
|
||
// inline asm
|
||
bfe.u32 %r2510, %r2523, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd900, %r2510, 4;
|
||
add.s64 %rd901, %rd98, %rd900;
|
||
ld.shared.u32 %r2904, [%rd901];
|
||
// inline asm
|
||
bfe.u32 %r2514, %r2523, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd902, %r2514, 4;
|
||
add.s64 %rd903, %rd102, %rd902;
|
||
ld.shared.u32 %r2905, [%rd903];
|
||
add.s32 %r2906, %r2905, %r2904;
|
||
// inline asm
|
||
bfe.u32 %r2518, %r2523, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd904, %r2518, 4;
|
||
add.s64 %rd905, %rd106, %rd904;
|
||
ld.shared.u32 %r2907, [%rd905];
|
||
xor.b32 %r2908, %r2907, %r2906;
|
||
// inline asm
|
||
bfe.u32 %r2522, %r2523, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd906, %r2522, 4;
|
||
add.s64 %rd907, %rd110, %rd906;
|
||
ld.shared.u32 %r2909, [%rd907];
|
||
add.s32 %r2910, %r2909, %r2908;
|
||
xor.b32 %r2911, %r3876, %r2507;
|
||
xor.b32 %r2912, %r2911, %r2910;
|
||
xor.b32 %r2913, %r3875, %r2523;
|
||
st.shared.u32 [%rd1339], %r2913;
|
||
st.shared.u32 [%rd1339+4], %r2912;
|
||
xor.b32 %r2914, %r2913, %r37;
|
||
xor.b32 %r2539, %r2914, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r2526, %r2539, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd908, %r2526, 4;
|
||
add.s64 %rd909, %rd98, %rd908;
|
||
ld.shared.u32 %r2915, [%rd909];
|
||
// inline asm
|
||
bfe.u32 %r2530, %r2539, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd910, %r2530, 4;
|
||
add.s64 %rd911, %rd102, %rd910;
|
||
ld.shared.u32 %r2916, [%rd911];
|
||
add.s32 %r2917, %r2916, %r2915;
|
||
// inline asm
|
||
bfe.u32 %r2534, %r2539, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd912, %r2534, 4;
|
||
add.s64 %rd913, %rd106, %rd912;
|
||
ld.shared.u32 %r2918, [%rd913];
|
||
xor.b32 %r2919, %r2918, %r2917;
|
||
// inline asm
|
||
bfe.u32 %r2538, %r2539, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd914, %r2538, 4;
|
||
add.s64 %rd915, %rd110, %rd914;
|
||
ld.shared.u32 %r2920, [%rd915];
|
||
add.s32 %r2921, %r2920, %r2919;
|
||
xor.b32 %r2922, %r38, %r2912;
|
||
xor.b32 %r2923, %r2922, %r3891;
|
||
xor.b32 %r2555, %r2923, %r2921;
|
||
// inline asm
|
||
bfe.u32 %r2542, %r2555, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd916, %r2542, 4;
|
||
add.s64 %rd917, %rd98, %rd916;
|
||
ld.shared.u32 %r2924, [%rd917];
|
||
// inline asm
|
||
bfe.u32 %r2546, %r2555, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd918, %r2546, 4;
|
||
add.s64 %rd919, %rd102, %rd918;
|
||
ld.shared.u32 %r2925, [%rd919];
|
||
add.s32 %r2926, %r2925, %r2924;
|
||
// inline asm
|
||
bfe.u32 %r2550, %r2555, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd920, %r2550, 4;
|
||
add.s64 %rd921, %rd106, %rd920;
|
||
ld.shared.u32 %r2927, [%rd921];
|
||
xor.b32 %r2928, %r2927, %r2926;
|
||
// inline asm
|
||
bfe.u32 %r2554, %r2555, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd922, %r2554, 4;
|
||
add.s64 %rd923, %rd110, %rd922;
|
||
ld.shared.u32 %r2929, [%rd923];
|
||
add.s32 %r2930, %r2929, %r2928;
|
||
xor.b32 %r2931, %r3890, %r2539;
|
||
xor.b32 %r2571, %r2931, %r2930;
|
||
// inline asm
|
||
bfe.u32 %r2558, %r2571, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd924, %r2558, 4;
|
||
add.s64 %rd925, %rd98, %rd924;
|
||
ld.shared.u32 %r2932, [%rd925];
|
||
// inline asm
|
||
bfe.u32 %r2562, %r2571, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd926, %r2562, 4;
|
||
add.s64 %rd927, %rd102, %rd926;
|
||
ld.shared.u32 %r2933, [%rd927];
|
||
add.s32 %r2934, %r2933, %r2932;
|
||
// inline asm
|
||
bfe.u32 %r2566, %r2571, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd928, %r2566, 4;
|
||
add.s64 %rd929, %rd106, %rd928;
|
||
ld.shared.u32 %r2935, [%rd929];
|
||
xor.b32 %r2936, %r2935, %r2934;
|
||
// inline asm
|
||
bfe.u32 %r2570, %r2571, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd930, %r2570, 4;
|
||
add.s64 %rd931, %rd110, %rd930;
|
||
ld.shared.u32 %r2937, [%rd931];
|
||
add.s32 %r2938, %r2937, %r2936;
|
||
xor.b32 %r2939, %r3889, %r2555;
|
||
xor.b32 %r2587, %r2939, %r2938;
|
||
// inline asm
|
||
bfe.u32 %r2574, %r2587, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd932, %r2574, 4;
|
||
add.s64 %rd933, %rd98, %rd932;
|
||
ld.shared.u32 %r2940, [%rd933];
|
||
// inline asm
|
||
bfe.u32 %r2578, %r2587, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd934, %r2578, 4;
|
||
add.s64 %rd935, %rd102, %rd934;
|
||
ld.shared.u32 %r2941, [%rd935];
|
||
add.s32 %r2942, %r2941, %r2940;
|
||
// inline asm
|
||
bfe.u32 %r2582, %r2587, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd936, %r2582, 4;
|
||
add.s64 %rd937, %rd106, %rd936;
|
||
ld.shared.u32 %r2943, [%rd937];
|
||
xor.b32 %r2944, %r2943, %r2942;
|
||
// inline asm
|
||
bfe.u32 %r2586, %r2587, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd938, %r2586, 4;
|
||
add.s64 %rd939, %rd110, %rd938;
|
||
ld.shared.u32 %r2945, [%rd939];
|
||
add.s32 %r2946, %r2945, %r2944;
|
||
xor.b32 %r2947, %r3888, %r2571;
|
||
xor.b32 %r2603, %r2947, %r2946;
|
||
// inline asm
|
||
bfe.u32 %r2590, %r2603, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd940, %r2590, 4;
|
||
add.s64 %rd941, %rd98, %rd940;
|
||
ld.shared.u32 %r2948, [%rd941];
|
||
// inline asm
|
||
bfe.u32 %r2594, %r2603, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd942, %r2594, 4;
|
||
add.s64 %rd943, %rd102, %rd942;
|
||
ld.shared.u32 %r2949, [%rd943];
|
||
add.s32 %r2950, %r2949, %r2948;
|
||
// inline asm
|
||
bfe.u32 %r2598, %r2603, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd944, %r2598, 4;
|
||
add.s64 %rd945, %rd106, %rd944;
|
||
ld.shared.u32 %r2951, [%rd945];
|
||
xor.b32 %r2952, %r2951, %r2950;
|
||
// inline asm
|
||
bfe.u32 %r2602, %r2603, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd946, %r2602, 4;
|
||
add.s64 %rd947, %rd110, %rd946;
|
||
ld.shared.u32 %r2953, [%rd947];
|
||
add.s32 %r2954, %r2953, %r2952;
|
||
xor.b32 %r2955, %r3887, %r2587;
|
||
xor.b32 %r2619, %r2955, %r2954;
|
||
// inline asm
|
||
bfe.u32 %r2606, %r2619, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd948, %r2606, 4;
|
||
add.s64 %rd949, %rd98, %rd948;
|
||
ld.shared.u32 %r2956, [%rd949];
|
||
// inline asm
|
||
bfe.u32 %r2610, %r2619, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd950, %r2610, 4;
|
||
add.s64 %rd951, %rd102, %rd950;
|
||
ld.shared.u32 %r2957, [%rd951];
|
||
add.s32 %r2958, %r2957, %r2956;
|
||
// inline asm
|
||
bfe.u32 %r2614, %r2619, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd952, %r2614, 4;
|
||
add.s64 %rd953, %rd106, %rd952;
|
||
ld.shared.u32 %r2959, [%rd953];
|
||
xor.b32 %r2960, %r2959, %r2958;
|
||
// inline asm
|
||
bfe.u32 %r2618, %r2619, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd954, %r2618, 4;
|
||
add.s64 %rd955, %rd110, %rd954;
|
||
ld.shared.u32 %r2961, [%rd955];
|
||
add.s32 %r2962, %r2961, %r2960;
|
||
xor.b32 %r2963, %r3886, %r2603;
|
||
xor.b32 %r2635, %r2963, %r2962;
|
||
// inline asm
|
||
bfe.u32 %r2622, %r2635, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd956, %r2622, 4;
|
||
add.s64 %rd957, %rd98, %rd956;
|
||
ld.shared.u32 %r2964, [%rd957];
|
||
// inline asm
|
||
bfe.u32 %r2626, %r2635, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd958, %r2626, 4;
|
||
add.s64 %rd959, %rd102, %rd958;
|
||
ld.shared.u32 %r2965, [%rd959];
|
||
add.s32 %r2966, %r2965, %r2964;
|
||
// inline asm
|
||
bfe.u32 %r2630, %r2635, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd960, %r2630, 4;
|
||
add.s64 %rd961, %rd106, %rd960;
|
||
ld.shared.u32 %r2967, [%rd961];
|
||
xor.b32 %r2968, %r2967, %r2966;
|
||
// inline asm
|
||
bfe.u32 %r2634, %r2635, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd962, %r2634, 4;
|
||
add.s64 %rd963, %rd110, %rd962;
|
||
ld.shared.u32 %r2969, [%rd963];
|
||
add.s32 %r2970, %r2969, %r2968;
|
||
xor.b32 %r2971, %r3885, %r2619;
|
||
xor.b32 %r2651, %r2971, %r2970;
|
||
// inline asm
|
||
bfe.u32 %r2638, %r2651, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd964, %r2638, 4;
|
||
add.s64 %rd965, %rd98, %rd964;
|
||
ld.shared.u32 %r2972, [%rd965];
|
||
// inline asm
|
||
bfe.u32 %r2642, %r2651, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd966, %r2642, 4;
|
||
add.s64 %rd967, %rd102, %rd966;
|
||
ld.shared.u32 %r2973, [%rd967];
|
||
add.s32 %r2974, %r2973, %r2972;
|
||
// inline asm
|
||
bfe.u32 %r2646, %r2651, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd968, %r2646, 4;
|
||
add.s64 %rd969, %rd106, %rd968;
|
||
ld.shared.u32 %r2975, [%rd969];
|
||
xor.b32 %r2976, %r2975, %r2974;
|
||
// inline asm
|
||
bfe.u32 %r2650, %r2651, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd970, %r2650, 4;
|
||
add.s64 %rd971, %rd110, %rd970;
|
||
ld.shared.u32 %r2977, [%rd971];
|
||
add.s32 %r2978, %r2977, %r2976;
|
||
xor.b32 %r2979, %r3884, %r2635;
|
||
xor.b32 %r2667, %r2979, %r2978;
|
||
// inline asm
|
||
bfe.u32 %r2654, %r2667, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd972, %r2654, 4;
|
||
add.s64 %rd973, %rd98, %rd972;
|
||
ld.shared.u32 %r2980, [%rd973];
|
||
// inline asm
|
||
bfe.u32 %r2658, %r2667, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd974, %r2658, 4;
|
||
add.s64 %rd975, %rd102, %rd974;
|
||
ld.shared.u32 %r2981, [%rd975];
|
||
add.s32 %r2982, %r2981, %r2980;
|
||
// inline asm
|
||
bfe.u32 %r2662, %r2667, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd976, %r2662, 4;
|
||
add.s64 %rd977, %rd106, %rd976;
|
||
ld.shared.u32 %r2983, [%rd977];
|
||
xor.b32 %r2984, %r2983, %r2982;
|
||
// inline asm
|
||
bfe.u32 %r2666, %r2667, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd978, %r2666, 4;
|
||
add.s64 %rd979, %rd110, %rd978;
|
||
ld.shared.u32 %r2985, [%rd979];
|
||
add.s32 %r2986, %r2985, %r2984;
|
||
xor.b32 %r2987, %r3883, %r2651;
|
||
xor.b32 %r2683, %r2987, %r2986;
|
||
// inline asm
|
||
bfe.u32 %r2670, %r2683, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd980, %r2670, 4;
|
||
add.s64 %rd981, %rd98, %rd980;
|
||
ld.shared.u32 %r2988, [%rd981];
|
||
// inline asm
|
||
bfe.u32 %r2674, %r2683, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd982, %r2674, 4;
|
||
add.s64 %rd983, %rd102, %rd982;
|
||
ld.shared.u32 %r2989, [%rd983];
|
||
add.s32 %r2990, %r2989, %r2988;
|
||
// inline asm
|
||
bfe.u32 %r2678, %r2683, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd984, %r2678, 4;
|
||
add.s64 %rd985, %rd106, %rd984;
|
||
ld.shared.u32 %r2991, [%rd985];
|
||
xor.b32 %r2992, %r2991, %r2990;
|
||
// inline asm
|
||
bfe.u32 %r2682, %r2683, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd986, %r2682, 4;
|
||
add.s64 %rd987, %rd110, %rd986;
|
||
ld.shared.u32 %r2993, [%rd987];
|
||
add.s32 %r2994, %r2993, %r2992;
|
||
xor.b32 %r2995, %r3882, %r2667;
|
||
xor.b32 %r2699, %r2995, %r2994;
|
||
// inline asm
|
||
bfe.u32 %r2686, %r2699, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd988, %r2686, 4;
|
||
add.s64 %rd989, %rd98, %rd988;
|
||
ld.shared.u32 %r2996, [%rd989];
|
||
// inline asm
|
||
bfe.u32 %r2690, %r2699, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd990, %r2690, 4;
|
||
add.s64 %rd991, %rd102, %rd990;
|
||
ld.shared.u32 %r2997, [%rd991];
|
||
add.s32 %r2998, %r2997, %r2996;
|
||
// inline asm
|
||
bfe.u32 %r2694, %r2699, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd992, %r2694, 4;
|
||
add.s64 %rd993, %rd106, %rd992;
|
||
ld.shared.u32 %r2999, [%rd993];
|
||
xor.b32 %r3000, %r2999, %r2998;
|
||
// inline asm
|
||
bfe.u32 %r2698, %r2699, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd994, %r2698, 4;
|
||
add.s64 %rd995, %rd110, %rd994;
|
||
ld.shared.u32 %r3001, [%rd995];
|
||
add.s32 %r3002, %r3001, %r3000;
|
||
xor.b32 %r3003, %r3881, %r2683;
|
||
xor.b32 %r2715, %r3003, %r3002;
|
||
// inline asm
|
||
bfe.u32 %r2702, %r2715, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd996, %r2702, 4;
|
||
add.s64 %rd997, %rd98, %rd996;
|
||
ld.shared.u32 %r3004, [%rd997];
|
||
// inline asm
|
||
bfe.u32 %r2706, %r2715, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd998, %r2706, 4;
|
||
add.s64 %rd999, %rd102, %rd998;
|
||
ld.shared.u32 %r3005, [%rd999];
|
||
add.s32 %r3006, %r3005, %r3004;
|
||
// inline asm
|
||
bfe.u32 %r2710, %r2715, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1000, %r2710, 4;
|
||
add.s64 %rd1001, %rd106, %rd1000;
|
||
ld.shared.u32 %r3007, [%rd1001];
|
||
xor.b32 %r3008, %r3007, %r3006;
|
||
// inline asm
|
||
bfe.u32 %r2714, %r2715, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1002, %r2714, 4;
|
||
add.s64 %rd1003, %rd110, %rd1002;
|
||
ld.shared.u32 %r3009, [%rd1003];
|
||
add.s32 %r3010, %r3009, %r3008;
|
||
xor.b32 %r3011, %r3880, %r2699;
|
||
xor.b32 %r2731, %r3011, %r3010;
|
||
// inline asm
|
||
bfe.u32 %r2718, %r2731, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1004, %r2718, 4;
|
||
add.s64 %rd1005, %rd98, %rd1004;
|
||
ld.shared.u32 %r3012, [%rd1005];
|
||
// inline asm
|
||
bfe.u32 %r2722, %r2731, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1006, %r2722, 4;
|
||
add.s64 %rd1007, %rd102, %rd1006;
|
||
ld.shared.u32 %r3013, [%rd1007];
|
||
add.s32 %r3014, %r3013, %r3012;
|
||
// inline asm
|
||
bfe.u32 %r2726, %r2731, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1008, %r2726, 4;
|
||
add.s64 %rd1009, %rd106, %rd1008;
|
||
ld.shared.u32 %r3015, [%rd1009];
|
||
xor.b32 %r3016, %r3015, %r3014;
|
||
// inline asm
|
||
bfe.u32 %r2730, %r2731, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1010, %r2730, 4;
|
||
add.s64 %rd1011, %rd110, %rd1010;
|
||
ld.shared.u32 %r3017, [%rd1011];
|
||
add.s32 %r3018, %r3017, %r3016;
|
||
xor.b32 %r3019, %r3879, %r2715;
|
||
xor.b32 %r2747, %r3019, %r3018;
|
||
// inline asm
|
||
bfe.u32 %r2734, %r2747, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1012, %r2734, 4;
|
||
add.s64 %rd1013, %rd98, %rd1012;
|
||
ld.shared.u32 %r3020, [%rd1013];
|
||
// inline asm
|
||
bfe.u32 %r2738, %r2747, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1014, %r2738, 4;
|
||
add.s64 %rd1015, %rd102, %rd1014;
|
||
ld.shared.u32 %r3021, [%rd1015];
|
||
add.s32 %r3022, %r3021, %r3020;
|
||
// inline asm
|
||
bfe.u32 %r2742, %r2747, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1016, %r2742, 4;
|
||
add.s64 %rd1017, %rd106, %rd1016;
|
||
ld.shared.u32 %r3023, [%rd1017];
|
||
xor.b32 %r3024, %r3023, %r3022;
|
||
// inline asm
|
||
bfe.u32 %r2746, %r2747, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1018, %r2746, 4;
|
||
add.s64 %rd1019, %rd110, %rd1018;
|
||
ld.shared.u32 %r3025, [%rd1019];
|
||
add.s32 %r3026, %r3025, %r3024;
|
||
xor.b32 %r3027, %r3878, %r2731;
|
||
xor.b32 %r2763, %r3027, %r3026;
|
||
// inline asm
|
||
bfe.u32 %r2750, %r2763, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1020, %r2750, 4;
|
||
add.s64 %rd1021, %rd98, %rd1020;
|
||
ld.shared.u32 %r3028, [%rd1021];
|
||
// inline asm
|
||
bfe.u32 %r2754, %r2763, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1022, %r2754, 4;
|
||
add.s64 %rd1023, %rd102, %rd1022;
|
||
ld.shared.u32 %r3029, [%rd1023];
|
||
add.s32 %r3030, %r3029, %r3028;
|
||
// inline asm
|
||
bfe.u32 %r2758, %r2763, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1024, %r2758, 4;
|
||
add.s64 %rd1025, %rd106, %rd1024;
|
||
ld.shared.u32 %r3031, [%rd1025];
|
||
xor.b32 %r3032, %r3031, %r3030;
|
||
// inline asm
|
||
bfe.u32 %r2762, %r2763, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1026, %r2762, 4;
|
||
add.s64 %rd1027, %rd110, %rd1026;
|
||
ld.shared.u32 %r3033, [%rd1027];
|
||
add.s32 %r3034, %r3033, %r3032;
|
||
xor.b32 %r3035, %r3877, %r2747;
|
||
xor.b32 %r2779, %r3035, %r3034;
|
||
// inline asm
|
||
bfe.u32 %r2766, %r2779, %r3855, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1028, %r2766, 4;
|
||
add.s64 %rd1029, %rd98, %rd1028;
|
||
ld.shared.u32 %r3036, [%rd1029];
|
||
// inline asm
|
||
bfe.u32 %r2770, %r2779, %r3857, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1030, %r2770, 4;
|
||
add.s64 %rd1031, %rd102, %rd1030;
|
||
ld.shared.u32 %r3037, [%rd1031];
|
||
add.s32 %r3038, %r3037, %r3036;
|
||
// inline asm
|
||
bfe.u32 %r2774, %r2779, %r3856, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1032, %r2774, 4;
|
||
add.s64 %rd1033, %rd106, %rd1032;
|
||
ld.shared.u32 %r3039, [%rd1033];
|
||
xor.b32 %r3040, %r3039, %r3038;
|
||
// inline asm
|
||
bfe.u32 %r2778, %r2779, %r2269, %r3856;
|
||
// inline asm
|
||
mul.wide.u32 %rd1034, %r2778, 4;
|
||
add.s64 %rd1035, %rd110, %rd1034;
|
||
ld.shared.u32 %r3041, [%rd1035];
|
||
add.s32 %r3042, %r3041, %r3040;
|
||
xor.b32 %r3043, %r3876, %r2763;
|
||
xor.b32 %r3897, %r3043, %r3042;
|
||
xor.b32 %r3898, %r3875, %r2779;
|
||
st.shared.u32 [%rd1339+8], %r3898;
|
||
st.shared.u32 [%rd1339+12], %r3897;
|
||
add.s64 %rd1339, %rd1339, 16;
|
||
add.s32 %r3902, %r3902, 4;
|
||
setp.lt.u32 %p15, %r3902, 256;
|
||
@%p15 bra BB3_26;
|
||
|
||
mov.u32 %r3865, %tid.x;
|
||
mul.wide.s32 %rd1324, %r3865, 1024;
|
||
mov.u64 %rd1322, m03200_init$S3_all;
|
||
add.s64 %rd1340, %rd1322, %rd1324;
|
||
mov.u32 %r3044, 0;
|
||
mov.u32 %r3905, %r3044;
|
||
|
||
BB3_28:
|
||
mov.u32 %r3860, 16;
|
||
mov.u32 %r3859, 8;
|
||
mov.u32 %r3858, 24;
|
||
xor.b32 %r3557, %r39, %r3898;
|
||
xor.b32 %r3058, %r3557, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r3045, %r3058, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1041, %r3045, 4;
|
||
add.s64 %rd1042, %rd98, %rd1041;
|
||
ld.shared.u32 %r3558, [%rd1042];
|
||
// inline asm
|
||
bfe.u32 %r3049, %r3058, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1045, %r3049, 4;
|
||
add.s64 %rd1046, %rd102, %rd1045;
|
||
ld.shared.u32 %r3559, [%rd1046];
|
||
add.s32 %r3560, %r3559, %r3558;
|
||
// inline asm
|
||
bfe.u32 %r3053, %r3058, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1049, %r3053, 4;
|
||
add.s64 %rd1050, %rd106, %rd1049;
|
||
ld.shared.u32 %r3561, [%rd1050];
|
||
xor.b32 %r3562, %r3561, %r3560;
|
||
// inline asm
|
||
bfe.u32 %r3057, %r3058, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1053, %r3057, 4;
|
||
add.s64 %rd1054, %rd110, %rd1053;
|
||
ld.shared.u32 %r3563, [%rd1054];
|
||
add.s32 %r3564, %r3563, %r3562;
|
||
xor.b32 %r3565, %r40, %r3897;
|
||
xor.b32 %r3566, %r3565, %r3891;
|
||
xor.b32 %r3074, %r3566, %r3564;
|
||
// inline asm
|
||
bfe.u32 %r3061, %r3074, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1055, %r3061, 4;
|
||
add.s64 %rd1056, %rd98, %rd1055;
|
||
ld.shared.u32 %r3567, [%rd1056];
|
||
// inline asm
|
||
bfe.u32 %r3065, %r3074, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1057, %r3065, 4;
|
||
add.s64 %rd1058, %rd102, %rd1057;
|
||
ld.shared.u32 %r3568, [%rd1058];
|
||
add.s32 %r3569, %r3568, %r3567;
|
||
// inline asm
|
||
bfe.u32 %r3069, %r3074, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1059, %r3069, 4;
|
||
add.s64 %rd1060, %rd106, %rd1059;
|
||
ld.shared.u32 %r3570, [%rd1060];
|
||
xor.b32 %r3571, %r3570, %r3569;
|
||
// inline asm
|
||
bfe.u32 %r3073, %r3074, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1061, %r3073, 4;
|
||
add.s64 %rd1062, %rd110, %rd1061;
|
||
ld.shared.u32 %r3572, [%rd1062];
|
||
add.s32 %r3573, %r3572, %r3571;
|
||
xor.b32 %r3574, %r3890, %r3058;
|
||
xor.b32 %r3090, %r3574, %r3573;
|
||
// inline asm
|
||
bfe.u32 %r3077, %r3090, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1063, %r3077, 4;
|
||
add.s64 %rd1064, %rd98, %rd1063;
|
||
ld.shared.u32 %r3575, [%rd1064];
|
||
// inline asm
|
||
bfe.u32 %r3081, %r3090, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1065, %r3081, 4;
|
||
add.s64 %rd1066, %rd102, %rd1065;
|
||
ld.shared.u32 %r3576, [%rd1066];
|
||
add.s32 %r3577, %r3576, %r3575;
|
||
// inline asm
|
||
bfe.u32 %r3085, %r3090, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1067, %r3085, 4;
|
||
add.s64 %rd1068, %rd106, %rd1067;
|
||
ld.shared.u32 %r3578, [%rd1068];
|
||
xor.b32 %r3579, %r3578, %r3577;
|
||
// inline asm
|
||
bfe.u32 %r3089, %r3090, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1069, %r3089, 4;
|
||
add.s64 %rd1070, %rd110, %rd1069;
|
||
ld.shared.u32 %r3580, [%rd1070];
|
||
add.s32 %r3581, %r3580, %r3579;
|
||
xor.b32 %r3582, %r3889, %r3074;
|
||
xor.b32 %r3106, %r3582, %r3581;
|
||
// inline asm
|
||
bfe.u32 %r3093, %r3106, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1071, %r3093, 4;
|
||
add.s64 %rd1072, %rd98, %rd1071;
|
||
ld.shared.u32 %r3583, [%rd1072];
|
||
// inline asm
|
||
bfe.u32 %r3097, %r3106, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1073, %r3097, 4;
|
||
add.s64 %rd1074, %rd102, %rd1073;
|
||
ld.shared.u32 %r3584, [%rd1074];
|
||
add.s32 %r3585, %r3584, %r3583;
|
||
// inline asm
|
||
bfe.u32 %r3101, %r3106, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1075, %r3101, 4;
|
||
add.s64 %rd1076, %rd106, %rd1075;
|
||
ld.shared.u32 %r3586, [%rd1076];
|
||
xor.b32 %r3587, %r3586, %r3585;
|
||
// inline asm
|
||
bfe.u32 %r3105, %r3106, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1077, %r3105, 4;
|
||
add.s64 %rd1078, %rd110, %rd1077;
|
||
ld.shared.u32 %r3588, [%rd1078];
|
||
add.s32 %r3589, %r3588, %r3587;
|
||
xor.b32 %r3590, %r3888, %r3090;
|
||
xor.b32 %r3122, %r3590, %r3589;
|
||
// inline asm
|
||
bfe.u32 %r3109, %r3122, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1079, %r3109, 4;
|
||
add.s64 %rd1080, %rd98, %rd1079;
|
||
ld.shared.u32 %r3591, [%rd1080];
|
||
// inline asm
|
||
bfe.u32 %r3113, %r3122, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1081, %r3113, 4;
|
||
add.s64 %rd1082, %rd102, %rd1081;
|
||
ld.shared.u32 %r3592, [%rd1082];
|
||
add.s32 %r3593, %r3592, %r3591;
|
||
// inline asm
|
||
bfe.u32 %r3117, %r3122, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1083, %r3117, 4;
|
||
add.s64 %rd1084, %rd106, %rd1083;
|
||
ld.shared.u32 %r3594, [%rd1084];
|
||
xor.b32 %r3595, %r3594, %r3593;
|
||
// inline asm
|
||
bfe.u32 %r3121, %r3122, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1085, %r3121, 4;
|
||
add.s64 %rd1086, %rd110, %rd1085;
|
||
ld.shared.u32 %r3596, [%rd1086];
|
||
add.s32 %r3597, %r3596, %r3595;
|
||
xor.b32 %r3598, %r3887, %r3106;
|
||
xor.b32 %r3138, %r3598, %r3597;
|
||
// inline asm
|
||
bfe.u32 %r3125, %r3138, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1087, %r3125, 4;
|
||
add.s64 %rd1088, %rd98, %rd1087;
|
||
ld.shared.u32 %r3599, [%rd1088];
|
||
// inline asm
|
||
bfe.u32 %r3129, %r3138, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1089, %r3129, 4;
|
||
add.s64 %rd1090, %rd102, %rd1089;
|
||
ld.shared.u32 %r3600, [%rd1090];
|
||
add.s32 %r3601, %r3600, %r3599;
|
||
// inline asm
|
||
bfe.u32 %r3133, %r3138, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1091, %r3133, 4;
|
||
add.s64 %rd1092, %rd106, %rd1091;
|
||
ld.shared.u32 %r3602, [%rd1092];
|
||
xor.b32 %r3603, %r3602, %r3601;
|
||
// inline asm
|
||
bfe.u32 %r3137, %r3138, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1093, %r3137, 4;
|
||
add.s64 %rd1094, %rd110, %rd1093;
|
||
ld.shared.u32 %r3604, [%rd1094];
|
||
add.s32 %r3605, %r3604, %r3603;
|
||
xor.b32 %r3606, %r3886, %r3122;
|
||
xor.b32 %r3154, %r3606, %r3605;
|
||
// inline asm
|
||
bfe.u32 %r3141, %r3154, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1095, %r3141, 4;
|
||
add.s64 %rd1096, %rd98, %rd1095;
|
||
ld.shared.u32 %r3607, [%rd1096];
|
||
// inline asm
|
||
bfe.u32 %r3145, %r3154, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1097, %r3145, 4;
|
||
add.s64 %rd1098, %rd102, %rd1097;
|
||
ld.shared.u32 %r3608, [%rd1098];
|
||
add.s32 %r3609, %r3608, %r3607;
|
||
// inline asm
|
||
bfe.u32 %r3149, %r3154, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1099, %r3149, 4;
|
||
add.s64 %rd1100, %rd106, %rd1099;
|
||
ld.shared.u32 %r3610, [%rd1100];
|
||
xor.b32 %r3611, %r3610, %r3609;
|
||
// inline asm
|
||
bfe.u32 %r3153, %r3154, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1101, %r3153, 4;
|
||
add.s64 %rd1102, %rd110, %rd1101;
|
||
ld.shared.u32 %r3612, [%rd1102];
|
||
add.s32 %r3613, %r3612, %r3611;
|
||
xor.b32 %r3614, %r3885, %r3138;
|
||
xor.b32 %r3170, %r3614, %r3613;
|
||
// inline asm
|
||
bfe.u32 %r3157, %r3170, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1103, %r3157, 4;
|
||
add.s64 %rd1104, %rd98, %rd1103;
|
||
ld.shared.u32 %r3615, [%rd1104];
|
||
// inline asm
|
||
bfe.u32 %r3161, %r3170, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1105, %r3161, 4;
|
||
add.s64 %rd1106, %rd102, %rd1105;
|
||
ld.shared.u32 %r3616, [%rd1106];
|
||
add.s32 %r3617, %r3616, %r3615;
|
||
// inline asm
|
||
bfe.u32 %r3165, %r3170, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1107, %r3165, 4;
|
||
add.s64 %rd1108, %rd106, %rd1107;
|
||
ld.shared.u32 %r3618, [%rd1108];
|
||
xor.b32 %r3619, %r3618, %r3617;
|
||
// inline asm
|
||
bfe.u32 %r3169, %r3170, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1109, %r3169, 4;
|
||
add.s64 %rd1110, %rd110, %rd1109;
|
||
ld.shared.u32 %r3620, [%rd1110];
|
||
add.s32 %r3621, %r3620, %r3619;
|
||
xor.b32 %r3622, %r3884, %r3154;
|
||
xor.b32 %r3186, %r3622, %r3621;
|
||
// inline asm
|
||
bfe.u32 %r3173, %r3186, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1111, %r3173, 4;
|
||
add.s64 %rd1112, %rd98, %rd1111;
|
||
ld.shared.u32 %r3623, [%rd1112];
|
||
// inline asm
|
||
bfe.u32 %r3177, %r3186, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1113, %r3177, 4;
|
||
add.s64 %rd1114, %rd102, %rd1113;
|
||
ld.shared.u32 %r3624, [%rd1114];
|
||
add.s32 %r3625, %r3624, %r3623;
|
||
// inline asm
|
||
bfe.u32 %r3181, %r3186, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1115, %r3181, 4;
|
||
add.s64 %rd1116, %rd106, %rd1115;
|
||
ld.shared.u32 %r3626, [%rd1116];
|
||
xor.b32 %r3627, %r3626, %r3625;
|
||
// inline asm
|
||
bfe.u32 %r3185, %r3186, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1117, %r3185, 4;
|
||
add.s64 %rd1118, %rd110, %rd1117;
|
||
ld.shared.u32 %r3628, [%rd1118];
|
||
add.s32 %r3629, %r3628, %r3627;
|
||
xor.b32 %r3630, %r3883, %r3170;
|
||
xor.b32 %r3202, %r3630, %r3629;
|
||
// inline asm
|
||
bfe.u32 %r3189, %r3202, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1119, %r3189, 4;
|
||
add.s64 %rd1120, %rd98, %rd1119;
|
||
ld.shared.u32 %r3631, [%rd1120];
|
||
// inline asm
|
||
bfe.u32 %r3193, %r3202, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1121, %r3193, 4;
|
||
add.s64 %rd1122, %rd102, %rd1121;
|
||
ld.shared.u32 %r3632, [%rd1122];
|
||
add.s32 %r3633, %r3632, %r3631;
|
||
// inline asm
|
||
bfe.u32 %r3197, %r3202, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1123, %r3197, 4;
|
||
add.s64 %rd1124, %rd106, %rd1123;
|
||
ld.shared.u32 %r3634, [%rd1124];
|
||
xor.b32 %r3635, %r3634, %r3633;
|
||
// inline asm
|
||
bfe.u32 %r3201, %r3202, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1125, %r3201, 4;
|
||
add.s64 %rd1126, %rd110, %rd1125;
|
||
ld.shared.u32 %r3636, [%rd1126];
|
||
add.s32 %r3637, %r3636, %r3635;
|
||
xor.b32 %r3638, %r3882, %r3186;
|
||
xor.b32 %r3218, %r3638, %r3637;
|
||
// inline asm
|
||
bfe.u32 %r3205, %r3218, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1127, %r3205, 4;
|
||
add.s64 %rd1128, %rd98, %rd1127;
|
||
ld.shared.u32 %r3639, [%rd1128];
|
||
// inline asm
|
||
bfe.u32 %r3209, %r3218, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1129, %r3209, 4;
|
||
add.s64 %rd1130, %rd102, %rd1129;
|
||
ld.shared.u32 %r3640, [%rd1130];
|
||
add.s32 %r3641, %r3640, %r3639;
|
||
// inline asm
|
||
bfe.u32 %r3213, %r3218, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1131, %r3213, 4;
|
||
add.s64 %rd1132, %rd106, %rd1131;
|
||
ld.shared.u32 %r3642, [%rd1132];
|
||
xor.b32 %r3643, %r3642, %r3641;
|
||
// inline asm
|
||
bfe.u32 %r3217, %r3218, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1133, %r3217, 4;
|
||
add.s64 %rd1134, %rd110, %rd1133;
|
||
ld.shared.u32 %r3644, [%rd1134];
|
||
add.s32 %r3645, %r3644, %r3643;
|
||
xor.b32 %r3646, %r3881, %r3202;
|
||
xor.b32 %r3234, %r3646, %r3645;
|
||
// inline asm
|
||
bfe.u32 %r3221, %r3234, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1135, %r3221, 4;
|
||
add.s64 %rd1136, %rd98, %rd1135;
|
||
ld.shared.u32 %r3647, [%rd1136];
|
||
// inline asm
|
||
bfe.u32 %r3225, %r3234, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1137, %r3225, 4;
|
||
add.s64 %rd1138, %rd102, %rd1137;
|
||
ld.shared.u32 %r3648, [%rd1138];
|
||
add.s32 %r3649, %r3648, %r3647;
|
||
// inline asm
|
||
bfe.u32 %r3229, %r3234, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1139, %r3229, 4;
|
||
add.s64 %rd1140, %rd106, %rd1139;
|
||
ld.shared.u32 %r3650, [%rd1140];
|
||
xor.b32 %r3651, %r3650, %r3649;
|
||
// inline asm
|
||
bfe.u32 %r3233, %r3234, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1141, %r3233, 4;
|
||
add.s64 %rd1142, %rd110, %rd1141;
|
||
ld.shared.u32 %r3652, [%rd1142];
|
||
add.s32 %r3653, %r3652, %r3651;
|
||
xor.b32 %r3654, %r3880, %r3218;
|
||
xor.b32 %r3250, %r3654, %r3653;
|
||
// inline asm
|
||
bfe.u32 %r3237, %r3250, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1143, %r3237, 4;
|
||
add.s64 %rd1144, %rd98, %rd1143;
|
||
ld.shared.u32 %r3655, [%rd1144];
|
||
// inline asm
|
||
bfe.u32 %r3241, %r3250, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1145, %r3241, 4;
|
||
add.s64 %rd1146, %rd102, %rd1145;
|
||
ld.shared.u32 %r3656, [%rd1146];
|
||
add.s32 %r3657, %r3656, %r3655;
|
||
// inline asm
|
||
bfe.u32 %r3245, %r3250, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1147, %r3245, 4;
|
||
add.s64 %rd1148, %rd106, %rd1147;
|
||
ld.shared.u32 %r3658, [%rd1148];
|
||
xor.b32 %r3659, %r3658, %r3657;
|
||
// inline asm
|
||
bfe.u32 %r3249, %r3250, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1149, %r3249, 4;
|
||
add.s64 %rd1150, %rd110, %rd1149;
|
||
ld.shared.u32 %r3660, [%rd1150];
|
||
add.s32 %r3661, %r3660, %r3659;
|
||
xor.b32 %r3662, %r3879, %r3234;
|
||
xor.b32 %r3266, %r3662, %r3661;
|
||
// inline asm
|
||
bfe.u32 %r3253, %r3266, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1151, %r3253, 4;
|
||
add.s64 %rd1152, %rd98, %rd1151;
|
||
ld.shared.u32 %r3663, [%rd1152];
|
||
// inline asm
|
||
bfe.u32 %r3257, %r3266, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1153, %r3257, 4;
|
||
add.s64 %rd1154, %rd102, %rd1153;
|
||
ld.shared.u32 %r3664, [%rd1154];
|
||
add.s32 %r3665, %r3664, %r3663;
|
||
// inline asm
|
||
bfe.u32 %r3261, %r3266, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1155, %r3261, 4;
|
||
add.s64 %rd1156, %rd106, %rd1155;
|
||
ld.shared.u32 %r3666, [%rd1156];
|
||
xor.b32 %r3667, %r3666, %r3665;
|
||
// inline asm
|
||
bfe.u32 %r3265, %r3266, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1157, %r3265, 4;
|
||
add.s64 %rd1158, %rd110, %rd1157;
|
||
ld.shared.u32 %r3668, [%rd1158];
|
||
add.s32 %r3669, %r3668, %r3667;
|
||
xor.b32 %r3670, %r3878, %r3250;
|
||
xor.b32 %r3282, %r3670, %r3669;
|
||
// inline asm
|
||
bfe.u32 %r3269, %r3282, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1159, %r3269, 4;
|
||
add.s64 %rd1160, %rd98, %rd1159;
|
||
ld.shared.u32 %r3671, [%rd1160];
|
||
// inline asm
|
||
bfe.u32 %r3273, %r3282, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1161, %r3273, 4;
|
||
add.s64 %rd1162, %rd102, %rd1161;
|
||
ld.shared.u32 %r3672, [%rd1162];
|
||
add.s32 %r3673, %r3672, %r3671;
|
||
// inline asm
|
||
bfe.u32 %r3277, %r3282, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1163, %r3277, 4;
|
||
add.s64 %rd1164, %rd106, %rd1163;
|
||
ld.shared.u32 %r3674, [%rd1164];
|
||
xor.b32 %r3675, %r3674, %r3673;
|
||
// inline asm
|
||
bfe.u32 %r3281, %r3282, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1165, %r3281, 4;
|
||
add.s64 %rd1166, %rd110, %rd1165;
|
||
ld.shared.u32 %r3676, [%rd1166];
|
||
add.s32 %r3677, %r3676, %r3675;
|
||
xor.b32 %r3678, %r3877, %r3266;
|
||
xor.b32 %r3298, %r3678, %r3677;
|
||
// inline asm
|
||
bfe.u32 %r3285, %r3298, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1167, %r3285, 4;
|
||
add.s64 %rd1168, %rd98, %rd1167;
|
||
ld.shared.u32 %r3679, [%rd1168];
|
||
// inline asm
|
||
bfe.u32 %r3289, %r3298, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1169, %r3289, 4;
|
||
add.s64 %rd1170, %rd102, %rd1169;
|
||
ld.shared.u32 %r3680, [%rd1170];
|
||
add.s32 %r3681, %r3680, %r3679;
|
||
// inline asm
|
||
bfe.u32 %r3293, %r3298, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1171, %r3293, 4;
|
||
add.s64 %rd1172, %rd106, %rd1171;
|
||
ld.shared.u32 %r3682, [%rd1172];
|
||
xor.b32 %r3683, %r3682, %r3681;
|
||
// inline asm
|
||
bfe.u32 %r3297, %r3298, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1173, %r3297, 4;
|
||
add.s64 %rd1174, %rd110, %rd1173;
|
||
ld.shared.u32 %r3684, [%rd1174];
|
||
add.s32 %r3685, %r3684, %r3683;
|
||
xor.b32 %r3686, %r3876, %r3282;
|
||
xor.b32 %r3687, %r3686, %r3685;
|
||
xor.b32 %r3688, %r3875, %r3298;
|
||
st.shared.u32 [%rd1340], %r3688;
|
||
st.shared.u32 [%rd1340+4], %r3687;
|
||
xor.b32 %r3689, %r3688, %r37;
|
||
xor.b32 %r3314, %r3689, %r3892;
|
||
// inline asm
|
||
bfe.u32 %r3301, %r3314, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1175, %r3301, 4;
|
||
add.s64 %rd1176, %rd98, %rd1175;
|
||
ld.shared.u32 %r3690, [%rd1176];
|
||
// inline asm
|
||
bfe.u32 %r3305, %r3314, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1177, %r3305, 4;
|
||
add.s64 %rd1178, %rd102, %rd1177;
|
||
ld.shared.u32 %r3691, [%rd1178];
|
||
add.s32 %r3692, %r3691, %r3690;
|
||
// inline asm
|
||
bfe.u32 %r3309, %r3314, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1179, %r3309, 4;
|
||
add.s64 %rd1180, %rd106, %rd1179;
|
||
ld.shared.u32 %r3693, [%rd1180];
|
||
xor.b32 %r3694, %r3693, %r3692;
|
||
// inline asm
|
||
bfe.u32 %r3313, %r3314, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1181, %r3313, 4;
|
||
add.s64 %rd1182, %rd110, %rd1181;
|
||
ld.shared.u32 %r3695, [%rd1182];
|
||
add.s32 %r3696, %r3695, %r3694;
|
||
xor.b32 %r3697, %r38, %r3687;
|
||
xor.b32 %r3698, %r3697, %r3891;
|
||
xor.b32 %r3330, %r3698, %r3696;
|
||
// inline asm
|
||
bfe.u32 %r3317, %r3330, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1183, %r3317, 4;
|
||
add.s64 %rd1184, %rd98, %rd1183;
|
||
ld.shared.u32 %r3699, [%rd1184];
|
||
// inline asm
|
||
bfe.u32 %r3321, %r3330, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1185, %r3321, 4;
|
||
add.s64 %rd1186, %rd102, %rd1185;
|
||
ld.shared.u32 %r3700, [%rd1186];
|
||
add.s32 %r3701, %r3700, %r3699;
|
||
// inline asm
|
||
bfe.u32 %r3325, %r3330, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1187, %r3325, 4;
|
||
add.s64 %rd1188, %rd106, %rd1187;
|
||
ld.shared.u32 %r3702, [%rd1188];
|
||
xor.b32 %r3703, %r3702, %r3701;
|
||
// inline asm
|
||
bfe.u32 %r3329, %r3330, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1189, %r3329, 4;
|
||
add.s64 %rd1190, %rd110, %rd1189;
|
||
ld.shared.u32 %r3704, [%rd1190];
|
||
add.s32 %r3705, %r3704, %r3703;
|
||
xor.b32 %r3706, %r3890, %r3314;
|
||
xor.b32 %r3346, %r3706, %r3705;
|
||
// inline asm
|
||
bfe.u32 %r3333, %r3346, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1191, %r3333, 4;
|
||
add.s64 %rd1192, %rd98, %rd1191;
|
||
ld.shared.u32 %r3707, [%rd1192];
|
||
// inline asm
|
||
bfe.u32 %r3337, %r3346, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1193, %r3337, 4;
|
||
add.s64 %rd1194, %rd102, %rd1193;
|
||
ld.shared.u32 %r3708, [%rd1194];
|
||
add.s32 %r3709, %r3708, %r3707;
|
||
// inline asm
|
||
bfe.u32 %r3341, %r3346, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1195, %r3341, 4;
|
||
add.s64 %rd1196, %rd106, %rd1195;
|
||
ld.shared.u32 %r3710, [%rd1196];
|
||
xor.b32 %r3711, %r3710, %r3709;
|
||
// inline asm
|
||
bfe.u32 %r3345, %r3346, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1197, %r3345, 4;
|
||
add.s64 %rd1198, %rd110, %rd1197;
|
||
ld.shared.u32 %r3712, [%rd1198];
|
||
add.s32 %r3713, %r3712, %r3711;
|
||
xor.b32 %r3714, %r3889, %r3330;
|
||
xor.b32 %r3362, %r3714, %r3713;
|
||
// inline asm
|
||
bfe.u32 %r3349, %r3362, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1199, %r3349, 4;
|
||
add.s64 %rd1200, %rd98, %rd1199;
|
||
ld.shared.u32 %r3715, [%rd1200];
|
||
// inline asm
|
||
bfe.u32 %r3353, %r3362, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1201, %r3353, 4;
|
||
add.s64 %rd1202, %rd102, %rd1201;
|
||
ld.shared.u32 %r3716, [%rd1202];
|
||
add.s32 %r3717, %r3716, %r3715;
|
||
// inline asm
|
||
bfe.u32 %r3357, %r3362, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1203, %r3357, 4;
|
||
add.s64 %rd1204, %rd106, %rd1203;
|
||
ld.shared.u32 %r3718, [%rd1204];
|
||
xor.b32 %r3719, %r3718, %r3717;
|
||
// inline asm
|
||
bfe.u32 %r3361, %r3362, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1205, %r3361, 4;
|
||
add.s64 %rd1206, %rd110, %rd1205;
|
||
ld.shared.u32 %r3720, [%rd1206];
|
||
add.s32 %r3721, %r3720, %r3719;
|
||
xor.b32 %r3722, %r3888, %r3346;
|
||
xor.b32 %r3378, %r3722, %r3721;
|
||
// inline asm
|
||
bfe.u32 %r3365, %r3378, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1207, %r3365, 4;
|
||
add.s64 %rd1208, %rd98, %rd1207;
|
||
ld.shared.u32 %r3723, [%rd1208];
|
||
// inline asm
|
||
bfe.u32 %r3369, %r3378, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1209, %r3369, 4;
|
||
add.s64 %rd1210, %rd102, %rd1209;
|
||
ld.shared.u32 %r3724, [%rd1210];
|
||
add.s32 %r3725, %r3724, %r3723;
|
||
// inline asm
|
||
bfe.u32 %r3373, %r3378, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1211, %r3373, 4;
|
||
add.s64 %rd1212, %rd106, %rd1211;
|
||
ld.shared.u32 %r3726, [%rd1212];
|
||
xor.b32 %r3727, %r3726, %r3725;
|
||
// inline asm
|
||
bfe.u32 %r3377, %r3378, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1213, %r3377, 4;
|
||
add.s64 %rd1214, %rd110, %rd1213;
|
||
ld.shared.u32 %r3728, [%rd1214];
|
||
add.s32 %r3729, %r3728, %r3727;
|
||
xor.b32 %r3730, %r3887, %r3362;
|
||
xor.b32 %r3394, %r3730, %r3729;
|
||
// inline asm
|
||
bfe.u32 %r3381, %r3394, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1215, %r3381, 4;
|
||
add.s64 %rd1216, %rd98, %rd1215;
|
||
ld.shared.u32 %r3731, [%rd1216];
|
||
// inline asm
|
||
bfe.u32 %r3385, %r3394, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1217, %r3385, 4;
|
||
add.s64 %rd1218, %rd102, %rd1217;
|
||
ld.shared.u32 %r3732, [%rd1218];
|
||
add.s32 %r3733, %r3732, %r3731;
|
||
// inline asm
|
||
bfe.u32 %r3389, %r3394, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1219, %r3389, 4;
|
||
add.s64 %rd1220, %rd106, %rd1219;
|
||
ld.shared.u32 %r3734, [%rd1220];
|
||
xor.b32 %r3735, %r3734, %r3733;
|
||
// inline asm
|
||
bfe.u32 %r3393, %r3394, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1221, %r3393, 4;
|
||
add.s64 %rd1222, %rd110, %rd1221;
|
||
ld.shared.u32 %r3736, [%rd1222];
|
||
add.s32 %r3737, %r3736, %r3735;
|
||
xor.b32 %r3738, %r3886, %r3378;
|
||
xor.b32 %r3410, %r3738, %r3737;
|
||
// inline asm
|
||
bfe.u32 %r3397, %r3410, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1223, %r3397, 4;
|
||
add.s64 %rd1224, %rd98, %rd1223;
|
||
ld.shared.u32 %r3739, [%rd1224];
|
||
// inline asm
|
||
bfe.u32 %r3401, %r3410, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1225, %r3401, 4;
|
||
add.s64 %rd1226, %rd102, %rd1225;
|
||
ld.shared.u32 %r3740, [%rd1226];
|
||
add.s32 %r3741, %r3740, %r3739;
|
||
// inline asm
|
||
bfe.u32 %r3405, %r3410, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1227, %r3405, 4;
|
||
add.s64 %rd1228, %rd106, %rd1227;
|
||
ld.shared.u32 %r3742, [%rd1228];
|
||
xor.b32 %r3743, %r3742, %r3741;
|
||
// inline asm
|
||
bfe.u32 %r3409, %r3410, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1229, %r3409, 4;
|
||
add.s64 %rd1230, %rd110, %rd1229;
|
||
ld.shared.u32 %r3744, [%rd1230];
|
||
add.s32 %r3745, %r3744, %r3743;
|
||
xor.b32 %r3746, %r3885, %r3394;
|
||
xor.b32 %r3426, %r3746, %r3745;
|
||
// inline asm
|
||
bfe.u32 %r3413, %r3426, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1231, %r3413, 4;
|
||
add.s64 %rd1232, %rd98, %rd1231;
|
||
ld.shared.u32 %r3747, [%rd1232];
|
||
// inline asm
|
||
bfe.u32 %r3417, %r3426, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1233, %r3417, 4;
|
||
add.s64 %rd1234, %rd102, %rd1233;
|
||
ld.shared.u32 %r3748, [%rd1234];
|
||
add.s32 %r3749, %r3748, %r3747;
|
||
// inline asm
|
||
bfe.u32 %r3421, %r3426, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1235, %r3421, 4;
|
||
add.s64 %rd1236, %rd106, %rd1235;
|
||
ld.shared.u32 %r3750, [%rd1236];
|
||
xor.b32 %r3751, %r3750, %r3749;
|
||
// inline asm
|
||
bfe.u32 %r3425, %r3426, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1237, %r3425, 4;
|
||
add.s64 %rd1238, %rd110, %rd1237;
|
||
ld.shared.u32 %r3752, [%rd1238];
|
||
add.s32 %r3753, %r3752, %r3751;
|
||
xor.b32 %r3754, %r3884, %r3410;
|
||
xor.b32 %r3442, %r3754, %r3753;
|
||
// inline asm
|
||
bfe.u32 %r3429, %r3442, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1239, %r3429, 4;
|
||
add.s64 %rd1240, %rd98, %rd1239;
|
||
ld.shared.u32 %r3755, [%rd1240];
|
||
// inline asm
|
||
bfe.u32 %r3433, %r3442, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1241, %r3433, 4;
|
||
add.s64 %rd1242, %rd102, %rd1241;
|
||
ld.shared.u32 %r3756, [%rd1242];
|
||
add.s32 %r3757, %r3756, %r3755;
|
||
// inline asm
|
||
bfe.u32 %r3437, %r3442, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1243, %r3437, 4;
|
||
add.s64 %rd1244, %rd106, %rd1243;
|
||
ld.shared.u32 %r3758, [%rd1244];
|
||
xor.b32 %r3759, %r3758, %r3757;
|
||
// inline asm
|
||
bfe.u32 %r3441, %r3442, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1245, %r3441, 4;
|
||
add.s64 %rd1246, %rd110, %rd1245;
|
||
ld.shared.u32 %r3760, [%rd1246];
|
||
add.s32 %r3761, %r3760, %r3759;
|
||
xor.b32 %r3762, %r3883, %r3426;
|
||
xor.b32 %r3458, %r3762, %r3761;
|
||
// inline asm
|
||
bfe.u32 %r3445, %r3458, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1247, %r3445, 4;
|
||
add.s64 %rd1248, %rd98, %rd1247;
|
||
ld.shared.u32 %r3763, [%rd1248];
|
||
// inline asm
|
||
bfe.u32 %r3449, %r3458, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1249, %r3449, 4;
|
||
add.s64 %rd1250, %rd102, %rd1249;
|
||
ld.shared.u32 %r3764, [%rd1250];
|
||
add.s32 %r3765, %r3764, %r3763;
|
||
// inline asm
|
||
bfe.u32 %r3453, %r3458, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1251, %r3453, 4;
|
||
add.s64 %rd1252, %rd106, %rd1251;
|
||
ld.shared.u32 %r3766, [%rd1252];
|
||
xor.b32 %r3767, %r3766, %r3765;
|
||
// inline asm
|
||
bfe.u32 %r3457, %r3458, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1253, %r3457, 4;
|
||
add.s64 %rd1254, %rd110, %rd1253;
|
||
ld.shared.u32 %r3768, [%rd1254];
|
||
add.s32 %r3769, %r3768, %r3767;
|
||
xor.b32 %r3770, %r3882, %r3442;
|
||
xor.b32 %r3474, %r3770, %r3769;
|
||
// inline asm
|
||
bfe.u32 %r3461, %r3474, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1255, %r3461, 4;
|
||
add.s64 %rd1256, %rd98, %rd1255;
|
||
ld.shared.u32 %r3771, [%rd1256];
|
||
// inline asm
|
||
bfe.u32 %r3465, %r3474, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1257, %r3465, 4;
|
||
add.s64 %rd1258, %rd102, %rd1257;
|
||
ld.shared.u32 %r3772, [%rd1258];
|
||
add.s32 %r3773, %r3772, %r3771;
|
||
// inline asm
|
||
bfe.u32 %r3469, %r3474, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1259, %r3469, 4;
|
||
add.s64 %rd1260, %rd106, %rd1259;
|
||
ld.shared.u32 %r3774, [%rd1260];
|
||
xor.b32 %r3775, %r3774, %r3773;
|
||
// inline asm
|
||
bfe.u32 %r3473, %r3474, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1261, %r3473, 4;
|
||
add.s64 %rd1262, %rd110, %rd1261;
|
||
ld.shared.u32 %r3776, [%rd1262];
|
||
add.s32 %r3777, %r3776, %r3775;
|
||
xor.b32 %r3778, %r3881, %r3458;
|
||
xor.b32 %r3490, %r3778, %r3777;
|
||
// inline asm
|
||
bfe.u32 %r3477, %r3490, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1263, %r3477, 4;
|
||
add.s64 %rd1264, %rd98, %rd1263;
|
||
ld.shared.u32 %r3779, [%rd1264];
|
||
// inline asm
|
||
bfe.u32 %r3481, %r3490, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1265, %r3481, 4;
|
||
add.s64 %rd1266, %rd102, %rd1265;
|
||
ld.shared.u32 %r3780, [%rd1266];
|
||
add.s32 %r3781, %r3780, %r3779;
|
||
// inline asm
|
||
bfe.u32 %r3485, %r3490, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1267, %r3485, 4;
|
||
add.s64 %rd1268, %rd106, %rd1267;
|
||
ld.shared.u32 %r3782, [%rd1268];
|
||
xor.b32 %r3783, %r3782, %r3781;
|
||
// inline asm
|
||
bfe.u32 %r3489, %r3490, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1269, %r3489, 4;
|
||
add.s64 %rd1270, %rd110, %rd1269;
|
||
ld.shared.u32 %r3784, [%rd1270];
|
||
add.s32 %r3785, %r3784, %r3783;
|
||
xor.b32 %r3786, %r3880, %r3474;
|
||
xor.b32 %r3506, %r3786, %r3785;
|
||
// inline asm
|
||
bfe.u32 %r3493, %r3506, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1271, %r3493, 4;
|
||
add.s64 %rd1272, %rd98, %rd1271;
|
||
ld.shared.u32 %r3787, [%rd1272];
|
||
// inline asm
|
||
bfe.u32 %r3497, %r3506, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1273, %r3497, 4;
|
||
add.s64 %rd1274, %rd102, %rd1273;
|
||
ld.shared.u32 %r3788, [%rd1274];
|
||
add.s32 %r3789, %r3788, %r3787;
|
||
// inline asm
|
||
bfe.u32 %r3501, %r3506, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1275, %r3501, 4;
|
||
add.s64 %rd1276, %rd106, %rd1275;
|
||
ld.shared.u32 %r3790, [%rd1276];
|
||
xor.b32 %r3791, %r3790, %r3789;
|
||
// inline asm
|
||
bfe.u32 %r3505, %r3506, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1277, %r3505, 4;
|
||
add.s64 %rd1278, %rd110, %rd1277;
|
||
ld.shared.u32 %r3792, [%rd1278];
|
||
add.s32 %r3793, %r3792, %r3791;
|
||
xor.b32 %r3794, %r3879, %r3490;
|
||
xor.b32 %r3522, %r3794, %r3793;
|
||
// inline asm
|
||
bfe.u32 %r3509, %r3522, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1279, %r3509, 4;
|
||
add.s64 %rd1280, %rd98, %rd1279;
|
||
ld.shared.u32 %r3795, [%rd1280];
|
||
// inline asm
|
||
bfe.u32 %r3513, %r3522, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1281, %r3513, 4;
|
||
add.s64 %rd1282, %rd102, %rd1281;
|
||
ld.shared.u32 %r3796, [%rd1282];
|
||
add.s32 %r3797, %r3796, %r3795;
|
||
// inline asm
|
||
bfe.u32 %r3517, %r3522, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1283, %r3517, 4;
|
||
add.s64 %rd1284, %rd106, %rd1283;
|
||
ld.shared.u32 %r3798, [%rd1284];
|
||
xor.b32 %r3799, %r3798, %r3797;
|
||
// inline asm
|
||
bfe.u32 %r3521, %r3522, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1285, %r3521, 4;
|
||
add.s64 %rd1286, %rd110, %rd1285;
|
||
ld.shared.u32 %r3800, [%rd1286];
|
||
add.s32 %r3801, %r3800, %r3799;
|
||
xor.b32 %r3802, %r3878, %r3506;
|
||
xor.b32 %r3538, %r3802, %r3801;
|
||
// inline asm
|
||
bfe.u32 %r3525, %r3538, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1287, %r3525, 4;
|
||
add.s64 %rd1288, %rd98, %rd1287;
|
||
ld.shared.u32 %r3803, [%rd1288];
|
||
// inline asm
|
||
bfe.u32 %r3529, %r3538, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1289, %r3529, 4;
|
||
add.s64 %rd1290, %rd102, %rd1289;
|
||
ld.shared.u32 %r3804, [%rd1290];
|
||
add.s32 %r3805, %r3804, %r3803;
|
||
// inline asm
|
||
bfe.u32 %r3533, %r3538, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1291, %r3533, 4;
|
||
add.s64 %rd1292, %rd106, %rd1291;
|
||
ld.shared.u32 %r3806, [%rd1292];
|
||
xor.b32 %r3807, %r3806, %r3805;
|
||
// inline asm
|
||
bfe.u32 %r3537, %r3538, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1293, %r3537, 4;
|
||
add.s64 %rd1294, %rd110, %rd1293;
|
||
ld.shared.u32 %r3808, [%rd1294];
|
||
add.s32 %r3809, %r3808, %r3807;
|
||
xor.b32 %r3810, %r3877, %r3522;
|
||
xor.b32 %r3554, %r3810, %r3809;
|
||
// inline asm
|
||
bfe.u32 %r3541, %r3554, %r3858, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1295, %r3541, 4;
|
||
add.s64 %rd1296, %rd98, %rd1295;
|
||
ld.shared.u32 %r3811, [%rd1296];
|
||
// inline asm
|
||
bfe.u32 %r3545, %r3554, %r3860, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1297, %r3545, 4;
|
||
add.s64 %rd1298, %rd102, %rd1297;
|
||
ld.shared.u32 %r3812, [%rd1298];
|
||
add.s32 %r3813, %r3812, %r3811;
|
||
// inline asm
|
||
bfe.u32 %r3549, %r3554, %r3859, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1299, %r3549, 4;
|
||
add.s64 %rd1300, %rd106, %rd1299;
|
||
ld.shared.u32 %r3814, [%rd1300];
|
||
xor.b32 %r3815, %r3814, %r3813;
|
||
// inline asm
|
||
bfe.u32 %r3553, %r3554, %r3044, %r3859;
|
||
// inline asm
|
||
mul.wide.u32 %rd1301, %r3553, 4;
|
||
add.s64 %rd1302, %rd110, %rd1301;
|
||
ld.shared.u32 %r3816, [%rd1302];
|
||
add.s32 %r3817, %r3816, %r3815;
|
||
xor.b32 %r3818, %r3876, %r3538;
|
||
xor.b32 %r3897, %r3818, %r3817;
|
||
xor.b32 %r3898, %r3875, %r3554;
|
||
st.shared.u32 [%rd1340+8], %r3898;
|
||
st.shared.u32 [%rd1340+12], %r3897;
|
||
add.s64 %rd1340, %rd1340, 16;
|
||
add.s32 %r3905, %r3905, 4;
|
||
setp.lt.u32 %p16, %r3905, 256;
|
||
@%p16 bra BB3_28;
|
||
|
||
st.global.u32 [%rd16+72], %r3892;
|
||
st.global.u32 [%rd16+76], %r3891;
|
||
st.global.u32 [%rd16+80], %r3890;
|
||
st.global.u32 [%rd16+84], %r3889;
|
||
st.global.u32 [%rd16+88], %r3888;
|
||
st.global.u32 [%rd16+92], %r3887;
|
||
st.global.u32 [%rd16+96], %r3886;
|
||
st.global.u32 [%rd16+100], %r3885;
|
||
st.global.u32 [%rd16+104], %r3884;
|
||
st.global.u32 [%rd16+108], %r3883;
|
||
st.global.u32 [%rd16+112], %r3882;
|
||
st.global.u32 [%rd16+116], %r3881;
|
||
st.global.u32 [%rd16+120], %r3880;
|
||
st.global.u32 [%rd16+124], %r3879;
|
||
st.global.u32 [%rd16+128], %r3878;
|
||
st.global.u32 [%rd16+132], %r3877;
|
||
st.global.u32 [%rd16+136], %r3876;
|
||
st.global.u32 [%rd16+140], %r3875;
|
||
mov.u32 %r3908, 0;
|
||
|
||
BB3_30:
|
||
mul.wide.u32 %rd1306, %r3908, 4;
|
||
add.s64 %rd1307, %rd98, %rd1306;
|
||
ld.shared.u32 %r3820, [%rd1307];
|
||
add.s64 %rd1310, %rd16, %rd1306;
|
||
ld.shared.u32 %r3821, [%rd1307+4];
|
||
ld.shared.u32 %r3822, [%rd1307+8];
|
||
ld.shared.u32 %r3823, [%rd1307+12];
|
||
ld.shared.u32 %r3824, [%rd1307+16];
|
||
ld.shared.u32 %r3825, [%rd1307+20];
|
||
ld.shared.u32 %r3826, [%rd1307+24];
|
||
ld.shared.u32 %r3827, [%rd1307+28];
|
||
st.global.u32 [%rd1310+144], %r3820;
|
||
add.s64 %rd1313, %rd102, %rd1306;
|
||
st.global.u32 [%rd1310+148], %r3821;
|
||
st.global.u32 [%rd1310+152], %r3822;
|
||
st.global.u32 [%rd1310+156], %r3823;
|
||
st.global.u32 [%rd1310+160], %r3824;
|
||
st.global.u32 [%rd1310+164], %r3825;
|
||
st.global.u32 [%rd1310+168], %r3826;
|
||
st.global.u32 [%rd1310+172], %r3827;
|
||
ld.shared.u32 %r3828, [%rd1313];
|
||
ld.shared.u32 %r3829, [%rd1313+4];
|
||
ld.shared.u32 %r3830, [%rd1313+8];
|
||
ld.shared.u32 %r3831, [%rd1313+12];
|
||
ld.shared.u32 %r3832, [%rd1313+16];
|
||
ld.shared.u32 %r3833, [%rd1313+20];
|
||
ld.shared.u32 %r3834, [%rd1313+24];
|
||
ld.shared.u32 %r3835, [%rd1313+28];
|
||
st.global.u32 [%rd1310+1168], %r3828;
|
||
add.s64 %rd1316, %rd106, %rd1306;
|
||
st.global.u32 [%rd1310+1172], %r3829;
|
||
st.global.u32 [%rd1310+1176], %r3830;
|
||
st.global.u32 [%rd1310+1180], %r3831;
|
||
st.global.u32 [%rd1310+1184], %r3832;
|
||
st.global.u32 [%rd1310+1188], %r3833;
|
||
st.global.u32 [%rd1310+1192], %r3834;
|
||
st.global.u32 [%rd1310+1196], %r3835;
|
||
ld.shared.u32 %r3836, [%rd1316];
|
||
ld.shared.u32 %r3837, [%rd1316+4];
|
||
ld.shared.u32 %r3838, [%rd1316+8];
|
||
ld.shared.u32 %r3839, [%rd1316+12];
|
||
ld.shared.u32 %r3840, [%rd1316+16];
|
||
ld.shared.u32 %r3841, [%rd1316+20];
|
||
ld.shared.u32 %r3842, [%rd1316+24];
|
||
ld.shared.u32 %r3843, [%rd1316+28];
|
||
st.global.u32 [%rd1310+2192], %r3836;
|
||
add.s64 %rd1319, %rd110, %rd1306;
|
||
st.global.u32 [%rd1310+2196], %r3837;
|
||
st.global.u32 [%rd1310+2200], %r3838;
|
||
st.global.u32 [%rd1310+2204], %r3839;
|
||
st.global.u32 [%rd1310+2208], %r3840;
|
||
st.global.u32 [%rd1310+2212], %r3841;
|
||
st.global.u32 [%rd1310+2216], %r3842;
|
||
st.global.u32 [%rd1310+2220], %r3843;
|
||
ld.shared.u32 %r3844, [%rd1319];
|
||
ld.shared.u32 %r3845, [%rd1319+4];
|
||
ld.shared.u32 %r3846, [%rd1319+8];
|
||
ld.shared.u32 %r3847, [%rd1319+12];
|
||
ld.shared.u32 %r3848, [%rd1319+16];
|
||
ld.shared.u32 %r3849, [%rd1319+20];
|
||
ld.shared.u32 %r3850, [%rd1319+24];
|
||
ld.shared.u32 %r3851, [%rd1319+28];
|
||
st.global.u32 [%rd1310+3216], %r3844;
|
||
st.global.u32 [%rd1310+3220], %r3845;
|
||
st.global.u32 [%rd1310+3224], %r3846;
|
||
st.global.u32 [%rd1310+3228], %r3847;
|
||
st.global.u32 [%rd1310+3232], %r3848;
|
||
st.global.u32 [%rd1310+3236], %r3849;
|
||
st.global.u32 [%rd1310+3240], %r3850;
|
||
st.global.u32 [%rd1310+3244], %r3851;
|
||
add.s32 %r3908, %r3908, 8;
|
||
setp.ne.s32 %p17, %r3908, 256;
|
||
@%p17 bra BB3_30;
|
||
|
||
BB3_31:
|
||
ret;
|
||
}
|
||
|
||
// .globl m03200_loop
|
||
.entry m03200_loop(
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_0,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_1,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_2,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_3,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_4,
|
||
.param .u64 .ptr .global .align 1 m03200_loop_param_5,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_6,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_7,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_8,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_9,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_10,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_11,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_12,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_13,
|
||
.param .u64 .ptr .global .align 8 m03200_loop_param_14,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_15,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_16,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_17,
|
||
.param .u64 .ptr .global .align 1 m03200_loop_param_18,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_19,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_20,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_21,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_22,
|
||
.param .u64 .ptr .global .align 4 m03200_loop_param_23,
|
||
.param .u32 m03200_loop_param_24,
|
||
.param .u32 m03200_loop_param_25,
|
||
.param .u32 m03200_loop_param_26,
|
||
.param .u32 m03200_loop_param_27,
|
||
.param .u32 m03200_loop_param_28,
|
||
.param .u32 m03200_loop_param_29,
|
||
.param .u32 m03200_loop_param_30,
|
||
.param .u32 m03200_loop_param_31,
|
||
.param .u32 m03200_loop_param_32,
|
||
.param .u32 m03200_loop_param_33,
|
||
.param .u64 m03200_loop_param_34
|
||
)
|
||
.reqntid 8, 1, 1
|
||
{
|
||
.reg .pred %p<14>;
|
||
.reg .b32 %r<10353>;
|
||
.reg .b64 %rd<3503>;
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_loop$S0_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_loop$S1_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_loop$S2_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_loop$S3_all[8192];
|
||
|
||
ld.param.u64 %rd31, [m03200_loop_param_4];
|
||
ld.param.u64 %rd32, [m03200_loop_param_17];
|
||
ld.param.u32 %r169, [m03200_loop_param_27];
|
||
ld.param.u32 %r170, [m03200_loop_param_29];
|
||
ld.param.u64 %rd33, [m03200_loop_param_34];
|
||
mov.b32 %r171, %envreg3;
|
||
mov.u32 %r172, %ctaid.x;
|
||
mov.u32 %r173, %ntid.x;
|
||
mad.lo.s32 %r174, %r172, %r173, %r171;
|
||
mov.u32 %r1, %tid.x;
|
||
add.s32 %r2, %r174, %r1;
|
||
cvt.s64.s32 %rd34, %r2;
|
||
cvt.s64.s32 %rd1, %r1;
|
||
setp.ge.u64 %p1, %rd34, %rd33;
|
||
@%p1 bra BB4_24;
|
||
|
||
mul.wide.s32 %rd35, %r2, 4240;
|
||
add.s64 %rd2, %rd31, %rd35;
|
||
ld.global.u32 %r3, [%rd2];
|
||
ld.global.u32 %r4, [%rd2+4];
|
||
ld.global.u32 %r5, [%rd2+8];
|
||
ld.global.u32 %r6, [%rd2+12];
|
||
ld.global.u32 %r7, [%rd2+16];
|
||
ld.global.u32 %r8, [%rd2+20];
|
||
ld.global.u32 %r9, [%rd2+24];
|
||
ld.global.u32 %r10, [%rd2+28];
|
||
ld.global.u32 %r11, [%rd2+32];
|
||
ld.global.u32 %r12, [%rd2+36];
|
||
ld.global.u32 %r13, [%rd2+40];
|
||
ld.global.u32 %r14, [%rd2+44];
|
||
ld.global.u32 %r15, [%rd2+48];
|
||
ld.global.u32 %r16, [%rd2+52];
|
||
ld.global.u32 %r17, [%rd2+56];
|
||
ld.global.u32 %r18, [%rd2+60];
|
||
ld.global.u32 %r19, [%rd2+64];
|
||
ld.global.u32 %r20, [%rd2+68];
|
||
ld.global.u32 %r10351, [%rd2+72];
|
||
ld.global.u32 %r10350, [%rd2+76];
|
||
ld.global.u32 %r10349, [%rd2+80];
|
||
ld.global.u32 %r10348, [%rd2+84];
|
||
ld.global.u32 %r10347, [%rd2+88];
|
||
ld.global.u32 %r10346, [%rd2+92];
|
||
ld.global.u32 %r10345, [%rd2+96];
|
||
ld.global.u32 %r10344, [%rd2+100];
|
||
ld.global.u32 %r10343, [%rd2+104];
|
||
ld.global.u32 %r10342, [%rd2+108];
|
||
ld.global.u32 %r10341, [%rd2+112];
|
||
ld.global.u32 %r10340, [%rd2+116];
|
||
ld.global.u32 %r10339, [%rd2+120];
|
||
ld.global.u32 %r10338, [%rd2+124];
|
||
ld.global.u32 %r10337, [%rd2+128];
|
||
ld.global.u32 %r10336, [%rd2+132];
|
||
ld.global.u32 %r10335, [%rd2+136];
|
||
ld.global.u32 %r10334, [%rd2+140];
|
||
mov.u32 %r10290, 0;
|
||
|
||
BB4_2:
|
||
mul.wide.u32 %rd38, %r10290, 4;
|
||
add.s64 %rd39, %rd2, %rd38;
|
||
ld.global.u32 %r176, [%rd39+144];
|
||
shl.b64 %rd40, %rd1, 10;
|
||
mov.u64 %rd41, m03200_loop$S0_all;
|
||
add.s64 %rd42, %rd41, %rd40;
|
||
add.s64 %rd43, %rd42, %rd38;
|
||
ld.global.u32 %r177, [%rd39+1168];
|
||
ld.global.u32 %r178, [%rd39+2192];
|
||
ld.global.u32 %r179, [%rd39+3216];
|
||
ld.global.u32 %r180, [%rd39+148];
|
||
ld.global.u32 %r181, [%rd39+1172];
|
||
ld.global.u32 %r182, [%rd39+2196];
|
||
ld.global.u32 %r183, [%rd39+3220];
|
||
ld.global.u32 %r184, [%rd39+152];
|
||
ld.global.u32 %r185, [%rd39+1176];
|
||
ld.global.u32 %r186, [%rd39+2200];
|
||
ld.global.u32 %r187, [%rd39+3224];
|
||
ld.global.u32 %r188, [%rd39+156];
|
||
ld.global.u32 %r189, [%rd39+1180];
|
||
ld.global.u32 %r190, [%rd39+2204];
|
||
ld.global.u32 %r191, [%rd39+3228];
|
||
ld.global.u32 %r192, [%rd39+160];
|
||
ld.global.u32 %r193, [%rd39+1184];
|
||
ld.global.u32 %r194, [%rd39+2208];
|
||
ld.global.u32 %r195, [%rd39+3232];
|
||
ld.global.u32 %r196, [%rd39+164];
|
||
ld.global.u32 %r197, [%rd39+1188];
|
||
ld.global.u32 %r198, [%rd39+2212];
|
||
ld.global.u32 %r199, [%rd39+3236];
|
||
ld.global.u32 %r200, [%rd39+168];
|
||
ld.global.u32 %r201, [%rd39+1192];
|
||
ld.global.u32 %r202, [%rd39+2216];
|
||
ld.global.u32 %r203, [%rd39+3240];
|
||
ld.global.u32 %r204, [%rd39+172];
|
||
ld.global.u32 %r205, [%rd39+1196];
|
||
ld.global.u32 %r206, [%rd39+2220];
|
||
ld.global.u32 %r207, [%rd39+3244];
|
||
st.shared.u32 [%rd43], %r176;
|
||
mov.u64 %rd44, m03200_loop$S1_all;
|
||
add.s64 %rd45, %rd44, %rd40;
|
||
add.s64 %rd46, %rd45, %rd38;
|
||
st.shared.u32 [%rd43+4], %r180;
|
||
st.shared.u32 [%rd43+8], %r184;
|
||
st.shared.u32 [%rd43+12], %r188;
|
||
st.shared.u32 [%rd43+16], %r192;
|
||
st.shared.u32 [%rd43+20], %r196;
|
||
st.shared.u32 [%rd43+24], %r200;
|
||
st.shared.u32 [%rd43+28], %r204;
|
||
st.shared.u32 [%rd46], %r177;
|
||
mov.u64 %rd47, m03200_loop$S2_all;
|
||
add.s64 %rd48, %rd47, %rd40;
|
||
add.s64 %rd49, %rd48, %rd38;
|
||
st.shared.u32 [%rd46+4], %r181;
|
||
st.shared.u32 [%rd46+8], %r185;
|
||
st.shared.u32 [%rd46+12], %r189;
|
||
st.shared.u32 [%rd46+16], %r193;
|
||
st.shared.u32 [%rd46+20], %r197;
|
||
st.shared.u32 [%rd46+24], %r201;
|
||
st.shared.u32 [%rd46+28], %r205;
|
||
st.shared.u32 [%rd49], %r178;
|
||
mov.u64 %rd50, m03200_loop$S3_all;
|
||
add.s64 %rd51, %rd50, %rd40;
|
||
add.s64 %rd52, %rd51, %rd38;
|
||
st.shared.u32 [%rd49+4], %r182;
|
||
st.shared.u32 [%rd49+8], %r186;
|
||
st.shared.u32 [%rd49+12], %r190;
|
||
st.shared.u32 [%rd49+16], %r194;
|
||
st.shared.u32 [%rd49+20], %r198;
|
||
st.shared.u32 [%rd49+24], %r202;
|
||
st.shared.u32 [%rd49+28], %r206;
|
||
st.shared.u32 [%rd52], %r179;
|
||
st.shared.u32 [%rd52+4], %r183;
|
||
st.shared.u32 [%rd52+8], %r187;
|
||
st.shared.u32 [%rd52+12], %r191;
|
||
st.shared.u32 [%rd52+16], %r195;
|
||
st.shared.u32 [%rd52+20], %r199;
|
||
st.shared.u32 [%rd52+24], %r203;
|
||
st.shared.u32 [%rd52+28], %r207;
|
||
add.s32 %r10290, %r10290, 8;
|
||
setp.ne.s32 %p2, %r10290, 256;
|
||
@%p2 bra BB4_2;
|
||
|
||
setp.eq.s32 %p3, %r170, 0;
|
||
@%p3 bra BB4_22;
|
||
|
||
mul.wide.u32 %rd53, %r169, 560;
|
||
add.s64 %rd54, %rd32, %rd53;
|
||
ld.global.u32 %r41, [%rd54];
|
||
ld.global.u32 %r42, [%rd54+4];
|
||
ld.global.u32 %r43, [%rd54+8];
|
||
ld.global.u32 %r44, [%rd54+12];
|
||
mul.wide.s32 %rd55, %r1, 1024;
|
||
add.s64 %rd3, %rd41, %rd55;
|
||
add.s64 %rd4, %rd44, %rd55;
|
||
add.s64 %rd5, %rd47, %rd55;
|
||
add.s64 %rd6, %rd50, %rd55;
|
||
mov.u32 %r208, 0;
|
||
mov.u32 %r10309, %r208;
|
||
|
||
BB4_5:
|
||
xor.b32 %r222, %r10351, %r3;
|
||
mov.u32 %r2499, 24;
|
||
mov.u32 %r2512, 8;
|
||
// inline asm
|
||
bfe.u32 %r209, %r222, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd63, %r209, 4;
|
||
add.s64 %rd64, %rd42, %rd63;
|
||
ld.shared.u32 %r2514, [%rd64];
|
||
mov.u32 %r2503, 16;
|
||
// inline asm
|
||
bfe.u32 %r213, %r222, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd67, %r213, 4;
|
||
add.s64 %rd68, %rd45, %rd67;
|
||
ld.shared.u32 %r2515, [%rd68];
|
||
add.s32 %r2516, %r2515, %r2514;
|
||
// inline asm
|
||
bfe.u32 %r217, %r222, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd71, %r217, 4;
|
||
add.s64 %rd72, %rd48, %rd71;
|
||
ld.shared.u32 %r2517, [%rd72];
|
||
xor.b32 %r2518, %r2517, %r2516;
|
||
// inline asm
|
||
bfe.u32 %r221, %r222, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd75, %r221, 4;
|
||
add.s64 %rd76, %rd51, %rd75;
|
||
ld.shared.u32 %r2519, [%rd76];
|
||
add.s32 %r2520, %r2519, %r2518;
|
||
xor.b32 %r2521, %r10350, %r4;
|
||
xor.b32 %r238, %r2521, %r2520;
|
||
// inline asm
|
||
bfe.u32 %r225, %r238, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd77, %r225, 4;
|
||
add.s64 %rd78, %rd42, %rd77;
|
||
ld.shared.u32 %r2522, [%rd78];
|
||
// inline asm
|
||
bfe.u32 %r229, %r238, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd79, %r229, 4;
|
||
add.s64 %rd80, %rd45, %rd79;
|
||
ld.shared.u32 %r2523, [%rd80];
|
||
add.s32 %r2524, %r2523, %r2522;
|
||
// inline asm
|
||
bfe.u32 %r233, %r238, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd81, %r233, 4;
|
||
add.s64 %rd82, %rd48, %rd81;
|
||
ld.shared.u32 %r2525, [%rd82];
|
||
xor.b32 %r2526, %r2525, %r2524;
|
||
// inline asm
|
||
bfe.u32 %r237, %r238, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd83, %r237, 4;
|
||
add.s64 %rd84, %rd51, %rd83;
|
||
ld.shared.u32 %r2527, [%rd84];
|
||
add.s32 %r2528, %r2527, %r2526;
|
||
xor.b32 %r2529, %r10349, %r5;
|
||
xor.b32 %r2530, %r2529, %r222;
|
||
xor.b32 %r254, %r2530, %r2528;
|
||
// inline asm
|
||
bfe.u32 %r241, %r254, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd85, %r241, 4;
|
||
add.s64 %rd86, %rd42, %rd85;
|
||
ld.shared.u32 %r2531, [%rd86];
|
||
// inline asm
|
||
bfe.u32 %r245, %r254, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd87, %r245, 4;
|
||
add.s64 %rd88, %rd45, %rd87;
|
||
ld.shared.u32 %r2532, [%rd88];
|
||
add.s32 %r2533, %r2532, %r2531;
|
||
// inline asm
|
||
bfe.u32 %r249, %r254, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd89, %r249, 4;
|
||
add.s64 %rd90, %rd48, %rd89;
|
||
ld.shared.u32 %r2534, [%rd90];
|
||
xor.b32 %r2535, %r2534, %r2533;
|
||
// inline asm
|
||
bfe.u32 %r253, %r254, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd91, %r253, 4;
|
||
add.s64 %rd92, %rd51, %rd91;
|
||
ld.shared.u32 %r2536, [%rd92];
|
||
add.s32 %r2537, %r2536, %r2535;
|
||
xor.b32 %r2538, %r10348, %r6;
|
||
xor.b32 %r2539, %r2538, %r238;
|
||
xor.b32 %r270, %r2539, %r2537;
|
||
// inline asm
|
||
bfe.u32 %r257, %r270, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd93, %r257, 4;
|
||
add.s64 %rd94, %rd42, %rd93;
|
||
ld.shared.u32 %r2540, [%rd94];
|
||
// inline asm
|
||
bfe.u32 %r261, %r270, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd95, %r261, 4;
|
||
add.s64 %rd96, %rd45, %rd95;
|
||
ld.shared.u32 %r2541, [%rd96];
|
||
add.s32 %r2542, %r2541, %r2540;
|
||
// inline asm
|
||
bfe.u32 %r265, %r270, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd97, %r265, 4;
|
||
add.s64 %rd98, %rd48, %rd97;
|
||
ld.shared.u32 %r2543, [%rd98];
|
||
xor.b32 %r2544, %r2543, %r2542;
|
||
// inline asm
|
||
bfe.u32 %r269, %r270, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd99, %r269, 4;
|
||
add.s64 %rd100, %rd51, %rd99;
|
||
ld.shared.u32 %r2545, [%rd100];
|
||
add.s32 %r2546, %r2545, %r2544;
|
||
xor.b32 %r2547, %r10347, %r7;
|
||
xor.b32 %r2548, %r2547, %r254;
|
||
xor.b32 %r286, %r2548, %r2546;
|
||
// inline asm
|
||
bfe.u32 %r273, %r286, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd101, %r273, 4;
|
||
add.s64 %rd102, %rd42, %rd101;
|
||
ld.shared.u32 %r2549, [%rd102];
|
||
// inline asm
|
||
bfe.u32 %r277, %r286, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd103, %r277, 4;
|
||
add.s64 %rd104, %rd45, %rd103;
|
||
ld.shared.u32 %r2550, [%rd104];
|
||
add.s32 %r2551, %r2550, %r2549;
|
||
// inline asm
|
||
bfe.u32 %r281, %r286, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd105, %r281, 4;
|
||
add.s64 %rd106, %rd48, %rd105;
|
||
ld.shared.u32 %r2552, [%rd106];
|
||
xor.b32 %r2553, %r2552, %r2551;
|
||
// inline asm
|
||
bfe.u32 %r285, %r286, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd107, %r285, 4;
|
||
add.s64 %rd108, %rd51, %rd107;
|
||
ld.shared.u32 %r2554, [%rd108];
|
||
add.s32 %r2555, %r2554, %r2553;
|
||
xor.b32 %r2556, %r10346, %r8;
|
||
xor.b32 %r2557, %r2556, %r270;
|
||
xor.b32 %r302, %r2557, %r2555;
|
||
// inline asm
|
||
bfe.u32 %r289, %r302, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd109, %r289, 4;
|
||
add.s64 %rd110, %rd42, %rd109;
|
||
ld.shared.u32 %r2558, [%rd110];
|
||
// inline asm
|
||
bfe.u32 %r293, %r302, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd111, %r293, 4;
|
||
add.s64 %rd112, %rd45, %rd111;
|
||
ld.shared.u32 %r2559, [%rd112];
|
||
add.s32 %r2560, %r2559, %r2558;
|
||
// inline asm
|
||
bfe.u32 %r297, %r302, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd113, %r297, 4;
|
||
add.s64 %rd114, %rd48, %rd113;
|
||
ld.shared.u32 %r2561, [%rd114];
|
||
xor.b32 %r2562, %r2561, %r2560;
|
||
// inline asm
|
||
bfe.u32 %r301, %r302, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd115, %r301, 4;
|
||
add.s64 %rd116, %rd51, %rd115;
|
||
ld.shared.u32 %r2563, [%rd116];
|
||
add.s32 %r2564, %r2563, %r2562;
|
||
xor.b32 %r2565, %r10345, %r9;
|
||
xor.b32 %r2566, %r2565, %r286;
|
||
xor.b32 %r318, %r2566, %r2564;
|
||
// inline asm
|
||
bfe.u32 %r305, %r318, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd117, %r305, 4;
|
||
add.s64 %rd118, %rd42, %rd117;
|
||
ld.shared.u32 %r2567, [%rd118];
|
||
// inline asm
|
||
bfe.u32 %r309, %r318, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd119, %r309, 4;
|
||
add.s64 %rd120, %rd45, %rd119;
|
||
ld.shared.u32 %r2568, [%rd120];
|
||
add.s32 %r2569, %r2568, %r2567;
|
||
// inline asm
|
||
bfe.u32 %r313, %r318, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd121, %r313, 4;
|
||
add.s64 %rd122, %rd48, %rd121;
|
||
ld.shared.u32 %r2570, [%rd122];
|
||
xor.b32 %r2571, %r2570, %r2569;
|
||
// inline asm
|
||
bfe.u32 %r317, %r318, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd123, %r317, 4;
|
||
add.s64 %rd124, %rd51, %rd123;
|
||
ld.shared.u32 %r2572, [%rd124];
|
||
add.s32 %r2573, %r2572, %r2571;
|
||
xor.b32 %r2574, %r10344, %r10;
|
||
xor.b32 %r2575, %r2574, %r302;
|
||
xor.b32 %r334, %r2575, %r2573;
|
||
// inline asm
|
||
bfe.u32 %r321, %r334, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd125, %r321, 4;
|
||
add.s64 %rd126, %rd42, %rd125;
|
||
ld.shared.u32 %r2576, [%rd126];
|
||
// inline asm
|
||
bfe.u32 %r325, %r334, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd127, %r325, 4;
|
||
add.s64 %rd128, %rd45, %rd127;
|
||
ld.shared.u32 %r2577, [%rd128];
|
||
add.s32 %r2578, %r2577, %r2576;
|
||
// inline asm
|
||
bfe.u32 %r329, %r334, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd129, %r329, 4;
|
||
add.s64 %rd130, %rd48, %rd129;
|
||
ld.shared.u32 %r2579, [%rd130];
|
||
xor.b32 %r2580, %r2579, %r2578;
|
||
// inline asm
|
||
bfe.u32 %r333, %r334, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd131, %r333, 4;
|
||
add.s64 %rd132, %rd51, %rd131;
|
||
ld.shared.u32 %r2581, [%rd132];
|
||
add.s32 %r2582, %r2581, %r2580;
|
||
xor.b32 %r2583, %r10343, %r11;
|
||
xor.b32 %r2584, %r2583, %r318;
|
||
xor.b32 %r350, %r2584, %r2582;
|
||
// inline asm
|
||
bfe.u32 %r337, %r350, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd133, %r337, 4;
|
||
add.s64 %rd134, %rd42, %rd133;
|
||
ld.shared.u32 %r2585, [%rd134];
|
||
// inline asm
|
||
bfe.u32 %r341, %r350, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd135, %r341, 4;
|
||
add.s64 %rd136, %rd45, %rd135;
|
||
ld.shared.u32 %r2586, [%rd136];
|
||
add.s32 %r2587, %r2586, %r2585;
|
||
// inline asm
|
||
bfe.u32 %r345, %r350, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd137, %r345, 4;
|
||
add.s64 %rd138, %rd48, %rd137;
|
||
ld.shared.u32 %r2588, [%rd138];
|
||
xor.b32 %r2589, %r2588, %r2587;
|
||
// inline asm
|
||
bfe.u32 %r349, %r350, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd139, %r349, 4;
|
||
add.s64 %rd140, %rd51, %rd139;
|
||
ld.shared.u32 %r2590, [%rd140];
|
||
add.s32 %r2591, %r2590, %r2589;
|
||
xor.b32 %r2592, %r10342, %r12;
|
||
xor.b32 %r2593, %r2592, %r334;
|
||
xor.b32 %r366, %r2593, %r2591;
|
||
// inline asm
|
||
bfe.u32 %r353, %r366, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd141, %r353, 4;
|
||
add.s64 %rd142, %rd42, %rd141;
|
||
ld.shared.u32 %r2594, [%rd142];
|
||
// inline asm
|
||
bfe.u32 %r357, %r366, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd143, %r357, 4;
|
||
add.s64 %rd144, %rd45, %rd143;
|
||
ld.shared.u32 %r2595, [%rd144];
|
||
add.s32 %r2596, %r2595, %r2594;
|
||
// inline asm
|
||
bfe.u32 %r361, %r366, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd145, %r361, 4;
|
||
add.s64 %rd146, %rd48, %rd145;
|
||
ld.shared.u32 %r2597, [%rd146];
|
||
xor.b32 %r2598, %r2597, %r2596;
|
||
// inline asm
|
||
bfe.u32 %r365, %r366, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd147, %r365, 4;
|
||
add.s64 %rd148, %rd51, %rd147;
|
||
ld.shared.u32 %r2599, [%rd148];
|
||
add.s32 %r2600, %r2599, %r2598;
|
||
xor.b32 %r2601, %r10341, %r13;
|
||
xor.b32 %r2602, %r2601, %r350;
|
||
xor.b32 %r382, %r2602, %r2600;
|
||
// inline asm
|
||
bfe.u32 %r369, %r382, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd149, %r369, 4;
|
||
add.s64 %rd150, %rd42, %rd149;
|
||
ld.shared.u32 %r2603, [%rd150];
|
||
// inline asm
|
||
bfe.u32 %r373, %r382, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd151, %r373, 4;
|
||
add.s64 %rd152, %rd45, %rd151;
|
||
ld.shared.u32 %r2604, [%rd152];
|
||
add.s32 %r2605, %r2604, %r2603;
|
||
// inline asm
|
||
bfe.u32 %r377, %r382, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd153, %r377, 4;
|
||
add.s64 %rd154, %rd48, %rd153;
|
||
ld.shared.u32 %r2606, [%rd154];
|
||
xor.b32 %r2607, %r2606, %r2605;
|
||
// inline asm
|
||
bfe.u32 %r381, %r382, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd155, %r381, 4;
|
||
add.s64 %rd156, %rd51, %rd155;
|
||
ld.shared.u32 %r2608, [%rd156];
|
||
add.s32 %r2609, %r2608, %r2607;
|
||
xor.b32 %r2610, %r10340, %r14;
|
||
xor.b32 %r2611, %r2610, %r366;
|
||
xor.b32 %r398, %r2611, %r2609;
|
||
// inline asm
|
||
bfe.u32 %r385, %r398, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd157, %r385, 4;
|
||
add.s64 %rd158, %rd42, %rd157;
|
||
ld.shared.u32 %r2612, [%rd158];
|
||
// inline asm
|
||
bfe.u32 %r389, %r398, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd159, %r389, 4;
|
||
add.s64 %rd160, %rd45, %rd159;
|
||
ld.shared.u32 %r2613, [%rd160];
|
||
add.s32 %r2614, %r2613, %r2612;
|
||
// inline asm
|
||
bfe.u32 %r393, %r398, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd161, %r393, 4;
|
||
add.s64 %rd162, %rd48, %rd161;
|
||
ld.shared.u32 %r2615, [%rd162];
|
||
xor.b32 %r2616, %r2615, %r2614;
|
||
// inline asm
|
||
bfe.u32 %r397, %r398, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd163, %r397, 4;
|
||
add.s64 %rd164, %rd51, %rd163;
|
||
ld.shared.u32 %r2617, [%rd164];
|
||
add.s32 %r2618, %r2617, %r2616;
|
||
xor.b32 %r2619, %r10339, %r15;
|
||
xor.b32 %r2620, %r2619, %r382;
|
||
xor.b32 %r414, %r2620, %r2618;
|
||
// inline asm
|
||
bfe.u32 %r401, %r414, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd165, %r401, 4;
|
||
add.s64 %rd166, %rd42, %rd165;
|
||
ld.shared.u32 %r2621, [%rd166];
|
||
// inline asm
|
||
bfe.u32 %r405, %r414, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd167, %r405, 4;
|
||
add.s64 %rd168, %rd45, %rd167;
|
||
ld.shared.u32 %r2622, [%rd168];
|
||
add.s32 %r2623, %r2622, %r2621;
|
||
// inline asm
|
||
bfe.u32 %r409, %r414, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd169, %r409, 4;
|
||
add.s64 %rd170, %rd48, %rd169;
|
||
ld.shared.u32 %r2624, [%rd170];
|
||
xor.b32 %r2625, %r2624, %r2623;
|
||
// inline asm
|
||
bfe.u32 %r413, %r414, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd171, %r413, 4;
|
||
add.s64 %rd172, %rd51, %rd171;
|
||
ld.shared.u32 %r2626, [%rd172];
|
||
add.s32 %r2627, %r2626, %r2625;
|
||
xor.b32 %r2628, %r10338, %r16;
|
||
xor.b32 %r2629, %r2628, %r398;
|
||
xor.b32 %r430, %r2629, %r2627;
|
||
// inline asm
|
||
bfe.u32 %r417, %r430, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd173, %r417, 4;
|
||
add.s64 %rd174, %rd42, %rd173;
|
||
ld.shared.u32 %r2630, [%rd174];
|
||
// inline asm
|
||
bfe.u32 %r421, %r430, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd175, %r421, 4;
|
||
add.s64 %rd176, %rd45, %rd175;
|
||
ld.shared.u32 %r2631, [%rd176];
|
||
add.s32 %r2632, %r2631, %r2630;
|
||
// inline asm
|
||
bfe.u32 %r425, %r430, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd177, %r425, 4;
|
||
add.s64 %rd178, %rd48, %rd177;
|
||
ld.shared.u32 %r2633, [%rd178];
|
||
xor.b32 %r2634, %r2633, %r2632;
|
||
// inline asm
|
||
bfe.u32 %r429, %r430, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd179, %r429, 4;
|
||
add.s64 %rd180, %rd51, %rd179;
|
||
ld.shared.u32 %r2635, [%rd180];
|
||
add.s32 %r2636, %r2635, %r2634;
|
||
xor.b32 %r2637, %r10337, %r17;
|
||
xor.b32 %r2638, %r2637, %r414;
|
||
xor.b32 %r446, %r2638, %r2636;
|
||
// inline asm
|
||
bfe.u32 %r433, %r446, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd181, %r433, 4;
|
||
add.s64 %rd182, %rd42, %rd181;
|
||
ld.shared.u32 %r2639, [%rd182];
|
||
// inline asm
|
||
bfe.u32 %r437, %r446, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd183, %r437, 4;
|
||
add.s64 %rd184, %rd45, %rd183;
|
||
ld.shared.u32 %r2640, [%rd184];
|
||
add.s32 %r2641, %r2640, %r2639;
|
||
// inline asm
|
||
bfe.u32 %r441, %r446, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd185, %r441, 4;
|
||
add.s64 %rd186, %rd48, %rd185;
|
||
ld.shared.u32 %r2642, [%rd186];
|
||
xor.b32 %r2643, %r2642, %r2641;
|
||
// inline asm
|
||
bfe.u32 %r445, %r446, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd187, %r445, 4;
|
||
add.s64 %rd188, %rd51, %rd187;
|
||
ld.shared.u32 %r2644, [%rd188];
|
||
add.s32 %r2645, %r2644, %r2643;
|
||
xor.b32 %r2646, %r10336, %r18;
|
||
xor.b32 %r2647, %r2646, %r430;
|
||
xor.b32 %r462, %r2647, %r2645;
|
||
// inline asm
|
||
bfe.u32 %r449, %r462, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd189, %r449, 4;
|
||
add.s64 %rd190, %rd42, %rd189;
|
||
ld.shared.u32 %r2648, [%rd190];
|
||
// inline asm
|
||
bfe.u32 %r453, %r462, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd191, %r453, 4;
|
||
add.s64 %rd192, %rd45, %rd191;
|
||
ld.shared.u32 %r2649, [%rd192];
|
||
add.s32 %r2650, %r2649, %r2648;
|
||
// inline asm
|
||
bfe.u32 %r457, %r462, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd193, %r457, 4;
|
||
add.s64 %rd194, %rd48, %rd193;
|
||
ld.shared.u32 %r2651, [%rd194];
|
||
xor.b32 %r2652, %r2651, %r2650;
|
||
// inline asm
|
||
bfe.u32 %r461, %r462, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd195, %r461, 4;
|
||
add.s64 %rd196, %rd51, %rd195;
|
||
ld.shared.u32 %r2653, [%rd196];
|
||
add.s32 %r2654, %r2653, %r2652;
|
||
xor.b32 %r2655, %r10335, %r19;
|
||
xor.b32 %r2656, %r2655, %r446;
|
||
xor.b32 %r64, %r2656, %r2654;
|
||
xor.b32 %r2657, %r10334, %r20;
|
||
xor.b32 %r65, %r2657, %r462;
|
||
// inline asm
|
||
bfe.u32 %r465, %r208, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd197, %r465, 4;
|
||
add.s64 %rd198, %rd42, %rd197;
|
||
ld.shared.u32 %r2658, [%rd198];
|
||
// inline asm
|
||
bfe.u32 %r469, %r208, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd199, %r469, 4;
|
||
add.s64 %rd200, %rd45, %rd199;
|
||
ld.shared.u32 %r2659, [%rd200];
|
||
add.s32 %r2660, %r2659, %r2658;
|
||
// inline asm
|
||
bfe.u32 %r473, %r208, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd201, %r473, 4;
|
||
add.s64 %rd202, %rd48, %rd201;
|
||
ld.shared.u32 %r2661, [%rd202];
|
||
xor.b32 %r2662, %r2661, %r2660;
|
||
// inline asm
|
||
bfe.u32 %r477, %r208, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd203, %r477, 4;
|
||
add.s64 %rd204, %rd51, %rd203;
|
||
ld.shared.u32 %r2663, [%rd204];
|
||
add.s32 %r494, %r2663, %r2662;
|
||
// inline asm
|
||
bfe.u32 %r481, %r494, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd205, %r481, 4;
|
||
add.s64 %rd206, %rd42, %rd205;
|
||
ld.shared.u32 %r2664, [%rd206];
|
||
// inline asm
|
||
bfe.u32 %r485, %r494, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd207, %r485, 4;
|
||
add.s64 %rd208, %rd45, %rd207;
|
||
ld.shared.u32 %r2665, [%rd208];
|
||
add.s32 %r2666, %r2665, %r2664;
|
||
// inline asm
|
||
bfe.u32 %r489, %r494, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd209, %r489, 4;
|
||
add.s64 %rd210, %rd48, %rd209;
|
||
ld.shared.u32 %r2667, [%rd210];
|
||
xor.b32 %r2668, %r2667, %r2666;
|
||
// inline asm
|
||
bfe.u32 %r493, %r494, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd211, %r493, 4;
|
||
add.s64 %rd212, %rd51, %rd211;
|
||
ld.shared.u32 %r2669, [%rd212];
|
||
add.s32 %r2670, %r2669, %r2668;
|
||
xor.b32 %r510, %r2529, %r2670;
|
||
// inline asm
|
||
bfe.u32 %r497, %r510, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd213, %r497, 4;
|
||
add.s64 %rd214, %rd42, %rd213;
|
||
ld.shared.u32 %r2671, [%rd214];
|
||
// inline asm
|
||
bfe.u32 %r501, %r510, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd215, %r501, 4;
|
||
add.s64 %rd216, %rd45, %rd215;
|
||
ld.shared.u32 %r2672, [%rd216];
|
||
add.s32 %r2673, %r2672, %r2671;
|
||
// inline asm
|
||
bfe.u32 %r505, %r510, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd217, %r505, 4;
|
||
add.s64 %rd218, %rd48, %rd217;
|
||
ld.shared.u32 %r2674, [%rd218];
|
||
xor.b32 %r2675, %r2674, %r2673;
|
||
// inline asm
|
||
bfe.u32 %r509, %r510, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd219, %r509, 4;
|
||
add.s64 %rd220, %rd51, %rd219;
|
||
ld.shared.u32 %r2676, [%rd220];
|
||
add.s32 %r2677, %r2676, %r2675;
|
||
xor.b32 %r2678, %r2538, %r494;
|
||
xor.b32 %r526, %r2678, %r2677;
|
||
// inline asm
|
||
bfe.u32 %r513, %r526, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd221, %r513, 4;
|
||
add.s64 %rd222, %rd42, %rd221;
|
||
ld.shared.u32 %r2679, [%rd222];
|
||
// inline asm
|
||
bfe.u32 %r517, %r526, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd223, %r517, 4;
|
||
add.s64 %rd224, %rd45, %rd223;
|
||
ld.shared.u32 %r2680, [%rd224];
|
||
add.s32 %r2681, %r2680, %r2679;
|
||
// inline asm
|
||
bfe.u32 %r521, %r526, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd225, %r521, 4;
|
||
add.s64 %rd226, %rd48, %rd225;
|
||
ld.shared.u32 %r2682, [%rd226];
|
||
xor.b32 %r2683, %r2682, %r2681;
|
||
// inline asm
|
||
bfe.u32 %r525, %r526, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd227, %r525, 4;
|
||
add.s64 %rd228, %rd51, %rd227;
|
||
ld.shared.u32 %r2684, [%rd228];
|
||
add.s32 %r2685, %r2684, %r2683;
|
||
xor.b32 %r2686, %r2547, %r510;
|
||
xor.b32 %r542, %r2686, %r2685;
|
||
// inline asm
|
||
bfe.u32 %r529, %r542, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd229, %r529, 4;
|
||
add.s64 %rd230, %rd42, %rd229;
|
||
ld.shared.u32 %r2687, [%rd230];
|
||
// inline asm
|
||
bfe.u32 %r533, %r542, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd231, %r533, 4;
|
||
add.s64 %rd232, %rd45, %rd231;
|
||
ld.shared.u32 %r2688, [%rd232];
|
||
add.s32 %r2689, %r2688, %r2687;
|
||
// inline asm
|
||
bfe.u32 %r537, %r542, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd233, %r537, 4;
|
||
add.s64 %rd234, %rd48, %rd233;
|
||
ld.shared.u32 %r2690, [%rd234];
|
||
xor.b32 %r2691, %r2690, %r2689;
|
||
// inline asm
|
||
bfe.u32 %r541, %r542, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd235, %r541, 4;
|
||
add.s64 %rd236, %rd51, %rd235;
|
||
ld.shared.u32 %r2692, [%rd236];
|
||
add.s32 %r2693, %r2692, %r2691;
|
||
xor.b32 %r2694, %r2556, %r526;
|
||
xor.b32 %r558, %r2694, %r2693;
|
||
// inline asm
|
||
bfe.u32 %r545, %r558, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd237, %r545, 4;
|
||
add.s64 %rd238, %rd42, %rd237;
|
||
ld.shared.u32 %r2695, [%rd238];
|
||
// inline asm
|
||
bfe.u32 %r549, %r558, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd239, %r549, 4;
|
||
add.s64 %rd240, %rd45, %rd239;
|
||
ld.shared.u32 %r2696, [%rd240];
|
||
add.s32 %r2697, %r2696, %r2695;
|
||
// inline asm
|
||
bfe.u32 %r553, %r558, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd241, %r553, 4;
|
||
add.s64 %rd242, %rd48, %rd241;
|
||
ld.shared.u32 %r2698, [%rd242];
|
||
xor.b32 %r2699, %r2698, %r2697;
|
||
// inline asm
|
||
bfe.u32 %r557, %r558, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd243, %r557, 4;
|
||
add.s64 %rd244, %rd51, %rd243;
|
||
ld.shared.u32 %r2700, [%rd244];
|
||
add.s32 %r2701, %r2700, %r2699;
|
||
xor.b32 %r2702, %r2565, %r542;
|
||
xor.b32 %r574, %r2702, %r2701;
|
||
// inline asm
|
||
bfe.u32 %r561, %r574, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd245, %r561, 4;
|
||
add.s64 %rd246, %rd42, %rd245;
|
||
ld.shared.u32 %r2703, [%rd246];
|
||
// inline asm
|
||
bfe.u32 %r565, %r574, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd247, %r565, 4;
|
||
add.s64 %rd248, %rd45, %rd247;
|
||
ld.shared.u32 %r2704, [%rd248];
|
||
add.s32 %r2705, %r2704, %r2703;
|
||
// inline asm
|
||
bfe.u32 %r569, %r574, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd249, %r569, 4;
|
||
add.s64 %rd250, %rd48, %rd249;
|
||
ld.shared.u32 %r2706, [%rd250];
|
||
xor.b32 %r2707, %r2706, %r2705;
|
||
// inline asm
|
||
bfe.u32 %r573, %r574, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd251, %r573, 4;
|
||
add.s64 %rd252, %rd51, %rd251;
|
||
ld.shared.u32 %r2708, [%rd252];
|
||
add.s32 %r2709, %r2708, %r2707;
|
||
xor.b32 %r2710, %r2574, %r558;
|
||
xor.b32 %r590, %r2710, %r2709;
|
||
// inline asm
|
||
bfe.u32 %r577, %r590, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd253, %r577, 4;
|
||
add.s64 %rd254, %rd42, %rd253;
|
||
ld.shared.u32 %r2711, [%rd254];
|
||
// inline asm
|
||
bfe.u32 %r581, %r590, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd255, %r581, 4;
|
||
add.s64 %rd256, %rd45, %rd255;
|
||
ld.shared.u32 %r2712, [%rd256];
|
||
add.s32 %r2713, %r2712, %r2711;
|
||
// inline asm
|
||
bfe.u32 %r585, %r590, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd257, %r585, 4;
|
||
add.s64 %rd258, %rd48, %rd257;
|
||
ld.shared.u32 %r2714, [%rd258];
|
||
xor.b32 %r2715, %r2714, %r2713;
|
||
// inline asm
|
||
bfe.u32 %r589, %r590, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd259, %r589, 4;
|
||
add.s64 %rd260, %rd51, %rd259;
|
||
ld.shared.u32 %r2716, [%rd260];
|
||
add.s32 %r2717, %r2716, %r2715;
|
||
xor.b32 %r2718, %r2583, %r574;
|
||
xor.b32 %r606, %r2718, %r2717;
|
||
// inline asm
|
||
bfe.u32 %r593, %r606, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd261, %r593, 4;
|
||
add.s64 %rd262, %rd42, %rd261;
|
||
ld.shared.u32 %r2719, [%rd262];
|
||
// inline asm
|
||
bfe.u32 %r597, %r606, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd263, %r597, 4;
|
||
add.s64 %rd264, %rd45, %rd263;
|
||
ld.shared.u32 %r2720, [%rd264];
|
||
add.s32 %r2721, %r2720, %r2719;
|
||
// inline asm
|
||
bfe.u32 %r601, %r606, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd265, %r601, 4;
|
||
add.s64 %rd266, %rd48, %rd265;
|
||
ld.shared.u32 %r2722, [%rd266];
|
||
xor.b32 %r2723, %r2722, %r2721;
|
||
// inline asm
|
||
bfe.u32 %r605, %r606, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd267, %r605, 4;
|
||
add.s64 %rd268, %rd51, %rd267;
|
||
ld.shared.u32 %r2724, [%rd268];
|
||
add.s32 %r2725, %r2724, %r2723;
|
||
xor.b32 %r2726, %r2592, %r590;
|
||
xor.b32 %r622, %r2726, %r2725;
|
||
// inline asm
|
||
bfe.u32 %r609, %r622, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd269, %r609, 4;
|
||
add.s64 %rd270, %rd42, %rd269;
|
||
ld.shared.u32 %r2727, [%rd270];
|
||
// inline asm
|
||
bfe.u32 %r613, %r622, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd271, %r613, 4;
|
||
add.s64 %rd272, %rd45, %rd271;
|
||
ld.shared.u32 %r2728, [%rd272];
|
||
add.s32 %r2729, %r2728, %r2727;
|
||
// inline asm
|
||
bfe.u32 %r617, %r622, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd273, %r617, 4;
|
||
add.s64 %rd274, %rd48, %rd273;
|
||
ld.shared.u32 %r2730, [%rd274];
|
||
xor.b32 %r2731, %r2730, %r2729;
|
||
// inline asm
|
||
bfe.u32 %r621, %r622, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd275, %r621, 4;
|
||
add.s64 %rd276, %rd51, %rd275;
|
||
ld.shared.u32 %r2732, [%rd276];
|
||
add.s32 %r2733, %r2732, %r2731;
|
||
xor.b32 %r2734, %r2601, %r606;
|
||
xor.b32 %r638, %r2734, %r2733;
|
||
// inline asm
|
||
bfe.u32 %r625, %r638, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd277, %r625, 4;
|
||
add.s64 %rd278, %rd42, %rd277;
|
||
ld.shared.u32 %r2735, [%rd278];
|
||
// inline asm
|
||
bfe.u32 %r629, %r638, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd279, %r629, 4;
|
||
add.s64 %rd280, %rd45, %rd279;
|
||
ld.shared.u32 %r2736, [%rd280];
|
||
add.s32 %r2737, %r2736, %r2735;
|
||
// inline asm
|
||
bfe.u32 %r633, %r638, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd281, %r633, 4;
|
||
add.s64 %rd282, %rd48, %rd281;
|
||
ld.shared.u32 %r2738, [%rd282];
|
||
xor.b32 %r2739, %r2738, %r2737;
|
||
// inline asm
|
||
bfe.u32 %r637, %r638, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd283, %r637, 4;
|
||
add.s64 %rd284, %rd51, %rd283;
|
||
ld.shared.u32 %r2740, [%rd284];
|
||
add.s32 %r2741, %r2740, %r2739;
|
||
xor.b32 %r2742, %r2610, %r622;
|
||
xor.b32 %r654, %r2742, %r2741;
|
||
// inline asm
|
||
bfe.u32 %r641, %r654, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd285, %r641, 4;
|
||
add.s64 %rd286, %rd42, %rd285;
|
||
ld.shared.u32 %r2743, [%rd286];
|
||
// inline asm
|
||
bfe.u32 %r645, %r654, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd287, %r645, 4;
|
||
add.s64 %rd288, %rd45, %rd287;
|
||
ld.shared.u32 %r2744, [%rd288];
|
||
add.s32 %r2745, %r2744, %r2743;
|
||
// inline asm
|
||
bfe.u32 %r649, %r654, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd289, %r649, 4;
|
||
add.s64 %rd290, %rd48, %rd289;
|
||
ld.shared.u32 %r2746, [%rd290];
|
||
xor.b32 %r2747, %r2746, %r2745;
|
||
// inline asm
|
||
bfe.u32 %r653, %r654, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd291, %r653, 4;
|
||
add.s64 %rd292, %rd51, %rd291;
|
||
ld.shared.u32 %r2748, [%rd292];
|
||
add.s32 %r2749, %r2748, %r2747;
|
||
xor.b32 %r2750, %r2619, %r638;
|
||
xor.b32 %r670, %r2750, %r2749;
|
||
// inline asm
|
||
bfe.u32 %r657, %r670, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd293, %r657, 4;
|
||
add.s64 %rd294, %rd42, %rd293;
|
||
ld.shared.u32 %r2751, [%rd294];
|
||
// inline asm
|
||
bfe.u32 %r661, %r670, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd295, %r661, 4;
|
||
add.s64 %rd296, %rd45, %rd295;
|
||
ld.shared.u32 %r2752, [%rd296];
|
||
add.s32 %r2753, %r2752, %r2751;
|
||
// inline asm
|
||
bfe.u32 %r665, %r670, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd297, %r665, 4;
|
||
add.s64 %rd298, %rd48, %rd297;
|
||
ld.shared.u32 %r2754, [%rd298];
|
||
xor.b32 %r2755, %r2754, %r2753;
|
||
// inline asm
|
||
bfe.u32 %r669, %r670, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd299, %r669, 4;
|
||
add.s64 %rd300, %rd51, %rd299;
|
||
ld.shared.u32 %r2756, [%rd300];
|
||
add.s32 %r2757, %r2756, %r2755;
|
||
xor.b32 %r2758, %r2628, %r654;
|
||
xor.b32 %r686, %r2758, %r2757;
|
||
// inline asm
|
||
bfe.u32 %r673, %r686, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd301, %r673, 4;
|
||
add.s64 %rd302, %rd42, %rd301;
|
||
ld.shared.u32 %r2759, [%rd302];
|
||
// inline asm
|
||
bfe.u32 %r677, %r686, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd303, %r677, 4;
|
||
add.s64 %rd304, %rd45, %rd303;
|
||
ld.shared.u32 %r2760, [%rd304];
|
||
add.s32 %r2761, %r2760, %r2759;
|
||
// inline asm
|
||
bfe.u32 %r681, %r686, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd305, %r681, 4;
|
||
add.s64 %rd306, %rd48, %rd305;
|
||
ld.shared.u32 %r2762, [%rd306];
|
||
xor.b32 %r2763, %r2762, %r2761;
|
||
// inline asm
|
||
bfe.u32 %r685, %r686, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd307, %r685, 4;
|
||
add.s64 %rd308, %rd51, %rd307;
|
||
ld.shared.u32 %r2764, [%rd308];
|
||
add.s32 %r2765, %r2764, %r2763;
|
||
xor.b32 %r2766, %r2637, %r670;
|
||
xor.b32 %r702, %r2766, %r2765;
|
||
// inline asm
|
||
bfe.u32 %r689, %r702, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd309, %r689, 4;
|
||
add.s64 %rd310, %rd42, %rd309;
|
||
ld.shared.u32 %r2767, [%rd310];
|
||
// inline asm
|
||
bfe.u32 %r693, %r702, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd311, %r693, 4;
|
||
add.s64 %rd312, %rd45, %rd311;
|
||
ld.shared.u32 %r2768, [%rd312];
|
||
add.s32 %r2769, %r2768, %r2767;
|
||
// inline asm
|
||
bfe.u32 %r697, %r702, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd313, %r697, 4;
|
||
add.s64 %rd314, %rd48, %rd313;
|
||
ld.shared.u32 %r2770, [%rd314];
|
||
xor.b32 %r2771, %r2770, %r2769;
|
||
// inline asm
|
||
bfe.u32 %r701, %r702, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd315, %r701, 4;
|
||
add.s64 %rd316, %rd51, %rd315;
|
||
ld.shared.u32 %r2772, [%rd316];
|
||
add.s32 %r2773, %r2772, %r2771;
|
||
xor.b32 %r2774, %r2646, %r686;
|
||
xor.b32 %r718, %r2774, %r2773;
|
||
// inline asm
|
||
bfe.u32 %r705, %r718, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd317, %r705, 4;
|
||
add.s64 %rd318, %rd42, %rd317;
|
||
ld.shared.u32 %r2775, [%rd318];
|
||
// inline asm
|
||
bfe.u32 %r709, %r718, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd319, %r709, 4;
|
||
add.s64 %rd320, %rd45, %rd319;
|
||
ld.shared.u32 %r2776, [%rd320];
|
||
add.s32 %r2777, %r2776, %r2775;
|
||
// inline asm
|
||
bfe.u32 %r713, %r718, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd321, %r713, 4;
|
||
add.s64 %rd322, %rd48, %rd321;
|
||
ld.shared.u32 %r2778, [%rd322];
|
||
xor.b32 %r2779, %r2778, %r2777;
|
||
// inline asm
|
||
bfe.u32 %r717, %r718, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd323, %r717, 4;
|
||
add.s64 %rd324, %rd51, %rd323;
|
||
ld.shared.u32 %r2780, [%rd324];
|
||
add.s32 %r2781, %r2780, %r2779;
|
||
xor.b32 %r2782, %r2655, %r702;
|
||
xor.b32 %r66, %r2782, %r2781;
|
||
xor.b32 %r67, %r2657, %r718;
|
||
xor.b32 %r734, %r462, %r718;
|
||
// inline asm
|
||
bfe.u32 %r721, %r734, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd325, %r721, 4;
|
||
add.s64 %rd326, %rd42, %rd325;
|
||
ld.shared.u32 %r2783, [%rd326];
|
||
// inline asm
|
||
bfe.u32 %r725, %r734, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd327, %r725, 4;
|
||
add.s64 %rd328, %rd45, %rd327;
|
||
ld.shared.u32 %r2784, [%rd328];
|
||
add.s32 %r2785, %r2784, %r2783;
|
||
// inline asm
|
||
bfe.u32 %r729, %r734, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd329, %r729, 4;
|
||
add.s64 %rd330, %rd48, %rd329;
|
||
ld.shared.u32 %r2786, [%rd330];
|
||
xor.b32 %r2787, %r2786, %r2785;
|
||
// inline asm
|
||
bfe.u32 %r733, %r734, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd331, %r733, 4;
|
||
add.s64 %rd332, %rd51, %rd331;
|
||
ld.shared.u32 %r2788, [%rd332];
|
||
add.s32 %r2789, %r2788, %r2787;
|
||
xor.b32 %r2790, %r64, %r66;
|
||
xor.b32 %r750, %r2790, %r2789;
|
||
// inline asm
|
||
bfe.u32 %r737, %r750, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd333, %r737, 4;
|
||
add.s64 %rd334, %rd42, %rd333;
|
||
ld.shared.u32 %r2791, [%rd334];
|
||
// inline asm
|
||
bfe.u32 %r741, %r750, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd335, %r741, 4;
|
||
add.s64 %rd336, %rd45, %rd335;
|
||
ld.shared.u32 %r2792, [%rd336];
|
||
add.s32 %r2793, %r2792, %r2791;
|
||
// inline asm
|
||
bfe.u32 %r745, %r750, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd337, %r745, 4;
|
||
add.s64 %rd338, %rd48, %rd337;
|
||
ld.shared.u32 %r2794, [%rd338];
|
||
xor.b32 %r2795, %r2794, %r2793;
|
||
// inline asm
|
||
bfe.u32 %r749, %r750, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd339, %r749, 4;
|
||
add.s64 %rd340, %rd51, %rd339;
|
||
ld.shared.u32 %r2796, [%rd340];
|
||
add.s32 %r2797, %r2796, %r2795;
|
||
xor.b32 %r766, %r65, %r2797;
|
||
// inline asm
|
||
bfe.u32 %r753, %r766, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd341, %r753, 4;
|
||
add.s64 %rd342, %rd42, %rd341;
|
||
ld.shared.u32 %r2798, [%rd342];
|
||
// inline asm
|
||
bfe.u32 %r757, %r766, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd343, %r757, 4;
|
||
add.s64 %rd344, %rd45, %rd343;
|
||
ld.shared.u32 %r2799, [%rd344];
|
||
add.s32 %r2800, %r2799, %r2798;
|
||
// inline asm
|
||
bfe.u32 %r761, %r766, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd345, %r761, 4;
|
||
add.s64 %rd346, %rd48, %rd345;
|
||
ld.shared.u32 %r2801, [%rd346];
|
||
xor.b32 %r2802, %r2801, %r2800;
|
||
// inline asm
|
||
bfe.u32 %r765, %r766, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd347, %r765, 4;
|
||
add.s64 %rd348, %rd51, %rd347;
|
||
ld.shared.u32 %r2803, [%rd348];
|
||
add.s32 %r2804, %r2803, %r2802;
|
||
xor.b32 %r2805, %r64, %r2789;
|
||
xor.b32 %r782, %r2805, %r2804;
|
||
// inline asm
|
||
bfe.u32 %r769, %r782, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd349, %r769, 4;
|
||
add.s64 %rd350, %rd42, %rd349;
|
||
ld.shared.u32 %r2806, [%rd350];
|
||
// inline asm
|
||
bfe.u32 %r773, %r782, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd351, %r773, 4;
|
||
add.s64 %rd352, %rd45, %rd351;
|
||
ld.shared.u32 %r2807, [%rd352];
|
||
add.s32 %r2808, %r2807, %r2806;
|
||
// inline asm
|
||
bfe.u32 %r777, %r782, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd353, %r777, 4;
|
||
add.s64 %rd354, %rd48, %rd353;
|
||
ld.shared.u32 %r2809, [%rd354];
|
||
xor.b32 %r2810, %r2809, %r2808;
|
||
// inline asm
|
||
bfe.u32 %r781, %r782, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd355, %r781, 4;
|
||
add.s64 %rd356, %rd51, %rd355;
|
||
ld.shared.u32 %r2811, [%rd356];
|
||
add.s32 %r2812, %r2811, %r2810;
|
||
xor.b32 %r2813, %r2547, %r766;
|
||
xor.b32 %r798, %r2813, %r2812;
|
||
// inline asm
|
||
bfe.u32 %r785, %r798, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd357, %r785, 4;
|
||
add.s64 %rd358, %rd42, %rd357;
|
||
ld.shared.u32 %r2814, [%rd358];
|
||
// inline asm
|
||
bfe.u32 %r789, %r798, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd359, %r789, 4;
|
||
add.s64 %rd360, %rd45, %rd359;
|
||
ld.shared.u32 %r2815, [%rd360];
|
||
add.s32 %r2816, %r2815, %r2814;
|
||
// inline asm
|
||
bfe.u32 %r793, %r798, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd361, %r793, 4;
|
||
add.s64 %rd362, %rd48, %rd361;
|
||
ld.shared.u32 %r2817, [%rd362];
|
||
xor.b32 %r2818, %r2817, %r2816;
|
||
// inline asm
|
||
bfe.u32 %r797, %r798, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd363, %r797, 4;
|
||
add.s64 %rd364, %rd51, %rd363;
|
||
ld.shared.u32 %r2819, [%rd364];
|
||
add.s32 %r2820, %r2819, %r2818;
|
||
xor.b32 %r2821, %r2556, %r782;
|
||
xor.b32 %r814, %r2821, %r2820;
|
||
// inline asm
|
||
bfe.u32 %r801, %r814, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd365, %r801, 4;
|
||
add.s64 %rd366, %rd42, %rd365;
|
||
ld.shared.u32 %r2822, [%rd366];
|
||
// inline asm
|
||
bfe.u32 %r805, %r814, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd367, %r805, 4;
|
||
add.s64 %rd368, %rd45, %rd367;
|
||
ld.shared.u32 %r2823, [%rd368];
|
||
add.s32 %r2824, %r2823, %r2822;
|
||
// inline asm
|
||
bfe.u32 %r809, %r814, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd369, %r809, 4;
|
||
add.s64 %rd370, %rd48, %rd369;
|
||
ld.shared.u32 %r2825, [%rd370];
|
||
xor.b32 %r2826, %r2825, %r2824;
|
||
// inline asm
|
||
bfe.u32 %r813, %r814, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd371, %r813, 4;
|
||
add.s64 %rd372, %rd51, %rd371;
|
||
ld.shared.u32 %r2827, [%rd372];
|
||
add.s32 %r2828, %r2827, %r2826;
|
||
xor.b32 %r2829, %r2565, %r798;
|
||
xor.b32 %r830, %r2829, %r2828;
|
||
// inline asm
|
||
bfe.u32 %r817, %r830, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd373, %r817, 4;
|
||
add.s64 %rd374, %rd42, %rd373;
|
||
ld.shared.u32 %r2830, [%rd374];
|
||
// inline asm
|
||
bfe.u32 %r821, %r830, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd375, %r821, 4;
|
||
add.s64 %rd376, %rd45, %rd375;
|
||
ld.shared.u32 %r2831, [%rd376];
|
||
add.s32 %r2832, %r2831, %r2830;
|
||
// inline asm
|
||
bfe.u32 %r825, %r830, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd377, %r825, 4;
|
||
add.s64 %rd378, %rd48, %rd377;
|
||
ld.shared.u32 %r2833, [%rd378];
|
||
xor.b32 %r2834, %r2833, %r2832;
|
||
// inline asm
|
||
bfe.u32 %r829, %r830, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd379, %r829, 4;
|
||
add.s64 %rd380, %rd51, %rd379;
|
||
ld.shared.u32 %r2835, [%rd380];
|
||
add.s32 %r2836, %r2835, %r2834;
|
||
xor.b32 %r2837, %r2574, %r814;
|
||
xor.b32 %r846, %r2837, %r2836;
|
||
// inline asm
|
||
bfe.u32 %r833, %r846, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd381, %r833, 4;
|
||
add.s64 %rd382, %rd42, %rd381;
|
||
ld.shared.u32 %r2838, [%rd382];
|
||
// inline asm
|
||
bfe.u32 %r837, %r846, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd383, %r837, 4;
|
||
add.s64 %rd384, %rd45, %rd383;
|
||
ld.shared.u32 %r2839, [%rd384];
|
||
add.s32 %r2840, %r2839, %r2838;
|
||
// inline asm
|
||
bfe.u32 %r841, %r846, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd385, %r841, 4;
|
||
add.s64 %rd386, %rd48, %rd385;
|
||
ld.shared.u32 %r2841, [%rd386];
|
||
xor.b32 %r2842, %r2841, %r2840;
|
||
// inline asm
|
||
bfe.u32 %r845, %r846, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd387, %r845, 4;
|
||
add.s64 %rd388, %rd51, %rd387;
|
||
ld.shared.u32 %r2843, [%rd388];
|
||
add.s32 %r2844, %r2843, %r2842;
|
||
xor.b32 %r2845, %r2583, %r830;
|
||
xor.b32 %r862, %r2845, %r2844;
|
||
// inline asm
|
||
bfe.u32 %r849, %r862, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd389, %r849, 4;
|
||
add.s64 %rd390, %rd42, %rd389;
|
||
ld.shared.u32 %r2846, [%rd390];
|
||
// inline asm
|
||
bfe.u32 %r853, %r862, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd391, %r853, 4;
|
||
add.s64 %rd392, %rd45, %rd391;
|
||
ld.shared.u32 %r2847, [%rd392];
|
||
add.s32 %r2848, %r2847, %r2846;
|
||
// inline asm
|
||
bfe.u32 %r857, %r862, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd393, %r857, 4;
|
||
add.s64 %rd394, %rd48, %rd393;
|
||
ld.shared.u32 %r2849, [%rd394];
|
||
xor.b32 %r2850, %r2849, %r2848;
|
||
// inline asm
|
||
bfe.u32 %r861, %r862, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd395, %r861, 4;
|
||
add.s64 %rd396, %rd51, %rd395;
|
||
ld.shared.u32 %r2851, [%rd396];
|
||
add.s32 %r2852, %r2851, %r2850;
|
||
xor.b32 %r2853, %r2592, %r846;
|
||
xor.b32 %r878, %r2853, %r2852;
|
||
// inline asm
|
||
bfe.u32 %r865, %r878, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd397, %r865, 4;
|
||
add.s64 %rd398, %rd42, %rd397;
|
||
ld.shared.u32 %r2854, [%rd398];
|
||
// inline asm
|
||
bfe.u32 %r869, %r878, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd399, %r869, 4;
|
||
add.s64 %rd400, %rd45, %rd399;
|
||
ld.shared.u32 %r2855, [%rd400];
|
||
add.s32 %r2856, %r2855, %r2854;
|
||
// inline asm
|
||
bfe.u32 %r873, %r878, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd401, %r873, 4;
|
||
add.s64 %rd402, %rd48, %rd401;
|
||
ld.shared.u32 %r2857, [%rd402];
|
||
xor.b32 %r2858, %r2857, %r2856;
|
||
// inline asm
|
||
bfe.u32 %r877, %r878, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd403, %r877, 4;
|
||
add.s64 %rd404, %rd51, %rd403;
|
||
ld.shared.u32 %r2859, [%rd404];
|
||
add.s32 %r2860, %r2859, %r2858;
|
||
xor.b32 %r2861, %r2601, %r862;
|
||
xor.b32 %r894, %r2861, %r2860;
|
||
// inline asm
|
||
bfe.u32 %r881, %r894, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd405, %r881, 4;
|
||
add.s64 %rd406, %rd42, %rd405;
|
||
ld.shared.u32 %r2862, [%rd406];
|
||
// inline asm
|
||
bfe.u32 %r885, %r894, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd407, %r885, 4;
|
||
add.s64 %rd408, %rd45, %rd407;
|
||
ld.shared.u32 %r2863, [%rd408];
|
||
add.s32 %r2864, %r2863, %r2862;
|
||
// inline asm
|
||
bfe.u32 %r889, %r894, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd409, %r889, 4;
|
||
add.s64 %rd410, %rd48, %rd409;
|
||
ld.shared.u32 %r2865, [%rd410];
|
||
xor.b32 %r2866, %r2865, %r2864;
|
||
// inline asm
|
||
bfe.u32 %r893, %r894, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd411, %r893, 4;
|
||
add.s64 %rd412, %rd51, %rd411;
|
||
ld.shared.u32 %r2867, [%rd412];
|
||
add.s32 %r2868, %r2867, %r2866;
|
||
xor.b32 %r2869, %r2610, %r878;
|
||
xor.b32 %r910, %r2869, %r2868;
|
||
// inline asm
|
||
bfe.u32 %r897, %r910, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd413, %r897, 4;
|
||
add.s64 %rd414, %rd42, %rd413;
|
||
ld.shared.u32 %r2870, [%rd414];
|
||
// inline asm
|
||
bfe.u32 %r901, %r910, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd415, %r901, 4;
|
||
add.s64 %rd416, %rd45, %rd415;
|
||
ld.shared.u32 %r2871, [%rd416];
|
||
add.s32 %r2872, %r2871, %r2870;
|
||
// inline asm
|
||
bfe.u32 %r905, %r910, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd417, %r905, 4;
|
||
add.s64 %rd418, %rd48, %rd417;
|
||
ld.shared.u32 %r2873, [%rd418];
|
||
xor.b32 %r2874, %r2873, %r2872;
|
||
// inline asm
|
||
bfe.u32 %r909, %r910, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd419, %r909, 4;
|
||
add.s64 %rd420, %rd51, %rd419;
|
||
ld.shared.u32 %r2875, [%rd420];
|
||
add.s32 %r2876, %r2875, %r2874;
|
||
xor.b32 %r2877, %r2619, %r894;
|
||
xor.b32 %r926, %r2877, %r2876;
|
||
// inline asm
|
||
bfe.u32 %r913, %r926, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd421, %r913, 4;
|
||
add.s64 %rd422, %rd42, %rd421;
|
||
ld.shared.u32 %r2878, [%rd422];
|
||
// inline asm
|
||
bfe.u32 %r917, %r926, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd423, %r917, 4;
|
||
add.s64 %rd424, %rd45, %rd423;
|
||
ld.shared.u32 %r2879, [%rd424];
|
||
add.s32 %r2880, %r2879, %r2878;
|
||
// inline asm
|
||
bfe.u32 %r921, %r926, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd425, %r921, 4;
|
||
add.s64 %rd426, %rd48, %rd425;
|
||
ld.shared.u32 %r2881, [%rd426];
|
||
xor.b32 %r2882, %r2881, %r2880;
|
||
// inline asm
|
||
bfe.u32 %r925, %r926, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd427, %r925, 4;
|
||
add.s64 %rd428, %rd51, %rd427;
|
||
ld.shared.u32 %r2883, [%rd428];
|
||
add.s32 %r2884, %r2883, %r2882;
|
||
xor.b32 %r2885, %r2628, %r910;
|
||
xor.b32 %r942, %r2885, %r2884;
|
||
// inline asm
|
||
bfe.u32 %r929, %r942, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd429, %r929, 4;
|
||
add.s64 %rd430, %rd42, %rd429;
|
||
ld.shared.u32 %r2886, [%rd430];
|
||
// inline asm
|
||
bfe.u32 %r933, %r942, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd431, %r933, 4;
|
||
add.s64 %rd432, %rd45, %rd431;
|
||
ld.shared.u32 %r2887, [%rd432];
|
||
add.s32 %r2888, %r2887, %r2886;
|
||
// inline asm
|
||
bfe.u32 %r937, %r942, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd433, %r937, 4;
|
||
add.s64 %rd434, %rd48, %rd433;
|
||
ld.shared.u32 %r2889, [%rd434];
|
||
xor.b32 %r2890, %r2889, %r2888;
|
||
// inline asm
|
||
bfe.u32 %r941, %r942, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd435, %r941, 4;
|
||
add.s64 %rd436, %rd51, %rd435;
|
||
ld.shared.u32 %r2891, [%rd436];
|
||
add.s32 %r2892, %r2891, %r2890;
|
||
xor.b32 %r2893, %r2637, %r926;
|
||
xor.b32 %r958, %r2893, %r2892;
|
||
// inline asm
|
||
bfe.u32 %r945, %r958, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd437, %r945, 4;
|
||
add.s64 %rd438, %rd42, %rd437;
|
||
ld.shared.u32 %r2894, [%rd438];
|
||
// inline asm
|
||
bfe.u32 %r949, %r958, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd439, %r949, 4;
|
||
add.s64 %rd440, %rd45, %rd439;
|
||
ld.shared.u32 %r2895, [%rd440];
|
||
add.s32 %r2896, %r2895, %r2894;
|
||
// inline asm
|
||
bfe.u32 %r953, %r958, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd441, %r953, 4;
|
||
add.s64 %rd442, %rd48, %rd441;
|
||
ld.shared.u32 %r2897, [%rd442];
|
||
xor.b32 %r2898, %r2897, %r2896;
|
||
// inline asm
|
||
bfe.u32 %r957, %r958, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd443, %r957, 4;
|
||
add.s64 %rd444, %rd51, %rd443;
|
||
ld.shared.u32 %r2899, [%rd444];
|
||
add.s32 %r2900, %r2899, %r2898;
|
||
xor.b32 %r2901, %r2646, %r942;
|
||
xor.b32 %r974, %r2901, %r2900;
|
||
// inline asm
|
||
bfe.u32 %r961, %r974, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd445, %r961, 4;
|
||
add.s64 %rd446, %rd42, %rd445;
|
||
ld.shared.u32 %r2902, [%rd446];
|
||
// inline asm
|
||
bfe.u32 %r965, %r974, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd447, %r965, 4;
|
||
add.s64 %rd448, %rd45, %rd447;
|
||
ld.shared.u32 %r2903, [%rd448];
|
||
add.s32 %r2904, %r2903, %r2902;
|
||
// inline asm
|
||
bfe.u32 %r969, %r974, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd449, %r969, 4;
|
||
add.s64 %rd450, %rd48, %rd449;
|
||
ld.shared.u32 %r2905, [%rd450];
|
||
xor.b32 %r2906, %r2905, %r2904;
|
||
// inline asm
|
||
bfe.u32 %r973, %r974, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd451, %r973, 4;
|
||
add.s64 %rd452, %rd51, %rd451;
|
||
ld.shared.u32 %r2907, [%rd452];
|
||
add.s32 %r2908, %r2907, %r2906;
|
||
xor.b32 %r2909, %r2655, %r958;
|
||
xor.b32 %r68, %r2909, %r2908;
|
||
xor.b32 %r69, %r2657, %r974;
|
||
xor.b32 %r990, %r462, %r974;
|
||
// inline asm
|
||
bfe.u32 %r977, %r990, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd453, %r977, 4;
|
||
add.s64 %rd454, %rd42, %rd453;
|
||
ld.shared.u32 %r2910, [%rd454];
|
||
// inline asm
|
||
bfe.u32 %r981, %r990, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd455, %r981, 4;
|
||
add.s64 %rd456, %rd45, %rd455;
|
||
ld.shared.u32 %r2911, [%rd456];
|
||
add.s32 %r2912, %r2911, %r2910;
|
||
// inline asm
|
||
bfe.u32 %r985, %r990, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd457, %r985, 4;
|
||
add.s64 %rd458, %rd48, %rd457;
|
||
ld.shared.u32 %r2913, [%rd458];
|
||
xor.b32 %r2914, %r2913, %r2912;
|
||
// inline asm
|
||
bfe.u32 %r989, %r990, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd459, %r989, 4;
|
||
add.s64 %rd460, %rd51, %rd459;
|
||
ld.shared.u32 %r2915, [%rd460];
|
||
add.s32 %r2916, %r2915, %r2914;
|
||
xor.b32 %r2917, %r64, %r68;
|
||
xor.b32 %r1006, %r2917, %r2916;
|
||
// inline asm
|
||
bfe.u32 %r993, %r1006, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd461, %r993, 4;
|
||
add.s64 %rd462, %rd42, %rd461;
|
||
ld.shared.u32 %r2918, [%rd462];
|
||
// inline asm
|
||
bfe.u32 %r997, %r1006, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd463, %r997, 4;
|
||
add.s64 %rd464, %rd45, %rd463;
|
||
ld.shared.u32 %r2919, [%rd464];
|
||
add.s32 %r2920, %r2919, %r2918;
|
||
// inline asm
|
||
bfe.u32 %r1001, %r1006, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd465, %r1001, 4;
|
||
add.s64 %rd466, %rd48, %rd465;
|
||
ld.shared.u32 %r2921, [%rd466];
|
||
xor.b32 %r2922, %r2921, %r2920;
|
||
// inline asm
|
||
bfe.u32 %r1005, %r1006, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd467, %r1005, 4;
|
||
add.s64 %rd468, %rd51, %rd467;
|
||
ld.shared.u32 %r2923, [%rd468];
|
||
add.s32 %r2924, %r2923, %r2922;
|
||
xor.b32 %r2925, %r67, %r990;
|
||
xor.b32 %r1022, %r2925, %r2924;
|
||
// inline asm
|
||
bfe.u32 %r1009, %r1022, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd469, %r1009, 4;
|
||
add.s64 %rd470, %rd42, %rd469;
|
||
ld.shared.u32 %r2926, [%rd470];
|
||
// inline asm
|
||
bfe.u32 %r1013, %r1022, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd471, %r1013, 4;
|
||
add.s64 %rd472, %rd45, %rd471;
|
||
ld.shared.u32 %r2927, [%rd472];
|
||
add.s32 %r2928, %r2927, %r2926;
|
||
// inline asm
|
||
bfe.u32 %r1017, %r1022, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd473, %r1017, 4;
|
||
add.s64 %rd474, %rd48, %rd473;
|
||
ld.shared.u32 %r2929, [%rd474];
|
||
xor.b32 %r2930, %r2929, %r2928;
|
||
// inline asm
|
||
bfe.u32 %r1021, %r1022, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd475, %r1021, 4;
|
||
add.s64 %rd476, %rd51, %rd475;
|
||
ld.shared.u32 %r2931, [%rd476];
|
||
add.s32 %r2932, %r2931, %r2930;
|
||
xor.b32 %r2933, %r66, %r1006;
|
||
xor.b32 %r1038, %r2933, %r2932;
|
||
// inline asm
|
||
bfe.u32 %r1025, %r1038, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd477, %r1025, 4;
|
||
add.s64 %rd478, %rd42, %rd477;
|
||
ld.shared.u32 %r2934, [%rd478];
|
||
// inline asm
|
||
bfe.u32 %r1029, %r1038, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd479, %r1029, 4;
|
||
add.s64 %rd480, %rd45, %rd479;
|
||
ld.shared.u32 %r2935, [%rd480];
|
||
add.s32 %r2936, %r2935, %r2934;
|
||
// inline asm
|
||
bfe.u32 %r1033, %r1038, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd481, %r1033, 4;
|
||
add.s64 %rd482, %rd48, %rd481;
|
||
ld.shared.u32 %r2937, [%rd482];
|
||
xor.b32 %r2938, %r2937, %r2936;
|
||
// inline asm
|
||
bfe.u32 %r1037, %r1038, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd483, %r1037, 4;
|
||
add.s64 %rd484, %rd51, %rd483;
|
||
ld.shared.u32 %r2939, [%rd484];
|
||
add.s32 %r2940, %r2939, %r2938;
|
||
xor.b32 %r2941, %r69, %r1022;
|
||
xor.b32 %r1054, %r2941, %r2940;
|
||
// inline asm
|
||
bfe.u32 %r1041, %r1054, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd485, %r1041, 4;
|
||
add.s64 %rd486, %rd42, %rd485;
|
||
ld.shared.u32 %r2942, [%rd486];
|
||
// inline asm
|
||
bfe.u32 %r1045, %r1054, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd487, %r1045, 4;
|
||
add.s64 %rd488, %rd45, %rd487;
|
||
ld.shared.u32 %r2943, [%rd488];
|
||
add.s32 %r2944, %r2943, %r2942;
|
||
// inline asm
|
||
bfe.u32 %r1049, %r1054, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd489, %r1049, 4;
|
||
add.s64 %rd490, %rd48, %rd489;
|
||
ld.shared.u32 %r2945, [%rd490];
|
||
xor.b32 %r2946, %r2945, %r2944;
|
||
// inline asm
|
||
bfe.u32 %r1053, %r1054, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd491, %r1053, 4;
|
||
add.s64 %rd492, %rd51, %rd491;
|
||
ld.shared.u32 %r2947, [%rd492];
|
||
add.s32 %r2948, %r2947, %r2946;
|
||
xor.b32 %r2949, %r68, %r1038;
|
||
xor.b32 %r1070, %r2949, %r2948;
|
||
// inline asm
|
||
bfe.u32 %r1057, %r1070, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd493, %r1057, 4;
|
||
add.s64 %rd494, %rd42, %rd493;
|
||
ld.shared.u32 %r2950, [%rd494];
|
||
// inline asm
|
||
bfe.u32 %r1061, %r1070, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd495, %r1061, 4;
|
||
add.s64 %rd496, %rd45, %rd495;
|
||
ld.shared.u32 %r2951, [%rd496];
|
||
add.s32 %r2952, %r2951, %r2950;
|
||
// inline asm
|
||
bfe.u32 %r1065, %r1070, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd497, %r1065, 4;
|
||
add.s64 %rd498, %rd48, %rd497;
|
||
ld.shared.u32 %r2953, [%rd498];
|
||
xor.b32 %r2954, %r2953, %r2952;
|
||
// inline asm
|
||
bfe.u32 %r1069, %r1070, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd499, %r1069, 4;
|
||
add.s64 %rd500, %rd51, %rd499;
|
||
ld.shared.u32 %r2955, [%rd500];
|
||
add.s32 %r2956, %r2955, %r2954;
|
||
xor.b32 %r2957, %r2565, %r1054;
|
||
xor.b32 %r1086, %r2957, %r2956;
|
||
// inline asm
|
||
bfe.u32 %r1073, %r1086, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd501, %r1073, 4;
|
||
add.s64 %rd502, %rd42, %rd501;
|
||
ld.shared.u32 %r2958, [%rd502];
|
||
// inline asm
|
||
bfe.u32 %r1077, %r1086, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd503, %r1077, 4;
|
||
add.s64 %rd504, %rd45, %rd503;
|
||
ld.shared.u32 %r2959, [%rd504];
|
||
add.s32 %r2960, %r2959, %r2958;
|
||
// inline asm
|
||
bfe.u32 %r1081, %r1086, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd505, %r1081, 4;
|
||
add.s64 %rd506, %rd48, %rd505;
|
||
ld.shared.u32 %r2961, [%rd506];
|
||
xor.b32 %r2962, %r2961, %r2960;
|
||
// inline asm
|
||
bfe.u32 %r1085, %r1086, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd507, %r1085, 4;
|
||
add.s64 %rd508, %rd51, %rd507;
|
||
ld.shared.u32 %r2963, [%rd508];
|
||
add.s32 %r2964, %r2963, %r2962;
|
||
xor.b32 %r2965, %r2574, %r1070;
|
||
xor.b32 %r1102, %r2965, %r2964;
|
||
// inline asm
|
||
bfe.u32 %r1089, %r1102, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd509, %r1089, 4;
|
||
add.s64 %rd510, %rd42, %rd509;
|
||
ld.shared.u32 %r2966, [%rd510];
|
||
// inline asm
|
||
bfe.u32 %r1093, %r1102, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd511, %r1093, 4;
|
||
add.s64 %rd512, %rd45, %rd511;
|
||
ld.shared.u32 %r2967, [%rd512];
|
||
add.s32 %r2968, %r2967, %r2966;
|
||
// inline asm
|
||
bfe.u32 %r1097, %r1102, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd513, %r1097, 4;
|
||
add.s64 %rd514, %rd48, %rd513;
|
||
ld.shared.u32 %r2969, [%rd514];
|
||
xor.b32 %r2970, %r2969, %r2968;
|
||
// inline asm
|
||
bfe.u32 %r1101, %r1102, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd515, %r1101, 4;
|
||
add.s64 %rd516, %rd51, %rd515;
|
||
ld.shared.u32 %r2971, [%rd516];
|
||
add.s32 %r2972, %r2971, %r2970;
|
||
xor.b32 %r2973, %r2583, %r1086;
|
||
xor.b32 %r1118, %r2973, %r2972;
|
||
// inline asm
|
||
bfe.u32 %r1105, %r1118, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd517, %r1105, 4;
|
||
add.s64 %rd518, %rd42, %rd517;
|
||
ld.shared.u32 %r2974, [%rd518];
|
||
// inline asm
|
||
bfe.u32 %r1109, %r1118, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd519, %r1109, 4;
|
||
add.s64 %rd520, %rd45, %rd519;
|
||
ld.shared.u32 %r2975, [%rd520];
|
||
add.s32 %r2976, %r2975, %r2974;
|
||
// inline asm
|
||
bfe.u32 %r1113, %r1118, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd521, %r1113, 4;
|
||
add.s64 %rd522, %rd48, %rd521;
|
||
ld.shared.u32 %r2977, [%rd522];
|
||
xor.b32 %r2978, %r2977, %r2976;
|
||
// inline asm
|
||
bfe.u32 %r1117, %r1118, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd523, %r1117, 4;
|
||
add.s64 %rd524, %rd51, %rd523;
|
||
ld.shared.u32 %r2979, [%rd524];
|
||
add.s32 %r2980, %r2979, %r2978;
|
||
xor.b32 %r2981, %r2592, %r1102;
|
||
xor.b32 %r1134, %r2981, %r2980;
|
||
// inline asm
|
||
bfe.u32 %r1121, %r1134, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd525, %r1121, 4;
|
||
add.s64 %rd526, %rd42, %rd525;
|
||
ld.shared.u32 %r2982, [%rd526];
|
||
// inline asm
|
||
bfe.u32 %r1125, %r1134, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd527, %r1125, 4;
|
||
add.s64 %rd528, %rd45, %rd527;
|
||
ld.shared.u32 %r2983, [%rd528];
|
||
add.s32 %r2984, %r2983, %r2982;
|
||
// inline asm
|
||
bfe.u32 %r1129, %r1134, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd529, %r1129, 4;
|
||
add.s64 %rd530, %rd48, %rd529;
|
||
ld.shared.u32 %r2985, [%rd530];
|
||
xor.b32 %r2986, %r2985, %r2984;
|
||
// inline asm
|
||
bfe.u32 %r1133, %r1134, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd531, %r1133, 4;
|
||
add.s64 %rd532, %rd51, %rd531;
|
||
ld.shared.u32 %r2987, [%rd532];
|
||
add.s32 %r2988, %r2987, %r2986;
|
||
xor.b32 %r2989, %r2601, %r1118;
|
||
xor.b32 %r1150, %r2989, %r2988;
|
||
// inline asm
|
||
bfe.u32 %r1137, %r1150, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd533, %r1137, 4;
|
||
add.s64 %rd534, %rd42, %rd533;
|
||
ld.shared.u32 %r2990, [%rd534];
|
||
// inline asm
|
||
bfe.u32 %r1141, %r1150, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd535, %r1141, 4;
|
||
add.s64 %rd536, %rd45, %rd535;
|
||
ld.shared.u32 %r2991, [%rd536];
|
||
add.s32 %r2992, %r2991, %r2990;
|
||
// inline asm
|
||
bfe.u32 %r1145, %r1150, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd537, %r1145, 4;
|
||
add.s64 %rd538, %rd48, %rd537;
|
||
ld.shared.u32 %r2993, [%rd538];
|
||
xor.b32 %r2994, %r2993, %r2992;
|
||
// inline asm
|
||
bfe.u32 %r1149, %r1150, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd539, %r1149, 4;
|
||
add.s64 %rd540, %rd51, %rd539;
|
||
ld.shared.u32 %r2995, [%rd540];
|
||
add.s32 %r2996, %r2995, %r2994;
|
||
xor.b32 %r2997, %r2610, %r1134;
|
||
xor.b32 %r1166, %r2997, %r2996;
|
||
// inline asm
|
||
bfe.u32 %r1153, %r1166, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd541, %r1153, 4;
|
||
add.s64 %rd542, %rd42, %rd541;
|
||
ld.shared.u32 %r2998, [%rd542];
|
||
// inline asm
|
||
bfe.u32 %r1157, %r1166, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd543, %r1157, 4;
|
||
add.s64 %rd544, %rd45, %rd543;
|
||
ld.shared.u32 %r2999, [%rd544];
|
||
add.s32 %r3000, %r2999, %r2998;
|
||
// inline asm
|
||
bfe.u32 %r1161, %r1166, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd545, %r1161, 4;
|
||
add.s64 %rd546, %rd48, %rd545;
|
||
ld.shared.u32 %r3001, [%rd546];
|
||
xor.b32 %r3002, %r3001, %r3000;
|
||
// inline asm
|
||
bfe.u32 %r1165, %r1166, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd547, %r1165, 4;
|
||
add.s64 %rd548, %rd51, %rd547;
|
||
ld.shared.u32 %r3003, [%rd548];
|
||
add.s32 %r3004, %r3003, %r3002;
|
||
xor.b32 %r3005, %r2619, %r1150;
|
||
xor.b32 %r1182, %r3005, %r3004;
|
||
// inline asm
|
||
bfe.u32 %r1169, %r1182, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd549, %r1169, 4;
|
||
add.s64 %rd550, %rd42, %rd549;
|
||
ld.shared.u32 %r3006, [%rd550];
|
||
// inline asm
|
||
bfe.u32 %r1173, %r1182, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd551, %r1173, 4;
|
||
add.s64 %rd552, %rd45, %rd551;
|
||
ld.shared.u32 %r3007, [%rd552];
|
||
add.s32 %r3008, %r3007, %r3006;
|
||
// inline asm
|
||
bfe.u32 %r1177, %r1182, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd553, %r1177, 4;
|
||
add.s64 %rd554, %rd48, %rd553;
|
||
ld.shared.u32 %r3009, [%rd554];
|
||
xor.b32 %r3010, %r3009, %r3008;
|
||
// inline asm
|
||
bfe.u32 %r1181, %r1182, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd555, %r1181, 4;
|
||
add.s64 %rd556, %rd51, %rd555;
|
||
ld.shared.u32 %r3011, [%rd556];
|
||
add.s32 %r3012, %r3011, %r3010;
|
||
xor.b32 %r3013, %r2628, %r1166;
|
||
xor.b32 %r1198, %r3013, %r3012;
|
||
// inline asm
|
||
bfe.u32 %r1185, %r1198, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd557, %r1185, 4;
|
||
add.s64 %rd558, %rd42, %rd557;
|
||
ld.shared.u32 %r3014, [%rd558];
|
||
// inline asm
|
||
bfe.u32 %r1189, %r1198, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd559, %r1189, 4;
|
||
add.s64 %rd560, %rd45, %rd559;
|
||
ld.shared.u32 %r3015, [%rd560];
|
||
add.s32 %r3016, %r3015, %r3014;
|
||
// inline asm
|
||
bfe.u32 %r1193, %r1198, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd561, %r1193, 4;
|
||
add.s64 %rd562, %rd48, %rd561;
|
||
ld.shared.u32 %r3017, [%rd562];
|
||
xor.b32 %r3018, %r3017, %r3016;
|
||
// inline asm
|
||
bfe.u32 %r1197, %r1198, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd563, %r1197, 4;
|
||
add.s64 %rd564, %rd51, %rd563;
|
||
ld.shared.u32 %r3019, [%rd564];
|
||
add.s32 %r3020, %r3019, %r3018;
|
||
xor.b32 %r3021, %r2637, %r1182;
|
||
xor.b32 %r1214, %r3021, %r3020;
|
||
// inline asm
|
||
bfe.u32 %r1201, %r1214, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd565, %r1201, 4;
|
||
add.s64 %rd566, %rd42, %rd565;
|
||
ld.shared.u32 %r3022, [%rd566];
|
||
// inline asm
|
||
bfe.u32 %r1205, %r1214, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd567, %r1205, 4;
|
||
add.s64 %rd568, %rd45, %rd567;
|
||
ld.shared.u32 %r3023, [%rd568];
|
||
add.s32 %r3024, %r3023, %r3022;
|
||
// inline asm
|
||
bfe.u32 %r1209, %r1214, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd569, %r1209, 4;
|
||
add.s64 %rd570, %rd48, %rd569;
|
||
ld.shared.u32 %r3025, [%rd570];
|
||
xor.b32 %r3026, %r3025, %r3024;
|
||
// inline asm
|
||
bfe.u32 %r1213, %r1214, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd571, %r1213, 4;
|
||
add.s64 %rd572, %rd51, %rd571;
|
||
ld.shared.u32 %r3027, [%rd572];
|
||
add.s32 %r3028, %r3027, %r3026;
|
||
xor.b32 %r3029, %r2646, %r1198;
|
||
xor.b32 %r1230, %r3029, %r3028;
|
||
// inline asm
|
||
bfe.u32 %r1217, %r1230, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd573, %r1217, 4;
|
||
add.s64 %rd574, %rd42, %rd573;
|
||
ld.shared.u32 %r3030, [%rd574];
|
||
// inline asm
|
||
bfe.u32 %r1221, %r1230, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd575, %r1221, 4;
|
||
add.s64 %rd576, %rd45, %rd575;
|
||
ld.shared.u32 %r3031, [%rd576];
|
||
add.s32 %r3032, %r3031, %r3030;
|
||
// inline asm
|
||
bfe.u32 %r1225, %r1230, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd577, %r1225, 4;
|
||
add.s64 %rd578, %rd48, %rd577;
|
||
ld.shared.u32 %r3033, [%rd578];
|
||
xor.b32 %r3034, %r3033, %r3032;
|
||
// inline asm
|
||
bfe.u32 %r1229, %r1230, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd579, %r1229, 4;
|
||
add.s64 %rd580, %rd51, %rd579;
|
||
ld.shared.u32 %r3035, [%rd580];
|
||
add.s32 %r3036, %r3035, %r3034;
|
||
xor.b32 %r3037, %r2655, %r1214;
|
||
xor.b32 %r70, %r3037, %r3036;
|
||
xor.b32 %r71, %r2657, %r1230;
|
||
xor.b32 %r1246, %r462, %r1230;
|
||
// inline asm
|
||
bfe.u32 %r1233, %r1246, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd581, %r1233, 4;
|
||
add.s64 %rd582, %rd42, %rd581;
|
||
ld.shared.u32 %r3038, [%rd582];
|
||
// inline asm
|
||
bfe.u32 %r1237, %r1246, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd583, %r1237, 4;
|
||
add.s64 %rd584, %rd45, %rd583;
|
||
ld.shared.u32 %r3039, [%rd584];
|
||
add.s32 %r3040, %r3039, %r3038;
|
||
// inline asm
|
||
bfe.u32 %r1241, %r1246, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd585, %r1241, 4;
|
||
add.s64 %rd586, %rd48, %rd585;
|
||
ld.shared.u32 %r3041, [%rd586];
|
||
xor.b32 %r3042, %r3041, %r3040;
|
||
// inline asm
|
||
bfe.u32 %r1245, %r1246, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd587, %r1245, 4;
|
||
add.s64 %rd588, %rd51, %rd587;
|
||
ld.shared.u32 %r3043, [%rd588];
|
||
add.s32 %r3044, %r3043, %r3042;
|
||
xor.b32 %r3045, %r64, %r70;
|
||
xor.b32 %r1262, %r3045, %r3044;
|
||
// inline asm
|
||
bfe.u32 %r1249, %r1262, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd589, %r1249, 4;
|
||
add.s64 %rd590, %rd42, %rd589;
|
||
ld.shared.u32 %r3046, [%rd590];
|
||
// inline asm
|
||
bfe.u32 %r1253, %r1262, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd591, %r1253, 4;
|
||
add.s64 %rd592, %rd45, %rd591;
|
||
ld.shared.u32 %r3047, [%rd592];
|
||
add.s32 %r3048, %r3047, %r3046;
|
||
// inline asm
|
||
bfe.u32 %r1257, %r1262, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd593, %r1257, 4;
|
||
add.s64 %rd594, %rd48, %rd593;
|
||
ld.shared.u32 %r3049, [%rd594];
|
||
xor.b32 %r3050, %r3049, %r3048;
|
||
// inline asm
|
||
bfe.u32 %r1261, %r1262, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd595, %r1261, 4;
|
||
add.s64 %rd596, %rd51, %rd595;
|
||
ld.shared.u32 %r3051, [%rd596];
|
||
add.s32 %r3052, %r3051, %r3050;
|
||
xor.b32 %r3053, %r67, %r1246;
|
||
xor.b32 %r1278, %r3053, %r3052;
|
||
// inline asm
|
||
bfe.u32 %r1265, %r1278, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd597, %r1265, 4;
|
||
add.s64 %rd598, %rd42, %rd597;
|
||
ld.shared.u32 %r3054, [%rd598];
|
||
// inline asm
|
||
bfe.u32 %r1269, %r1278, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd599, %r1269, 4;
|
||
add.s64 %rd600, %rd45, %rd599;
|
||
ld.shared.u32 %r3055, [%rd600];
|
||
add.s32 %r3056, %r3055, %r3054;
|
||
// inline asm
|
||
bfe.u32 %r1273, %r1278, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd601, %r1273, 4;
|
||
add.s64 %rd602, %rd48, %rd601;
|
||
ld.shared.u32 %r3057, [%rd602];
|
||
xor.b32 %r3058, %r3057, %r3056;
|
||
// inline asm
|
||
bfe.u32 %r1277, %r1278, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd603, %r1277, 4;
|
||
add.s64 %rd604, %rd51, %rd603;
|
||
ld.shared.u32 %r3059, [%rd604];
|
||
add.s32 %r3060, %r3059, %r3058;
|
||
xor.b32 %r3061, %r66, %r1262;
|
||
xor.b32 %r1294, %r3061, %r3060;
|
||
// inline asm
|
||
bfe.u32 %r1281, %r1294, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd605, %r1281, 4;
|
||
add.s64 %rd606, %rd42, %rd605;
|
||
ld.shared.u32 %r3062, [%rd606];
|
||
// inline asm
|
||
bfe.u32 %r1285, %r1294, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd607, %r1285, 4;
|
||
add.s64 %rd608, %rd45, %rd607;
|
||
ld.shared.u32 %r3063, [%rd608];
|
||
add.s32 %r3064, %r3063, %r3062;
|
||
// inline asm
|
||
bfe.u32 %r1289, %r1294, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd609, %r1289, 4;
|
||
add.s64 %rd610, %rd48, %rd609;
|
||
ld.shared.u32 %r3065, [%rd610];
|
||
xor.b32 %r3066, %r3065, %r3064;
|
||
// inline asm
|
||
bfe.u32 %r1293, %r1294, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd611, %r1293, 4;
|
||
add.s64 %rd612, %rd51, %rd611;
|
||
ld.shared.u32 %r3067, [%rd612];
|
||
add.s32 %r3068, %r3067, %r3066;
|
||
xor.b32 %r3069, %r69, %r1278;
|
||
xor.b32 %r1310, %r3069, %r3068;
|
||
// inline asm
|
||
bfe.u32 %r1297, %r1310, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd613, %r1297, 4;
|
||
add.s64 %rd614, %rd42, %rd613;
|
||
ld.shared.u32 %r3070, [%rd614];
|
||
// inline asm
|
||
bfe.u32 %r1301, %r1310, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd615, %r1301, 4;
|
||
add.s64 %rd616, %rd45, %rd615;
|
||
ld.shared.u32 %r3071, [%rd616];
|
||
add.s32 %r3072, %r3071, %r3070;
|
||
// inline asm
|
||
bfe.u32 %r1305, %r1310, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd617, %r1305, 4;
|
||
add.s64 %rd618, %rd48, %rd617;
|
||
ld.shared.u32 %r3073, [%rd618];
|
||
xor.b32 %r3074, %r3073, %r3072;
|
||
// inline asm
|
||
bfe.u32 %r1309, %r1310, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd619, %r1309, 4;
|
||
add.s64 %rd620, %rd51, %rd619;
|
||
ld.shared.u32 %r3075, [%rd620];
|
||
add.s32 %r3076, %r3075, %r3074;
|
||
xor.b32 %r3077, %r68, %r1294;
|
||
xor.b32 %r1326, %r3077, %r3076;
|
||
// inline asm
|
||
bfe.u32 %r1313, %r1326, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd621, %r1313, 4;
|
||
add.s64 %rd622, %rd42, %rd621;
|
||
ld.shared.u32 %r3078, [%rd622];
|
||
// inline asm
|
||
bfe.u32 %r1317, %r1326, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd623, %r1317, 4;
|
||
add.s64 %rd624, %rd45, %rd623;
|
||
ld.shared.u32 %r3079, [%rd624];
|
||
add.s32 %r3080, %r3079, %r3078;
|
||
// inline asm
|
||
bfe.u32 %r1321, %r1326, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd625, %r1321, 4;
|
||
add.s64 %rd626, %rd48, %rd625;
|
||
ld.shared.u32 %r3081, [%rd626];
|
||
xor.b32 %r3082, %r3081, %r3080;
|
||
// inline asm
|
||
bfe.u32 %r1325, %r1326, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd627, %r1325, 4;
|
||
add.s64 %rd628, %rd51, %rd627;
|
||
ld.shared.u32 %r3083, [%rd628];
|
||
add.s32 %r3084, %r3083, %r3082;
|
||
xor.b32 %r3085, %r71, %r1310;
|
||
xor.b32 %r1342, %r3085, %r3084;
|
||
// inline asm
|
||
bfe.u32 %r1329, %r1342, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd629, %r1329, 4;
|
||
add.s64 %rd630, %rd42, %rd629;
|
||
ld.shared.u32 %r3086, [%rd630];
|
||
// inline asm
|
||
bfe.u32 %r1333, %r1342, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd631, %r1333, 4;
|
||
add.s64 %rd632, %rd45, %rd631;
|
||
ld.shared.u32 %r3087, [%rd632];
|
||
add.s32 %r3088, %r3087, %r3086;
|
||
// inline asm
|
||
bfe.u32 %r1337, %r1342, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd633, %r1337, 4;
|
||
add.s64 %rd634, %rd48, %rd633;
|
||
ld.shared.u32 %r3089, [%rd634];
|
||
xor.b32 %r3090, %r3089, %r3088;
|
||
// inline asm
|
||
bfe.u32 %r1341, %r1342, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd635, %r1341, 4;
|
||
add.s64 %rd636, %rd51, %rd635;
|
||
ld.shared.u32 %r3091, [%rd636];
|
||
add.s32 %r3092, %r3091, %r3090;
|
||
xor.b32 %r3093, %r70, %r1326;
|
||
xor.b32 %r1358, %r3093, %r3092;
|
||
// inline asm
|
||
bfe.u32 %r1345, %r1358, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd637, %r1345, 4;
|
||
add.s64 %rd638, %rd42, %rd637;
|
||
ld.shared.u32 %r3094, [%rd638];
|
||
// inline asm
|
||
bfe.u32 %r1349, %r1358, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd639, %r1349, 4;
|
||
add.s64 %rd640, %rd45, %rd639;
|
||
ld.shared.u32 %r3095, [%rd640];
|
||
add.s32 %r3096, %r3095, %r3094;
|
||
// inline asm
|
||
bfe.u32 %r1353, %r1358, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd641, %r1353, 4;
|
||
add.s64 %rd642, %rd48, %rd641;
|
||
ld.shared.u32 %r3097, [%rd642];
|
||
xor.b32 %r3098, %r3097, %r3096;
|
||
// inline asm
|
||
bfe.u32 %r1357, %r1358, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd643, %r1357, 4;
|
||
add.s64 %rd644, %rd51, %rd643;
|
||
ld.shared.u32 %r3099, [%rd644];
|
||
add.s32 %r3100, %r3099, %r3098;
|
||
xor.b32 %r3101, %r2583, %r1342;
|
||
xor.b32 %r1374, %r3101, %r3100;
|
||
// inline asm
|
||
bfe.u32 %r1361, %r1374, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd645, %r1361, 4;
|
||
add.s64 %rd646, %rd42, %rd645;
|
||
ld.shared.u32 %r3102, [%rd646];
|
||
// inline asm
|
||
bfe.u32 %r1365, %r1374, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd647, %r1365, 4;
|
||
add.s64 %rd648, %rd45, %rd647;
|
||
ld.shared.u32 %r3103, [%rd648];
|
||
add.s32 %r3104, %r3103, %r3102;
|
||
// inline asm
|
||
bfe.u32 %r1369, %r1374, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd649, %r1369, 4;
|
||
add.s64 %rd650, %rd48, %rd649;
|
||
ld.shared.u32 %r3105, [%rd650];
|
||
xor.b32 %r3106, %r3105, %r3104;
|
||
// inline asm
|
||
bfe.u32 %r1373, %r1374, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd651, %r1373, 4;
|
||
add.s64 %rd652, %rd51, %rd651;
|
||
ld.shared.u32 %r3107, [%rd652];
|
||
add.s32 %r3108, %r3107, %r3106;
|
||
xor.b32 %r3109, %r2592, %r1358;
|
||
xor.b32 %r1390, %r3109, %r3108;
|
||
// inline asm
|
||
bfe.u32 %r1377, %r1390, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd653, %r1377, 4;
|
||
add.s64 %rd654, %rd42, %rd653;
|
||
ld.shared.u32 %r3110, [%rd654];
|
||
// inline asm
|
||
bfe.u32 %r1381, %r1390, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd655, %r1381, 4;
|
||
add.s64 %rd656, %rd45, %rd655;
|
||
ld.shared.u32 %r3111, [%rd656];
|
||
add.s32 %r3112, %r3111, %r3110;
|
||
// inline asm
|
||
bfe.u32 %r1385, %r1390, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd657, %r1385, 4;
|
||
add.s64 %rd658, %rd48, %rd657;
|
||
ld.shared.u32 %r3113, [%rd658];
|
||
xor.b32 %r3114, %r3113, %r3112;
|
||
// inline asm
|
||
bfe.u32 %r1389, %r1390, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd659, %r1389, 4;
|
||
add.s64 %rd660, %rd51, %rd659;
|
||
ld.shared.u32 %r3115, [%rd660];
|
||
add.s32 %r3116, %r3115, %r3114;
|
||
xor.b32 %r3117, %r2601, %r1374;
|
||
xor.b32 %r1406, %r3117, %r3116;
|
||
// inline asm
|
||
bfe.u32 %r1393, %r1406, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd661, %r1393, 4;
|
||
add.s64 %rd662, %rd42, %rd661;
|
||
ld.shared.u32 %r3118, [%rd662];
|
||
// inline asm
|
||
bfe.u32 %r1397, %r1406, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd663, %r1397, 4;
|
||
add.s64 %rd664, %rd45, %rd663;
|
||
ld.shared.u32 %r3119, [%rd664];
|
||
add.s32 %r3120, %r3119, %r3118;
|
||
// inline asm
|
||
bfe.u32 %r1401, %r1406, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd665, %r1401, 4;
|
||
add.s64 %rd666, %rd48, %rd665;
|
||
ld.shared.u32 %r3121, [%rd666];
|
||
xor.b32 %r3122, %r3121, %r3120;
|
||
// inline asm
|
||
bfe.u32 %r1405, %r1406, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd667, %r1405, 4;
|
||
add.s64 %rd668, %rd51, %rd667;
|
||
ld.shared.u32 %r3123, [%rd668];
|
||
add.s32 %r3124, %r3123, %r3122;
|
||
xor.b32 %r3125, %r2610, %r1390;
|
||
xor.b32 %r1422, %r3125, %r3124;
|
||
// inline asm
|
||
bfe.u32 %r1409, %r1422, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd669, %r1409, 4;
|
||
add.s64 %rd670, %rd42, %rd669;
|
||
ld.shared.u32 %r3126, [%rd670];
|
||
// inline asm
|
||
bfe.u32 %r1413, %r1422, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd671, %r1413, 4;
|
||
add.s64 %rd672, %rd45, %rd671;
|
||
ld.shared.u32 %r3127, [%rd672];
|
||
add.s32 %r3128, %r3127, %r3126;
|
||
// inline asm
|
||
bfe.u32 %r1417, %r1422, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd673, %r1417, 4;
|
||
add.s64 %rd674, %rd48, %rd673;
|
||
ld.shared.u32 %r3129, [%rd674];
|
||
xor.b32 %r3130, %r3129, %r3128;
|
||
// inline asm
|
||
bfe.u32 %r1421, %r1422, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd675, %r1421, 4;
|
||
add.s64 %rd676, %rd51, %rd675;
|
||
ld.shared.u32 %r3131, [%rd676];
|
||
add.s32 %r3132, %r3131, %r3130;
|
||
xor.b32 %r3133, %r2619, %r1406;
|
||
xor.b32 %r1438, %r3133, %r3132;
|
||
// inline asm
|
||
bfe.u32 %r1425, %r1438, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd677, %r1425, 4;
|
||
add.s64 %rd678, %rd42, %rd677;
|
||
ld.shared.u32 %r3134, [%rd678];
|
||
// inline asm
|
||
bfe.u32 %r1429, %r1438, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd679, %r1429, 4;
|
||
add.s64 %rd680, %rd45, %rd679;
|
||
ld.shared.u32 %r3135, [%rd680];
|
||
add.s32 %r3136, %r3135, %r3134;
|
||
// inline asm
|
||
bfe.u32 %r1433, %r1438, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd681, %r1433, 4;
|
||
add.s64 %rd682, %rd48, %rd681;
|
||
ld.shared.u32 %r3137, [%rd682];
|
||
xor.b32 %r3138, %r3137, %r3136;
|
||
// inline asm
|
||
bfe.u32 %r1437, %r1438, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd683, %r1437, 4;
|
||
add.s64 %rd684, %rd51, %rd683;
|
||
ld.shared.u32 %r3139, [%rd684];
|
||
add.s32 %r3140, %r3139, %r3138;
|
||
xor.b32 %r3141, %r2628, %r1422;
|
||
xor.b32 %r1454, %r3141, %r3140;
|
||
// inline asm
|
||
bfe.u32 %r1441, %r1454, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd685, %r1441, 4;
|
||
add.s64 %rd686, %rd42, %rd685;
|
||
ld.shared.u32 %r3142, [%rd686];
|
||
// inline asm
|
||
bfe.u32 %r1445, %r1454, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd687, %r1445, 4;
|
||
add.s64 %rd688, %rd45, %rd687;
|
||
ld.shared.u32 %r3143, [%rd688];
|
||
add.s32 %r3144, %r3143, %r3142;
|
||
// inline asm
|
||
bfe.u32 %r1449, %r1454, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd689, %r1449, 4;
|
||
add.s64 %rd690, %rd48, %rd689;
|
||
ld.shared.u32 %r3145, [%rd690];
|
||
xor.b32 %r3146, %r3145, %r3144;
|
||
// inline asm
|
||
bfe.u32 %r1453, %r1454, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd691, %r1453, 4;
|
||
add.s64 %rd692, %rd51, %rd691;
|
||
ld.shared.u32 %r3147, [%rd692];
|
||
add.s32 %r3148, %r3147, %r3146;
|
||
xor.b32 %r3149, %r2637, %r1438;
|
||
xor.b32 %r1470, %r3149, %r3148;
|
||
// inline asm
|
||
bfe.u32 %r1457, %r1470, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd693, %r1457, 4;
|
||
add.s64 %rd694, %rd42, %rd693;
|
||
ld.shared.u32 %r3150, [%rd694];
|
||
// inline asm
|
||
bfe.u32 %r1461, %r1470, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd695, %r1461, 4;
|
||
add.s64 %rd696, %rd45, %rd695;
|
||
ld.shared.u32 %r3151, [%rd696];
|
||
add.s32 %r3152, %r3151, %r3150;
|
||
// inline asm
|
||
bfe.u32 %r1465, %r1470, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd697, %r1465, 4;
|
||
add.s64 %rd698, %rd48, %rd697;
|
||
ld.shared.u32 %r3153, [%rd698];
|
||
xor.b32 %r3154, %r3153, %r3152;
|
||
// inline asm
|
||
bfe.u32 %r1469, %r1470, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd699, %r1469, 4;
|
||
add.s64 %rd700, %rd51, %rd699;
|
||
ld.shared.u32 %r3155, [%rd700];
|
||
add.s32 %r3156, %r3155, %r3154;
|
||
xor.b32 %r3157, %r2646, %r1454;
|
||
xor.b32 %r1486, %r3157, %r3156;
|
||
// inline asm
|
||
bfe.u32 %r1473, %r1486, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd701, %r1473, 4;
|
||
add.s64 %rd702, %rd42, %rd701;
|
||
ld.shared.u32 %r3158, [%rd702];
|
||
// inline asm
|
||
bfe.u32 %r1477, %r1486, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd703, %r1477, 4;
|
||
add.s64 %rd704, %rd45, %rd703;
|
||
ld.shared.u32 %r3159, [%rd704];
|
||
add.s32 %r3160, %r3159, %r3158;
|
||
// inline asm
|
||
bfe.u32 %r1481, %r1486, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd705, %r1481, 4;
|
||
add.s64 %rd706, %rd48, %rd705;
|
||
ld.shared.u32 %r3161, [%rd706];
|
||
xor.b32 %r3162, %r3161, %r3160;
|
||
// inline asm
|
||
bfe.u32 %r1485, %r1486, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd707, %r1485, 4;
|
||
add.s64 %rd708, %rd51, %rd707;
|
||
ld.shared.u32 %r3163, [%rd708];
|
||
add.s32 %r3164, %r3163, %r3162;
|
||
xor.b32 %r3165, %r2655, %r1470;
|
||
xor.b32 %r72, %r3165, %r3164;
|
||
xor.b32 %r73, %r2657, %r1486;
|
||
xor.b32 %r1502, %r462, %r1486;
|
||
// inline asm
|
||
bfe.u32 %r1489, %r1502, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd709, %r1489, 4;
|
||
add.s64 %rd710, %rd42, %rd709;
|
||
ld.shared.u32 %r3166, [%rd710];
|
||
// inline asm
|
||
bfe.u32 %r1493, %r1502, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd711, %r1493, 4;
|
||
add.s64 %rd712, %rd45, %rd711;
|
||
ld.shared.u32 %r3167, [%rd712];
|
||
add.s32 %r3168, %r3167, %r3166;
|
||
// inline asm
|
||
bfe.u32 %r1497, %r1502, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd713, %r1497, 4;
|
||
add.s64 %rd714, %rd48, %rd713;
|
||
ld.shared.u32 %r3169, [%rd714];
|
||
xor.b32 %r3170, %r3169, %r3168;
|
||
// inline asm
|
||
bfe.u32 %r1501, %r1502, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd715, %r1501, 4;
|
||
add.s64 %rd716, %rd51, %rd715;
|
||
ld.shared.u32 %r3171, [%rd716];
|
||
add.s32 %r3172, %r3171, %r3170;
|
||
xor.b32 %r3173, %r64, %r72;
|
||
xor.b32 %r1518, %r3173, %r3172;
|
||
// inline asm
|
||
bfe.u32 %r1505, %r1518, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd717, %r1505, 4;
|
||
add.s64 %rd718, %rd42, %rd717;
|
||
ld.shared.u32 %r3174, [%rd718];
|
||
// inline asm
|
||
bfe.u32 %r1509, %r1518, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd719, %r1509, 4;
|
||
add.s64 %rd720, %rd45, %rd719;
|
||
ld.shared.u32 %r3175, [%rd720];
|
||
add.s32 %r3176, %r3175, %r3174;
|
||
// inline asm
|
||
bfe.u32 %r1513, %r1518, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd721, %r1513, 4;
|
||
add.s64 %rd722, %rd48, %rd721;
|
||
ld.shared.u32 %r3177, [%rd722];
|
||
xor.b32 %r3178, %r3177, %r3176;
|
||
// inline asm
|
||
bfe.u32 %r1517, %r1518, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd723, %r1517, 4;
|
||
add.s64 %rd724, %rd51, %rd723;
|
||
ld.shared.u32 %r3179, [%rd724];
|
||
add.s32 %r3180, %r3179, %r3178;
|
||
xor.b32 %r3181, %r67, %r1502;
|
||
xor.b32 %r1534, %r3181, %r3180;
|
||
// inline asm
|
||
bfe.u32 %r1521, %r1534, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd725, %r1521, 4;
|
||
add.s64 %rd726, %rd42, %rd725;
|
||
ld.shared.u32 %r3182, [%rd726];
|
||
// inline asm
|
||
bfe.u32 %r1525, %r1534, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd727, %r1525, 4;
|
||
add.s64 %rd728, %rd45, %rd727;
|
||
ld.shared.u32 %r3183, [%rd728];
|
||
add.s32 %r3184, %r3183, %r3182;
|
||
// inline asm
|
||
bfe.u32 %r1529, %r1534, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd729, %r1529, 4;
|
||
add.s64 %rd730, %rd48, %rd729;
|
||
ld.shared.u32 %r3185, [%rd730];
|
||
xor.b32 %r3186, %r3185, %r3184;
|
||
// inline asm
|
||
bfe.u32 %r1533, %r1534, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd731, %r1533, 4;
|
||
add.s64 %rd732, %rd51, %rd731;
|
||
ld.shared.u32 %r3187, [%rd732];
|
||
add.s32 %r3188, %r3187, %r3186;
|
||
xor.b32 %r3189, %r66, %r1518;
|
||
xor.b32 %r1550, %r3189, %r3188;
|
||
// inline asm
|
||
bfe.u32 %r1537, %r1550, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd733, %r1537, 4;
|
||
add.s64 %rd734, %rd42, %rd733;
|
||
ld.shared.u32 %r3190, [%rd734];
|
||
// inline asm
|
||
bfe.u32 %r1541, %r1550, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd735, %r1541, 4;
|
||
add.s64 %rd736, %rd45, %rd735;
|
||
ld.shared.u32 %r3191, [%rd736];
|
||
add.s32 %r3192, %r3191, %r3190;
|
||
// inline asm
|
||
bfe.u32 %r1545, %r1550, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd737, %r1545, 4;
|
||
add.s64 %rd738, %rd48, %rd737;
|
||
ld.shared.u32 %r3193, [%rd738];
|
||
xor.b32 %r3194, %r3193, %r3192;
|
||
// inline asm
|
||
bfe.u32 %r1549, %r1550, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd739, %r1549, 4;
|
||
add.s64 %rd740, %rd51, %rd739;
|
||
ld.shared.u32 %r3195, [%rd740];
|
||
add.s32 %r3196, %r3195, %r3194;
|
||
xor.b32 %r3197, %r69, %r1534;
|
||
xor.b32 %r1566, %r3197, %r3196;
|
||
// inline asm
|
||
bfe.u32 %r1553, %r1566, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd741, %r1553, 4;
|
||
add.s64 %rd742, %rd42, %rd741;
|
||
ld.shared.u32 %r3198, [%rd742];
|
||
// inline asm
|
||
bfe.u32 %r1557, %r1566, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd743, %r1557, 4;
|
||
add.s64 %rd744, %rd45, %rd743;
|
||
ld.shared.u32 %r3199, [%rd744];
|
||
add.s32 %r3200, %r3199, %r3198;
|
||
// inline asm
|
||
bfe.u32 %r1561, %r1566, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd745, %r1561, 4;
|
||
add.s64 %rd746, %rd48, %rd745;
|
||
ld.shared.u32 %r3201, [%rd746];
|
||
xor.b32 %r3202, %r3201, %r3200;
|
||
// inline asm
|
||
bfe.u32 %r1565, %r1566, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd747, %r1565, 4;
|
||
add.s64 %rd748, %rd51, %rd747;
|
||
ld.shared.u32 %r3203, [%rd748];
|
||
add.s32 %r3204, %r3203, %r3202;
|
||
xor.b32 %r3205, %r68, %r1550;
|
||
xor.b32 %r1582, %r3205, %r3204;
|
||
// inline asm
|
||
bfe.u32 %r1569, %r1582, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd749, %r1569, 4;
|
||
add.s64 %rd750, %rd42, %rd749;
|
||
ld.shared.u32 %r3206, [%rd750];
|
||
// inline asm
|
||
bfe.u32 %r1573, %r1582, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd751, %r1573, 4;
|
||
add.s64 %rd752, %rd45, %rd751;
|
||
ld.shared.u32 %r3207, [%rd752];
|
||
add.s32 %r3208, %r3207, %r3206;
|
||
// inline asm
|
||
bfe.u32 %r1577, %r1582, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd753, %r1577, 4;
|
||
add.s64 %rd754, %rd48, %rd753;
|
||
ld.shared.u32 %r3209, [%rd754];
|
||
xor.b32 %r3210, %r3209, %r3208;
|
||
// inline asm
|
||
bfe.u32 %r1581, %r1582, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd755, %r1581, 4;
|
||
add.s64 %rd756, %rd51, %rd755;
|
||
ld.shared.u32 %r3211, [%rd756];
|
||
add.s32 %r3212, %r3211, %r3210;
|
||
xor.b32 %r3213, %r71, %r1566;
|
||
xor.b32 %r1598, %r3213, %r3212;
|
||
// inline asm
|
||
bfe.u32 %r1585, %r1598, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd757, %r1585, 4;
|
||
add.s64 %rd758, %rd42, %rd757;
|
||
ld.shared.u32 %r3214, [%rd758];
|
||
// inline asm
|
||
bfe.u32 %r1589, %r1598, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd759, %r1589, 4;
|
||
add.s64 %rd760, %rd45, %rd759;
|
||
ld.shared.u32 %r3215, [%rd760];
|
||
add.s32 %r3216, %r3215, %r3214;
|
||
// inline asm
|
||
bfe.u32 %r1593, %r1598, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd761, %r1593, 4;
|
||
add.s64 %rd762, %rd48, %rd761;
|
||
ld.shared.u32 %r3217, [%rd762];
|
||
xor.b32 %r3218, %r3217, %r3216;
|
||
// inline asm
|
||
bfe.u32 %r1597, %r1598, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd763, %r1597, 4;
|
||
add.s64 %rd764, %rd51, %rd763;
|
||
ld.shared.u32 %r3219, [%rd764];
|
||
add.s32 %r3220, %r3219, %r3218;
|
||
xor.b32 %r3221, %r70, %r1582;
|
||
xor.b32 %r1614, %r3221, %r3220;
|
||
// inline asm
|
||
bfe.u32 %r1601, %r1614, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd765, %r1601, 4;
|
||
add.s64 %rd766, %rd42, %rd765;
|
||
ld.shared.u32 %r3222, [%rd766];
|
||
// inline asm
|
||
bfe.u32 %r1605, %r1614, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd767, %r1605, 4;
|
||
add.s64 %rd768, %rd45, %rd767;
|
||
ld.shared.u32 %r3223, [%rd768];
|
||
add.s32 %r3224, %r3223, %r3222;
|
||
// inline asm
|
||
bfe.u32 %r1609, %r1614, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd769, %r1609, 4;
|
||
add.s64 %rd770, %rd48, %rd769;
|
||
ld.shared.u32 %r3225, [%rd770];
|
||
xor.b32 %r3226, %r3225, %r3224;
|
||
// inline asm
|
||
bfe.u32 %r1613, %r1614, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd771, %r1613, 4;
|
||
add.s64 %rd772, %rd51, %rd771;
|
||
ld.shared.u32 %r3227, [%rd772];
|
||
add.s32 %r3228, %r3227, %r3226;
|
||
xor.b32 %r3229, %r73, %r1598;
|
||
xor.b32 %r1630, %r3229, %r3228;
|
||
// inline asm
|
||
bfe.u32 %r1617, %r1630, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd773, %r1617, 4;
|
||
add.s64 %rd774, %rd42, %rd773;
|
||
ld.shared.u32 %r3230, [%rd774];
|
||
// inline asm
|
||
bfe.u32 %r1621, %r1630, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd775, %r1621, 4;
|
||
add.s64 %rd776, %rd45, %rd775;
|
||
ld.shared.u32 %r3231, [%rd776];
|
||
add.s32 %r3232, %r3231, %r3230;
|
||
// inline asm
|
||
bfe.u32 %r1625, %r1630, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd777, %r1625, 4;
|
||
add.s64 %rd778, %rd48, %rd777;
|
||
ld.shared.u32 %r3233, [%rd778];
|
||
xor.b32 %r3234, %r3233, %r3232;
|
||
// inline asm
|
||
bfe.u32 %r1629, %r1630, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd779, %r1629, 4;
|
||
add.s64 %rd780, %rd51, %rd779;
|
||
ld.shared.u32 %r3235, [%rd780];
|
||
add.s32 %r3236, %r3235, %r3234;
|
||
xor.b32 %r3237, %r72, %r1614;
|
||
xor.b32 %r1646, %r3237, %r3236;
|
||
// inline asm
|
||
bfe.u32 %r1633, %r1646, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd781, %r1633, 4;
|
||
add.s64 %rd782, %rd42, %rd781;
|
||
ld.shared.u32 %r3238, [%rd782];
|
||
// inline asm
|
||
bfe.u32 %r1637, %r1646, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd783, %r1637, 4;
|
||
add.s64 %rd784, %rd45, %rd783;
|
||
ld.shared.u32 %r3239, [%rd784];
|
||
add.s32 %r3240, %r3239, %r3238;
|
||
// inline asm
|
||
bfe.u32 %r1641, %r1646, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd785, %r1641, 4;
|
||
add.s64 %rd786, %rd48, %rd785;
|
||
ld.shared.u32 %r3241, [%rd786];
|
||
xor.b32 %r3242, %r3241, %r3240;
|
||
// inline asm
|
||
bfe.u32 %r1645, %r1646, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd787, %r1645, 4;
|
||
add.s64 %rd788, %rd51, %rd787;
|
||
ld.shared.u32 %r3243, [%rd788];
|
||
add.s32 %r3244, %r3243, %r3242;
|
||
xor.b32 %r3245, %r2601, %r1630;
|
||
xor.b32 %r1662, %r3245, %r3244;
|
||
// inline asm
|
||
bfe.u32 %r1649, %r1662, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd789, %r1649, 4;
|
||
add.s64 %rd790, %rd42, %rd789;
|
||
ld.shared.u32 %r3246, [%rd790];
|
||
// inline asm
|
||
bfe.u32 %r1653, %r1662, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd791, %r1653, 4;
|
||
add.s64 %rd792, %rd45, %rd791;
|
||
ld.shared.u32 %r3247, [%rd792];
|
||
add.s32 %r3248, %r3247, %r3246;
|
||
// inline asm
|
||
bfe.u32 %r1657, %r1662, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd793, %r1657, 4;
|
||
add.s64 %rd794, %rd48, %rd793;
|
||
ld.shared.u32 %r3249, [%rd794];
|
||
xor.b32 %r3250, %r3249, %r3248;
|
||
// inline asm
|
||
bfe.u32 %r1661, %r1662, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd795, %r1661, 4;
|
||
add.s64 %rd796, %rd51, %rd795;
|
||
ld.shared.u32 %r3251, [%rd796];
|
||
add.s32 %r3252, %r3251, %r3250;
|
||
xor.b32 %r3253, %r2610, %r1646;
|
||
xor.b32 %r1678, %r3253, %r3252;
|
||
// inline asm
|
||
bfe.u32 %r1665, %r1678, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd797, %r1665, 4;
|
||
add.s64 %rd798, %rd42, %rd797;
|
||
ld.shared.u32 %r3254, [%rd798];
|
||
// inline asm
|
||
bfe.u32 %r1669, %r1678, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd799, %r1669, 4;
|
||
add.s64 %rd800, %rd45, %rd799;
|
||
ld.shared.u32 %r3255, [%rd800];
|
||
add.s32 %r3256, %r3255, %r3254;
|
||
// inline asm
|
||
bfe.u32 %r1673, %r1678, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd801, %r1673, 4;
|
||
add.s64 %rd802, %rd48, %rd801;
|
||
ld.shared.u32 %r3257, [%rd802];
|
||
xor.b32 %r3258, %r3257, %r3256;
|
||
// inline asm
|
||
bfe.u32 %r1677, %r1678, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd803, %r1677, 4;
|
||
add.s64 %rd804, %rd51, %rd803;
|
||
ld.shared.u32 %r3259, [%rd804];
|
||
add.s32 %r3260, %r3259, %r3258;
|
||
xor.b32 %r3261, %r2619, %r1662;
|
||
xor.b32 %r1694, %r3261, %r3260;
|
||
// inline asm
|
||
bfe.u32 %r1681, %r1694, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd805, %r1681, 4;
|
||
add.s64 %rd806, %rd42, %rd805;
|
||
ld.shared.u32 %r3262, [%rd806];
|
||
// inline asm
|
||
bfe.u32 %r1685, %r1694, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd807, %r1685, 4;
|
||
add.s64 %rd808, %rd45, %rd807;
|
||
ld.shared.u32 %r3263, [%rd808];
|
||
add.s32 %r3264, %r3263, %r3262;
|
||
// inline asm
|
||
bfe.u32 %r1689, %r1694, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd809, %r1689, 4;
|
||
add.s64 %rd810, %rd48, %rd809;
|
||
ld.shared.u32 %r3265, [%rd810];
|
||
xor.b32 %r3266, %r3265, %r3264;
|
||
// inline asm
|
||
bfe.u32 %r1693, %r1694, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd811, %r1693, 4;
|
||
add.s64 %rd812, %rd51, %rd811;
|
||
ld.shared.u32 %r3267, [%rd812];
|
||
add.s32 %r3268, %r3267, %r3266;
|
||
xor.b32 %r3269, %r2628, %r1678;
|
||
xor.b32 %r1710, %r3269, %r3268;
|
||
// inline asm
|
||
bfe.u32 %r1697, %r1710, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd813, %r1697, 4;
|
||
add.s64 %rd814, %rd42, %rd813;
|
||
ld.shared.u32 %r3270, [%rd814];
|
||
// inline asm
|
||
bfe.u32 %r1701, %r1710, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd815, %r1701, 4;
|
||
add.s64 %rd816, %rd45, %rd815;
|
||
ld.shared.u32 %r3271, [%rd816];
|
||
add.s32 %r3272, %r3271, %r3270;
|
||
// inline asm
|
||
bfe.u32 %r1705, %r1710, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd817, %r1705, 4;
|
||
add.s64 %rd818, %rd48, %rd817;
|
||
ld.shared.u32 %r3273, [%rd818];
|
||
xor.b32 %r3274, %r3273, %r3272;
|
||
// inline asm
|
||
bfe.u32 %r1709, %r1710, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd819, %r1709, 4;
|
||
add.s64 %rd820, %rd51, %rd819;
|
||
ld.shared.u32 %r3275, [%rd820];
|
||
add.s32 %r3276, %r3275, %r3274;
|
||
xor.b32 %r3277, %r2637, %r1694;
|
||
xor.b32 %r1726, %r3277, %r3276;
|
||
// inline asm
|
||
bfe.u32 %r1713, %r1726, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd821, %r1713, 4;
|
||
add.s64 %rd822, %rd42, %rd821;
|
||
ld.shared.u32 %r3278, [%rd822];
|
||
// inline asm
|
||
bfe.u32 %r1717, %r1726, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd823, %r1717, 4;
|
||
add.s64 %rd824, %rd45, %rd823;
|
||
ld.shared.u32 %r3279, [%rd824];
|
||
add.s32 %r3280, %r3279, %r3278;
|
||
// inline asm
|
||
bfe.u32 %r1721, %r1726, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd825, %r1721, 4;
|
||
add.s64 %rd826, %rd48, %rd825;
|
||
ld.shared.u32 %r3281, [%rd826];
|
||
xor.b32 %r3282, %r3281, %r3280;
|
||
// inline asm
|
||
bfe.u32 %r1725, %r1726, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd827, %r1725, 4;
|
||
add.s64 %rd828, %rd51, %rd827;
|
||
ld.shared.u32 %r3283, [%rd828];
|
||
add.s32 %r3284, %r3283, %r3282;
|
||
xor.b32 %r3285, %r2646, %r1710;
|
||
xor.b32 %r1742, %r3285, %r3284;
|
||
// inline asm
|
||
bfe.u32 %r1729, %r1742, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd829, %r1729, 4;
|
||
add.s64 %rd830, %rd42, %rd829;
|
||
ld.shared.u32 %r3286, [%rd830];
|
||
// inline asm
|
||
bfe.u32 %r1733, %r1742, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd831, %r1733, 4;
|
||
add.s64 %rd832, %rd45, %rd831;
|
||
ld.shared.u32 %r3287, [%rd832];
|
||
add.s32 %r3288, %r3287, %r3286;
|
||
// inline asm
|
||
bfe.u32 %r1737, %r1742, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd833, %r1737, 4;
|
||
add.s64 %rd834, %rd48, %rd833;
|
||
ld.shared.u32 %r3289, [%rd834];
|
||
xor.b32 %r3290, %r3289, %r3288;
|
||
// inline asm
|
||
bfe.u32 %r1741, %r1742, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd835, %r1741, 4;
|
||
add.s64 %rd836, %rd51, %rd835;
|
||
ld.shared.u32 %r3291, [%rd836];
|
||
add.s32 %r3292, %r3291, %r3290;
|
||
xor.b32 %r3293, %r2655, %r1726;
|
||
xor.b32 %r74, %r3293, %r3292;
|
||
xor.b32 %r75, %r2657, %r1742;
|
||
xor.b32 %r1758, %r462, %r1742;
|
||
// inline asm
|
||
bfe.u32 %r1745, %r1758, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd837, %r1745, 4;
|
||
add.s64 %rd838, %rd42, %rd837;
|
||
ld.shared.u32 %r3294, [%rd838];
|
||
// inline asm
|
||
bfe.u32 %r1749, %r1758, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd839, %r1749, 4;
|
||
add.s64 %rd840, %rd45, %rd839;
|
||
ld.shared.u32 %r3295, [%rd840];
|
||
add.s32 %r3296, %r3295, %r3294;
|
||
// inline asm
|
||
bfe.u32 %r1753, %r1758, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd841, %r1753, 4;
|
||
add.s64 %rd842, %rd48, %rd841;
|
||
ld.shared.u32 %r3297, [%rd842];
|
||
xor.b32 %r3298, %r3297, %r3296;
|
||
// inline asm
|
||
bfe.u32 %r1757, %r1758, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd843, %r1757, 4;
|
||
add.s64 %rd844, %rd51, %rd843;
|
||
ld.shared.u32 %r3299, [%rd844];
|
||
add.s32 %r3300, %r3299, %r3298;
|
||
xor.b32 %r3301, %r64, %r74;
|
||
xor.b32 %r1774, %r3301, %r3300;
|
||
// inline asm
|
||
bfe.u32 %r1761, %r1774, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd845, %r1761, 4;
|
||
add.s64 %rd846, %rd42, %rd845;
|
||
ld.shared.u32 %r3302, [%rd846];
|
||
// inline asm
|
||
bfe.u32 %r1765, %r1774, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd847, %r1765, 4;
|
||
add.s64 %rd848, %rd45, %rd847;
|
||
ld.shared.u32 %r3303, [%rd848];
|
||
add.s32 %r3304, %r3303, %r3302;
|
||
// inline asm
|
||
bfe.u32 %r1769, %r1774, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd849, %r1769, 4;
|
||
add.s64 %rd850, %rd48, %rd849;
|
||
ld.shared.u32 %r3305, [%rd850];
|
||
xor.b32 %r3306, %r3305, %r3304;
|
||
// inline asm
|
||
bfe.u32 %r1773, %r1774, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd851, %r1773, 4;
|
||
add.s64 %rd852, %rd51, %rd851;
|
||
ld.shared.u32 %r3307, [%rd852];
|
||
add.s32 %r3308, %r3307, %r3306;
|
||
xor.b32 %r3309, %r67, %r1758;
|
||
xor.b32 %r1790, %r3309, %r3308;
|
||
// inline asm
|
||
bfe.u32 %r1777, %r1790, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd853, %r1777, 4;
|
||
add.s64 %rd854, %rd42, %rd853;
|
||
ld.shared.u32 %r3310, [%rd854];
|
||
// inline asm
|
||
bfe.u32 %r1781, %r1790, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd855, %r1781, 4;
|
||
add.s64 %rd856, %rd45, %rd855;
|
||
ld.shared.u32 %r3311, [%rd856];
|
||
add.s32 %r3312, %r3311, %r3310;
|
||
// inline asm
|
||
bfe.u32 %r1785, %r1790, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd857, %r1785, 4;
|
||
add.s64 %rd858, %rd48, %rd857;
|
||
ld.shared.u32 %r3313, [%rd858];
|
||
xor.b32 %r3314, %r3313, %r3312;
|
||
// inline asm
|
||
bfe.u32 %r1789, %r1790, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd859, %r1789, 4;
|
||
add.s64 %rd860, %rd51, %rd859;
|
||
ld.shared.u32 %r3315, [%rd860];
|
||
add.s32 %r3316, %r3315, %r3314;
|
||
xor.b32 %r3317, %r66, %r1774;
|
||
xor.b32 %r1806, %r3317, %r3316;
|
||
// inline asm
|
||
bfe.u32 %r1793, %r1806, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd861, %r1793, 4;
|
||
add.s64 %rd862, %rd42, %rd861;
|
||
ld.shared.u32 %r3318, [%rd862];
|
||
// inline asm
|
||
bfe.u32 %r1797, %r1806, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd863, %r1797, 4;
|
||
add.s64 %rd864, %rd45, %rd863;
|
||
ld.shared.u32 %r3319, [%rd864];
|
||
add.s32 %r3320, %r3319, %r3318;
|
||
// inline asm
|
||
bfe.u32 %r1801, %r1806, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd865, %r1801, 4;
|
||
add.s64 %rd866, %rd48, %rd865;
|
||
ld.shared.u32 %r3321, [%rd866];
|
||
xor.b32 %r3322, %r3321, %r3320;
|
||
// inline asm
|
||
bfe.u32 %r1805, %r1806, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd867, %r1805, 4;
|
||
add.s64 %rd868, %rd51, %rd867;
|
||
ld.shared.u32 %r3323, [%rd868];
|
||
add.s32 %r3324, %r3323, %r3322;
|
||
xor.b32 %r3325, %r69, %r1790;
|
||
xor.b32 %r1822, %r3325, %r3324;
|
||
// inline asm
|
||
bfe.u32 %r1809, %r1822, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd869, %r1809, 4;
|
||
add.s64 %rd870, %rd42, %rd869;
|
||
ld.shared.u32 %r3326, [%rd870];
|
||
// inline asm
|
||
bfe.u32 %r1813, %r1822, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd871, %r1813, 4;
|
||
add.s64 %rd872, %rd45, %rd871;
|
||
ld.shared.u32 %r3327, [%rd872];
|
||
add.s32 %r3328, %r3327, %r3326;
|
||
// inline asm
|
||
bfe.u32 %r1817, %r1822, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd873, %r1817, 4;
|
||
add.s64 %rd874, %rd48, %rd873;
|
||
ld.shared.u32 %r3329, [%rd874];
|
||
xor.b32 %r3330, %r3329, %r3328;
|
||
// inline asm
|
||
bfe.u32 %r1821, %r1822, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd875, %r1821, 4;
|
||
add.s64 %rd876, %rd51, %rd875;
|
||
ld.shared.u32 %r3331, [%rd876];
|
||
add.s32 %r3332, %r3331, %r3330;
|
||
xor.b32 %r3333, %r68, %r1806;
|
||
xor.b32 %r1838, %r3333, %r3332;
|
||
// inline asm
|
||
bfe.u32 %r1825, %r1838, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd877, %r1825, 4;
|
||
add.s64 %rd878, %rd42, %rd877;
|
||
ld.shared.u32 %r3334, [%rd878];
|
||
// inline asm
|
||
bfe.u32 %r1829, %r1838, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd879, %r1829, 4;
|
||
add.s64 %rd880, %rd45, %rd879;
|
||
ld.shared.u32 %r3335, [%rd880];
|
||
add.s32 %r3336, %r3335, %r3334;
|
||
// inline asm
|
||
bfe.u32 %r1833, %r1838, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd881, %r1833, 4;
|
||
add.s64 %rd882, %rd48, %rd881;
|
||
ld.shared.u32 %r3337, [%rd882];
|
||
xor.b32 %r3338, %r3337, %r3336;
|
||
// inline asm
|
||
bfe.u32 %r1837, %r1838, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd883, %r1837, 4;
|
||
add.s64 %rd884, %rd51, %rd883;
|
||
ld.shared.u32 %r3339, [%rd884];
|
||
add.s32 %r3340, %r3339, %r3338;
|
||
xor.b32 %r3341, %r71, %r1822;
|
||
xor.b32 %r1854, %r3341, %r3340;
|
||
// inline asm
|
||
bfe.u32 %r1841, %r1854, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd885, %r1841, 4;
|
||
add.s64 %rd886, %rd42, %rd885;
|
||
ld.shared.u32 %r3342, [%rd886];
|
||
// inline asm
|
||
bfe.u32 %r1845, %r1854, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd887, %r1845, 4;
|
||
add.s64 %rd888, %rd45, %rd887;
|
||
ld.shared.u32 %r3343, [%rd888];
|
||
add.s32 %r3344, %r3343, %r3342;
|
||
// inline asm
|
||
bfe.u32 %r1849, %r1854, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd889, %r1849, 4;
|
||
add.s64 %rd890, %rd48, %rd889;
|
||
ld.shared.u32 %r3345, [%rd890];
|
||
xor.b32 %r3346, %r3345, %r3344;
|
||
// inline asm
|
||
bfe.u32 %r1853, %r1854, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd891, %r1853, 4;
|
||
add.s64 %rd892, %rd51, %rd891;
|
||
ld.shared.u32 %r3347, [%rd892];
|
||
add.s32 %r3348, %r3347, %r3346;
|
||
xor.b32 %r3349, %r70, %r1838;
|
||
xor.b32 %r1870, %r3349, %r3348;
|
||
// inline asm
|
||
bfe.u32 %r1857, %r1870, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd893, %r1857, 4;
|
||
add.s64 %rd894, %rd42, %rd893;
|
||
ld.shared.u32 %r3350, [%rd894];
|
||
// inline asm
|
||
bfe.u32 %r1861, %r1870, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd895, %r1861, 4;
|
||
add.s64 %rd896, %rd45, %rd895;
|
||
ld.shared.u32 %r3351, [%rd896];
|
||
add.s32 %r3352, %r3351, %r3350;
|
||
// inline asm
|
||
bfe.u32 %r1865, %r1870, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd897, %r1865, 4;
|
||
add.s64 %rd898, %rd48, %rd897;
|
||
ld.shared.u32 %r3353, [%rd898];
|
||
xor.b32 %r3354, %r3353, %r3352;
|
||
// inline asm
|
||
bfe.u32 %r1869, %r1870, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd899, %r1869, 4;
|
||
add.s64 %rd900, %rd51, %rd899;
|
||
ld.shared.u32 %r3355, [%rd900];
|
||
add.s32 %r3356, %r3355, %r3354;
|
||
xor.b32 %r3357, %r73, %r1854;
|
||
xor.b32 %r1886, %r3357, %r3356;
|
||
// inline asm
|
||
bfe.u32 %r1873, %r1886, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd901, %r1873, 4;
|
||
add.s64 %rd902, %rd42, %rd901;
|
||
ld.shared.u32 %r3358, [%rd902];
|
||
// inline asm
|
||
bfe.u32 %r1877, %r1886, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd903, %r1877, 4;
|
||
add.s64 %rd904, %rd45, %rd903;
|
||
ld.shared.u32 %r3359, [%rd904];
|
||
add.s32 %r3360, %r3359, %r3358;
|
||
// inline asm
|
||
bfe.u32 %r1881, %r1886, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd905, %r1881, 4;
|
||
add.s64 %rd906, %rd48, %rd905;
|
||
ld.shared.u32 %r3361, [%rd906];
|
||
xor.b32 %r3362, %r3361, %r3360;
|
||
// inline asm
|
||
bfe.u32 %r1885, %r1886, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd907, %r1885, 4;
|
||
add.s64 %rd908, %rd51, %rd907;
|
||
ld.shared.u32 %r3363, [%rd908];
|
||
add.s32 %r3364, %r3363, %r3362;
|
||
xor.b32 %r3365, %r72, %r1870;
|
||
xor.b32 %r1902, %r3365, %r3364;
|
||
// inline asm
|
||
bfe.u32 %r1889, %r1902, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd909, %r1889, 4;
|
||
add.s64 %rd910, %rd42, %rd909;
|
||
ld.shared.u32 %r3366, [%rd910];
|
||
// inline asm
|
||
bfe.u32 %r1893, %r1902, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd911, %r1893, 4;
|
||
add.s64 %rd912, %rd45, %rd911;
|
||
ld.shared.u32 %r3367, [%rd912];
|
||
add.s32 %r3368, %r3367, %r3366;
|
||
// inline asm
|
||
bfe.u32 %r1897, %r1902, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd913, %r1897, 4;
|
||
add.s64 %rd914, %rd48, %rd913;
|
||
ld.shared.u32 %r3369, [%rd914];
|
||
xor.b32 %r3370, %r3369, %r3368;
|
||
// inline asm
|
||
bfe.u32 %r1901, %r1902, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd915, %r1901, 4;
|
||
add.s64 %rd916, %rd51, %rd915;
|
||
ld.shared.u32 %r3371, [%rd916];
|
||
add.s32 %r3372, %r3371, %r3370;
|
||
xor.b32 %r3373, %r75, %r1886;
|
||
xor.b32 %r1918, %r3373, %r3372;
|
||
// inline asm
|
||
bfe.u32 %r1905, %r1918, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd917, %r1905, 4;
|
||
add.s64 %rd918, %rd42, %rd917;
|
||
ld.shared.u32 %r3374, [%rd918];
|
||
// inline asm
|
||
bfe.u32 %r1909, %r1918, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd919, %r1909, 4;
|
||
add.s64 %rd920, %rd45, %rd919;
|
||
ld.shared.u32 %r3375, [%rd920];
|
||
add.s32 %r3376, %r3375, %r3374;
|
||
// inline asm
|
||
bfe.u32 %r1913, %r1918, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd921, %r1913, 4;
|
||
add.s64 %rd922, %rd48, %rd921;
|
||
ld.shared.u32 %r3377, [%rd922];
|
||
xor.b32 %r3378, %r3377, %r3376;
|
||
// inline asm
|
||
bfe.u32 %r1917, %r1918, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd923, %r1917, 4;
|
||
add.s64 %rd924, %rd51, %rd923;
|
||
ld.shared.u32 %r3379, [%rd924];
|
||
add.s32 %r3380, %r3379, %r3378;
|
||
xor.b32 %r3381, %r74, %r1902;
|
||
xor.b32 %r1934, %r3381, %r3380;
|
||
// inline asm
|
||
bfe.u32 %r1921, %r1934, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd925, %r1921, 4;
|
||
add.s64 %rd926, %rd42, %rd925;
|
||
ld.shared.u32 %r3382, [%rd926];
|
||
// inline asm
|
||
bfe.u32 %r1925, %r1934, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd927, %r1925, 4;
|
||
add.s64 %rd928, %rd45, %rd927;
|
||
ld.shared.u32 %r3383, [%rd928];
|
||
add.s32 %r3384, %r3383, %r3382;
|
||
// inline asm
|
||
bfe.u32 %r1929, %r1934, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd929, %r1929, 4;
|
||
add.s64 %rd930, %rd48, %rd929;
|
||
ld.shared.u32 %r3385, [%rd930];
|
||
xor.b32 %r3386, %r3385, %r3384;
|
||
// inline asm
|
||
bfe.u32 %r1933, %r1934, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd931, %r1933, 4;
|
||
add.s64 %rd932, %rd51, %rd931;
|
||
ld.shared.u32 %r3387, [%rd932];
|
||
add.s32 %r3388, %r3387, %r3386;
|
||
xor.b32 %r3389, %r2619, %r1918;
|
||
xor.b32 %r1950, %r3389, %r3388;
|
||
// inline asm
|
||
bfe.u32 %r1937, %r1950, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd933, %r1937, 4;
|
||
add.s64 %rd934, %rd42, %rd933;
|
||
ld.shared.u32 %r3390, [%rd934];
|
||
// inline asm
|
||
bfe.u32 %r1941, %r1950, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd935, %r1941, 4;
|
||
add.s64 %rd936, %rd45, %rd935;
|
||
ld.shared.u32 %r3391, [%rd936];
|
||
add.s32 %r3392, %r3391, %r3390;
|
||
// inline asm
|
||
bfe.u32 %r1945, %r1950, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd937, %r1945, 4;
|
||
add.s64 %rd938, %rd48, %rd937;
|
||
ld.shared.u32 %r3393, [%rd938];
|
||
xor.b32 %r3394, %r3393, %r3392;
|
||
// inline asm
|
||
bfe.u32 %r1949, %r1950, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd939, %r1949, 4;
|
||
add.s64 %rd940, %rd51, %rd939;
|
||
ld.shared.u32 %r3395, [%rd940];
|
||
add.s32 %r3396, %r3395, %r3394;
|
||
xor.b32 %r3397, %r2628, %r1934;
|
||
xor.b32 %r1966, %r3397, %r3396;
|
||
// inline asm
|
||
bfe.u32 %r1953, %r1966, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd941, %r1953, 4;
|
||
add.s64 %rd942, %rd42, %rd941;
|
||
ld.shared.u32 %r3398, [%rd942];
|
||
// inline asm
|
||
bfe.u32 %r1957, %r1966, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd943, %r1957, 4;
|
||
add.s64 %rd944, %rd45, %rd943;
|
||
ld.shared.u32 %r3399, [%rd944];
|
||
add.s32 %r3400, %r3399, %r3398;
|
||
// inline asm
|
||
bfe.u32 %r1961, %r1966, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd945, %r1961, 4;
|
||
add.s64 %rd946, %rd48, %rd945;
|
||
ld.shared.u32 %r3401, [%rd946];
|
||
xor.b32 %r3402, %r3401, %r3400;
|
||
// inline asm
|
||
bfe.u32 %r1965, %r1966, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd947, %r1965, 4;
|
||
add.s64 %rd948, %rd51, %rd947;
|
||
ld.shared.u32 %r3403, [%rd948];
|
||
add.s32 %r3404, %r3403, %r3402;
|
||
xor.b32 %r3405, %r2637, %r1950;
|
||
xor.b32 %r1982, %r3405, %r3404;
|
||
// inline asm
|
||
bfe.u32 %r1969, %r1982, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd949, %r1969, 4;
|
||
add.s64 %rd950, %rd42, %rd949;
|
||
ld.shared.u32 %r3406, [%rd950];
|
||
// inline asm
|
||
bfe.u32 %r1973, %r1982, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd951, %r1973, 4;
|
||
add.s64 %rd952, %rd45, %rd951;
|
||
ld.shared.u32 %r3407, [%rd952];
|
||
add.s32 %r3408, %r3407, %r3406;
|
||
// inline asm
|
||
bfe.u32 %r1977, %r1982, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd953, %r1977, 4;
|
||
add.s64 %rd954, %rd48, %rd953;
|
||
ld.shared.u32 %r3409, [%rd954];
|
||
xor.b32 %r3410, %r3409, %r3408;
|
||
// inline asm
|
||
bfe.u32 %r1981, %r1982, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd955, %r1981, 4;
|
||
add.s64 %rd956, %rd51, %rd955;
|
||
ld.shared.u32 %r3411, [%rd956];
|
||
add.s32 %r3412, %r3411, %r3410;
|
||
xor.b32 %r3413, %r2646, %r1966;
|
||
xor.b32 %r1998, %r3413, %r3412;
|
||
// inline asm
|
||
bfe.u32 %r1985, %r1998, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd957, %r1985, 4;
|
||
add.s64 %rd958, %rd42, %rd957;
|
||
ld.shared.u32 %r3414, [%rd958];
|
||
// inline asm
|
||
bfe.u32 %r1989, %r1998, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd959, %r1989, 4;
|
||
add.s64 %rd960, %rd45, %rd959;
|
||
ld.shared.u32 %r3415, [%rd960];
|
||
add.s32 %r3416, %r3415, %r3414;
|
||
// inline asm
|
||
bfe.u32 %r1993, %r1998, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd961, %r1993, 4;
|
||
add.s64 %rd962, %rd48, %rd961;
|
||
ld.shared.u32 %r3417, [%rd962];
|
||
xor.b32 %r3418, %r3417, %r3416;
|
||
// inline asm
|
||
bfe.u32 %r1997, %r1998, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd963, %r1997, 4;
|
||
add.s64 %rd964, %rd51, %rd963;
|
||
ld.shared.u32 %r3419, [%rd964];
|
||
add.s32 %r3420, %r3419, %r3418;
|
||
xor.b32 %r3421, %r2655, %r1982;
|
||
xor.b32 %r76, %r3421, %r3420;
|
||
xor.b32 %r77, %r2657, %r1998;
|
||
xor.b32 %r2014, %r462, %r1998;
|
||
// inline asm
|
||
bfe.u32 %r2001, %r2014, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd965, %r2001, 4;
|
||
add.s64 %rd966, %rd42, %rd965;
|
||
ld.shared.u32 %r3422, [%rd966];
|
||
// inline asm
|
||
bfe.u32 %r2005, %r2014, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd967, %r2005, 4;
|
||
add.s64 %rd968, %rd45, %rd967;
|
||
ld.shared.u32 %r3423, [%rd968];
|
||
add.s32 %r3424, %r3423, %r3422;
|
||
// inline asm
|
||
bfe.u32 %r2009, %r2014, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd969, %r2009, 4;
|
||
add.s64 %rd970, %rd48, %rd969;
|
||
ld.shared.u32 %r3425, [%rd970];
|
||
xor.b32 %r3426, %r3425, %r3424;
|
||
// inline asm
|
||
bfe.u32 %r2013, %r2014, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd971, %r2013, 4;
|
||
add.s64 %rd972, %rd51, %rd971;
|
||
ld.shared.u32 %r3427, [%rd972];
|
||
add.s32 %r3428, %r3427, %r3426;
|
||
xor.b32 %r3429, %r64, %r76;
|
||
xor.b32 %r2030, %r3429, %r3428;
|
||
// inline asm
|
||
bfe.u32 %r2017, %r2030, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd973, %r2017, 4;
|
||
add.s64 %rd974, %rd42, %rd973;
|
||
ld.shared.u32 %r3430, [%rd974];
|
||
// inline asm
|
||
bfe.u32 %r2021, %r2030, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd975, %r2021, 4;
|
||
add.s64 %rd976, %rd45, %rd975;
|
||
ld.shared.u32 %r3431, [%rd976];
|
||
add.s32 %r3432, %r3431, %r3430;
|
||
// inline asm
|
||
bfe.u32 %r2025, %r2030, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd977, %r2025, 4;
|
||
add.s64 %rd978, %rd48, %rd977;
|
||
ld.shared.u32 %r3433, [%rd978];
|
||
xor.b32 %r3434, %r3433, %r3432;
|
||
// inline asm
|
||
bfe.u32 %r2029, %r2030, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd979, %r2029, 4;
|
||
add.s64 %rd980, %rd51, %rd979;
|
||
ld.shared.u32 %r3435, [%rd980];
|
||
add.s32 %r3436, %r3435, %r3434;
|
||
xor.b32 %r3437, %r67, %r2014;
|
||
xor.b32 %r2046, %r3437, %r3436;
|
||
// inline asm
|
||
bfe.u32 %r2033, %r2046, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd981, %r2033, 4;
|
||
add.s64 %rd982, %rd42, %rd981;
|
||
ld.shared.u32 %r3438, [%rd982];
|
||
// inline asm
|
||
bfe.u32 %r2037, %r2046, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd983, %r2037, 4;
|
||
add.s64 %rd984, %rd45, %rd983;
|
||
ld.shared.u32 %r3439, [%rd984];
|
||
add.s32 %r3440, %r3439, %r3438;
|
||
// inline asm
|
||
bfe.u32 %r2041, %r2046, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd985, %r2041, 4;
|
||
add.s64 %rd986, %rd48, %rd985;
|
||
ld.shared.u32 %r3441, [%rd986];
|
||
xor.b32 %r3442, %r3441, %r3440;
|
||
// inline asm
|
||
bfe.u32 %r2045, %r2046, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd987, %r2045, 4;
|
||
add.s64 %rd988, %rd51, %rd987;
|
||
ld.shared.u32 %r3443, [%rd988];
|
||
add.s32 %r3444, %r3443, %r3442;
|
||
xor.b32 %r3445, %r66, %r2030;
|
||
xor.b32 %r2062, %r3445, %r3444;
|
||
// inline asm
|
||
bfe.u32 %r2049, %r2062, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd989, %r2049, 4;
|
||
add.s64 %rd990, %rd42, %rd989;
|
||
ld.shared.u32 %r3446, [%rd990];
|
||
// inline asm
|
||
bfe.u32 %r2053, %r2062, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd991, %r2053, 4;
|
||
add.s64 %rd992, %rd45, %rd991;
|
||
ld.shared.u32 %r3447, [%rd992];
|
||
add.s32 %r3448, %r3447, %r3446;
|
||
// inline asm
|
||
bfe.u32 %r2057, %r2062, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd993, %r2057, 4;
|
||
add.s64 %rd994, %rd48, %rd993;
|
||
ld.shared.u32 %r3449, [%rd994];
|
||
xor.b32 %r3450, %r3449, %r3448;
|
||
// inline asm
|
||
bfe.u32 %r2061, %r2062, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd995, %r2061, 4;
|
||
add.s64 %rd996, %rd51, %rd995;
|
||
ld.shared.u32 %r3451, [%rd996];
|
||
add.s32 %r3452, %r3451, %r3450;
|
||
xor.b32 %r3453, %r69, %r2046;
|
||
xor.b32 %r2078, %r3453, %r3452;
|
||
// inline asm
|
||
bfe.u32 %r2065, %r2078, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd997, %r2065, 4;
|
||
add.s64 %rd998, %rd42, %rd997;
|
||
ld.shared.u32 %r3454, [%rd998];
|
||
// inline asm
|
||
bfe.u32 %r2069, %r2078, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd999, %r2069, 4;
|
||
add.s64 %rd1000, %rd45, %rd999;
|
||
ld.shared.u32 %r3455, [%rd1000];
|
||
add.s32 %r3456, %r3455, %r3454;
|
||
// inline asm
|
||
bfe.u32 %r2073, %r2078, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1001, %r2073, 4;
|
||
add.s64 %rd1002, %rd48, %rd1001;
|
||
ld.shared.u32 %r3457, [%rd1002];
|
||
xor.b32 %r3458, %r3457, %r3456;
|
||
// inline asm
|
||
bfe.u32 %r2077, %r2078, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1003, %r2077, 4;
|
||
add.s64 %rd1004, %rd51, %rd1003;
|
||
ld.shared.u32 %r3459, [%rd1004];
|
||
add.s32 %r3460, %r3459, %r3458;
|
||
xor.b32 %r3461, %r68, %r2062;
|
||
xor.b32 %r2094, %r3461, %r3460;
|
||
// inline asm
|
||
bfe.u32 %r2081, %r2094, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1005, %r2081, 4;
|
||
add.s64 %rd1006, %rd42, %rd1005;
|
||
ld.shared.u32 %r3462, [%rd1006];
|
||
// inline asm
|
||
bfe.u32 %r2085, %r2094, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1007, %r2085, 4;
|
||
add.s64 %rd1008, %rd45, %rd1007;
|
||
ld.shared.u32 %r3463, [%rd1008];
|
||
add.s32 %r3464, %r3463, %r3462;
|
||
// inline asm
|
||
bfe.u32 %r2089, %r2094, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1009, %r2089, 4;
|
||
add.s64 %rd1010, %rd48, %rd1009;
|
||
ld.shared.u32 %r3465, [%rd1010];
|
||
xor.b32 %r3466, %r3465, %r3464;
|
||
// inline asm
|
||
bfe.u32 %r2093, %r2094, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1011, %r2093, 4;
|
||
add.s64 %rd1012, %rd51, %rd1011;
|
||
ld.shared.u32 %r3467, [%rd1012];
|
||
add.s32 %r3468, %r3467, %r3466;
|
||
xor.b32 %r3469, %r71, %r2078;
|
||
xor.b32 %r2110, %r3469, %r3468;
|
||
// inline asm
|
||
bfe.u32 %r2097, %r2110, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1013, %r2097, 4;
|
||
add.s64 %rd1014, %rd42, %rd1013;
|
||
ld.shared.u32 %r3470, [%rd1014];
|
||
// inline asm
|
||
bfe.u32 %r2101, %r2110, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1015, %r2101, 4;
|
||
add.s64 %rd1016, %rd45, %rd1015;
|
||
ld.shared.u32 %r3471, [%rd1016];
|
||
add.s32 %r3472, %r3471, %r3470;
|
||
// inline asm
|
||
bfe.u32 %r2105, %r2110, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1017, %r2105, 4;
|
||
add.s64 %rd1018, %rd48, %rd1017;
|
||
ld.shared.u32 %r3473, [%rd1018];
|
||
xor.b32 %r3474, %r3473, %r3472;
|
||
// inline asm
|
||
bfe.u32 %r2109, %r2110, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1019, %r2109, 4;
|
||
add.s64 %rd1020, %rd51, %rd1019;
|
||
ld.shared.u32 %r3475, [%rd1020];
|
||
add.s32 %r3476, %r3475, %r3474;
|
||
xor.b32 %r3477, %r70, %r2094;
|
||
xor.b32 %r2126, %r3477, %r3476;
|
||
// inline asm
|
||
bfe.u32 %r2113, %r2126, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1021, %r2113, 4;
|
||
add.s64 %rd1022, %rd42, %rd1021;
|
||
ld.shared.u32 %r3478, [%rd1022];
|
||
// inline asm
|
||
bfe.u32 %r2117, %r2126, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1023, %r2117, 4;
|
||
add.s64 %rd1024, %rd45, %rd1023;
|
||
ld.shared.u32 %r3479, [%rd1024];
|
||
add.s32 %r3480, %r3479, %r3478;
|
||
// inline asm
|
||
bfe.u32 %r2121, %r2126, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1025, %r2121, 4;
|
||
add.s64 %rd1026, %rd48, %rd1025;
|
||
ld.shared.u32 %r3481, [%rd1026];
|
||
xor.b32 %r3482, %r3481, %r3480;
|
||
// inline asm
|
||
bfe.u32 %r2125, %r2126, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1027, %r2125, 4;
|
||
add.s64 %rd1028, %rd51, %rd1027;
|
||
ld.shared.u32 %r3483, [%rd1028];
|
||
add.s32 %r3484, %r3483, %r3482;
|
||
xor.b32 %r3485, %r73, %r2110;
|
||
xor.b32 %r2142, %r3485, %r3484;
|
||
// inline asm
|
||
bfe.u32 %r2129, %r2142, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1029, %r2129, 4;
|
||
add.s64 %rd1030, %rd42, %rd1029;
|
||
ld.shared.u32 %r3486, [%rd1030];
|
||
// inline asm
|
||
bfe.u32 %r2133, %r2142, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1031, %r2133, 4;
|
||
add.s64 %rd1032, %rd45, %rd1031;
|
||
ld.shared.u32 %r3487, [%rd1032];
|
||
add.s32 %r3488, %r3487, %r3486;
|
||
// inline asm
|
||
bfe.u32 %r2137, %r2142, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1033, %r2137, 4;
|
||
add.s64 %rd1034, %rd48, %rd1033;
|
||
ld.shared.u32 %r3489, [%rd1034];
|
||
xor.b32 %r3490, %r3489, %r3488;
|
||
// inline asm
|
||
bfe.u32 %r2141, %r2142, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1035, %r2141, 4;
|
||
add.s64 %rd1036, %rd51, %rd1035;
|
||
ld.shared.u32 %r3491, [%rd1036];
|
||
add.s32 %r3492, %r3491, %r3490;
|
||
xor.b32 %r3493, %r72, %r2126;
|
||
xor.b32 %r2158, %r3493, %r3492;
|
||
// inline asm
|
||
bfe.u32 %r2145, %r2158, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1037, %r2145, 4;
|
||
add.s64 %rd1038, %rd42, %rd1037;
|
||
ld.shared.u32 %r3494, [%rd1038];
|
||
// inline asm
|
||
bfe.u32 %r2149, %r2158, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1039, %r2149, 4;
|
||
add.s64 %rd1040, %rd45, %rd1039;
|
||
ld.shared.u32 %r3495, [%rd1040];
|
||
add.s32 %r3496, %r3495, %r3494;
|
||
// inline asm
|
||
bfe.u32 %r2153, %r2158, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1041, %r2153, 4;
|
||
add.s64 %rd1042, %rd48, %rd1041;
|
||
ld.shared.u32 %r3497, [%rd1042];
|
||
xor.b32 %r3498, %r3497, %r3496;
|
||
// inline asm
|
||
bfe.u32 %r2157, %r2158, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1043, %r2157, 4;
|
||
add.s64 %rd1044, %rd51, %rd1043;
|
||
ld.shared.u32 %r3499, [%rd1044];
|
||
add.s32 %r3500, %r3499, %r3498;
|
||
xor.b32 %r3501, %r75, %r2142;
|
||
xor.b32 %r2174, %r3501, %r3500;
|
||
// inline asm
|
||
bfe.u32 %r2161, %r2174, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1045, %r2161, 4;
|
||
add.s64 %rd1046, %rd42, %rd1045;
|
||
ld.shared.u32 %r3502, [%rd1046];
|
||
// inline asm
|
||
bfe.u32 %r2165, %r2174, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1047, %r2165, 4;
|
||
add.s64 %rd1048, %rd45, %rd1047;
|
||
ld.shared.u32 %r3503, [%rd1048];
|
||
add.s32 %r3504, %r3503, %r3502;
|
||
// inline asm
|
||
bfe.u32 %r2169, %r2174, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1049, %r2169, 4;
|
||
add.s64 %rd1050, %rd48, %rd1049;
|
||
ld.shared.u32 %r3505, [%rd1050];
|
||
xor.b32 %r3506, %r3505, %r3504;
|
||
// inline asm
|
||
bfe.u32 %r2173, %r2174, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1051, %r2173, 4;
|
||
add.s64 %rd1052, %rd51, %rd1051;
|
||
ld.shared.u32 %r3507, [%rd1052];
|
||
add.s32 %r3508, %r3507, %r3506;
|
||
xor.b32 %r3509, %r74, %r2158;
|
||
xor.b32 %r2190, %r3509, %r3508;
|
||
// inline asm
|
||
bfe.u32 %r2177, %r2190, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1053, %r2177, 4;
|
||
add.s64 %rd1054, %rd42, %rd1053;
|
||
ld.shared.u32 %r3510, [%rd1054];
|
||
// inline asm
|
||
bfe.u32 %r2181, %r2190, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1055, %r2181, 4;
|
||
add.s64 %rd1056, %rd45, %rd1055;
|
||
ld.shared.u32 %r3511, [%rd1056];
|
||
add.s32 %r3512, %r3511, %r3510;
|
||
// inline asm
|
||
bfe.u32 %r2185, %r2190, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1057, %r2185, 4;
|
||
add.s64 %rd1058, %rd48, %rd1057;
|
||
ld.shared.u32 %r3513, [%rd1058];
|
||
xor.b32 %r3514, %r3513, %r3512;
|
||
// inline asm
|
||
bfe.u32 %r2189, %r2190, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1059, %r2189, 4;
|
||
add.s64 %rd1060, %rd51, %rd1059;
|
||
ld.shared.u32 %r3515, [%rd1060];
|
||
add.s32 %r3516, %r3515, %r3514;
|
||
xor.b32 %r3517, %r77, %r2174;
|
||
xor.b32 %r2206, %r3517, %r3516;
|
||
// inline asm
|
||
bfe.u32 %r2193, %r2206, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1061, %r2193, 4;
|
||
add.s64 %rd1062, %rd42, %rd1061;
|
||
ld.shared.u32 %r3518, [%rd1062];
|
||
// inline asm
|
||
bfe.u32 %r2197, %r2206, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1063, %r2197, 4;
|
||
add.s64 %rd1064, %rd45, %rd1063;
|
||
ld.shared.u32 %r3519, [%rd1064];
|
||
add.s32 %r3520, %r3519, %r3518;
|
||
// inline asm
|
||
bfe.u32 %r2201, %r2206, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1065, %r2201, 4;
|
||
add.s64 %rd1066, %rd48, %rd1065;
|
||
ld.shared.u32 %r3521, [%rd1066];
|
||
xor.b32 %r3522, %r3521, %r3520;
|
||
// inline asm
|
||
bfe.u32 %r2205, %r2206, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1067, %r2205, 4;
|
||
add.s64 %rd1068, %rd51, %rd1067;
|
||
ld.shared.u32 %r3523, [%rd1068];
|
||
add.s32 %r3524, %r3523, %r3522;
|
||
xor.b32 %r3525, %r76, %r2190;
|
||
xor.b32 %r2222, %r3525, %r3524;
|
||
// inline asm
|
||
bfe.u32 %r2209, %r2222, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1069, %r2209, 4;
|
||
add.s64 %rd1070, %rd42, %rd1069;
|
||
ld.shared.u32 %r3526, [%rd1070];
|
||
// inline asm
|
||
bfe.u32 %r2213, %r2222, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1071, %r2213, 4;
|
||
add.s64 %rd1072, %rd45, %rd1071;
|
||
ld.shared.u32 %r3527, [%rd1072];
|
||
add.s32 %r3528, %r3527, %r3526;
|
||
// inline asm
|
||
bfe.u32 %r2217, %r2222, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1073, %r2217, 4;
|
||
add.s64 %rd1074, %rd48, %rd1073;
|
||
ld.shared.u32 %r3529, [%rd1074];
|
||
xor.b32 %r3530, %r3529, %r3528;
|
||
// inline asm
|
||
bfe.u32 %r2221, %r2222, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1075, %r2221, 4;
|
||
add.s64 %rd1076, %rd51, %rd1075;
|
||
ld.shared.u32 %r3531, [%rd1076];
|
||
add.s32 %r3532, %r3531, %r3530;
|
||
xor.b32 %r3533, %r2637, %r2206;
|
||
xor.b32 %r2238, %r3533, %r3532;
|
||
// inline asm
|
||
bfe.u32 %r2225, %r2238, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1077, %r2225, 4;
|
||
add.s64 %rd1078, %rd42, %rd1077;
|
||
ld.shared.u32 %r3534, [%rd1078];
|
||
// inline asm
|
||
bfe.u32 %r2229, %r2238, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1079, %r2229, 4;
|
||
add.s64 %rd1080, %rd45, %rd1079;
|
||
ld.shared.u32 %r3535, [%rd1080];
|
||
add.s32 %r3536, %r3535, %r3534;
|
||
// inline asm
|
||
bfe.u32 %r2233, %r2238, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1081, %r2233, 4;
|
||
add.s64 %rd1082, %rd48, %rd1081;
|
||
ld.shared.u32 %r3537, [%rd1082];
|
||
xor.b32 %r3538, %r3537, %r3536;
|
||
// inline asm
|
||
bfe.u32 %r2237, %r2238, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1083, %r2237, 4;
|
||
add.s64 %rd1084, %rd51, %rd1083;
|
||
ld.shared.u32 %r3539, [%rd1084];
|
||
add.s32 %r3540, %r3539, %r3538;
|
||
xor.b32 %r3541, %r2646, %r2222;
|
||
xor.b32 %r2254, %r3541, %r3540;
|
||
// inline asm
|
||
bfe.u32 %r2241, %r2254, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1085, %r2241, 4;
|
||
add.s64 %rd1086, %rd42, %rd1085;
|
||
ld.shared.u32 %r3542, [%rd1086];
|
||
// inline asm
|
||
bfe.u32 %r2245, %r2254, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1087, %r2245, 4;
|
||
add.s64 %rd1088, %rd45, %rd1087;
|
||
ld.shared.u32 %r3543, [%rd1088];
|
||
add.s32 %r3544, %r3543, %r3542;
|
||
// inline asm
|
||
bfe.u32 %r2249, %r2254, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1089, %r2249, 4;
|
||
add.s64 %rd1090, %rd48, %rd1089;
|
||
ld.shared.u32 %r3545, [%rd1090];
|
||
xor.b32 %r3546, %r3545, %r3544;
|
||
// inline asm
|
||
bfe.u32 %r2253, %r2254, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1091, %r2253, 4;
|
||
add.s64 %rd1092, %rd51, %rd1091;
|
||
ld.shared.u32 %r3547, [%rd1092];
|
||
add.s32 %r3548, %r3547, %r3546;
|
||
xor.b32 %r3549, %r2655, %r2238;
|
||
xor.b32 %r78, %r3549, %r3548;
|
||
xor.b32 %r79, %r2657, %r2254;
|
||
xor.b32 %r2270, %r462, %r2254;
|
||
// inline asm
|
||
bfe.u32 %r2257, %r2270, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1093, %r2257, 4;
|
||
add.s64 %rd1094, %rd42, %rd1093;
|
||
ld.shared.u32 %r3550, [%rd1094];
|
||
// inline asm
|
||
bfe.u32 %r2261, %r2270, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1095, %r2261, 4;
|
||
add.s64 %rd1096, %rd45, %rd1095;
|
||
ld.shared.u32 %r3551, [%rd1096];
|
||
add.s32 %r3552, %r3551, %r3550;
|
||
// inline asm
|
||
bfe.u32 %r2265, %r2270, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1097, %r2265, 4;
|
||
add.s64 %rd1098, %rd48, %rd1097;
|
||
ld.shared.u32 %r3553, [%rd1098];
|
||
xor.b32 %r3554, %r3553, %r3552;
|
||
// inline asm
|
||
bfe.u32 %r2269, %r2270, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1099, %r2269, 4;
|
||
add.s64 %rd1100, %rd51, %rd1099;
|
||
ld.shared.u32 %r3555, [%rd1100];
|
||
add.s32 %r3556, %r3555, %r3554;
|
||
xor.b32 %r3557, %r64, %r78;
|
||
xor.b32 %r2286, %r3557, %r3556;
|
||
// inline asm
|
||
bfe.u32 %r2273, %r2286, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1101, %r2273, 4;
|
||
add.s64 %rd1102, %rd42, %rd1101;
|
||
ld.shared.u32 %r3558, [%rd1102];
|
||
// inline asm
|
||
bfe.u32 %r2277, %r2286, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1103, %r2277, 4;
|
||
add.s64 %rd1104, %rd45, %rd1103;
|
||
ld.shared.u32 %r3559, [%rd1104];
|
||
add.s32 %r3560, %r3559, %r3558;
|
||
// inline asm
|
||
bfe.u32 %r2281, %r2286, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1105, %r2281, 4;
|
||
add.s64 %rd1106, %rd48, %rd1105;
|
||
ld.shared.u32 %r3561, [%rd1106];
|
||
xor.b32 %r3562, %r3561, %r3560;
|
||
// inline asm
|
||
bfe.u32 %r2285, %r2286, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1107, %r2285, 4;
|
||
add.s64 %rd1108, %rd51, %rd1107;
|
||
ld.shared.u32 %r3563, [%rd1108];
|
||
add.s32 %r3564, %r3563, %r3562;
|
||
xor.b32 %r3565, %r67, %r2270;
|
||
xor.b32 %r2302, %r3565, %r3564;
|
||
// inline asm
|
||
bfe.u32 %r2289, %r2302, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1109, %r2289, 4;
|
||
add.s64 %rd1110, %rd42, %rd1109;
|
||
ld.shared.u32 %r3566, [%rd1110];
|
||
// inline asm
|
||
bfe.u32 %r2293, %r2302, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1111, %r2293, 4;
|
||
add.s64 %rd1112, %rd45, %rd1111;
|
||
ld.shared.u32 %r3567, [%rd1112];
|
||
add.s32 %r3568, %r3567, %r3566;
|
||
// inline asm
|
||
bfe.u32 %r2297, %r2302, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1113, %r2297, 4;
|
||
add.s64 %rd1114, %rd48, %rd1113;
|
||
ld.shared.u32 %r3569, [%rd1114];
|
||
xor.b32 %r3570, %r3569, %r3568;
|
||
// inline asm
|
||
bfe.u32 %r2301, %r2302, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1115, %r2301, 4;
|
||
add.s64 %rd1116, %rd51, %rd1115;
|
||
ld.shared.u32 %r3571, [%rd1116];
|
||
add.s32 %r3572, %r3571, %r3570;
|
||
xor.b32 %r3573, %r66, %r2286;
|
||
xor.b32 %r2318, %r3573, %r3572;
|
||
// inline asm
|
||
bfe.u32 %r2305, %r2318, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1117, %r2305, 4;
|
||
add.s64 %rd1118, %rd42, %rd1117;
|
||
ld.shared.u32 %r3574, [%rd1118];
|
||
// inline asm
|
||
bfe.u32 %r2309, %r2318, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1119, %r2309, 4;
|
||
add.s64 %rd1120, %rd45, %rd1119;
|
||
ld.shared.u32 %r3575, [%rd1120];
|
||
add.s32 %r3576, %r3575, %r3574;
|
||
// inline asm
|
||
bfe.u32 %r2313, %r2318, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1121, %r2313, 4;
|
||
add.s64 %rd1122, %rd48, %rd1121;
|
||
ld.shared.u32 %r3577, [%rd1122];
|
||
xor.b32 %r3578, %r3577, %r3576;
|
||
// inline asm
|
||
bfe.u32 %r2317, %r2318, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1123, %r2317, 4;
|
||
add.s64 %rd1124, %rd51, %rd1123;
|
||
ld.shared.u32 %r3579, [%rd1124];
|
||
add.s32 %r3580, %r3579, %r3578;
|
||
xor.b32 %r3581, %r69, %r2302;
|
||
xor.b32 %r2334, %r3581, %r3580;
|
||
// inline asm
|
||
bfe.u32 %r2321, %r2334, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1125, %r2321, 4;
|
||
add.s64 %rd1126, %rd42, %rd1125;
|
||
ld.shared.u32 %r3582, [%rd1126];
|
||
// inline asm
|
||
bfe.u32 %r2325, %r2334, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1127, %r2325, 4;
|
||
add.s64 %rd1128, %rd45, %rd1127;
|
||
ld.shared.u32 %r3583, [%rd1128];
|
||
add.s32 %r3584, %r3583, %r3582;
|
||
// inline asm
|
||
bfe.u32 %r2329, %r2334, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1129, %r2329, 4;
|
||
add.s64 %rd1130, %rd48, %rd1129;
|
||
ld.shared.u32 %r3585, [%rd1130];
|
||
xor.b32 %r3586, %r3585, %r3584;
|
||
// inline asm
|
||
bfe.u32 %r2333, %r2334, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1131, %r2333, 4;
|
||
add.s64 %rd1132, %rd51, %rd1131;
|
||
ld.shared.u32 %r3587, [%rd1132];
|
||
add.s32 %r3588, %r3587, %r3586;
|
||
xor.b32 %r3589, %r68, %r2318;
|
||
xor.b32 %r2350, %r3589, %r3588;
|
||
// inline asm
|
||
bfe.u32 %r2337, %r2350, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1133, %r2337, 4;
|
||
add.s64 %rd1134, %rd42, %rd1133;
|
||
ld.shared.u32 %r3590, [%rd1134];
|
||
// inline asm
|
||
bfe.u32 %r2341, %r2350, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1135, %r2341, 4;
|
||
add.s64 %rd1136, %rd45, %rd1135;
|
||
ld.shared.u32 %r3591, [%rd1136];
|
||
add.s32 %r3592, %r3591, %r3590;
|
||
// inline asm
|
||
bfe.u32 %r2345, %r2350, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1137, %r2345, 4;
|
||
add.s64 %rd1138, %rd48, %rd1137;
|
||
ld.shared.u32 %r3593, [%rd1138];
|
||
xor.b32 %r3594, %r3593, %r3592;
|
||
// inline asm
|
||
bfe.u32 %r2349, %r2350, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1139, %r2349, 4;
|
||
add.s64 %rd1140, %rd51, %rd1139;
|
||
ld.shared.u32 %r3595, [%rd1140];
|
||
add.s32 %r3596, %r3595, %r3594;
|
||
xor.b32 %r3597, %r71, %r2334;
|
||
xor.b32 %r2366, %r3597, %r3596;
|
||
// inline asm
|
||
bfe.u32 %r2353, %r2366, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1141, %r2353, 4;
|
||
add.s64 %rd1142, %rd42, %rd1141;
|
||
ld.shared.u32 %r3598, [%rd1142];
|
||
// inline asm
|
||
bfe.u32 %r2357, %r2366, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1143, %r2357, 4;
|
||
add.s64 %rd1144, %rd45, %rd1143;
|
||
ld.shared.u32 %r3599, [%rd1144];
|
||
add.s32 %r3600, %r3599, %r3598;
|
||
// inline asm
|
||
bfe.u32 %r2361, %r2366, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1145, %r2361, 4;
|
||
add.s64 %rd1146, %rd48, %rd1145;
|
||
ld.shared.u32 %r3601, [%rd1146];
|
||
xor.b32 %r3602, %r3601, %r3600;
|
||
// inline asm
|
||
bfe.u32 %r2365, %r2366, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1147, %r2365, 4;
|
||
add.s64 %rd1148, %rd51, %rd1147;
|
||
ld.shared.u32 %r3603, [%rd1148];
|
||
add.s32 %r3604, %r3603, %r3602;
|
||
xor.b32 %r3605, %r70, %r2350;
|
||
xor.b32 %r2382, %r3605, %r3604;
|
||
// inline asm
|
||
bfe.u32 %r2369, %r2382, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1149, %r2369, 4;
|
||
add.s64 %rd1150, %rd42, %rd1149;
|
||
ld.shared.u32 %r3606, [%rd1150];
|
||
// inline asm
|
||
bfe.u32 %r2373, %r2382, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1151, %r2373, 4;
|
||
add.s64 %rd1152, %rd45, %rd1151;
|
||
ld.shared.u32 %r3607, [%rd1152];
|
||
add.s32 %r3608, %r3607, %r3606;
|
||
// inline asm
|
||
bfe.u32 %r2377, %r2382, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1153, %r2377, 4;
|
||
add.s64 %rd1154, %rd48, %rd1153;
|
||
ld.shared.u32 %r3609, [%rd1154];
|
||
xor.b32 %r3610, %r3609, %r3608;
|
||
// inline asm
|
||
bfe.u32 %r2381, %r2382, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1155, %r2381, 4;
|
||
add.s64 %rd1156, %rd51, %rd1155;
|
||
ld.shared.u32 %r3611, [%rd1156];
|
||
add.s32 %r3612, %r3611, %r3610;
|
||
xor.b32 %r3613, %r73, %r2366;
|
||
xor.b32 %r2398, %r3613, %r3612;
|
||
// inline asm
|
||
bfe.u32 %r2385, %r2398, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1157, %r2385, 4;
|
||
add.s64 %rd1158, %rd42, %rd1157;
|
||
ld.shared.u32 %r3614, [%rd1158];
|
||
// inline asm
|
||
bfe.u32 %r2389, %r2398, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1159, %r2389, 4;
|
||
add.s64 %rd1160, %rd45, %rd1159;
|
||
ld.shared.u32 %r3615, [%rd1160];
|
||
add.s32 %r3616, %r3615, %r3614;
|
||
// inline asm
|
||
bfe.u32 %r2393, %r2398, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1161, %r2393, 4;
|
||
add.s64 %rd1162, %rd48, %rd1161;
|
||
ld.shared.u32 %r3617, [%rd1162];
|
||
xor.b32 %r3618, %r3617, %r3616;
|
||
// inline asm
|
||
bfe.u32 %r2397, %r2398, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1163, %r2397, 4;
|
||
add.s64 %rd1164, %rd51, %rd1163;
|
||
ld.shared.u32 %r3619, [%rd1164];
|
||
add.s32 %r3620, %r3619, %r3618;
|
||
xor.b32 %r3621, %r72, %r2382;
|
||
xor.b32 %r2414, %r3621, %r3620;
|
||
// inline asm
|
||
bfe.u32 %r2401, %r2414, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1165, %r2401, 4;
|
||
add.s64 %rd1166, %rd42, %rd1165;
|
||
ld.shared.u32 %r3622, [%rd1166];
|
||
// inline asm
|
||
bfe.u32 %r2405, %r2414, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1167, %r2405, 4;
|
||
add.s64 %rd1168, %rd45, %rd1167;
|
||
ld.shared.u32 %r3623, [%rd1168];
|
||
add.s32 %r3624, %r3623, %r3622;
|
||
// inline asm
|
||
bfe.u32 %r2409, %r2414, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1169, %r2409, 4;
|
||
add.s64 %rd1170, %rd48, %rd1169;
|
||
ld.shared.u32 %r3625, [%rd1170];
|
||
xor.b32 %r3626, %r3625, %r3624;
|
||
// inline asm
|
||
bfe.u32 %r2413, %r2414, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1171, %r2413, 4;
|
||
add.s64 %rd1172, %rd51, %rd1171;
|
||
ld.shared.u32 %r3627, [%rd1172];
|
||
add.s32 %r3628, %r3627, %r3626;
|
||
xor.b32 %r3629, %r75, %r2398;
|
||
xor.b32 %r2430, %r3629, %r3628;
|
||
// inline asm
|
||
bfe.u32 %r2417, %r2430, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1173, %r2417, 4;
|
||
add.s64 %rd1174, %rd42, %rd1173;
|
||
ld.shared.u32 %r3630, [%rd1174];
|
||
// inline asm
|
||
bfe.u32 %r2421, %r2430, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1175, %r2421, 4;
|
||
add.s64 %rd1176, %rd45, %rd1175;
|
||
ld.shared.u32 %r3631, [%rd1176];
|
||
add.s32 %r3632, %r3631, %r3630;
|
||
// inline asm
|
||
bfe.u32 %r2425, %r2430, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1177, %r2425, 4;
|
||
add.s64 %rd1178, %rd48, %rd1177;
|
||
ld.shared.u32 %r3633, [%rd1178];
|
||
xor.b32 %r3634, %r3633, %r3632;
|
||
// inline asm
|
||
bfe.u32 %r2429, %r2430, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1179, %r2429, 4;
|
||
add.s64 %rd1180, %rd51, %rd1179;
|
||
ld.shared.u32 %r3635, [%rd1180];
|
||
add.s32 %r3636, %r3635, %r3634;
|
||
xor.b32 %r3637, %r74, %r2414;
|
||
xor.b32 %r2446, %r3637, %r3636;
|
||
// inline asm
|
||
bfe.u32 %r2433, %r2446, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1181, %r2433, 4;
|
||
add.s64 %rd1182, %rd42, %rd1181;
|
||
ld.shared.u32 %r3638, [%rd1182];
|
||
// inline asm
|
||
bfe.u32 %r2437, %r2446, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1183, %r2437, 4;
|
||
add.s64 %rd1184, %rd45, %rd1183;
|
||
ld.shared.u32 %r3639, [%rd1184];
|
||
add.s32 %r3640, %r3639, %r3638;
|
||
// inline asm
|
||
bfe.u32 %r2441, %r2446, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1185, %r2441, 4;
|
||
add.s64 %rd1186, %rd48, %rd1185;
|
||
ld.shared.u32 %r3641, [%rd1186];
|
||
xor.b32 %r3642, %r3641, %r3640;
|
||
// inline asm
|
||
bfe.u32 %r2445, %r2446, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1187, %r2445, 4;
|
||
add.s64 %rd1188, %rd51, %rd1187;
|
||
ld.shared.u32 %r3643, [%rd1188];
|
||
add.s32 %r3644, %r3643, %r3642;
|
||
xor.b32 %r3645, %r77, %r2430;
|
||
xor.b32 %r2462, %r3645, %r3644;
|
||
// inline asm
|
||
bfe.u32 %r2449, %r2462, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1189, %r2449, 4;
|
||
add.s64 %rd1190, %rd42, %rd1189;
|
||
ld.shared.u32 %r3646, [%rd1190];
|
||
// inline asm
|
||
bfe.u32 %r2453, %r2462, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1191, %r2453, 4;
|
||
add.s64 %rd1192, %rd45, %rd1191;
|
||
ld.shared.u32 %r3647, [%rd1192];
|
||
add.s32 %r3648, %r3647, %r3646;
|
||
// inline asm
|
||
bfe.u32 %r2457, %r2462, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1193, %r2457, 4;
|
||
add.s64 %rd1194, %rd48, %rd1193;
|
||
ld.shared.u32 %r3649, [%rd1194];
|
||
xor.b32 %r3650, %r3649, %r3648;
|
||
// inline asm
|
||
bfe.u32 %r2461, %r2462, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1195, %r2461, 4;
|
||
add.s64 %rd1196, %rd51, %rd1195;
|
||
ld.shared.u32 %r3651, [%rd1196];
|
||
add.s32 %r3652, %r3651, %r3650;
|
||
xor.b32 %r3653, %r76, %r2446;
|
||
xor.b32 %r2478, %r3653, %r3652;
|
||
// inline asm
|
||
bfe.u32 %r2465, %r2478, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1197, %r2465, 4;
|
||
add.s64 %rd1198, %rd42, %rd1197;
|
||
ld.shared.u32 %r3654, [%rd1198];
|
||
// inline asm
|
||
bfe.u32 %r2469, %r2478, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1199, %r2469, 4;
|
||
add.s64 %rd1200, %rd45, %rd1199;
|
||
ld.shared.u32 %r3655, [%rd1200];
|
||
add.s32 %r3656, %r3655, %r3654;
|
||
// inline asm
|
||
bfe.u32 %r2473, %r2478, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1201, %r2473, 4;
|
||
add.s64 %rd1202, %rd48, %rd1201;
|
||
ld.shared.u32 %r3657, [%rd1202];
|
||
xor.b32 %r3658, %r3657, %r3656;
|
||
// inline asm
|
||
bfe.u32 %r2477, %r2478, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1203, %r2477, 4;
|
||
add.s64 %rd1204, %rd51, %rd1203;
|
||
ld.shared.u32 %r3659, [%rd1204];
|
||
add.s32 %r3660, %r3659, %r3658;
|
||
xor.b32 %r3661, %r79, %r2462;
|
||
xor.b32 %r2494, %r3661, %r3660;
|
||
// inline asm
|
||
bfe.u32 %r2481, %r2494, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1205, %r2481, 4;
|
||
add.s64 %rd1206, %rd42, %rd1205;
|
||
ld.shared.u32 %r3662, [%rd1206];
|
||
// inline asm
|
||
bfe.u32 %r2485, %r2494, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1207, %r2485, 4;
|
||
add.s64 %rd1208, %rd45, %rd1207;
|
||
ld.shared.u32 %r3663, [%rd1208];
|
||
add.s32 %r3664, %r3663, %r3662;
|
||
// inline asm
|
||
bfe.u32 %r2489, %r2494, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1209, %r2489, 4;
|
||
add.s64 %rd1210, %rd48, %rd1209;
|
||
ld.shared.u32 %r3665, [%rd1210];
|
||
xor.b32 %r3666, %r3665, %r3664;
|
||
// inline asm
|
||
bfe.u32 %r2493, %r2494, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1211, %r2493, 4;
|
||
add.s64 %rd1212, %rd51, %rd1211;
|
||
ld.shared.u32 %r3667, [%rd1212];
|
||
add.s32 %r3668, %r3667, %r3666;
|
||
xor.b32 %r3669, %r78, %r2478;
|
||
xor.b32 %r2510, %r3669, %r3668;
|
||
// inline asm
|
||
bfe.u32 %r2497, %r2510, %r2499, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1213, %r2497, 4;
|
||
add.s64 %rd1214, %rd42, %rd1213;
|
||
ld.shared.u32 %r3670, [%rd1214];
|
||
// inline asm
|
||
bfe.u32 %r2501, %r2510, %r2503, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1215, %r2501, 4;
|
||
add.s64 %rd1216, %rd45, %rd1215;
|
||
ld.shared.u32 %r3671, [%rd1216];
|
||
add.s32 %r3672, %r3671, %r3670;
|
||
// inline asm
|
||
bfe.u32 %r2505, %r2510, %r2512, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1217, %r2505, 4;
|
||
add.s64 %rd1218, %rd48, %rd1217;
|
||
ld.shared.u32 %r3673, [%rd1218];
|
||
xor.b32 %r3674, %r3673, %r3672;
|
||
// inline asm
|
||
bfe.u32 %r2509, %r2510, %r208, %r2512;
|
||
// inline asm
|
||
mul.wide.u32 %rd1219, %r2509, 4;
|
||
add.s64 %rd1220, %rd51, %rd1219;
|
||
ld.shared.u32 %r3675, [%rd1220];
|
||
add.s32 %r3676, %r3675, %r3674;
|
||
xor.b32 %r3677, %r2655, %r2494;
|
||
xor.b32 %r80, %r3677, %r3676;
|
||
xor.b32 %r81, %r2657, %r2510;
|
||
mov.u64 %rd3495, %rd3;
|
||
mov.u32 %r10310, %r208;
|
||
mov.u32 %r10314, %r80;
|
||
mov.u32 %r10315, %r81;
|
||
|
||
BB4_6:
|
||
mov.u32 %r10277, 16;
|
||
mov.u32 %r10276, 8;
|
||
mov.u32 %r10275, 24;
|
||
xor.b32 %r3691, %r65, %r10315;
|
||
// inline asm
|
||
bfe.u32 %r3678, %r3691, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1224, %r3678, 4;
|
||
add.s64 %rd1225, %rd42, %rd1224;
|
||
ld.shared.u32 %r3935, [%rd1225];
|
||
// inline asm
|
||
bfe.u32 %r3682, %r3691, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1228, %r3682, 4;
|
||
add.s64 %rd1229, %rd45, %rd1228;
|
||
ld.shared.u32 %r3936, [%rd1229];
|
||
add.s32 %r3937, %r3936, %r3935;
|
||
// inline asm
|
||
bfe.u32 %r3686, %r3691, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1232, %r3686, 4;
|
||
add.s64 %rd1233, %rd48, %rd1232;
|
||
ld.shared.u32 %r3938, [%rd1233];
|
||
xor.b32 %r3939, %r3938, %r3937;
|
||
// inline asm
|
||
bfe.u32 %r3690, %r3691, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1236, %r3690, 4;
|
||
add.s64 %rd1237, %rd51, %rd1236;
|
||
ld.shared.u32 %r3940, [%rd1237];
|
||
add.s32 %r3941, %r3940, %r3939;
|
||
xor.b32 %r3942, %r64, %r10314;
|
||
xor.b32 %r3707, %r3942, %r3941;
|
||
// inline asm
|
||
bfe.u32 %r3694, %r3707, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1238, %r3694, 4;
|
||
add.s64 %rd1239, %rd42, %rd1238;
|
||
ld.shared.u32 %r3943, [%rd1239];
|
||
// inline asm
|
||
bfe.u32 %r3698, %r3707, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1240, %r3698, 4;
|
||
add.s64 %rd1241, %rd45, %rd1240;
|
||
ld.shared.u32 %r3944, [%rd1241];
|
||
add.s32 %r3945, %r3944, %r3943;
|
||
// inline asm
|
||
bfe.u32 %r3702, %r3707, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1242, %r3702, 4;
|
||
add.s64 %rd1243, %rd48, %rd1242;
|
||
ld.shared.u32 %r3946, [%rd1243];
|
||
xor.b32 %r3947, %r3946, %r3945;
|
||
// inline asm
|
||
bfe.u32 %r3706, %r3707, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1244, %r3706, 4;
|
||
add.s64 %rd1245, %rd51, %rd1244;
|
||
ld.shared.u32 %r3948, [%rd1245];
|
||
add.s32 %r3949, %r3948, %r3947;
|
||
xor.b32 %r3950, %r67, %r3691;
|
||
xor.b32 %r3723, %r3950, %r3949;
|
||
// inline asm
|
||
bfe.u32 %r3710, %r3723, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1246, %r3710, 4;
|
||
add.s64 %rd1247, %rd42, %rd1246;
|
||
ld.shared.u32 %r3951, [%rd1247];
|
||
// inline asm
|
||
bfe.u32 %r3714, %r3723, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1248, %r3714, 4;
|
||
add.s64 %rd1249, %rd45, %rd1248;
|
||
ld.shared.u32 %r3952, [%rd1249];
|
||
add.s32 %r3953, %r3952, %r3951;
|
||
// inline asm
|
||
bfe.u32 %r3718, %r3723, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1250, %r3718, 4;
|
||
add.s64 %rd1251, %rd48, %rd1250;
|
||
ld.shared.u32 %r3954, [%rd1251];
|
||
xor.b32 %r3955, %r3954, %r3953;
|
||
// inline asm
|
||
bfe.u32 %r3722, %r3723, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1252, %r3722, 4;
|
||
add.s64 %rd1253, %rd51, %rd1252;
|
||
ld.shared.u32 %r3956, [%rd1253];
|
||
add.s32 %r3957, %r3956, %r3955;
|
||
xor.b32 %r3958, %r66, %r3707;
|
||
xor.b32 %r3739, %r3958, %r3957;
|
||
// inline asm
|
||
bfe.u32 %r3726, %r3739, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1254, %r3726, 4;
|
||
add.s64 %rd1255, %rd42, %rd1254;
|
||
ld.shared.u32 %r3959, [%rd1255];
|
||
// inline asm
|
||
bfe.u32 %r3730, %r3739, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1256, %r3730, 4;
|
||
add.s64 %rd1257, %rd45, %rd1256;
|
||
ld.shared.u32 %r3960, [%rd1257];
|
||
add.s32 %r3961, %r3960, %r3959;
|
||
// inline asm
|
||
bfe.u32 %r3734, %r3739, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1258, %r3734, 4;
|
||
add.s64 %rd1259, %rd48, %rd1258;
|
||
ld.shared.u32 %r3962, [%rd1259];
|
||
xor.b32 %r3963, %r3962, %r3961;
|
||
// inline asm
|
||
bfe.u32 %r3738, %r3739, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1260, %r3738, 4;
|
||
add.s64 %rd1261, %rd51, %rd1260;
|
||
ld.shared.u32 %r3964, [%rd1261];
|
||
add.s32 %r3965, %r3964, %r3963;
|
||
xor.b32 %r3966, %r69, %r3723;
|
||
xor.b32 %r3755, %r3966, %r3965;
|
||
// inline asm
|
||
bfe.u32 %r3742, %r3755, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1262, %r3742, 4;
|
||
add.s64 %rd1263, %rd42, %rd1262;
|
||
ld.shared.u32 %r3967, [%rd1263];
|
||
// inline asm
|
||
bfe.u32 %r3746, %r3755, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1264, %r3746, 4;
|
||
add.s64 %rd1265, %rd45, %rd1264;
|
||
ld.shared.u32 %r3968, [%rd1265];
|
||
add.s32 %r3969, %r3968, %r3967;
|
||
// inline asm
|
||
bfe.u32 %r3750, %r3755, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1266, %r3750, 4;
|
||
add.s64 %rd1267, %rd48, %rd1266;
|
||
ld.shared.u32 %r3970, [%rd1267];
|
||
xor.b32 %r3971, %r3970, %r3969;
|
||
// inline asm
|
||
bfe.u32 %r3754, %r3755, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1268, %r3754, 4;
|
||
add.s64 %rd1269, %rd51, %rd1268;
|
||
ld.shared.u32 %r3972, [%rd1269];
|
||
add.s32 %r3973, %r3972, %r3971;
|
||
xor.b32 %r3974, %r68, %r3739;
|
||
xor.b32 %r3771, %r3974, %r3973;
|
||
// inline asm
|
||
bfe.u32 %r3758, %r3771, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1270, %r3758, 4;
|
||
add.s64 %rd1271, %rd42, %rd1270;
|
||
ld.shared.u32 %r3975, [%rd1271];
|
||
// inline asm
|
||
bfe.u32 %r3762, %r3771, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1272, %r3762, 4;
|
||
add.s64 %rd1273, %rd45, %rd1272;
|
||
ld.shared.u32 %r3976, [%rd1273];
|
||
add.s32 %r3977, %r3976, %r3975;
|
||
// inline asm
|
||
bfe.u32 %r3766, %r3771, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1274, %r3766, 4;
|
||
add.s64 %rd1275, %rd48, %rd1274;
|
||
ld.shared.u32 %r3978, [%rd1275];
|
||
xor.b32 %r3979, %r3978, %r3977;
|
||
// inline asm
|
||
bfe.u32 %r3770, %r3771, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1276, %r3770, 4;
|
||
add.s64 %rd1277, %rd51, %rd1276;
|
||
ld.shared.u32 %r3980, [%rd1277];
|
||
add.s32 %r3981, %r3980, %r3979;
|
||
xor.b32 %r3982, %r71, %r3755;
|
||
xor.b32 %r3787, %r3982, %r3981;
|
||
// inline asm
|
||
bfe.u32 %r3774, %r3787, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1278, %r3774, 4;
|
||
add.s64 %rd1279, %rd42, %rd1278;
|
||
ld.shared.u32 %r3983, [%rd1279];
|
||
// inline asm
|
||
bfe.u32 %r3778, %r3787, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1280, %r3778, 4;
|
||
add.s64 %rd1281, %rd45, %rd1280;
|
||
ld.shared.u32 %r3984, [%rd1281];
|
||
add.s32 %r3985, %r3984, %r3983;
|
||
// inline asm
|
||
bfe.u32 %r3782, %r3787, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1282, %r3782, 4;
|
||
add.s64 %rd1283, %rd48, %rd1282;
|
||
ld.shared.u32 %r3986, [%rd1283];
|
||
xor.b32 %r3987, %r3986, %r3985;
|
||
// inline asm
|
||
bfe.u32 %r3786, %r3787, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1284, %r3786, 4;
|
||
add.s64 %rd1285, %rd51, %rd1284;
|
||
ld.shared.u32 %r3988, [%rd1285];
|
||
add.s32 %r3989, %r3988, %r3987;
|
||
xor.b32 %r3990, %r70, %r3771;
|
||
xor.b32 %r3803, %r3990, %r3989;
|
||
// inline asm
|
||
bfe.u32 %r3790, %r3803, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1286, %r3790, 4;
|
||
add.s64 %rd1287, %rd42, %rd1286;
|
||
ld.shared.u32 %r3991, [%rd1287];
|
||
// inline asm
|
||
bfe.u32 %r3794, %r3803, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1288, %r3794, 4;
|
||
add.s64 %rd1289, %rd45, %rd1288;
|
||
ld.shared.u32 %r3992, [%rd1289];
|
||
add.s32 %r3993, %r3992, %r3991;
|
||
// inline asm
|
||
bfe.u32 %r3798, %r3803, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1290, %r3798, 4;
|
||
add.s64 %rd1291, %rd48, %rd1290;
|
||
ld.shared.u32 %r3994, [%rd1291];
|
||
xor.b32 %r3995, %r3994, %r3993;
|
||
// inline asm
|
||
bfe.u32 %r3802, %r3803, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1292, %r3802, 4;
|
||
add.s64 %rd1293, %rd51, %rd1292;
|
||
ld.shared.u32 %r3996, [%rd1293];
|
||
add.s32 %r3997, %r3996, %r3995;
|
||
xor.b32 %r3998, %r73, %r3787;
|
||
xor.b32 %r3819, %r3998, %r3997;
|
||
// inline asm
|
||
bfe.u32 %r3806, %r3819, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1294, %r3806, 4;
|
||
add.s64 %rd1295, %rd42, %rd1294;
|
||
ld.shared.u32 %r3999, [%rd1295];
|
||
// inline asm
|
||
bfe.u32 %r3810, %r3819, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1296, %r3810, 4;
|
||
add.s64 %rd1297, %rd45, %rd1296;
|
||
ld.shared.u32 %r4000, [%rd1297];
|
||
add.s32 %r4001, %r4000, %r3999;
|
||
// inline asm
|
||
bfe.u32 %r3814, %r3819, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1298, %r3814, 4;
|
||
add.s64 %rd1299, %rd48, %rd1298;
|
||
ld.shared.u32 %r4002, [%rd1299];
|
||
xor.b32 %r4003, %r4002, %r4001;
|
||
// inline asm
|
||
bfe.u32 %r3818, %r3819, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1300, %r3818, 4;
|
||
add.s64 %rd1301, %rd51, %rd1300;
|
||
ld.shared.u32 %r4004, [%rd1301];
|
||
add.s32 %r4005, %r4004, %r4003;
|
||
xor.b32 %r4006, %r72, %r3803;
|
||
xor.b32 %r3835, %r4006, %r4005;
|
||
// inline asm
|
||
bfe.u32 %r3822, %r3835, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1302, %r3822, 4;
|
||
add.s64 %rd1303, %rd42, %rd1302;
|
||
ld.shared.u32 %r4007, [%rd1303];
|
||
// inline asm
|
||
bfe.u32 %r3826, %r3835, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1304, %r3826, 4;
|
||
add.s64 %rd1305, %rd45, %rd1304;
|
||
ld.shared.u32 %r4008, [%rd1305];
|
||
add.s32 %r4009, %r4008, %r4007;
|
||
// inline asm
|
||
bfe.u32 %r3830, %r3835, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1306, %r3830, 4;
|
||
add.s64 %rd1307, %rd48, %rd1306;
|
||
ld.shared.u32 %r4010, [%rd1307];
|
||
xor.b32 %r4011, %r4010, %r4009;
|
||
// inline asm
|
||
bfe.u32 %r3834, %r3835, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1308, %r3834, 4;
|
||
add.s64 %rd1309, %rd51, %rd1308;
|
||
ld.shared.u32 %r4012, [%rd1309];
|
||
add.s32 %r4013, %r4012, %r4011;
|
||
xor.b32 %r4014, %r75, %r3819;
|
||
xor.b32 %r3851, %r4014, %r4013;
|
||
// inline asm
|
||
bfe.u32 %r3838, %r3851, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1310, %r3838, 4;
|
||
add.s64 %rd1311, %rd42, %rd1310;
|
||
ld.shared.u32 %r4015, [%rd1311];
|
||
// inline asm
|
||
bfe.u32 %r3842, %r3851, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1312, %r3842, 4;
|
||
add.s64 %rd1313, %rd45, %rd1312;
|
||
ld.shared.u32 %r4016, [%rd1313];
|
||
add.s32 %r4017, %r4016, %r4015;
|
||
// inline asm
|
||
bfe.u32 %r3846, %r3851, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1314, %r3846, 4;
|
||
add.s64 %rd1315, %rd48, %rd1314;
|
||
ld.shared.u32 %r4018, [%rd1315];
|
||
xor.b32 %r4019, %r4018, %r4017;
|
||
// inline asm
|
||
bfe.u32 %r3850, %r3851, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1316, %r3850, 4;
|
||
add.s64 %rd1317, %rd51, %rd1316;
|
||
ld.shared.u32 %r4020, [%rd1317];
|
||
add.s32 %r4021, %r4020, %r4019;
|
||
xor.b32 %r4022, %r74, %r3835;
|
||
xor.b32 %r3867, %r4022, %r4021;
|
||
// inline asm
|
||
bfe.u32 %r3854, %r3867, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1318, %r3854, 4;
|
||
add.s64 %rd1319, %rd42, %rd1318;
|
||
ld.shared.u32 %r4023, [%rd1319];
|
||
// inline asm
|
||
bfe.u32 %r3858, %r3867, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1320, %r3858, 4;
|
||
add.s64 %rd1321, %rd45, %rd1320;
|
||
ld.shared.u32 %r4024, [%rd1321];
|
||
add.s32 %r4025, %r4024, %r4023;
|
||
// inline asm
|
||
bfe.u32 %r3862, %r3867, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1322, %r3862, 4;
|
||
add.s64 %rd1323, %rd48, %rd1322;
|
||
ld.shared.u32 %r4026, [%rd1323];
|
||
xor.b32 %r4027, %r4026, %r4025;
|
||
// inline asm
|
||
bfe.u32 %r3866, %r3867, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1324, %r3866, 4;
|
||
add.s64 %rd1325, %rd51, %rd1324;
|
||
ld.shared.u32 %r4028, [%rd1325];
|
||
add.s32 %r4029, %r4028, %r4027;
|
||
xor.b32 %r4030, %r77, %r3851;
|
||
xor.b32 %r3883, %r4030, %r4029;
|
||
// inline asm
|
||
bfe.u32 %r3870, %r3883, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1326, %r3870, 4;
|
||
add.s64 %rd1327, %rd42, %rd1326;
|
||
ld.shared.u32 %r4031, [%rd1327];
|
||
// inline asm
|
||
bfe.u32 %r3874, %r3883, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1328, %r3874, 4;
|
||
add.s64 %rd1329, %rd45, %rd1328;
|
||
ld.shared.u32 %r4032, [%rd1329];
|
||
add.s32 %r4033, %r4032, %r4031;
|
||
// inline asm
|
||
bfe.u32 %r3878, %r3883, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1330, %r3878, 4;
|
||
add.s64 %rd1331, %rd48, %rd1330;
|
||
ld.shared.u32 %r4034, [%rd1331];
|
||
xor.b32 %r4035, %r4034, %r4033;
|
||
// inline asm
|
||
bfe.u32 %r3882, %r3883, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1332, %r3882, 4;
|
||
add.s64 %rd1333, %rd51, %rd1332;
|
||
ld.shared.u32 %r4036, [%rd1333];
|
||
add.s32 %r4037, %r4036, %r4035;
|
||
xor.b32 %r4038, %r76, %r3867;
|
||
xor.b32 %r3899, %r4038, %r4037;
|
||
// inline asm
|
||
bfe.u32 %r3886, %r3899, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1334, %r3886, 4;
|
||
add.s64 %rd1335, %rd42, %rd1334;
|
||
ld.shared.u32 %r4039, [%rd1335];
|
||
// inline asm
|
||
bfe.u32 %r3890, %r3899, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1336, %r3890, 4;
|
||
add.s64 %rd1337, %rd45, %rd1336;
|
||
ld.shared.u32 %r4040, [%rd1337];
|
||
add.s32 %r4041, %r4040, %r4039;
|
||
// inline asm
|
||
bfe.u32 %r3894, %r3899, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1338, %r3894, 4;
|
||
add.s64 %rd1339, %rd48, %rd1338;
|
||
ld.shared.u32 %r4042, [%rd1339];
|
||
xor.b32 %r4043, %r4042, %r4041;
|
||
// inline asm
|
||
bfe.u32 %r3898, %r3899, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1340, %r3898, 4;
|
||
add.s64 %rd1341, %rd51, %rd1340;
|
||
ld.shared.u32 %r4044, [%rd1341];
|
||
add.s32 %r4045, %r4044, %r4043;
|
||
xor.b32 %r4046, %r79, %r3883;
|
||
xor.b32 %r3915, %r4046, %r4045;
|
||
// inline asm
|
||
bfe.u32 %r3902, %r3915, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1342, %r3902, 4;
|
||
add.s64 %rd1343, %rd42, %rd1342;
|
||
ld.shared.u32 %r4047, [%rd1343];
|
||
// inline asm
|
||
bfe.u32 %r3906, %r3915, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1344, %r3906, 4;
|
||
add.s64 %rd1345, %rd45, %rd1344;
|
||
ld.shared.u32 %r4048, [%rd1345];
|
||
add.s32 %r4049, %r4048, %r4047;
|
||
// inline asm
|
||
bfe.u32 %r3910, %r3915, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1346, %r3910, 4;
|
||
add.s64 %rd1347, %rd48, %rd1346;
|
||
ld.shared.u32 %r4050, [%rd1347];
|
||
xor.b32 %r4051, %r4050, %r4049;
|
||
// inline asm
|
||
bfe.u32 %r3914, %r3915, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1348, %r3914, 4;
|
||
add.s64 %rd1349, %rd51, %rd1348;
|
||
ld.shared.u32 %r4052, [%rd1349];
|
||
add.s32 %r4053, %r4052, %r4051;
|
||
xor.b32 %r4054, %r78, %r3899;
|
||
xor.b32 %r3931, %r4054, %r4053;
|
||
// inline asm
|
||
bfe.u32 %r3918, %r3931, %r10275, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1350, %r3918, 4;
|
||
add.s64 %rd1351, %rd42, %rd1350;
|
||
ld.shared.u32 %r4055, [%rd1351];
|
||
// inline asm
|
||
bfe.u32 %r3922, %r3931, %r10277, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1352, %r3922, 4;
|
||
add.s64 %rd1353, %rd45, %rd1352;
|
||
ld.shared.u32 %r4056, [%rd1353];
|
||
add.s32 %r4057, %r4056, %r4055;
|
||
// inline asm
|
||
bfe.u32 %r3926, %r3931, %r10276, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1354, %r3926, 4;
|
||
add.s64 %rd1355, %rd48, %rd1354;
|
||
ld.shared.u32 %r4058, [%rd1355];
|
||
xor.b32 %r4059, %r4058, %r4057;
|
||
// inline asm
|
||
bfe.u32 %r3930, %r3931, %r208, %r10276;
|
||
// inline asm
|
||
mul.wide.u32 %rd1356, %r3930, 4;
|
||
add.s64 %rd1357, %rd51, %rd1356;
|
||
ld.shared.u32 %r4060, [%rd1357];
|
||
add.s32 %r4061, %r4060, %r4059;
|
||
xor.b32 %r4062, %r81, %r3915;
|
||
xor.b32 %r10314, %r4062, %r4061;
|
||
xor.b32 %r10315, %r80, %r3931;
|
||
st.shared.u32 [%rd3495], %r10315;
|
||
st.shared.u32 [%rd3495+4], %r10314;
|
||
add.s64 %rd3495, %rd3495, 8;
|
||
add.s32 %r10310, %r10310, 2;
|
||
setp.lt.u32 %p4, %r10310, 256;
|
||
@%p4 bra BB4_6;
|
||
|
||
mov.u64 %rd3496, %rd4;
|
||
mov.u32 %r10313, %r208;
|
||
|
||
BB4_8:
|
||
mov.u32 %r10280, 16;
|
||
mov.u32 %r10279, 8;
|
||
mov.u32 %r10278, 24;
|
||
xor.b32 %r4076, %r65, %r10315;
|
||
// inline asm
|
||
bfe.u32 %r4063, %r4076, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1361, %r4063, 4;
|
||
add.s64 %rd1362, %rd42, %rd1361;
|
||
ld.shared.u32 %r4320, [%rd1362];
|
||
// inline asm
|
||
bfe.u32 %r4067, %r4076, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1365, %r4067, 4;
|
||
add.s64 %rd1366, %rd45, %rd1365;
|
||
ld.shared.u32 %r4321, [%rd1366];
|
||
add.s32 %r4322, %r4321, %r4320;
|
||
// inline asm
|
||
bfe.u32 %r4071, %r4076, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1369, %r4071, 4;
|
||
add.s64 %rd1370, %rd48, %rd1369;
|
||
ld.shared.u32 %r4323, [%rd1370];
|
||
xor.b32 %r4324, %r4323, %r4322;
|
||
// inline asm
|
||
bfe.u32 %r4075, %r4076, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1373, %r4075, 4;
|
||
add.s64 %rd1374, %rd51, %rd1373;
|
||
ld.shared.u32 %r4325, [%rd1374];
|
||
add.s32 %r4326, %r4325, %r4324;
|
||
xor.b32 %r4327, %r64, %r10314;
|
||
xor.b32 %r4092, %r4327, %r4326;
|
||
// inline asm
|
||
bfe.u32 %r4079, %r4092, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1375, %r4079, 4;
|
||
add.s64 %rd1376, %rd42, %rd1375;
|
||
ld.shared.u32 %r4328, [%rd1376];
|
||
// inline asm
|
||
bfe.u32 %r4083, %r4092, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1377, %r4083, 4;
|
||
add.s64 %rd1378, %rd45, %rd1377;
|
||
ld.shared.u32 %r4329, [%rd1378];
|
||
add.s32 %r4330, %r4329, %r4328;
|
||
// inline asm
|
||
bfe.u32 %r4087, %r4092, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1379, %r4087, 4;
|
||
add.s64 %rd1380, %rd48, %rd1379;
|
||
ld.shared.u32 %r4331, [%rd1380];
|
||
xor.b32 %r4332, %r4331, %r4330;
|
||
// inline asm
|
||
bfe.u32 %r4091, %r4092, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1381, %r4091, 4;
|
||
add.s64 %rd1382, %rd51, %rd1381;
|
||
ld.shared.u32 %r4333, [%rd1382];
|
||
add.s32 %r4334, %r4333, %r4332;
|
||
xor.b32 %r4335, %r67, %r4076;
|
||
xor.b32 %r4108, %r4335, %r4334;
|
||
// inline asm
|
||
bfe.u32 %r4095, %r4108, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1383, %r4095, 4;
|
||
add.s64 %rd1384, %rd42, %rd1383;
|
||
ld.shared.u32 %r4336, [%rd1384];
|
||
// inline asm
|
||
bfe.u32 %r4099, %r4108, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1385, %r4099, 4;
|
||
add.s64 %rd1386, %rd45, %rd1385;
|
||
ld.shared.u32 %r4337, [%rd1386];
|
||
add.s32 %r4338, %r4337, %r4336;
|
||
// inline asm
|
||
bfe.u32 %r4103, %r4108, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1387, %r4103, 4;
|
||
add.s64 %rd1388, %rd48, %rd1387;
|
||
ld.shared.u32 %r4339, [%rd1388];
|
||
xor.b32 %r4340, %r4339, %r4338;
|
||
// inline asm
|
||
bfe.u32 %r4107, %r4108, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1389, %r4107, 4;
|
||
add.s64 %rd1390, %rd51, %rd1389;
|
||
ld.shared.u32 %r4341, [%rd1390];
|
||
add.s32 %r4342, %r4341, %r4340;
|
||
xor.b32 %r4343, %r66, %r4092;
|
||
xor.b32 %r4124, %r4343, %r4342;
|
||
// inline asm
|
||
bfe.u32 %r4111, %r4124, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1391, %r4111, 4;
|
||
add.s64 %rd1392, %rd42, %rd1391;
|
||
ld.shared.u32 %r4344, [%rd1392];
|
||
// inline asm
|
||
bfe.u32 %r4115, %r4124, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1393, %r4115, 4;
|
||
add.s64 %rd1394, %rd45, %rd1393;
|
||
ld.shared.u32 %r4345, [%rd1394];
|
||
add.s32 %r4346, %r4345, %r4344;
|
||
// inline asm
|
||
bfe.u32 %r4119, %r4124, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1395, %r4119, 4;
|
||
add.s64 %rd1396, %rd48, %rd1395;
|
||
ld.shared.u32 %r4347, [%rd1396];
|
||
xor.b32 %r4348, %r4347, %r4346;
|
||
// inline asm
|
||
bfe.u32 %r4123, %r4124, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1397, %r4123, 4;
|
||
add.s64 %rd1398, %rd51, %rd1397;
|
||
ld.shared.u32 %r4349, [%rd1398];
|
||
add.s32 %r4350, %r4349, %r4348;
|
||
xor.b32 %r4351, %r69, %r4108;
|
||
xor.b32 %r4140, %r4351, %r4350;
|
||
// inline asm
|
||
bfe.u32 %r4127, %r4140, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1399, %r4127, 4;
|
||
add.s64 %rd1400, %rd42, %rd1399;
|
||
ld.shared.u32 %r4352, [%rd1400];
|
||
// inline asm
|
||
bfe.u32 %r4131, %r4140, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1401, %r4131, 4;
|
||
add.s64 %rd1402, %rd45, %rd1401;
|
||
ld.shared.u32 %r4353, [%rd1402];
|
||
add.s32 %r4354, %r4353, %r4352;
|
||
// inline asm
|
||
bfe.u32 %r4135, %r4140, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1403, %r4135, 4;
|
||
add.s64 %rd1404, %rd48, %rd1403;
|
||
ld.shared.u32 %r4355, [%rd1404];
|
||
xor.b32 %r4356, %r4355, %r4354;
|
||
// inline asm
|
||
bfe.u32 %r4139, %r4140, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1405, %r4139, 4;
|
||
add.s64 %rd1406, %rd51, %rd1405;
|
||
ld.shared.u32 %r4357, [%rd1406];
|
||
add.s32 %r4358, %r4357, %r4356;
|
||
xor.b32 %r4359, %r68, %r4124;
|
||
xor.b32 %r4156, %r4359, %r4358;
|
||
// inline asm
|
||
bfe.u32 %r4143, %r4156, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1407, %r4143, 4;
|
||
add.s64 %rd1408, %rd42, %rd1407;
|
||
ld.shared.u32 %r4360, [%rd1408];
|
||
// inline asm
|
||
bfe.u32 %r4147, %r4156, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1409, %r4147, 4;
|
||
add.s64 %rd1410, %rd45, %rd1409;
|
||
ld.shared.u32 %r4361, [%rd1410];
|
||
add.s32 %r4362, %r4361, %r4360;
|
||
// inline asm
|
||
bfe.u32 %r4151, %r4156, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1411, %r4151, 4;
|
||
add.s64 %rd1412, %rd48, %rd1411;
|
||
ld.shared.u32 %r4363, [%rd1412];
|
||
xor.b32 %r4364, %r4363, %r4362;
|
||
// inline asm
|
||
bfe.u32 %r4155, %r4156, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1413, %r4155, 4;
|
||
add.s64 %rd1414, %rd51, %rd1413;
|
||
ld.shared.u32 %r4365, [%rd1414];
|
||
add.s32 %r4366, %r4365, %r4364;
|
||
xor.b32 %r4367, %r71, %r4140;
|
||
xor.b32 %r4172, %r4367, %r4366;
|
||
// inline asm
|
||
bfe.u32 %r4159, %r4172, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1415, %r4159, 4;
|
||
add.s64 %rd1416, %rd42, %rd1415;
|
||
ld.shared.u32 %r4368, [%rd1416];
|
||
// inline asm
|
||
bfe.u32 %r4163, %r4172, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1417, %r4163, 4;
|
||
add.s64 %rd1418, %rd45, %rd1417;
|
||
ld.shared.u32 %r4369, [%rd1418];
|
||
add.s32 %r4370, %r4369, %r4368;
|
||
// inline asm
|
||
bfe.u32 %r4167, %r4172, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1419, %r4167, 4;
|
||
add.s64 %rd1420, %rd48, %rd1419;
|
||
ld.shared.u32 %r4371, [%rd1420];
|
||
xor.b32 %r4372, %r4371, %r4370;
|
||
// inline asm
|
||
bfe.u32 %r4171, %r4172, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1421, %r4171, 4;
|
||
add.s64 %rd1422, %rd51, %rd1421;
|
||
ld.shared.u32 %r4373, [%rd1422];
|
||
add.s32 %r4374, %r4373, %r4372;
|
||
xor.b32 %r4375, %r70, %r4156;
|
||
xor.b32 %r4188, %r4375, %r4374;
|
||
// inline asm
|
||
bfe.u32 %r4175, %r4188, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1423, %r4175, 4;
|
||
add.s64 %rd1424, %rd42, %rd1423;
|
||
ld.shared.u32 %r4376, [%rd1424];
|
||
// inline asm
|
||
bfe.u32 %r4179, %r4188, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1425, %r4179, 4;
|
||
add.s64 %rd1426, %rd45, %rd1425;
|
||
ld.shared.u32 %r4377, [%rd1426];
|
||
add.s32 %r4378, %r4377, %r4376;
|
||
// inline asm
|
||
bfe.u32 %r4183, %r4188, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1427, %r4183, 4;
|
||
add.s64 %rd1428, %rd48, %rd1427;
|
||
ld.shared.u32 %r4379, [%rd1428];
|
||
xor.b32 %r4380, %r4379, %r4378;
|
||
// inline asm
|
||
bfe.u32 %r4187, %r4188, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1429, %r4187, 4;
|
||
add.s64 %rd1430, %rd51, %rd1429;
|
||
ld.shared.u32 %r4381, [%rd1430];
|
||
add.s32 %r4382, %r4381, %r4380;
|
||
xor.b32 %r4383, %r73, %r4172;
|
||
xor.b32 %r4204, %r4383, %r4382;
|
||
// inline asm
|
||
bfe.u32 %r4191, %r4204, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1431, %r4191, 4;
|
||
add.s64 %rd1432, %rd42, %rd1431;
|
||
ld.shared.u32 %r4384, [%rd1432];
|
||
// inline asm
|
||
bfe.u32 %r4195, %r4204, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1433, %r4195, 4;
|
||
add.s64 %rd1434, %rd45, %rd1433;
|
||
ld.shared.u32 %r4385, [%rd1434];
|
||
add.s32 %r4386, %r4385, %r4384;
|
||
// inline asm
|
||
bfe.u32 %r4199, %r4204, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1435, %r4199, 4;
|
||
add.s64 %rd1436, %rd48, %rd1435;
|
||
ld.shared.u32 %r4387, [%rd1436];
|
||
xor.b32 %r4388, %r4387, %r4386;
|
||
// inline asm
|
||
bfe.u32 %r4203, %r4204, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1437, %r4203, 4;
|
||
add.s64 %rd1438, %rd51, %rd1437;
|
||
ld.shared.u32 %r4389, [%rd1438];
|
||
add.s32 %r4390, %r4389, %r4388;
|
||
xor.b32 %r4391, %r72, %r4188;
|
||
xor.b32 %r4220, %r4391, %r4390;
|
||
// inline asm
|
||
bfe.u32 %r4207, %r4220, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1439, %r4207, 4;
|
||
add.s64 %rd1440, %rd42, %rd1439;
|
||
ld.shared.u32 %r4392, [%rd1440];
|
||
// inline asm
|
||
bfe.u32 %r4211, %r4220, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1441, %r4211, 4;
|
||
add.s64 %rd1442, %rd45, %rd1441;
|
||
ld.shared.u32 %r4393, [%rd1442];
|
||
add.s32 %r4394, %r4393, %r4392;
|
||
// inline asm
|
||
bfe.u32 %r4215, %r4220, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1443, %r4215, 4;
|
||
add.s64 %rd1444, %rd48, %rd1443;
|
||
ld.shared.u32 %r4395, [%rd1444];
|
||
xor.b32 %r4396, %r4395, %r4394;
|
||
// inline asm
|
||
bfe.u32 %r4219, %r4220, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1445, %r4219, 4;
|
||
add.s64 %rd1446, %rd51, %rd1445;
|
||
ld.shared.u32 %r4397, [%rd1446];
|
||
add.s32 %r4398, %r4397, %r4396;
|
||
xor.b32 %r4399, %r75, %r4204;
|
||
xor.b32 %r4236, %r4399, %r4398;
|
||
// inline asm
|
||
bfe.u32 %r4223, %r4236, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1447, %r4223, 4;
|
||
add.s64 %rd1448, %rd42, %rd1447;
|
||
ld.shared.u32 %r4400, [%rd1448];
|
||
// inline asm
|
||
bfe.u32 %r4227, %r4236, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1449, %r4227, 4;
|
||
add.s64 %rd1450, %rd45, %rd1449;
|
||
ld.shared.u32 %r4401, [%rd1450];
|
||
add.s32 %r4402, %r4401, %r4400;
|
||
// inline asm
|
||
bfe.u32 %r4231, %r4236, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1451, %r4231, 4;
|
||
add.s64 %rd1452, %rd48, %rd1451;
|
||
ld.shared.u32 %r4403, [%rd1452];
|
||
xor.b32 %r4404, %r4403, %r4402;
|
||
// inline asm
|
||
bfe.u32 %r4235, %r4236, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1453, %r4235, 4;
|
||
add.s64 %rd1454, %rd51, %rd1453;
|
||
ld.shared.u32 %r4405, [%rd1454];
|
||
add.s32 %r4406, %r4405, %r4404;
|
||
xor.b32 %r4407, %r74, %r4220;
|
||
xor.b32 %r4252, %r4407, %r4406;
|
||
// inline asm
|
||
bfe.u32 %r4239, %r4252, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1455, %r4239, 4;
|
||
add.s64 %rd1456, %rd42, %rd1455;
|
||
ld.shared.u32 %r4408, [%rd1456];
|
||
// inline asm
|
||
bfe.u32 %r4243, %r4252, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1457, %r4243, 4;
|
||
add.s64 %rd1458, %rd45, %rd1457;
|
||
ld.shared.u32 %r4409, [%rd1458];
|
||
add.s32 %r4410, %r4409, %r4408;
|
||
// inline asm
|
||
bfe.u32 %r4247, %r4252, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1459, %r4247, 4;
|
||
add.s64 %rd1460, %rd48, %rd1459;
|
||
ld.shared.u32 %r4411, [%rd1460];
|
||
xor.b32 %r4412, %r4411, %r4410;
|
||
// inline asm
|
||
bfe.u32 %r4251, %r4252, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1461, %r4251, 4;
|
||
add.s64 %rd1462, %rd51, %rd1461;
|
||
ld.shared.u32 %r4413, [%rd1462];
|
||
add.s32 %r4414, %r4413, %r4412;
|
||
xor.b32 %r4415, %r77, %r4236;
|
||
xor.b32 %r4268, %r4415, %r4414;
|
||
// inline asm
|
||
bfe.u32 %r4255, %r4268, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1463, %r4255, 4;
|
||
add.s64 %rd1464, %rd42, %rd1463;
|
||
ld.shared.u32 %r4416, [%rd1464];
|
||
// inline asm
|
||
bfe.u32 %r4259, %r4268, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1465, %r4259, 4;
|
||
add.s64 %rd1466, %rd45, %rd1465;
|
||
ld.shared.u32 %r4417, [%rd1466];
|
||
add.s32 %r4418, %r4417, %r4416;
|
||
// inline asm
|
||
bfe.u32 %r4263, %r4268, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1467, %r4263, 4;
|
||
add.s64 %rd1468, %rd48, %rd1467;
|
||
ld.shared.u32 %r4419, [%rd1468];
|
||
xor.b32 %r4420, %r4419, %r4418;
|
||
// inline asm
|
||
bfe.u32 %r4267, %r4268, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1469, %r4267, 4;
|
||
add.s64 %rd1470, %rd51, %rd1469;
|
||
ld.shared.u32 %r4421, [%rd1470];
|
||
add.s32 %r4422, %r4421, %r4420;
|
||
xor.b32 %r4423, %r76, %r4252;
|
||
xor.b32 %r4284, %r4423, %r4422;
|
||
// inline asm
|
||
bfe.u32 %r4271, %r4284, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1471, %r4271, 4;
|
||
add.s64 %rd1472, %rd42, %rd1471;
|
||
ld.shared.u32 %r4424, [%rd1472];
|
||
// inline asm
|
||
bfe.u32 %r4275, %r4284, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1473, %r4275, 4;
|
||
add.s64 %rd1474, %rd45, %rd1473;
|
||
ld.shared.u32 %r4425, [%rd1474];
|
||
add.s32 %r4426, %r4425, %r4424;
|
||
// inline asm
|
||
bfe.u32 %r4279, %r4284, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1475, %r4279, 4;
|
||
add.s64 %rd1476, %rd48, %rd1475;
|
||
ld.shared.u32 %r4427, [%rd1476];
|
||
xor.b32 %r4428, %r4427, %r4426;
|
||
// inline asm
|
||
bfe.u32 %r4283, %r4284, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1477, %r4283, 4;
|
||
add.s64 %rd1478, %rd51, %rd1477;
|
||
ld.shared.u32 %r4429, [%rd1478];
|
||
add.s32 %r4430, %r4429, %r4428;
|
||
xor.b32 %r4431, %r79, %r4268;
|
||
xor.b32 %r4300, %r4431, %r4430;
|
||
// inline asm
|
||
bfe.u32 %r4287, %r4300, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1479, %r4287, 4;
|
||
add.s64 %rd1480, %rd42, %rd1479;
|
||
ld.shared.u32 %r4432, [%rd1480];
|
||
// inline asm
|
||
bfe.u32 %r4291, %r4300, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1481, %r4291, 4;
|
||
add.s64 %rd1482, %rd45, %rd1481;
|
||
ld.shared.u32 %r4433, [%rd1482];
|
||
add.s32 %r4434, %r4433, %r4432;
|
||
// inline asm
|
||
bfe.u32 %r4295, %r4300, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1483, %r4295, 4;
|
||
add.s64 %rd1484, %rd48, %rd1483;
|
||
ld.shared.u32 %r4435, [%rd1484];
|
||
xor.b32 %r4436, %r4435, %r4434;
|
||
// inline asm
|
||
bfe.u32 %r4299, %r4300, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1485, %r4299, 4;
|
||
add.s64 %rd1486, %rd51, %rd1485;
|
||
ld.shared.u32 %r4437, [%rd1486];
|
||
add.s32 %r4438, %r4437, %r4436;
|
||
xor.b32 %r4439, %r78, %r4284;
|
||
xor.b32 %r4316, %r4439, %r4438;
|
||
// inline asm
|
||
bfe.u32 %r4303, %r4316, %r10278, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1487, %r4303, 4;
|
||
add.s64 %rd1488, %rd42, %rd1487;
|
||
ld.shared.u32 %r4440, [%rd1488];
|
||
// inline asm
|
||
bfe.u32 %r4307, %r4316, %r10280, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1489, %r4307, 4;
|
||
add.s64 %rd1490, %rd45, %rd1489;
|
||
ld.shared.u32 %r4441, [%rd1490];
|
||
add.s32 %r4442, %r4441, %r4440;
|
||
// inline asm
|
||
bfe.u32 %r4311, %r4316, %r10279, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1491, %r4311, 4;
|
||
add.s64 %rd1492, %rd48, %rd1491;
|
||
ld.shared.u32 %r4443, [%rd1492];
|
||
xor.b32 %r4444, %r4443, %r4442;
|
||
// inline asm
|
||
bfe.u32 %r4315, %r4316, %r208, %r10279;
|
||
// inline asm
|
||
mul.wide.u32 %rd1493, %r4315, 4;
|
||
add.s64 %rd1494, %rd51, %rd1493;
|
||
ld.shared.u32 %r4445, [%rd1494];
|
||
add.s32 %r4446, %r4445, %r4444;
|
||
xor.b32 %r4447, %r81, %r4300;
|
||
xor.b32 %r10314, %r4447, %r4446;
|
||
xor.b32 %r10315, %r80, %r4316;
|
||
st.shared.u32 [%rd3496], %r10315;
|
||
st.shared.u32 [%rd3496+4], %r10314;
|
||
add.s64 %rd3496, %rd3496, 8;
|
||
add.s32 %r10313, %r10313, 2;
|
||
setp.lt.u32 %p5, %r10313, 256;
|
||
@%p5 bra BB4_8;
|
||
|
||
mov.u64 %rd3497, %rd5;
|
||
mov.u32 %r10316, %r208;
|
||
|
||
BB4_10:
|
||
mov.u32 %r10283, 16;
|
||
mov.u32 %r10282, 8;
|
||
mov.u32 %r10281, 24;
|
||
xor.b32 %r4461, %r65, %r10315;
|
||
// inline asm
|
||
bfe.u32 %r4448, %r4461, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1498, %r4448, 4;
|
||
add.s64 %rd1499, %rd42, %rd1498;
|
||
ld.shared.u32 %r4705, [%rd1499];
|
||
// inline asm
|
||
bfe.u32 %r4452, %r4461, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1502, %r4452, 4;
|
||
add.s64 %rd1503, %rd45, %rd1502;
|
||
ld.shared.u32 %r4706, [%rd1503];
|
||
add.s32 %r4707, %r4706, %r4705;
|
||
// inline asm
|
||
bfe.u32 %r4456, %r4461, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1506, %r4456, 4;
|
||
add.s64 %rd1507, %rd48, %rd1506;
|
||
ld.shared.u32 %r4708, [%rd1507];
|
||
xor.b32 %r4709, %r4708, %r4707;
|
||
// inline asm
|
||
bfe.u32 %r4460, %r4461, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1510, %r4460, 4;
|
||
add.s64 %rd1511, %rd51, %rd1510;
|
||
ld.shared.u32 %r4710, [%rd1511];
|
||
add.s32 %r4711, %r4710, %r4709;
|
||
xor.b32 %r4712, %r64, %r10314;
|
||
xor.b32 %r4477, %r4712, %r4711;
|
||
// inline asm
|
||
bfe.u32 %r4464, %r4477, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1512, %r4464, 4;
|
||
add.s64 %rd1513, %rd42, %rd1512;
|
||
ld.shared.u32 %r4713, [%rd1513];
|
||
// inline asm
|
||
bfe.u32 %r4468, %r4477, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1514, %r4468, 4;
|
||
add.s64 %rd1515, %rd45, %rd1514;
|
||
ld.shared.u32 %r4714, [%rd1515];
|
||
add.s32 %r4715, %r4714, %r4713;
|
||
// inline asm
|
||
bfe.u32 %r4472, %r4477, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1516, %r4472, 4;
|
||
add.s64 %rd1517, %rd48, %rd1516;
|
||
ld.shared.u32 %r4716, [%rd1517];
|
||
xor.b32 %r4717, %r4716, %r4715;
|
||
// inline asm
|
||
bfe.u32 %r4476, %r4477, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1518, %r4476, 4;
|
||
add.s64 %rd1519, %rd51, %rd1518;
|
||
ld.shared.u32 %r4718, [%rd1519];
|
||
add.s32 %r4719, %r4718, %r4717;
|
||
xor.b32 %r4720, %r67, %r4461;
|
||
xor.b32 %r4493, %r4720, %r4719;
|
||
// inline asm
|
||
bfe.u32 %r4480, %r4493, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1520, %r4480, 4;
|
||
add.s64 %rd1521, %rd42, %rd1520;
|
||
ld.shared.u32 %r4721, [%rd1521];
|
||
// inline asm
|
||
bfe.u32 %r4484, %r4493, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1522, %r4484, 4;
|
||
add.s64 %rd1523, %rd45, %rd1522;
|
||
ld.shared.u32 %r4722, [%rd1523];
|
||
add.s32 %r4723, %r4722, %r4721;
|
||
// inline asm
|
||
bfe.u32 %r4488, %r4493, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1524, %r4488, 4;
|
||
add.s64 %rd1525, %rd48, %rd1524;
|
||
ld.shared.u32 %r4724, [%rd1525];
|
||
xor.b32 %r4725, %r4724, %r4723;
|
||
// inline asm
|
||
bfe.u32 %r4492, %r4493, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1526, %r4492, 4;
|
||
add.s64 %rd1527, %rd51, %rd1526;
|
||
ld.shared.u32 %r4726, [%rd1527];
|
||
add.s32 %r4727, %r4726, %r4725;
|
||
xor.b32 %r4728, %r66, %r4477;
|
||
xor.b32 %r4509, %r4728, %r4727;
|
||
// inline asm
|
||
bfe.u32 %r4496, %r4509, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1528, %r4496, 4;
|
||
add.s64 %rd1529, %rd42, %rd1528;
|
||
ld.shared.u32 %r4729, [%rd1529];
|
||
// inline asm
|
||
bfe.u32 %r4500, %r4509, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1530, %r4500, 4;
|
||
add.s64 %rd1531, %rd45, %rd1530;
|
||
ld.shared.u32 %r4730, [%rd1531];
|
||
add.s32 %r4731, %r4730, %r4729;
|
||
// inline asm
|
||
bfe.u32 %r4504, %r4509, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1532, %r4504, 4;
|
||
add.s64 %rd1533, %rd48, %rd1532;
|
||
ld.shared.u32 %r4732, [%rd1533];
|
||
xor.b32 %r4733, %r4732, %r4731;
|
||
// inline asm
|
||
bfe.u32 %r4508, %r4509, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1534, %r4508, 4;
|
||
add.s64 %rd1535, %rd51, %rd1534;
|
||
ld.shared.u32 %r4734, [%rd1535];
|
||
add.s32 %r4735, %r4734, %r4733;
|
||
xor.b32 %r4736, %r69, %r4493;
|
||
xor.b32 %r4525, %r4736, %r4735;
|
||
// inline asm
|
||
bfe.u32 %r4512, %r4525, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1536, %r4512, 4;
|
||
add.s64 %rd1537, %rd42, %rd1536;
|
||
ld.shared.u32 %r4737, [%rd1537];
|
||
// inline asm
|
||
bfe.u32 %r4516, %r4525, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1538, %r4516, 4;
|
||
add.s64 %rd1539, %rd45, %rd1538;
|
||
ld.shared.u32 %r4738, [%rd1539];
|
||
add.s32 %r4739, %r4738, %r4737;
|
||
// inline asm
|
||
bfe.u32 %r4520, %r4525, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1540, %r4520, 4;
|
||
add.s64 %rd1541, %rd48, %rd1540;
|
||
ld.shared.u32 %r4740, [%rd1541];
|
||
xor.b32 %r4741, %r4740, %r4739;
|
||
// inline asm
|
||
bfe.u32 %r4524, %r4525, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1542, %r4524, 4;
|
||
add.s64 %rd1543, %rd51, %rd1542;
|
||
ld.shared.u32 %r4742, [%rd1543];
|
||
add.s32 %r4743, %r4742, %r4741;
|
||
xor.b32 %r4744, %r68, %r4509;
|
||
xor.b32 %r4541, %r4744, %r4743;
|
||
// inline asm
|
||
bfe.u32 %r4528, %r4541, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1544, %r4528, 4;
|
||
add.s64 %rd1545, %rd42, %rd1544;
|
||
ld.shared.u32 %r4745, [%rd1545];
|
||
// inline asm
|
||
bfe.u32 %r4532, %r4541, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1546, %r4532, 4;
|
||
add.s64 %rd1547, %rd45, %rd1546;
|
||
ld.shared.u32 %r4746, [%rd1547];
|
||
add.s32 %r4747, %r4746, %r4745;
|
||
// inline asm
|
||
bfe.u32 %r4536, %r4541, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1548, %r4536, 4;
|
||
add.s64 %rd1549, %rd48, %rd1548;
|
||
ld.shared.u32 %r4748, [%rd1549];
|
||
xor.b32 %r4749, %r4748, %r4747;
|
||
// inline asm
|
||
bfe.u32 %r4540, %r4541, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1550, %r4540, 4;
|
||
add.s64 %rd1551, %rd51, %rd1550;
|
||
ld.shared.u32 %r4750, [%rd1551];
|
||
add.s32 %r4751, %r4750, %r4749;
|
||
xor.b32 %r4752, %r71, %r4525;
|
||
xor.b32 %r4557, %r4752, %r4751;
|
||
// inline asm
|
||
bfe.u32 %r4544, %r4557, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1552, %r4544, 4;
|
||
add.s64 %rd1553, %rd42, %rd1552;
|
||
ld.shared.u32 %r4753, [%rd1553];
|
||
// inline asm
|
||
bfe.u32 %r4548, %r4557, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1554, %r4548, 4;
|
||
add.s64 %rd1555, %rd45, %rd1554;
|
||
ld.shared.u32 %r4754, [%rd1555];
|
||
add.s32 %r4755, %r4754, %r4753;
|
||
// inline asm
|
||
bfe.u32 %r4552, %r4557, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1556, %r4552, 4;
|
||
add.s64 %rd1557, %rd48, %rd1556;
|
||
ld.shared.u32 %r4756, [%rd1557];
|
||
xor.b32 %r4757, %r4756, %r4755;
|
||
// inline asm
|
||
bfe.u32 %r4556, %r4557, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1558, %r4556, 4;
|
||
add.s64 %rd1559, %rd51, %rd1558;
|
||
ld.shared.u32 %r4758, [%rd1559];
|
||
add.s32 %r4759, %r4758, %r4757;
|
||
xor.b32 %r4760, %r70, %r4541;
|
||
xor.b32 %r4573, %r4760, %r4759;
|
||
// inline asm
|
||
bfe.u32 %r4560, %r4573, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1560, %r4560, 4;
|
||
add.s64 %rd1561, %rd42, %rd1560;
|
||
ld.shared.u32 %r4761, [%rd1561];
|
||
// inline asm
|
||
bfe.u32 %r4564, %r4573, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1562, %r4564, 4;
|
||
add.s64 %rd1563, %rd45, %rd1562;
|
||
ld.shared.u32 %r4762, [%rd1563];
|
||
add.s32 %r4763, %r4762, %r4761;
|
||
// inline asm
|
||
bfe.u32 %r4568, %r4573, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1564, %r4568, 4;
|
||
add.s64 %rd1565, %rd48, %rd1564;
|
||
ld.shared.u32 %r4764, [%rd1565];
|
||
xor.b32 %r4765, %r4764, %r4763;
|
||
// inline asm
|
||
bfe.u32 %r4572, %r4573, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1566, %r4572, 4;
|
||
add.s64 %rd1567, %rd51, %rd1566;
|
||
ld.shared.u32 %r4766, [%rd1567];
|
||
add.s32 %r4767, %r4766, %r4765;
|
||
xor.b32 %r4768, %r73, %r4557;
|
||
xor.b32 %r4589, %r4768, %r4767;
|
||
// inline asm
|
||
bfe.u32 %r4576, %r4589, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1568, %r4576, 4;
|
||
add.s64 %rd1569, %rd42, %rd1568;
|
||
ld.shared.u32 %r4769, [%rd1569];
|
||
// inline asm
|
||
bfe.u32 %r4580, %r4589, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1570, %r4580, 4;
|
||
add.s64 %rd1571, %rd45, %rd1570;
|
||
ld.shared.u32 %r4770, [%rd1571];
|
||
add.s32 %r4771, %r4770, %r4769;
|
||
// inline asm
|
||
bfe.u32 %r4584, %r4589, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1572, %r4584, 4;
|
||
add.s64 %rd1573, %rd48, %rd1572;
|
||
ld.shared.u32 %r4772, [%rd1573];
|
||
xor.b32 %r4773, %r4772, %r4771;
|
||
// inline asm
|
||
bfe.u32 %r4588, %r4589, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1574, %r4588, 4;
|
||
add.s64 %rd1575, %rd51, %rd1574;
|
||
ld.shared.u32 %r4774, [%rd1575];
|
||
add.s32 %r4775, %r4774, %r4773;
|
||
xor.b32 %r4776, %r72, %r4573;
|
||
xor.b32 %r4605, %r4776, %r4775;
|
||
// inline asm
|
||
bfe.u32 %r4592, %r4605, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1576, %r4592, 4;
|
||
add.s64 %rd1577, %rd42, %rd1576;
|
||
ld.shared.u32 %r4777, [%rd1577];
|
||
// inline asm
|
||
bfe.u32 %r4596, %r4605, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1578, %r4596, 4;
|
||
add.s64 %rd1579, %rd45, %rd1578;
|
||
ld.shared.u32 %r4778, [%rd1579];
|
||
add.s32 %r4779, %r4778, %r4777;
|
||
// inline asm
|
||
bfe.u32 %r4600, %r4605, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1580, %r4600, 4;
|
||
add.s64 %rd1581, %rd48, %rd1580;
|
||
ld.shared.u32 %r4780, [%rd1581];
|
||
xor.b32 %r4781, %r4780, %r4779;
|
||
// inline asm
|
||
bfe.u32 %r4604, %r4605, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1582, %r4604, 4;
|
||
add.s64 %rd1583, %rd51, %rd1582;
|
||
ld.shared.u32 %r4782, [%rd1583];
|
||
add.s32 %r4783, %r4782, %r4781;
|
||
xor.b32 %r4784, %r75, %r4589;
|
||
xor.b32 %r4621, %r4784, %r4783;
|
||
// inline asm
|
||
bfe.u32 %r4608, %r4621, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1584, %r4608, 4;
|
||
add.s64 %rd1585, %rd42, %rd1584;
|
||
ld.shared.u32 %r4785, [%rd1585];
|
||
// inline asm
|
||
bfe.u32 %r4612, %r4621, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1586, %r4612, 4;
|
||
add.s64 %rd1587, %rd45, %rd1586;
|
||
ld.shared.u32 %r4786, [%rd1587];
|
||
add.s32 %r4787, %r4786, %r4785;
|
||
// inline asm
|
||
bfe.u32 %r4616, %r4621, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1588, %r4616, 4;
|
||
add.s64 %rd1589, %rd48, %rd1588;
|
||
ld.shared.u32 %r4788, [%rd1589];
|
||
xor.b32 %r4789, %r4788, %r4787;
|
||
// inline asm
|
||
bfe.u32 %r4620, %r4621, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1590, %r4620, 4;
|
||
add.s64 %rd1591, %rd51, %rd1590;
|
||
ld.shared.u32 %r4790, [%rd1591];
|
||
add.s32 %r4791, %r4790, %r4789;
|
||
xor.b32 %r4792, %r74, %r4605;
|
||
xor.b32 %r4637, %r4792, %r4791;
|
||
// inline asm
|
||
bfe.u32 %r4624, %r4637, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1592, %r4624, 4;
|
||
add.s64 %rd1593, %rd42, %rd1592;
|
||
ld.shared.u32 %r4793, [%rd1593];
|
||
// inline asm
|
||
bfe.u32 %r4628, %r4637, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1594, %r4628, 4;
|
||
add.s64 %rd1595, %rd45, %rd1594;
|
||
ld.shared.u32 %r4794, [%rd1595];
|
||
add.s32 %r4795, %r4794, %r4793;
|
||
// inline asm
|
||
bfe.u32 %r4632, %r4637, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1596, %r4632, 4;
|
||
add.s64 %rd1597, %rd48, %rd1596;
|
||
ld.shared.u32 %r4796, [%rd1597];
|
||
xor.b32 %r4797, %r4796, %r4795;
|
||
// inline asm
|
||
bfe.u32 %r4636, %r4637, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1598, %r4636, 4;
|
||
add.s64 %rd1599, %rd51, %rd1598;
|
||
ld.shared.u32 %r4798, [%rd1599];
|
||
add.s32 %r4799, %r4798, %r4797;
|
||
xor.b32 %r4800, %r77, %r4621;
|
||
xor.b32 %r4653, %r4800, %r4799;
|
||
// inline asm
|
||
bfe.u32 %r4640, %r4653, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1600, %r4640, 4;
|
||
add.s64 %rd1601, %rd42, %rd1600;
|
||
ld.shared.u32 %r4801, [%rd1601];
|
||
// inline asm
|
||
bfe.u32 %r4644, %r4653, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1602, %r4644, 4;
|
||
add.s64 %rd1603, %rd45, %rd1602;
|
||
ld.shared.u32 %r4802, [%rd1603];
|
||
add.s32 %r4803, %r4802, %r4801;
|
||
// inline asm
|
||
bfe.u32 %r4648, %r4653, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1604, %r4648, 4;
|
||
add.s64 %rd1605, %rd48, %rd1604;
|
||
ld.shared.u32 %r4804, [%rd1605];
|
||
xor.b32 %r4805, %r4804, %r4803;
|
||
// inline asm
|
||
bfe.u32 %r4652, %r4653, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1606, %r4652, 4;
|
||
add.s64 %rd1607, %rd51, %rd1606;
|
||
ld.shared.u32 %r4806, [%rd1607];
|
||
add.s32 %r4807, %r4806, %r4805;
|
||
xor.b32 %r4808, %r76, %r4637;
|
||
xor.b32 %r4669, %r4808, %r4807;
|
||
// inline asm
|
||
bfe.u32 %r4656, %r4669, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1608, %r4656, 4;
|
||
add.s64 %rd1609, %rd42, %rd1608;
|
||
ld.shared.u32 %r4809, [%rd1609];
|
||
// inline asm
|
||
bfe.u32 %r4660, %r4669, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1610, %r4660, 4;
|
||
add.s64 %rd1611, %rd45, %rd1610;
|
||
ld.shared.u32 %r4810, [%rd1611];
|
||
add.s32 %r4811, %r4810, %r4809;
|
||
// inline asm
|
||
bfe.u32 %r4664, %r4669, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1612, %r4664, 4;
|
||
add.s64 %rd1613, %rd48, %rd1612;
|
||
ld.shared.u32 %r4812, [%rd1613];
|
||
xor.b32 %r4813, %r4812, %r4811;
|
||
// inline asm
|
||
bfe.u32 %r4668, %r4669, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1614, %r4668, 4;
|
||
add.s64 %rd1615, %rd51, %rd1614;
|
||
ld.shared.u32 %r4814, [%rd1615];
|
||
add.s32 %r4815, %r4814, %r4813;
|
||
xor.b32 %r4816, %r79, %r4653;
|
||
xor.b32 %r4685, %r4816, %r4815;
|
||
// inline asm
|
||
bfe.u32 %r4672, %r4685, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1616, %r4672, 4;
|
||
add.s64 %rd1617, %rd42, %rd1616;
|
||
ld.shared.u32 %r4817, [%rd1617];
|
||
// inline asm
|
||
bfe.u32 %r4676, %r4685, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1618, %r4676, 4;
|
||
add.s64 %rd1619, %rd45, %rd1618;
|
||
ld.shared.u32 %r4818, [%rd1619];
|
||
add.s32 %r4819, %r4818, %r4817;
|
||
// inline asm
|
||
bfe.u32 %r4680, %r4685, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1620, %r4680, 4;
|
||
add.s64 %rd1621, %rd48, %rd1620;
|
||
ld.shared.u32 %r4820, [%rd1621];
|
||
xor.b32 %r4821, %r4820, %r4819;
|
||
// inline asm
|
||
bfe.u32 %r4684, %r4685, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1622, %r4684, 4;
|
||
add.s64 %rd1623, %rd51, %rd1622;
|
||
ld.shared.u32 %r4822, [%rd1623];
|
||
add.s32 %r4823, %r4822, %r4821;
|
||
xor.b32 %r4824, %r78, %r4669;
|
||
xor.b32 %r4701, %r4824, %r4823;
|
||
// inline asm
|
||
bfe.u32 %r4688, %r4701, %r10281, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1624, %r4688, 4;
|
||
add.s64 %rd1625, %rd42, %rd1624;
|
||
ld.shared.u32 %r4825, [%rd1625];
|
||
// inline asm
|
||
bfe.u32 %r4692, %r4701, %r10283, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1626, %r4692, 4;
|
||
add.s64 %rd1627, %rd45, %rd1626;
|
||
ld.shared.u32 %r4826, [%rd1627];
|
||
add.s32 %r4827, %r4826, %r4825;
|
||
// inline asm
|
||
bfe.u32 %r4696, %r4701, %r10282, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1628, %r4696, 4;
|
||
add.s64 %rd1629, %rd48, %rd1628;
|
||
ld.shared.u32 %r4828, [%rd1629];
|
||
xor.b32 %r4829, %r4828, %r4827;
|
||
// inline asm
|
||
bfe.u32 %r4700, %r4701, %r208, %r10282;
|
||
// inline asm
|
||
mul.wide.u32 %rd1630, %r4700, 4;
|
||
add.s64 %rd1631, %rd51, %rd1630;
|
||
ld.shared.u32 %r4830, [%rd1631];
|
||
add.s32 %r4831, %r4830, %r4829;
|
||
xor.b32 %r4832, %r81, %r4685;
|
||
xor.b32 %r10314, %r4832, %r4831;
|
||
xor.b32 %r10315, %r80, %r4701;
|
||
st.shared.u32 [%rd3497], %r10315;
|
||
st.shared.u32 [%rd3497+4], %r10314;
|
||
add.s64 %rd3497, %rd3497, 8;
|
||
add.s32 %r10316, %r10316, 2;
|
||
setp.lt.u32 %p6, %r10316, 256;
|
||
@%p6 bra BB4_10;
|
||
|
||
mov.u64 %rd3498, %rd6;
|
||
mov.u32 %r10319, %r208;
|
||
|
||
BB4_12:
|
||
mov.u32 %r10286, 16;
|
||
mov.u32 %r10285, 8;
|
||
mov.u32 %r10284, 24;
|
||
xor.b32 %r4846, %r65, %r10315;
|
||
// inline asm
|
||
bfe.u32 %r4833, %r4846, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1635, %r4833, 4;
|
||
add.s64 %rd1636, %rd42, %rd1635;
|
||
ld.shared.u32 %r5089, [%rd1636];
|
||
// inline asm
|
||
bfe.u32 %r4837, %r4846, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1639, %r4837, 4;
|
||
add.s64 %rd1640, %rd45, %rd1639;
|
||
ld.shared.u32 %r5090, [%rd1640];
|
||
add.s32 %r5091, %r5090, %r5089;
|
||
// inline asm
|
||
bfe.u32 %r4841, %r4846, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1643, %r4841, 4;
|
||
add.s64 %rd1644, %rd48, %rd1643;
|
||
ld.shared.u32 %r5092, [%rd1644];
|
||
xor.b32 %r5093, %r5092, %r5091;
|
||
// inline asm
|
||
bfe.u32 %r4845, %r4846, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1647, %r4845, 4;
|
||
add.s64 %rd1648, %rd51, %rd1647;
|
||
ld.shared.u32 %r5094, [%rd1648];
|
||
add.s32 %r5095, %r5094, %r5093;
|
||
xor.b32 %r5096, %r64, %r10314;
|
||
xor.b32 %r4862, %r5096, %r5095;
|
||
// inline asm
|
||
bfe.u32 %r4849, %r4862, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1649, %r4849, 4;
|
||
add.s64 %rd1650, %rd42, %rd1649;
|
||
ld.shared.u32 %r5097, [%rd1650];
|
||
// inline asm
|
||
bfe.u32 %r4853, %r4862, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1651, %r4853, 4;
|
||
add.s64 %rd1652, %rd45, %rd1651;
|
||
ld.shared.u32 %r5098, [%rd1652];
|
||
add.s32 %r5099, %r5098, %r5097;
|
||
// inline asm
|
||
bfe.u32 %r4857, %r4862, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1653, %r4857, 4;
|
||
add.s64 %rd1654, %rd48, %rd1653;
|
||
ld.shared.u32 %r5100, [%rd1654];
|
||
xor.b32 %r5101, %r5100, %r5099;
|
||
// inline asm
|
||
bfe.u32 %r4861, %r4862, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1655, %r4861, 4;
|
||
add.s64 %rd1656, %rd51, %rd1655;
|
||
ld.shared.u32 %r5102, [%rd1656];
|
||
add.s32 %r5103, %r5102, %r5101;
|
||
xor.b32 %r5104, %r67, %r4846;
|
||
xor.b32 %r4878, %r5104, %r5103;
|
||
// inline asm
|
||
bfe.u32 %r4865, %r4878, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1657, %r4865, 4;
|
||
add.s64 %rd1658, %rd42, %rd1657;
|
||
ld.shared.u32 %r5105, [%rd1658];
|
||
// inline asm
|
||
bfe.u32 %r4869, %r4878, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1659, %r4869, 4;
|
||
add.s64 %rd1660, %rd45, %rd1659;
|
||
ld.shared.u32 %r5106, [%rd1660];
|
||
add.s32 %r5107, %r5106, %r5105;
|
||
// inline asm
|
||
bfe.u32 %r4873, %r4878, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1661, %r4873, 4;
|
||
add.s64 %rd1662, %rd48, %rd1661;
|
||
ld.shared.u32 %r5108, [%rd1662];
|
||
xor.b32 %r5109, %r5108, %r5107;
|
||
// inline asm
|
||
bfe.u32 %r4877, %r4878, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1663, %r4877, 4;
|
||
add.s64 %rd1664, %rd51, %rd1663;
|
||
ld.shared.u32 %r5110, [%rd1664];
|
||
add.s32 %r5111, %r5110, %r5109;
|
||
xor.b32 %r5112, %r66, %r4862;
|
||
xor.b32 %r4894, %r5112, %r5111;
|
||
// inline asm
|
||
bfe.u32 %r4881, %r4894, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1665, %r4881, 4;
|
||
add.s64 %rd1666, %rd42, %rd1665;
|
||
ld.shared.u32 %r5113, [%rd1666];
|
||
// inline asm
|
||
bfe.u32 %r4885, %r4894, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1667, %r4885, 4;
|
||
add.s64 %rd1668, %rd45, %rd1667;
|
||
ld.shared.u32 %r5114, [%rd1668];
|
||
add.s32 %r5115, %r5114, %r5113;
|
||
// inline asm
|
||
bfe.u32 %r4889, %r4894, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1669, %r4889, 4;
|
||
add.s64 %rd1670, %rd48, %rd1669;
|
||
ld.shared.u32 %r5116, [%rd1670];
|
||
xor.b32 %r5117, %r5116, %r5115;
|
||
// inline asm
|
||
bfe.u32 %r4893, %r4894, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1671, %r4893, 4;
|
||
add.s64 %rd1672, %rd51, %rd1671;
|
||
ld.shared.u32 %r5118, [%rd1672];
|
||
add.s32 %r5119, %r5118, %r5117;
|
||
xor.b32 %r5120, %r69, %r4878;
|
||
xor.b32 %r4910, %r5120, %r5119;
|
||
// inline asm
|
||
bfe.u32 %r4897, %r4910, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1673, %r4897, 4;
|
||
add.s64 %rd1674, %rd42, %rd1673;
|
||
ld.shared.u32 %r5121, [%rd1674];
|
||
// inline asm
|
||
bfe.u32 %r4901, %r4910, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1675, %r4901, 4;
|
||
add.s64 %rd1676, %rd45, %rd1675;
|
||
ld.shared.u32 %r5122, [%rd1676];
|
||
add.s32 %r5123, %r5122, %r5121;
|
||
// inline asm
|
||
bfe.u32 %r4905, %r4910, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1677, %r4905, 4;
|
||
add.s64 %rd1678, %rd48, %rd1677;
|
||
ld.shared.u32 %r5124, [%rd1678];
|
||
xor.b32 %r5125, %r5124, %r5123;
|
||
// inline asm
|
||
bfe.u32 %r4909, %r4910, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1679, %r4909, 4;
|
||
add.s64 %rd1680, %rd51, %rd1679;
|
||
ld.shared.u32 %r5126, [%rd1680];
|
||
add.s32 %r5127, %r5126, %r5125;
|
||
xor.b32 %r5128, %r68, %r4894;
|
||
xor.b32 %r4926, %r5128, %r5127;
|
||
// inline asm
|
||
bfe.u32 %r4913, %r4926, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1681, %r4913, 4;
|
||
add.s64 %rd1682, %rd42, %rd1681;
|
||
ld.shared.u32 %r5129, [%rd1682];
|
||
// inline asm
|
||
bfe.u32 %r4917, %r4926, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1683, %r4917, 4;
|
||
add.s64 %rd1684, %rd45, %rd1683;
|
||
ld.shared.u32 %r5130, [%rd1684];
|
||
add.s32 %r5131, %r5130, %r5129;
|
||
// inline asm
|
||
bfe.u32 %r4921, %r4926, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1685, %r4921, 4;
|
||
add.s64 %rd1686, %rd48, %rd1685;
|
||
ld.shared.u32 %r5132, [%rd1686];
|
||
xor.b32 %r5133, %r5132, %r5131;
|
||
// inline asm
|
||
bfe.u32 %r4925, %r4926, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1687, %r4925, 4;
|
||
add.s64 %rd1688, %rd51, %rd1687;
|
||
ld.shared.u32 %r5134, [%rd1688];
|
||
add.s32 %r5135, %r5134, %r5133;
|
||
xor.b32 %r5136, %r71, %r4910;
|
||
xor.b32 %r4942, %r5136, %r5135;
|
||
// inline asm
|
||
bfe.u32 %r4929, %r4942, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1689, %r4929, 4;
|
||
add.s64 %rd1690, %rd42, %rd1689;
|
||
ld.shared.u32 %r5137, [%rd1690];
|
||
// inline asm
|
||
bfe.u32 %r4933, %r4942, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1691, %r4933, 4;
|
||
add.s64 %rd1692, %rd45, %rd1691;
|
||
ld.shared.u32 %r5138, [%rd1692];
|
||
add.s32 %r5139, %r5138, %r5137;
|
||
// inline asm
|
||
bfe.u32 %r4937, %r4942, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1693, %r4937, 4;
|
||
add.s64 %rd1694, %rd48, %rd1693;
|
||
ld.shared.u32 %r5140, [%rd1694];
|
||
xor.b32 %r5141, %r5140, %r5139;
|
||
// inline asm
|
||
bfe.u32 %r4941, %r4942, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1695, %r4941, 4;
|
||
add.s64 %rd1696, %rd51, %rd1695;
|
||
ld.shared.u32 %r5142, [%rd1696];
|
||
add.s32 %r5143, %r5142, %r5141;
|
||
xor.b32 %r5144, %r70, %r4926;
|
||
xor.b32 %r4958, %r5144, %r5143;
|
||
// inline asm
|
||
bfe.u32 %r4945, %r4958, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1697, %r4945, 4;
|
||
add.s64 %rd1698, %rd42, %rd1697;
|
||
ld.shared.u32 %r5145, [%rd1698];
|
||
// inline asm
|
||
bfe.u32 %r4949, %r4958, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1699, %r4949, 4;
|
||
add.s64 %rd1700, %rd45, %rd1699;
|
||
ld.shared.u32 %r5146, [%rd1700];
|
||
add.s32 %r5147, %r5146, %r5145;
|
||
// inline asm
|
||
bfe.u32 %r4953, %r4958, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1701, %r4953, 4;
|
||
add.s64 %rd1702, %rd48, %rd1701;
|
||
ld.shared.u32 %r5148, [%rd1702];
|
||
xor.b32 %r5149, %r5148, %r5147;
|
||
// inline asm
|
||
bfe.u32 %r4957, %r4958, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1703, %r4957, 4;
|
||
add.s64 %rd1704, %rd51, %rd1703;
|
||
ld.shared.u32 %r5150, [%rd1704];
|
||
add.s32 %r5151, %r5150, %r5149;
|
||
xor.b32 %r5152, %r73, %r4942;
|
||
xor.b32 %r4974, %r5152, %r5151;
|
||
// inline asm
|
||
bfe.u32 %r4961, %r4974, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1705, %r4961, 4;
|
||
add.s64 %rd1706, %rd42, %rd1705;
|
||
ld.shared.u32 %r5153, [%rd1706];
|
||
// inline asm
|
||
bfe.u32 %r4965, %r4974, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1707, %r4965, 4;
|
||
add.s64 %rd1708, %rd45, %rd1707;
|
||
ld.shared.u32 %r5154, [%rd1708];
|
||
add.s32 %r5155, %r5154, %r5153;
|
||
// inline asm
|
||
bfe.u32 %r4969, %r4974, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1709, %r4969, 4;
|
||
add.s64 %rd1710, %rd48, %rd1709;
|
||
ld.shared.u32 %r5156, [%rd1710];
|
||
xor.b32 %r5157, %r5156, %r5155;
|
||
// inline asm
|
||
bfe.u32 %r4973, %r4974, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1711, %r4973, 4;
|
||
add.s64 %rd1712, %rd51, %rd1711;
|
||
ld.shared.u32 %r5158, [%rd1712];
|
||
add.s32 %r5159, %r5158, %r5157;
|
||
xor.b32 %r5160, %r72, %r4958;
|
||
xor.b32 %r4990, %r5160, %r5159;
|
||
// inline asm
|
||
bfe.u32 %r4977, %r4990, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1713, %r4977, 4;
|
||
add.s64 %rd1714, %rd42, %rd1713;
|
||
ld.shared.u32 %r5161, [%rd1714];
|
||
// inline asm
|
||
bfe.u32 %r4981, %r4990, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1715, %r4981, 4;
|
||
add.s64 %rd1716, %rd45, %rd1715;
|
||
ld.shared.u32 %r5162, [%rd1716];
|
||
add.s32 %r5163, %r5162, %r5161;
|
||
// inline asm
|
||
bfe.u32 %r4985, %r4990, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1717, %r4985, 4;
|
||
add.s64 %rd1718, %rd48, %rd1717;
|
||
ld.shared.u32 %r5164, [%rd1718];
|
||
xor.b32 %r5165, %r5164, %r5163;
|
||
// inline asm
|
||
bfe.u32 %r4989, %r4990, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1719, %r4989, 4;
|
||
add.s64 %rd1720, %rd51, %rd1719;
|
||
ld.shared.u32 %r5166, [%rd1720];
|
||
add.s32 %r5167, %r5166, %r5165;
|
||
xor.b32 %r5168, %r75, %r4974;
|
||
xor.b32 %r5006, %r5168, %r5167;
|
||
// inline asm
|
||
bfe.u32 %r4993, %r5006, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1721, %r4993, 4;
|
||
add.s64 %rd1722, %rd42, %rd1721;
|
||
ld.shared.u32 %r5169, [%rd1722];
|
||
// inline asm
|
||
bfe.u32 %r4997, %r5006, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1723, %r4997, 4;
|
||
add.s64 %rd1724, %rd45, %rd1723;
|
||
ld.shared.u32 %r5170, [%rd1724];
|
||
add.s32 %r5171, %r5170, %r5169;
|
||
// inline asm
|
||
bfe.u32 %r5001, %r5006, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1725, %r5001, 4;
|
||
add.s64 %rd1726, %rd48, %rd1725;
|
||
ld.shared.u32 %r5172, [%rd1726];
|
||
xor.b32 %r5173, %r5172, %r5171;
|
||
// inline asm
|
||
bfe.u32 %r5005, %r5006, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1727, %r5005, 4;
|
||
add.s64 %rd1728, %rd51, %rd1727;
|
||
ld.shared.u32 %r5174, [%rd1728];
|
||
add.s32 %r5175, %r5174, %r5173;
|
||
xor.b32 %r5176, %r74, %r4990;
|
||
xor.b32 %r5022, %r5176, %r5175;
|
||
// inline asm
|
||
bfe.u32 %r5009, %r5022, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1729, %r5009, 4;
|
||
add.s64 %rd1730, %rd42, %rd1729;
|
||
ld.shared.u32 %r5177, [%rd1730];
|
||
// inline asm
|
||
bfe.u32 %r5013, %r5022, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1731, %r5013, 4;
|
||
add.s64 %rd1732, %rd45, %rd1731;
|
||
ld.shared.u32 %r5178, [%rd1732];
|
||
add.s32 %r5179, %r5178, %r5177;
|
||
// inline asm
|
||
bfe.u32 %r5017, %r5022, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1733, %r5017, 4;
|
||
add.s64 %rd1734, %rd48, %rd1733;
|
||
ld.shared.u32 %r5180, [%rd1734];
|
||
xor.b32 %r5181, %r5180, %r5179;
|
||
// inline asm
|
||
bfe.u32 %r5021, %r5022, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1735, %r5021, 4;
|
||
add.s64 %rd1736, %rd51, %rd1735;
|
||
ld.shared.u32 %r5182, [%rd1736];
|
||
add.s32 %r5183, %r5182, %r5181;
|
||
xor.b32 %r5184, %r77, %r5006;
|
||
xor.b32 %r5038, %r5184, %r5183;
|
||
// inline asm
|
||
bfe.u32 %r5025, %r5038, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1737, %r5025, 4;
|
||
add.s64 %rd1738, %rd42, %rd1737;
|
||
ld.shared.u32 %r5185, [%rd1738];
|
||
// inline asm
|
||
bfe.u32 %r5029, %r5038, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1739, %r5029, 4;
|
||
add.s64 %rd1740, %rd45, %rd1739;
|
||
ld.shared.u32 %r5186, [%rd1740];
|
||
add.s32 %r5187, %r5186, %r5185;
|
||
// inline asm
|
||
bfe.u32 %r5033, %r5038, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1741, %r5033, 4;
|
||
add.s64 %rd1742, %rd48, %rd1741;
|
||
ld.shared.u32 %r5188, [%rd1742];
|
||
xor.b32 %r5189, %r5188, %r5187;
|
||
// inline asm
|
||
bfe.u32 %r5037, %r5038, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1743, %r5037, 4;
|
||
add.s64 %rd1744, %rd51, %rd1743;
|
||
ld.shared.u32 %r5190, [%rd1744];
|
||
add.s32 %r5191, %r5190, %r5189;
|
||
xor.b32 %r5192, %r76, %r5022;
|
||
xor.b32 %r5054, %r5192, %r5191;
|
||
// inline asm
|
||
bfe.u32 %r5041, %r5054, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1745, %r5041, 4;
|
||
add.s64 %rd1746, %rd42, %rd1745;
|
||
ld.shared.u32 %r5193, [%rd1746];
|
||
// inline asm
|
||
bfe.u32 %r5045, %r5054, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1747, %r5045, 4;
|
||
add.s64 %rd1748, %rd45, %rd1747;
|
||
ld.shared.u32 %r5194, [%rd1748];
|
||
add.s32 %r5195, %r5194, %r5193;
|
||
// inline asm
|
||
bfe.u32 %r5049, %r5054, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1749, %r5049, 4;
|
||
add.s64 %rd1750, %rd48, %rd1749;
|
||
ld.shared.u32 %r5196, [%rd1750];
|
||
xor.b32 %r5197, %r5196, %r5195;
|
||
// inline asm
|
||
bfe.u32 %r5053, %r5054, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1751, %r5053, 4;
|
||
add.s64 %rd1752, %rd51, %rd1751;
|
||
ld.shared.u32 %r5198, [%rd1752];
|
||
add.s32 %r5199, %r5198, %r5197;
|
||
xor.b32 %r5200, %r79, %r5038;
|
||
xor.b32 %r5070, %r5200, %r5199;
|
||
// inline asm
|
||
bfe.u32 %r5057, %r5070, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1753, %r5057, 4;
|
||
add.s64 %rd1754, %rd42, %rd1753;
|
||
ld.shared.u32 %r5201, [%rd1754];
|
||
// inline asm
|
||
bfe.u32 %r5061, %r5070, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1755, %r5061, 4;
|
||
add.s64 %rd1756, %rd45, %rd1755;
|
||
ld.shared.u32 %r5202, [%rd1756];
|
||
add.s32 %r5203, %r5202, %r5201;
|
||
// inline asm
|
||
bfe.u32 %r5065, %r5070, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1757, %r5065, 4;
|
||
add.s64 %rd1758, %rd48, %rd1757;
|
||
ld.shared.u32 %r5204, [%rd1758];
|
||
xor.b32 %r5205, %r5204, %r5203;
|
||
// inline asm
|
||
bfe.u32 %r5069, %r5070, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1759, %r5069, 4;
|
||
add.s64 %rd1760, %rd51, %rd1759;
|
||
ld.shared.u32 %r5206, [%rd1760];
|
||
add.s32 %r5207, %r5206, %r5205;
|
||
xor.b32 %r5208, %r78, %r5054;
|
||
xor.b32 %r5086, %r5208, %r5207;
|
||
// inline asm
|
||
bfe.u32 %r5073, %r5086, %r10284, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1761, %r5073, 4;
|
||
add.s64 %rd1762, %rd42, %rd1761;
|
||
ld.shared.u32 %r5209, [%rd1762];
|
||
// inline asm
|
||
bfe.u32 %r5077, %r5086, %r10286, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1763, %r5077, 4;
|
||
add.s64 %rd1764, %rd45, %rd1763;
|
||
ld.shared.u32 %r5210, [%rd1764];
|
||
add.s32 %r5211, %r5210, %r5209;
|
||
// inline asm
|
||
bfe.u32 %r5081, %r5086, %r10285, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1765, %r5081, 4;
|
||
add.s64 %rd1766, %rd48, %rd1765;
|
||
ld.shared.u32 %r5212, [%rd1766];
|
||
xor.b32 %r5213, %r5212, %r5211;
|
||
// inline asm
|
||
bfe.u32 %r5085, %r5086, %r208, %r10285;
|
||
// inline asm
|
||
mul.wide.u32 %rd1767, %r5085, 4;
|
||
add.s64 %rd1768, %rd51, %rd1767;
|
||
ld.shared.u32 %r5214, [%rd1768];
|
||
add.s32 %r5215, %r5214, %r5213;
|
||
xor.b32 %r5216, %r81, %r5070;
|
||
xor.b32 %r10314, %r5216, %r5215;
|
||
xor.b32 %r10315, %r80, %r5086;
|
||
st.shared.u32 [%rd3498], %r10315;
|
||
st.shared.u32 [%rd3498+4], %r10314;
|
||
add.s64 %rd3498, %rd3498, 8;
|
||
add.s32 %r10319, %r10319, 2;
|
||
setp.lt.u32 %p7, %r10319, 256;
|
||
@%p7 bra BB4_12;
|
||
|
||
mov.u32 %r10289, 16;
|
||
mov.u32 %r10288, 8;
|
||
mov.u32 %r10287, 24;
|
||
xor.b32 %r5230, %r65, %r41;
|
||
// inline asm
|
||
bfe.u32 %r5217, %r5230, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1772, %r5217, 4;
|
||
add.s64 %rd1773, %rd42, %rd1772;
|
||
ld.shared.u32 %r7522, [%rd1773];
|
||
// inline asm
|
||
bfe.u32 %r5221, %r5230, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1776, %r5221, 4;
|
||
add.s64 %rd1777, %rd45, %rd1776;
|
||
ld.shared.u32 %r7523, [%rd1777];
|
||
add.s32 %r7524, %r7523, %r7522;
|
||
// inline asm
|
||
bfe.u32 %r5225, %r5230, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1780, %r5225, 4;
|
||
add.s64 %rd1781, %rd48, %rd1780;
|
||
ld.shared.u32 %r7525, [%rd1781];
|
||
xor.b32 %r7526, %r7525, %r7524;
|
||
mov.u32 %r10322, 0;
|
||
// inline asm
|
||
bfe.u32 %r5229, %r5230, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1784, %r5229, 4;
|
||
add.s64 %rd1785, %rd51, %rd1784;
|
||
ld.shared.u32 %r7527, [%rd1785];
|
||
add.s32 %r7528, %r7527, %r7526;
|
||
xor.b32 %r7529, %r64, %r42;
|
||
xor.b32 %r5246, %r7529, %r7528;
|
||
// inline asm
|
||
bfe.u32 %r5233, %r5246, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1786, %r5233, 4;
|
||
add.s64 %rd1787, %rd42, %rd1786;
|
||
ld.shared.u32 %r7530, [%rd1787];
|
||
// inline asm
|
||
bfe.u32 %r5237, %r5246, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1788, %r5237, 4;
|
||
add.s64 %rd1789, %rd45, %rd1788;
|
||
ld.shared.u32 %r7531, [%rd1789];
|
||
add.s32 %r7532, %r7531, %r7530;
|
||
// inline asm
|
||
bfe.u32 %r5241, %r5246, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1790, %r5241, 4;
|
||
add.s64 %rd1791, %rd48, %rd1790;
|
||
ld.shared.u32 %r7533, [%rd1791];
|
||
xor.b32 %r7534, %r7533, %r7532;
|
||
// inline asm
|
||
bfe.u32 %r5245, %r5246, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1792, %r5245, 4;
|
||
add.s64 %rd1793, %rd51, %rd1792;
|
||
ld.shared.u32 %r7535, [%rd1793];
|
||
add.s32 %r7536, %r7535, %r7534;
|
||
xor.b32 %r7537, %r67, %r43;
|
||
xor.b32 %r7538, %r7537, %r5230;
|
||
xor.b32 %r5262, %r7538, %r7536;
|
||
// inline asm
|
||
bfe.u32 %r5249, %r5262, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1794, %r5249, 4;
|
||
add.s64 %rd1795, %rd42, %rd1794;
|
||
ld.shared.u32 %r7539, [%rd1795];
|
||
// inline asm
|
||
bfe.u32 %r5253, %r5262, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1796, %r5253, 4;
|
||
add.s64 %rd1797, %rd45, %rd1796;
|
||
ld.shared.u32 %r7540, [%rd1797];
|
||
add.s32 %r7541, %r7540, %r7539;
|
||
// inline asm
|
||
bfe.u32 %r5257, %r5262, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1798, %r5257, 4;
|
||
add.s64 %rd1799, %rd48, %rd1798;
|
||
ld.shared.u32 %r7542, [%rd1799];
|
||
xor.b32 %r7543, %r7542, %r7541;
|
||
// inline asm
|
||
bfe.u32 %r5261, %r5262, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1800, %r5261, 4;
|
||
add.s64 %rd1801, %rd51, %rd1800;
|
||
ld.shared.u32 %r7544, [%rd1801];
|
||
add.s32 %r7545, %r7544, %r7543;
|
||
xor.b32 %r7546, %r66, %r44;
|
||
xor.b32 %r7547, %r7546, %r5246;
|
||
xor.b32 %r5278, %r7547, %r7545;
|
||
// inline asm
|
||
bfe.u32 %r5265, %r5278, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1802, %r5265, 4;
|
||
add.s64 %rd1803, %rd42, %rd1802;
|
||
ld.shared.u32 %r7548, [%rd1803];
|
||
// inline asm
|
||
bfe.u32 %r5269, %r5278, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1804, %r5269, 4;
|
||
add.s64 %rd1805, %rd45, %rd1804;
|
||
ld.shared.u32 %r7549, [%rd1805];
|
||
add.s32 %r7550, %r7549, %r7548;
|
||
// inline asm
|
||
bfe.u32 %r5273, %r5278, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1806, %r5273, 4;
|
||
add.s64 %rd1807, %rd48, %rd1806;
|
||
ld.shared.u32 %r7551, [%rd1807];
|
||
xor.b32 %r7552, %r7551, %r7550;
|
||
// inline asm
|
||
bfe.u32 %r5277, %r5278, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1808, %r5277, 4;
|
||
add.s64 %rd1809, %rd51, %rd1808;
|
||
ld.shared.u32 %r7553, [%rd1809];
|
||
add.s32 %r7554, %r7553, %r7552;
|
||
xor.b32 %r7555, %r69, %r41;
|
||
xor.b32 %r7556, %r7555, %r5262;
|
||
xor.b32 %r5294, %r7556, %r7554;
|
||
// inline asm
|
||
bfe.u32 %r5281, %r5294, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1810, %r5281, 4;
|
||
add.s64 %rd1811, %rd42, %rd1810;
|
||
ld.shared.u32 %r7557, [%rd1811];
|
||
// inline asm
|
||
bfe.u32 %r5285, %r5294, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1812, %r5285, 4;
|
||
add.s64 %rd1813, %rd45, %rd1812;
|
||
ld.shared.u32 %r7558, [%rd1813];
|
||
add.s32 %r7559, %r7558, %r7557;
|
||
// inline asm
|
||
bfe.u32 %r5289, %r5294, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1814, %r5289, 4;
|
||
add.s64 %rd1815, %rd48, %rd1814;
|
||
ld.shared.u32 %r7560, [%rd1815];
|
||
xor.b32 %r7561, %r7560, %r7559;
|
||
// inline asm
|
||
bfe.u32 %r5293, %r5294, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1816, %r5293, 4;
|
||
add.s64 %rd1817, %rd51, %rd1816;
|
||
ld.shared.u32 %r7562, [%rd1817];
|
||
add.s32 %r7563, %r7562, %r7561;
|
||
xor.b32 %r7564, %r68, %r42;
|
||
xor.b32 %r7565, %r7564, %r5278;
|
||
xor.b32 %r5310, %r7565, %r7563;
|
||
// inline asm
|
||
bfe.u32 %r5297, %r5310, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1818, %r5297, 4;
|
||
add.s64 %rd1819, %rd42, %rd1818;
|
||
ld.shared.u32 %r7566, [%rd1819];
|
||
// inline asm
|
||
bfe.u32 %r5301, %r5310, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1820, %r5301, 4;
|
||
add.s64 %rd1821, %rd45, %rd1820;
|
||
ld.shared.u32 %r7567, [%rd1821];
|
||
add.s32 %r7568, %r7567, %r7566;
|
||
// inline asm
|
||
bfe.u32 %r5305, %r5310, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1822, %r5305, 4;
|
||
add.s64 %rd1823, %rd48, %rd1822;
|
||
ld.shared.u32 %r7569, [%rd1823];
|
||
xor.b32 %r7570, %r7569, %r7568;
|
||
// inline asm
|
||
bfe.u32 %r5309, %r5310, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1824, %r5309, 4;
|
||
add.s64 %rd1825, %rd51, %rd1824;
|
||
ld.shared.u32 %r7571, [%rd1825];
|
||
add.s32 %r7572, %r7571, %r7570;
|
||
xor.b32 %r7573, %r71, %r43;
|
||
xor.b32 %r7574, %r7573, %r5294;
|
||
xor.b32 %r5326, %r7574, %r7572;
|
||
// inline asm
|
||
bfe.u32 %r5313, %r5326, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1826, %r5313, 4;
|
||
add.s64 %rd1827, %rd42, %rd1826;
|
||
ld.shared.u32 %r7575, [%rd1827];
|
||
// inline asm
|
||
bfe.u32 %r5317, %r5326, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1828, %r5317, 4;
|
||
add.s64 %rd1829, %rd45, %rd1828;
|
||
ld.shared.u32 %r7576, [%rd1829];
|
||
add.s32 %r7577, %r7576, %r7575;
|
||
// inline asm
|
||
bfe.u32 %r5321, %r5326, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1830, %r5321, 4;
|
||
add.s64 %rd1831, %rd48, %rd1830;
|
||
ld.shared.u32 %r7578, [%rd1831];
|
||
xor.b32 %r7579, %r7578, %r7577;
|
||
// inline asm
|
||
bfe.u32 %r5325, %r5326, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1832, %r5325, 4;
|
||
add.s64 %rd1833, %rd51, %rd1832;
|
||
ld.shared.u32 %r7580, [%rd1833];
|
||
add.s32 %r7581, %r7580, %r7579;
|
||
xor.b32 %r7582, %r70, %r44;
|
||
xor.b32 %r7583, %r7582, %r5310;
|
||
xor.b32 %r5342, %r7583, %r7581;
|
||
// inline asm
|
||
bfe.u32 %r5329, %r5342, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1834, %r5329, 4;
|
||
add.s64 %rd1835, %rd42, %rd1834;
|
||
ld.shared.u32 %r7584, [%rd1835];
|
||
// inline asm
|
||
bfe.u32 %r5333, %r5342, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1836, %r5333, 4;
|
||
add.s64 %rd1837, %rd45, %rd1836;
|
||
ld.shared.u32 %r7585, [%rd1837];
|
||
add.s32 %r7586, %r7585, %r7584;
|
||
// inline asm
|
||
bfe.u32 %r5337, %r5342, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1838, %r5337, 4;
|
||
add.s64 %rd1839, %rd48, %rd1838;
|
||
ld.shared.u32 %r7587, [%rd1839];
|
||
xor.b32 %r7588, %r7587, %r7586;
|
||
// inline asm
|
||
bfe.u32 %r5341, %r5342, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1840, %r5341, 4;
|
||
add.s64 %rd1841, %rd51, %rd1840;
|
||
ld.shared.u32 %r7589, [%rd1841];
|
||
add.s32 %r7590, %r7589, %r7588;
|
||
xor.b32 %r7591, %r73, %r41;
|
||
xor.b32 %r7592, %r7591, %r5326;
|
||
xor.b32 %r5358, %r7592, %r7590;
|
||
// inline asm
|
||
bfe.u32 %r5345, %r5358, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1842, %r5345, 4;
|
||
add.s64 %rd1843, %rd42, %rd1842;
|
||
ld.shared.u32 %r7593, [%rd1843];
|
||
// inline asm
|
||
bfe.u32 %r5349, %r5358, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1844, %r5349, 4;
|
||
add.s64 %rd1845, %rd45, %rd1844;
|
||
ld.shared.u32 %r7594, [%rd1845];
|
||
add.s32 %r7595, %r7594, %r7593;
|
||
// inline asm
|
||
bfe.u32 %r5353, %r5358, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1846, %r5353, 4;
|
||
add.s64 %rd1847, %rd48, %rd1846;
|
||
ld.shared.u32 %r7596, [%rd1847];
|
||
xor.b32 %r7597, %r7596, %r7595;
|
||
// inline asm
|
||
bfe.u32 %r5357, %r5358, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1848, %r5357, 4;
|
||
add.s64 %rd1849, %rd51, %rd1848;
|
||
ld.shared.u32 %r7598, [%rd1849];
|
||
add.s32 %r7599, %r7598, %r7597;
|
||
xor.b32 %r7600, %r72, %r42;
|
||
xor.b32 %r7601, %r7600, %r5342;
|
||
xor.b32 %r5374, %r7601, %r7599;
|
||
// inline asm
|
||
bfe.u32 %r5361, %r5374, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1850, %r5361, 4;
|
||
add.s64 %rd1851, %rd42, %rd1850;
|
||
ld.shared.u32 %r7602, [%rd1851];
|
||
// inline asm
|
||
bfe.u32 %r5365, %r5374, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1852, %r5365, 4;
|
||
add.s64 %rd1853, %rd45, %rd1852;
|
||
ld.shared.u32 %r7603, [%rd1853];
|
||
add.s32 %r7604, %r7603, %r7602;
|
||
// inline asm
|
||
bfe.u32 %r5369, %r5374, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1854, %r5369, 4;
|
||
add.s64 %rd1855, %rd48, %rd1854;
|
||
ld.shared.u32 %r7605, [%rd1855];
|
||
xor.b32 %r7606, %r7605, %r7604;
|
||
// inline asm
|
||
bfe.u32 %r5373, %r5374, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1856, %r5373, 4;
|
||
add.s64 %rd1857, %rd51, %rd1856;
|
||
ld.shared.u32 %r7607, [%rd1857];
|
||
add.s32 %r7608, %r7607, %r7606;
|
||
xor.b32 %r7609, %r75, %r43;
|
||
xor.b32 %r7610, %r7609, %r5358;
|
||
xor.b32 %r5390, %r7610, %r7608;
|
||
// inline asm
|
||
bfe.u32 %r5377, %r5390, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1858, %r5377, 4;
|
||
add.s64 %rd1859, %rd42, %rd1858;
|
||
ld.shared.u32 %r7611, [%rd1859];
|
||
// inline asm
|
||
bfe.u32 %r5381, %r5390, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1860, %r5381, 4;
|
||
add.s64 %rd1861, %rd45, %rd1860;
|
||
ld.shared.u32 %r7612, [%rd1861];
|
||
add.s32 %r7613, %r7612, %r7611;
|
||
// inline asm
|
||
bfe.u32 %r5385, %r5390, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1862, %r5385, 4;
|
||
add.s64 %rd1863, %rd48, %rd1862;
|
||
ld.shared.u32 %r7614, [%rd1863];
|
||
xor.b32 %r7615, %r7614, %r7613;
|
||
// inline asm
|
||
bfe.u32 %r5389, %r5390, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1864, %r5389, 4;
|
||
add.s64 %rd1865, %rd51, %rd1864;
|
||
ld.shared.u32 %r7616, [%rd1865];
|
||
add.s32 %r7617, %r7616, %r7615;
|
||
xor.b32 %r7618, %r74, %r44;
|
||
xor.b32 %r7619, %r7618, %r5374;
|
||
xor.b32 %r5406, %r7619, %r7617;
|
||
// inline asm
|
||
bfe.u32 %r5393, %r5406, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1866, %r5393, 4;
|
||
add.s64 %rd1867, %rd42, %rd1866;
|
||
ld.shared.u32 %r7620, [%rd1867];
|
||
// inline asm
|
||
bfe.u32 %r5397, %r5406, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1868, %r5397, 4;
|
||
add.s64 %rd1869, %rd45, %rd1868;
|
||
ld.shared.u32 %r7621, [%rd1869];
|
||
add.s32 %r7622, %r7621, %r7620;
|
||
// inline asm
|
||
bfe.u32 %r5401, %r5406, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1870, %r5401, 4;
|
||
add.s64 %rd1871, %rd48, %rd1870;
|
||
ld.shared.u32 %r7623, [%rd1871];
|
||
xor.b32 %r7624, %r7623, %r7622;
|
||
// inline asm
|
||
bfe.u32 %r5405, %r5406, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1872, %r5405, 4;
|
||
add.s64 %rd1873, %rd51, %rd1872;
|
||
ld.shared.u32 %r7625, [%rd1873];
|
||
add.s32 %r7626, %r7625, %r7624;
|
||
xor.b32 %r7627, %r77, %r41;
|
||
xor.b32 %r7628, %r7627, %r5390;
|
||
xor.b32 %r5422, %r7628, %r7626;
|
||
// inline asm
|
||
bfe.u32 %r5409, %r5422, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1874, %r5409, 4;
|
||
add.s64 %rd1875, %rd42, %rd1874;
|
||
ld.shared.u32 %r7629, [%rd1875];
|
||
// inline asm
|
||
bfe.u32 %r5413, %r5422, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1876, %r5413, 4;
|
||
add.s64 %rd1877, %rd45, %rd1876;
|
||
ld.shared.u32 %r7630, [%rd1877];
|
||
add.s32 %r7631, %r7630, %r7629;
|
||
// inline asm
|
||
bfe.u32 %r5417, %r5422, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1878, %r5417, 4;
|
||
add.s64 %rd1879, %rd48, %rd1878;
|
||
ld.shared.u32 %r7632, [%rd1879];
|
||
xor.b32 %r7633, %r7632, %r7631;
|
||
// inline asm
|
||
bfe.u32 %r5421, %r5422, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1880, %r5421, 4;
|
||
add.s64 %rd1881, %rd51, %rd1880;
|
||
ld.shared.u32 %r7634, [%rd1881];
|
||
add.s32 %r7635, %r7634, %r7633;
|
||
xor.b32 %r7636, %r76, %r42;
|
||
xor.b32 %r7637, %r7636, %r5406;
|
||
xor.b32 %r5438, %r7637, %r7635;
|
||
// inline asm
|
||
bfe.u32 %r5425, %r5438, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1882, %r5425, 4;
|
||
add.s64 %rd1883, %rd42, %rd1882;
|
||
ld.shared.u32 %r7638, [%rd1883];
|
||
// inline asm
|
||
bfe.u32 %r5429, %r5438, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1884, %r5429, 4;
|
||
add.s64 %rd1885, %rd45, %rd1884;
|
||
ld.shared.u32 %r7639, [%rd1885];
|
||
add.s32 %r7640, %r7639, %r7638;
|
||
// inline asm
|
||
bfe.u32 %r5433, %r5438, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1886, %r5433, 4;
|
||
add.s64 %rd1887, %rd48, %rd1886;
|
||
ld.shared.u32 %r7641, [%rd1887];
|
||
xor.b32 %r7642, %r7641, %r7640;
|
||
// inline asm
|
||
bfe.u32 %r5437, %r5438, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1888, %r5437, 4;
|
||
add.s64 %rd1889, %rd51, %rd1888;
|
||
ld.shared.u32 %r7643, [%rd1889];
|
||
add.s32 %r7644, %r7643, %r7642;
|
||
xor.b32 %r7645, %r79, %r43;
|
||
xor.b32 %r7646, %r7645, %r5422;
|
||
xor.b32 %r5454, %r7646, %r7644;
|
||
// inline asm
|
||
bfe.u32 %r5441, %r5454, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1890, %r5441, 4;
|
||
add.s64 %rd1891, %rd42, %rd1890;
|
||
ld.shared.u32 %r7647, [%rd1891];
|
||
// inline asm
|
||
bfe.u32 %r5445, %r5454, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1892, %r5445, 4;
|
||
add.s64 %rd1893, %rd45, %rd1892;
|
||
ld.shared.u32 %r7648, [%rd1893];
|
||
add.s32 %r7649, %r7648, %r7647;
|
||
// inline asm
|
||
bfe.u32 %r5449, %r5454, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1894, %r5449, 4;
|
||
add.s64 %rd1895, %rd48, %rd1894;
|
||
ld.shared.u32 %r7650, [%rd1895];
|
||
xor.b32 %r7651, %r7650, %r7649;
|
||
// inline asm
|
||
bfe.u32 %r5453, %r5454, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1896, %r5453, 4;
|
||
add.s64 %rd1897, %rd51, %rd1896;
|
||
ld.shared.u32 %r7652, [%rd1897];
|
||
add.s32 %r7653, %r7652, %r7651;
|
||
xor.b32 %r7654, %r78, %r44;
|
||
xor.b32 %r7655, %r7654, %r5438;
|
||
xor.b32 %r5470, %r7655, %r7653;
|
||
// inline asm
|
||
bfe.u32 %r5457, %r5470, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1898, %r5457, 4;
|
||
add.s64 %rd1899, %rd42, %rd1898;
|
||
ld.shared.u32 %r7656, [%rd1899];
|
||
// inline asm
|
||
bfe.u32 %r5461, %r5470, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1900, %r5461, 4;
|
||
add.s64 %rd1901, %rd45, %rd1900;
|
||
ld.shared.u32 %r7657, [%rd1901];
|
||
add.s32 %r7658, %r7657, %r7656;
|
||
// inline asm
|
||
bfe.u32 %r5465, %r5470, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1902, %r5465, 4;
|
||
add.s64 %rd1903, %rd48, %rd1902;
|
||
ld.shared.u32 %r7659, [%rd1903];
|
||
xor.b32 %r7660, %r7659, %r7658;
|
||
// inline asm
|
||
bfe.u32 %r5469, %r5470, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1904, %r5469, 4;
|
||
add.s64 %rd1905, %rd51, %rd1904;
|
||
ld.shared.u32 %r7661, [%rd1905];
|
||
add.s32 %r7662, %r7661, %r7660;
|
||
xor.b32 %r7663, %r81, %r41;
|
||
xor.b32 %r7664, %r7663, %r5454;
|
||
xor.b32 %r10350, %r7664, %r7662;
|
||
xor.b32 %r7665, %r80, %r42;
|
||
xor.b32 %r10351, %r7665, %r5470;
|
||
// inline asm
|
||
bfe.u32 %r5473, %r10322, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1906, %r5473, 4;
|
||
add.s64 %rd1907, %rd42, %rd1906;
|
||
ld.shared.u32 %r7666, [%rd1907];
|
||
// inline asm
|
||
bfe.u32 %r5477, %r10322, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1908, %r5477, 4;
|
||
add.s64 %rd1909, %rd45, %rd1908;
|
||
ld.shared.u32 %r7667, [%rd1909];
|
||
add.s32 %r7668, %r7667, %r7666;
|
||
// inline asm
|
||
bfe.u32 %r5481, %r10322, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1910, %r5481, 4;
|
||
add.s64 %rd1911, %rd48, %rd1910;
|
||
ld.shared.u32 %r7669, [%rd1911];
|
||
xor.b32 %r7670, %r7669, %r7668;
|
||
// inline asm
|
||
bfe.u32 %r5485, %r10322, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1912, %r5485, 4;
|
||
add.s64 %rd1913, %rd51, %rd1912;
|
||
ld.shared.u32 %r7671, [%rd1913];
|
||
add.s32 %r5502, %r7671, %r7670;
|
||
// inline asm
|
||
bfe.u32 %r5489, %r5502, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1914, %r5489, 4;
|
||
add.s64 %rd1915, %rd42, %rd1914;
|
||
ld.shared.u32 %r7672, [%rd1915];
|
||
// inline asm
|
||
bfe.u32 %r5493, %r5502, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1916, %r5493, 4;
|
||
add.s64 %rd1917, %rd45, %rd1916;
|
||
ld.shared.u32 %r7673, [%rd1917];
|
||
add.s32 %r7674, %r7673, %r7672;
|
||
// inline asm
|
||
bfe.u32 %r5497, %r5502, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1918, %r5497, 4;
|
||
add.s64 %rd1919, %rd48, %rd1918;
|
||
ld.shared.u32 %r7675, [%rd1919];
|
||
xor.b32 %r7676, %r7675, %r7674;
|
||
// inline asm
|
||
bfe.u32 %r5501, %r5502, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1920, %r5501, 4;
|
||
add.s64 %rd1921, %rd51, %rd1920;
|
||
ld.shared.u32 %r7677, [%rd1921];
|
||
add.s32 %r7678, %r7677, %r7676;
|
||
xor.b32 %r5518, %r7537, %r7678;
|
||
// inline asm
|
||
bfe.u32 %r5505, %r5518, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1922, %r5505, 4;
|
||
add.s64 %rd1923, %rd42, %rd1922;
|
||
ld.shared.u32 %r7679, [%rd1923];
|
||
// inline asm
|
||
bfe.u32 %r5509, %r5518, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1924, %r5509, 4;
|
||
add.s64 %rd1925, %rd45, %rd1924;
|
||
ld.shared.u32 %r7680, [%rd1925];
|
||
add.s32 %r7681, %r7680, %r7679;
|
||
// inline asm
|
||
bfe.u32 %r5513, %r5518, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1926, %r5513, 4;
|
||
add.s64 %rd1927, %rd48, %rd1926;
|
||
ld.shared.u32 %r7682, [%rd1927];
|
||
xor.b32 %r7683, %r7682, %r7681;
|
||
// inline asm
|
||
bfe.u32 %r5517, %r5518, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1928, %r5517, 4;
|
||
add.s64 %rd1929, %rd51, %rd1928;
|
||
ld.shared.u32 %r7684, [%rd1929];
|
||
add.s32 %r7685, %r7684, %r7683;
|
||
xor.b32 %r7686, %r7546, %r5502;
|
||
xor.b32 %r5534, %r7686, %r7685;
|
||
// inline asm
|
||
bfe.u32 %r5521, %r5534, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1930, %r5521, 4;
|
||
add.s64 %rd1931, %rd42, %rd1930;
|
||
ld.shared.u32 %r7687, [%rd1931];
|
||
// inline asm
|
||
bfe.u32 %r5525, %r5534, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1932, %r5525, 4;
|
||
add.s64 %rd1933, %rd45, %rd1932;
|
||
ld.shared.u32 %r7688, [%rd1933];
|
||
add.s32 %r7689, %r7688, %r7687;
|
||
// inline asm
|
||
bfe.u32 %r5529, %r5534, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1934, %r5529, 4;
|
||
add.s64 %rd1935, %rd48, %rd1934;
|
||
ld.shared.u32 %r7690, [%rd1935];
|
||
xor.b32 %r7691, %r7690, %r7689;
|
||
// inline asm
|
||
bfe.u32 %r5533, %r5534, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1936, %r5533, 4;
|
||
add.s64 %rd1937, %rd51, %rd1936;
|
||
ld.shared.u32 %r7692, [%rd1937];
|
||
add.s32 %r7693, %r7692, %r7691;
|
||
xor.b32 %r7694, %r7555, %r5518;
|
||
xor.b32 %r5550, %r7694, %r7693;
|
||
// inline asm
|
||
bfe.u32 %r5537, %r5550, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1938, %r5537, 4;
|
||
add.s64 %rd1939, %rd42, %rd1938;
|
||
ld.shared.u32 %r7695, [%rd1939];
|
||
// inline asm
|
||
bfe.u32 %r5541, %r5550, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1940, %r5541, 4;
|
||
add.s64 %rd1941, %rd45, %rd1940;
|
||
ld.shared.u32 %r7696, [%rd1941];
|
||
add.s32 %r7697, %r7696, %r7695;
|
||
// inline asm
|
||
bfe.u32 %r5545, %r5550, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1942, %r5545, 4;
|
||
add.s64 %rd1943, %rd48, %rd1942;
|
||
ld.shared.u32 %r7698, [%rd1943];
|
||
xor.b32 %r7699, %r7698, %r7697;
|
||
// inline asm
|
||
bfe.u32 %r5549, %r5550, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1944, %r5549, 4;
|
||
add.s64 %rd1945, %rd51, %rd1944;
|
||
ld.shared.u32 %r7700, [%rd1945];
|
||
add.s32 %r7701, %r7700, %r7699;
|
||
xor.b32 %r7702, %r7564, %r5534;
|
||
xor.b32 %r5566, %r7702, %r7701;
|
||
// inline asm
|
||
bfe.u32 %r5553, %r5566, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1946, %r5553, 4;
|
||
add.s64 %rd1947, %rd42, %rd1946;
|
||
ld.shared.u32 %r7703, [%rd1947];
|
||
// inline asm
|
||
bfe.u32 %r5557, %r5566, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1948, %r5557, 4;
|
||
add.s64 %rd1949, %rd45, %rd1948;
|
||
ld.shared.u32 %r7704, [%rd1949];
|
||
add.s32 %r7705, %r7704, %r7703;
|
||
// inline asm
|
||
bfe.u32 %r5561, %r5566, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1950, %r5561, 4;
|
||
add.s64 %rd1951, %rd48, %rd1950;
|
||
ld.shared.u32 %r7706, [%rd1951];
|
||
xor.b32 %r7707, %r7706, %r7705;
|
||
// inline asm
|
||
bfe.u32 %r5565, %r5566, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1952, %r5565, 4;
|
||
add.s64 %rd1953, %rd51, %rd1952;
|
||
ld.shared.u32 %r7708, [%rd1953];
|
||
add.s32 %r7709, %r7708, %r7707;
|
||
xor.b32 %r7710, %r7573, %r5550;
|
||
xor.b32 %r5582, %r7710, %r7709;
|
||
// inline asm
|
||
bfe.u32 %r5569, %r5582, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1954, %r5569, 4;
|
||
add.s64 %rd1955, %rd42, %rd1954;
|
||
ld.shared.u32 %r7711, [%rd1955];
|
||
// inline asm
|
||
bfe.u32 %r5573, %r5582, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1956, %r5573, 4;
|
||
add.s64 %rd1957, %rd45, %rd1956;
|
||
ld.shared.u32 %r7712, [%rd1957];
|
||
add.s32 %r7713, %r7712, %r7711;
|
||
// inline asm
|
||
bfe.u32 %r5577, %r5582, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1958, %r5577, 4;
|
||
add.s64 %rd1959, %rd48, %rd1958;
|
||
ld.shared.u32 %r7714, [%rd1959];
|
||
xor.b32 %r7715, %r7714, %r7713;
|
||
// inline asm
|
||
bfe.u32 %r5581, %r5582, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1960, %r5581, 4;
|
||
add.s64 %rd1961, %rd51, %rd1960;
|
||
ld.shared.u32 %r7716, [%rd1961];
|
||
add.s32 %r7717, %r7716, %r7715;
|
||
xor.b32 %r7718, %r7582, %r5566;
|
||
xor.b32 %r5598, %r7718, %r7717;
|
||
// inline asm
|
||
bfe.u32 %r5585, %r5598, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1962, %r5585, 4;
|
||
add.s64 %rd1963, %rd42, %rd1962;
|
||
ld.shared.u32 %r7719, [%rd1963];
|
||
// inline asm
|
||
bfe.u32 %r5589, %r5598, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1964, %r5589, 4;
|
||
add.s64 %rd1965, %rd45, %rd1964;
|
||
ld.shared.u32 %r7720, [%rd1965];
|
||
add.s32 %r7721, %r7720, %r7719;
|
||
// inline asm
|
||
bfe.u32 %r5593, %r5598, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1966, %r5593, 4;
|
||
add.s64 %rd1967, %rd48, %rd1966;
|
||
ld.shared.u32 %r7722, [%rd1967];
|
||
xor.b32 %r7723, %r7722, %r7721;
|
||
// inline asm
|
||
bfe.u32 %r5597, %r5598, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1968, %r5597, 4;
|
||
add.s64 %rd1969, %rd51, %rd1968;
|
||
ld.shared.u32 %r7724, [%rd1969];
|
||
add.s32 %r7725, %r7724, %r7723;
|
||
xor.b32 %r7726, %r7591, %r5582;
|
||
xor.b32 %r5614, %r7726, %r7725;
|
||
// inline asm
|
||
bfe.u32 %r5601, %r5614, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1970, %r5601, 4;
|
||
add.s64 %rd1971, %rd42, %rd1970;
|
||
ld.shared.u32 %r7727, [%rd1971];
|
||
// inline asm
|
||
bfe.u32 %r5605, %r5614, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1972, %r5605, 4;
|
||
add.s64 %rd1973, %rd45, %rd1972;
|
||
ld.shared.u32 %r7728, [%rd1973];
|
||
add.s32 %r7729, %r7728, %r7727;
|
||
// inline asm
|
||
bfe.u32 %r5609, %r5614, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1974, %r5609, 4;
|
||
add.s64 %rd1975, %rd48, %rd1974;
|
||
ld.shared.u32 %r7730, [%rd1975];
|
||
xor.b32 %r7731, %r7730, %r7729;
|
||
// inline asm
|
||
bfe.u32 %r5613, %r5614, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1976, %r5613, 4;
|
||
add.s64 %rd1977, %rd51, %rd1976;
|
||
ld.shared.u32 %r7732, [%rd1977];
|
||
add.s32 %r7733, %r7732, %r7731;
|
||
xor.b32 %r7734, %r7600, %r5598;
|
||
xor.b32 %r5630, %r7734, %r7733;
|
||
// inline asm
|
||
bfe.u32 %r5617, %r5630, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1978, %r5617, 4;
|
||
add.s64 %rd1979, %rd42, %rd1978;
|
||
ld.shared.u32 %r7735, [%rd1979];
|
||
// inline asm
|
||
bfe.u32 %r5621, %r5630, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1980, %r5621, 4;
|
||
add.s64 %rd1981, %rd45, %rd1980;
|
||
ld.shared.u32 %r7736, [%rd1981];
|
||
add.s32 %r7737, %r7736, %r7735;
|
||
// inline asm
|
||
bfe.u32 %r5625, %r5630, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1982, %r5625, 4;
|
||
add.s64 %rd1983, %rd48, %rd1982;
|
||
ld.shared.u32 %r7738, [%rd1983];
|
||
xor.b32 %r7739, %r7738, %r7737;
|
||
// inline asm
|
||
bfe.u32 %r5629, %r5630, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1984, %r5629, 4;
|
||
add.s64 %rd1985, %rd51, %rd1984;
|
||
ld.shared.u32 %r7740, [%rd1985];
|
||
add.s32 %r7741, %r7740, %r7739;
|
||
xor.b32 %r7742, %r7609, %r5614;
|
||
xor.b32 %r5646, %r7742, %r7741;
|
||
// inline asm
|
||
bfe.u32 %r5633, %r5646, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1986, %r5633, 4;
|
||
add.s64 %rd1987, %rd42, %rd1986;
|
||
ld.shared.u32 %r7743, [%rd1987];
|
||
// inline asm
|
||
bfe.u32 %r5637, %r5646, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1988, %r5637, 4;
|
||
add.s64 %rd1989, %rd45, %rd1988;
|
||
ld.shared.u32 %r7744, [%rd1989];
|
||
add.s32 %r7745, %r7744, %r7743;
|
||
// inline asm
|
||
bfe.u32 %r5641, %r5646, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1990, %r5641, 4;
|
||
add.s64 %rd1991, %rd48, %rd1990;
|
||
ld.shared.u32 %r7746, [%rd1991];
|
||
xor.b32 %r7747, %r7746, %r7745;
|
||
// inline asm
|
||
bfe.u32 %r5645, %r5646, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1992, %r5645, 4;
|
||
add.s64 %rd1993, %rd51, %rd1992;
|
||
ld.shared.u32 %r7748, [%rd1993];
|
||
add.s32 %r7749, %r7748, %r7747;
|
||
xor.b32 %r7750, %r7618, %r5630;
|
||
xor.b32 %r5662, %r7750, %r7749;
|
||
// inline asm
|
||
bfe.u32 %r5649, %r5662, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1994, %r5649, 4;
|
||
add.s64 %rd1995, %rd42, %rd1994;
|
||
ld.shared.u32 %r7751, [%rd1995];
|
||
// inline asm
|
||
bfe.u32 %r5653, %r5662, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1996, %r5653, 4;
|
||
add.s64 %rd1997, %rd45, %rd1996;
|
||
ld.shared.u32 %r7752, [%rd1997];
|
||
add.s32 %r7753, %r7752, %r7751;
|
||
// inline asm
|
||
bfe.u32 %r5657, %r5662, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd1998, %r5657, 4;
|
||
add.s64 %rd1999, %rd48, %rd1998;
|
||
ld.shared.u32 %r7754, [%rd1999];
|
||
xor.b32 %r7755, %r7754, %r7753;
|
||
// inline asm
|
||
bfe.u32 %r5661, %r5662, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2000, %r5661, 4;
|
||
add.s64 %rd2001, %rd51, %rd2000;
|
||
ld.shared.u32 %r7756, [%rd2001];
|
||
add.s32 %r7757, %r7756, %r7755;
|
||
xor.b32 %r7758, %r7627, %r5646;
|
||
xor.b32 %r5678, %r7758, %r7757;
|
||
// inline asm
|
||
bfe.u32 %r5665, %r5678, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2002, %r5665, 4;
|
||
add.s64 %rd2003, %rd42, %rd2002;
|
||
ld.shared.u32 %r7759, [%rd2003];
|
||
// inline asm
|
||
bfe.u32 %r5669, %r5678, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2004, %r5669, 4;
|
||
add.s64 %rd2005, %rd45, %rd2004;
|
||
ld.shared.u32 %r7760, [%rd2005];
|
||
add.s32 %r7761, %r7760, %r7759;
|
||
// inline asm
|
||
bfe.u32 %r5673, %r5678, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2006, %r5673, 4;
|
||
add.s64 %rd2007, %rd48, %rd2006;
|
||
ld.shared.u32 %r7762, [%rd2007];
|
||
xor.b32 %r7763, %r7762, %r7761;
|
||
// inline asm
|
||
bfe.u32 %r5677, %r5678, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2008, %r5677, 4;
|
||
add.s64 %rd2009, %rd51, %rd2008;
|
||
ld.shared.u32 %r7764, [%rd2009];
|
||
add.s32 %r7765, %r7764, %r7763;
|
||
xor.b32 %r7766, %r7636, %r5662;
|
||
xor.b32 %r5694, %r7766, %r7765;
|
||
// inline asm
|
||
bfe.u32 %r5681, %r5694, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2010, %r5681, 4;
|
||
add.s64 %rd2011, %rd42, %rd2010;
|
||
ld.shared.u32 %r7767, [%rd2011];
|
||
// inline asm
|
||
bfe.u32 %r5685, %r5694, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2012, %r5685, 4;
|
||
add.s64 %rd2013, %rd45, %rd2012;
|
||
ld.shared.u32 %r7768, [%rd2013];
|
||
add.s32 %r7769, %r7768, %r7767;
|
||
// inline asm
|
||
bfe.u32 %r5689, %r5694, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2014, %r5689, 4;
|
||
add.s64 %rd2015, %rd48, %rd2014;
|
||
ld.shared.u32 %r7770, [%rd2015];
|
||
xor.b32 %r7771, %r7770, %r7769;
|
||
// inline asm
|
||
bfe.u32 %r5693, %r5694, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2016, %r5693, 4;
|
||
add.s64 %rd2017, %rd51, %rd2016;
|
||
ld.shared.u32 %r7772, [%rd2017];
|
||
add.s32 %r7773, %r7772, %r7771;
|
||
xor.b32 %r7774, %r7645, %r5678;
|
||
xor.b32 %r5710, %r7774, %r7773;
|
||
// inline asm
|
||
bfe.u32 %r5697, %r5710, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2018, %r5697, 4;
|
||
add.s64 %rd2019, %rd42, %rd2018;
|
||
ld.shared.u32 %r7775, [%rd2019];
|
||
// inline asm
|
||
bfe.u32 %r5701, %r5710, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2020, %r5701, 4;
|
||
add.s64 %rd2021, %rd45, %rd2020;
|
||
ld.shared.u32 %r7776, [%rd2021];
|
||
add.s32 %r7777, %r7776, %r7775;
|
||
// inline asm
|
||
bfe.u32 %r5705, %r5710, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2022, %r5705, 4;
|
||
add.s64 %rd2023, %rd48, %rd2022;
|
||
ld.shared.u32 %r7778, [%rd2023];
|
||
xor.b32 %r7779, %r7778, %r7777;
|
||
// inline asm
|
||
bfe.u32 %r5709, %r5710, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2024, %r5709, 4;
|
||
add.s64 %rd2025, %rd51, %rd2024;
|
||
ld.shared.u32 %r7780, [%rd2025];
|
||
add.s32 %r7781, %r7780, %r7779;
|
||
xor.b32 %r7782, %r7654, %r5694;
|
||
xor.b32 %r5726, %r7782, %r7781;
|
||
// inline asm
|
||
bfe.u32 %r5713, %r5726, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2026, %r5713, 4;
|
||
add.s64 %rd2027, %rd42, %rd2026;
|
||
ld.shared.u32 %r7783, [%rd2027];
|
||
// inline asm
|
||
bfe.u32 %r5717, %r5726, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2028, %r5717, 4;
|
||
add.s64 %rd2029, %rd45, %rd2028;
|
||
ld.shared.u32 %r7784, [%rd2029];
|
||
add.s32 %r7785, %r7784, %r7783;
|
||
// inline asm
|
||
bfe.u32 %r5721, %r5726, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2030, %r5721, 4;
|
||
add.s64 %rd2031, %rd48, %rd2030;
|
||
ld.shared.u32 %r7786, [%rd2031];
|
||
xor.b32 %r7787, %r7786, %r7785;
|
||
// inline asm
|
||
bfe.u32 %r5725, %r5726, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2032, %r5725, 4;
|
||
add.s64 %rd2033, %rd51, %rd2032;
|
||
ld.shared.u32 %r7788, [%rd2033];
|
||
add.s32 %r7789, %r7788, %r7787;
|
||
xor.b32 %r7790, %r7663, %r5710;
|
||
xor.b32 %r10348, %r7790, %r7789;
|
||
xor.b32 %r10349, %r7665, %r5726;
|
||
xor.b32 %r5742, %r5470, %r5726;
|
||
// inline asm
|
||
bfe.u32 %r5729, %r5742, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2034, %r5729, 4;
|
||
add.s64 %rd2035, %rd42, %rd2034;
|
||
ld.shared.u32 %r7791, [%rd2035];
|
||
// inline asm
|
||
bfe.u32 %r5733, %r5742, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2036, %r5733, 4;
|
||
add.s64 %rd2037, %rd45, %rd2036;
|
||
ld.shared.u32 %r7792, [%rd2037];
|
||
add.s32 %r7793, %r7792, %r7791;
|
||
// inline asm
|
||
bfe.u32 %r5737, %r5742, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2038, %r5737, 4;
|
||
add.s64 %rd2039, %rd48, %rd2038;
|
||
ld.shared.u32 %r7794, [%rd2039];
|
||
xor.b32 %r7795, %r7794, %r7793;
|
||
// inline asm
|
||
bfe.u32 %r5741, %r5742, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2040, %r5741, 4;
|
||
add.s64 %rd2041, %rd51, %rd2040;
|
||
ld.shared.u32 %r7796, [%rd2041];
|
||
add.s32 %r7797, %r7796, %r7795;
|
||
xor.b32 %r7798, %r10350, %r10348;
|
||
xor.b32 %r5758, %r7798, %r7797;
|
||
// inline asm
|
||
bfe.u32 %r5745, %r5758, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2042, %r5745, 4;
|
||
add.s64 %rd2043, %rd42, %rd2042;
|
||
ld.shared.u32 %r7799, [%rd2043];
|
||
// inline asm
|
||
bfe.u32 %r5749, %r5758, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2044, %r5749, 4;
|
||
add.s64 %rd2045, %rd45, %rd2044;
|
||
ld.shared.u32 %r7800, [%rd2045];
|
||
add.s32 %r7801, %r7800, %r7799;
|
||
// inline asm
|
||
bfe.u32 %r5753, %r5758, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2046, %r5753, 4;
|
||
add.s64 %rd2047, %rd48, %rd2046;
|
||
ld.shared.u32 %r7802, [%rd2047];
|
||
xor.b32 %r7803, %r7802, %r7801;
|
||
// inline asm
|
||
bfe.u32 %r5757, %r5758, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2048, %r5757, 4;
|
||
add.s64 %rd2049, %rd51, %rd2048;
|
||
ld.shared.u32 %r7804, [%rd2049];
|
||
add.s32 %r7805, %r7804, %r7803;
|
||
xor.b32 %r5774, %r10351, %r7805;
|
||
// inline asm
|
||
bfe.u32 %r5761, %r5774, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2050, %r5761, 4;
|
||
add.s64 %rd2051, %rd42, %rd2050;
|
||
ld.shared.u32 %r7806, [%rd2051];
|
||
// inline asm
|
||
bfe.u32 %r5765, %r5774, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2052, %r5765, 4;
|
||
add.s64 %rd2053, %rd45, %rd2052;
|
||
ld.shared.u32 %r7807, [%rd2053];
|
||
add.s32 %r7808, %r7807, %r7806;
|
||
// inline asm
|
||
bfe.u32 %r5769, %r5774, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2054, %r5769, 4;
|
||
add.s64 %rd2055, %rd48, %rd2054;
|
||
ld.shared.u32 %r7809, [%rd2055];
|
||
xor.b32 %r7810, %r7809, %r7808;
|
||
// inline asm
|
||
bfe.u32 %r5773, %r5774, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2056, %r5773, 4;
|
||
add.s64 %rd2057, %rd51, %rd2056;
|
||
ld.shared.u32 %r7811, [%rd2057];
|
||
add.s32 %r7812, %r7811, %r7810;
|
||
xor.b32 %r7813, %r10350, %r7797;
|
||
xor.b32 %r5790, %r7813, %r7812;
|
||
// inline asm
|
||
bfe.u32 %r5777, %r5790, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2058, %r5777, 4;
|
||
add.s64 %rd2059, %rd42, %rd2058;
|
||
ld.shared.u32 %r7814, [%rd2059];
|
||
// inline asm
|
||
bfe.u32 %r5781, %r5790, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2060, %r5781, 4;
|
||
add.s64 %rd2061, %rd45, %rd2060;
|
||
ld.shared.u32 %r7815, [%rd2061];
|
||
add.s32 %r7816, %r7815, %r7814;
|
||
// inline asm
|
||
bfe.u32 %r5785, %r5790, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2062, %r5785, 4;
|
||
add.s64 %rd2063, %rd48, %rd2062;
|
||
ld.shared.u32 %r7817, [%rd2063];
|
||
xor.b32 %r7818, %r7817, %r7816;
|
||
// inline asm
|
||
bfe.u32 %r5789, %r5790, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2064, %r5789, 4;
|
||
add.s64 %rd2065, %rd51, %rd2064;
|
||
ld.shared.u32 %r7819, [%rd2065];
|
||
add.s32 %r7820, %r7819, %r7818;
|
||
xor.b32 %r7821, %r7555, %r5774;
|
||
xor.b32 %r5806, %r7821, %r7820;
|
||
// inline asm
|
||
bfe.u32 %r5793, %r5806, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2066, %r5793, 4;
|
||
add.s64 %rd2067, %rd42, %rd2066;
|
||
ld.shared.u32 %r7822, [%rd2067];
|
||
// inline asm
|
||
bfe.u32 %r5797, %r5806, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2068, %r5797, 4;
|
||
add.s64 %rd2069, %rd45, %rd2068;
|
||
ld.shared.u32 %r7823, [%rd2069];
|
||
add.s32 %r7824, %r7823, %r7822;
|
||
// inline asm
|
||
bfe.u32 %r5801, %r5806, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2070, %r5801, 4;
|
||
add.s64 %rd2071, %rd48, %rd2070;
|
||
ld.shared.u32 %r7825, [%rd2071];
|
||
xor.b32 %r7826, %r7825, %r7824;
|
||
// inline asm
|
||
bfe.u32 %r5805, %r5806, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2072, %r5805, 4;
|
||
add.s64 %rd2073, %rd51, %rd2072;
|
||
ld.shared.u32 %r7827, [%rd2073];
|
||
add.s32 %r7828, %r7827, %r7826;
|
||
xor.b32 %r7829, %r7564, %r5790;
|
||
xor.b32 %r5822, %r7829, %r7828;
|
||
// inline asm
|
||
bfe.u32 %r5809, %r5822, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2074, %r5809, 4;
|
||
add.s64 %rd2075, %rd42, %rd2074;
|
||
ld.shared.u32 %r7830, [%rd2075];
|
||
// inline asm
|
||
bfe.u32 %r5813, %r5822, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2076, %r5813, 4;
|
||
add.s64 %rd2077, %rd45, %rd2076;
|
||
ld.shared.u32 %r7831, [%rd2077];
|
||
add.s32 %r7832, %r7831, %r7830;
|
||
// inline asm
|
||
bfe.u32 %r5817, %r5822, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2078, %r5817, 4;
|
||
add.s64 %rd2079, %rd48, %rd2078;
|
||
ld.shared.u32 %r7833, [%rd2079];
|
||
xor.b32 %r7834, %r7833, %r7832;
|
||
// inline asm
|
||
bfe.u32 %r5821, %r5822, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2080, %r5821, 4;
|
||
add.s64 %rd2081, %rd51, %rd2080;
|
||
ld.shared.u32 %r7835, [%rd2081];
|
||
add.s32 %r7836, %r7835, %r7834;
|
||
xor.b32 %r7837, %r7573, %r5806;
|
||
xor.b32 %r5838, %r7837, %r7836;
|
||
// inline asm
|
||
bfe.u32 %r5825, %r5838, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2082, %r5825, 4;
|
||
add.s64 %rd2083, %rd42, %rd2082;
|
||
ld.shared.u32 %r7838, [%rd2083];
|
||
// inline asm
|
||
bfe.u32 %r5829, %r5838, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2084, %r5829, 4;
|
||
add.s64 %rd2085, %rd45, %rd2084;
|
||
ld.shared.u32 %r7839, [%rd2085];
|
||
add.s32 %r7840, %r7839, %r7838;
|
||
// inline asm
|
||
bfe.u32 %r5833, %r5838, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2086, %r5833, 4;
|
||
add.s64 %rd2087, %rd48, %rd2086;
|
||
ld.shared.u32 %r7841, [%rd2087];
|
||
xor.b32 %r7842, %r7841, %r7840;
|
||
// inline asm
|
||
bfe.u32 %r5837, %r5838, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2088, %r5837, 4;
|
||
add.s64 %rd2089, %rd51, %rd2088;
|
||
ld.shared.u32 %r7843, [%rd2089];
|
||
add.s32 %r7844, %r7843, %r7842;
|
||
xor.b32 %r7845, %r7582, %r5822;
|
||
xor.b32 %r5854, %r7845, %r7844;
|
||
// inline asm
|
||
bfe.u32 %r5841, %r5854, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2090, %r5841, 4;
|
||
add.s64 %rd2091, %rd42, %rd2090;
|
||
ld.shared.u32 %r7846, [%rd2091];
|
||
// inline asm
|
||
bfe.u32 %r5845, %r5854, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2092, %r5845, 4;
|
||
add.s64 %rd2093, %rd45, %rd2092;
|
||
ld.shared.u32 %r7847, [%rd2093];
|
||
add.s32 %r7848, %r7847, %r7846;
|
||
// inline asm
|
||
bfe.u32 %r5849, %r5854, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2094, %r5849, 4;
|
||
add.s64 %rd2095, %rd48, %rd2094;
|
||
ld.shared.u32 %r7849, [%rd2095];
|
||
xor.b32 %r7850, %r7849, %r7848;
|
||
// inline asm
|
||
bfe.u32 %r5853, %r5854, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2096, %r5853, 4;
|
||
add.s64 %rd2097, %rd51, %rd2096;
|
||
ld.shared.u32 %r7851, [%rd2097];
|
||
add.s32 %r7852, %r7851, %r7850;
|
||
xor.b32 %r7853, %r7591, %r5838;
|
||
xor.b32 %r5870, %r7853, %r7852;
|
||
// inline asm
|
||
bfe.u32 %r5857, %r5870, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2098, %r5857, 4;
|
||
add.s64 %rd2099, %rd42, %rd2098;
|
||
ld.shared.u32 %r7854, [%rd2099];
|
||
// inline asm
|
||
bfe.u32 %r5861, %r5870, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2100, %r5861, 4;
|
||
add.s64 %rd2101, %rd45, %rd2100;
|
||
ld.shared.u32 %r7855, [%rd2101];
|
||
add.s32 %r7856, %r7855, %r7854;
|
||
// inline asm
|
||
bfe.u32 %r5865, %r5870, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2102, %r5865, 4;
|
||
add.s64 %rd2103, %rd48, %rd2102;
|
||
ld.shared.u32 %r7857, [%rd2103];
|
||
xor.b32 %r7858, %r7857, %r7856;
|
||
// inline asm
|
||
bfe.u32 %r5869, %r5870, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2104, %r5869, 4;
|
||
add.s64 %rd2105, %rd51, %rd2104;
|
||
ld.shared.u32 %r7859, [%rd2105];
|
||
add.s32 %r7860, %r7859, %r7858;
|
||
xor.b32 %r7861, %r7600, %r5854;
|
||
xor.b32 %r5886, %r7861, %r7860;
|
||
// inline asm
|
||
bfe.u32 %r5873, %r5886, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2106, %r5873, 4;
|
||
add.s64 %rd2107, %rd42, %rd2106;
|
||
ld.shared.u32 %r7862, [%rd2107];
|
||
// inline asm
|
||
bfe.u32 %r5877, %r5886, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2108, %r5877, 4;
|
||
add.s64 %rd2109, %rd45, %rd2108;
|
||
ld.shared.u32 %r7863, [%rd2109];
|
||
add.s32 %r7864, %r7863, %r7862;
|
||
// inline asm
|
||
bfe.u32 %r5881, %r5886, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2110, %r5881, 4;
|
||
add.s64 %rd2111, %rd48, %rd2110;
|
||
ld.shared.u32 %r7865, [%rd2111];
|
||
xor.b32 %r7866, %r7865, %r7864;
|
||
// inline asm
|
||
bfe.u32 %r5885, %r5886, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2112, %r5885, 4;
|
||
add.s64 %rd2113, %rd51, %rd2112;
|
||
ld.shared.u32 %r7867, [%rd2113];
|
||
add.s32 %r7868, %r7867, %r7866;
|
||
xor.b32 %r7869, %r7609, %r5870;
|
||
xor.b32 %r5902, %r7869, %r7868;
|
||
// inline asm
|
||
bfe.u32 %r5889, %r5902, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2114, %r5889, 4;
|
||
add.s64 %rd2115, %rd42, %rd2114;
|
||
ld.shared.u32 %r7870, [%rd2115];
|
||
// inline asm
|
||
bfe.u32 %r5893, %r5902, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2116, %r5893, 4;
|
||
add.s64 %rd2117, %rd45, %rd2116;
|
||
ld.shared.u32 %r7871, [%rd2117];
|
||
add.s32 %r7872, %r7871, %r7870;
|
||
// inline asm
|
||
bfe.u32 %r5897, %r5902, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2118, %r5897, 4;
|
||
add.s64 %rd2119, %rd48, %rd2118;
|
||
ld.shared.u32 %r7873, [%rd2119];
|
||
xor.b32 %r7874, %r7873, %r7872;
|
||
// inline asm
|
||
bfe.u32 %r5901, %r5902, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2120, %r5901, 4;
|
||
add.s64 %rd2121, %rd51, %rd2120;
|
||
ld.shared.u32 %r7875, [%rd2121];
|
||
add.s32 %r7876, %r7875, %r7874;
|
||
xor.b32 %r7877, %r7618, %r5886;
|
||
xor.b32 %r5918, %r7877, %r7876;
|
||
// inline asm
|
||
bfe.u32 %r5905, %r5918, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2122, %r5905, 4;
|
||
add.s64 %rd2123, %rd42, %rd2122;
|
||
ld.shared.u32 %r7878, [%rd2123];
|
||
// inline asm
|
||
bfe.u32 %r5909, %r5918, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2124, %r5909, 4;
|
||
add.s64 %rd2125, %rd45, %rd2124;
|
||
ld.shared.u32 %r7879, [%rd2125];
|
||
add.s32 %r7880, %r7879, %r7878;
|
||
// inline asm
|
||
bfe.u32 %r5913, %r5918, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2126, %r5913, 4;
|
||
add.s64 %rd2127, %rd48, %rd2126;
|
||
ld.shared.u32 %r7881, [%rd2127];
|
||
xor.b32 %r7882, %r7881, %r7880;
|
||
// inline asm
|
||
bfe.u32 %r5917, %r5918, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2128, %r5917, 4;
|
||
add.s64 %rd2129, %rd51, %rd2128;
|
||
ld.shared.u32 %r7883, [%rd2129];
|
||
add.s32 %r7884, %r7883, %r7882;
|
||
xor.b32 %r7885, %r7627, %r5902;
|
||
xor.b32 %r5934, %r7885, %r7884;
|
||
// inline asm
|
||
bfe.u32 %r5921, %r5934, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2130, %r5921, 4;
|
||
add.s64 %rd2131, %rd42, %rd2130;
|
||
ld.shared.u32 %r7886, [%rd2131];
|
||
// inline asm
|
||
bfe.u32 %r5925, %r5934, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2132, %r5925, 4;
|
||
add.s64 %rd2133, %rd45, %rd2132;
|
||
ld.shared.u32 %r7887, [%rd2133];
|
||
add.s32 %r7888, %r7887, %r7886;
|
||
// inline asm
|
||
bfe.u32 %r5929, %r5934, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2134, %r5929, 4;
|
||
add.s64 %rd2135, %rd48, %rd2134;
|
||
ld.shared.u32 %r7889, [%rd2135];
|
||
xor.b32 %r7890, %r7889, %r7888;
|
||
// inline asm
|
||
bfe.u32 %r5933, %r5934, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2136, %r5933, 4;
|
||
add.s64 %rd2137, %rd51, %rd2136;
|
||
ld.shared.u32 %r7891, [%rd2137];
|
||
add.s32 %r7892, %r7891, %r7890;
|
||
xor.b32 %r7893, %r7636, %r5918;
|
||
xor.b32 %r5950, %r7893, %r7892;
|
||
// inline asm
|
||
bfe.u32 %r5937, %r5950, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2138, %r5937, 4;
|
||
add.s64 %rd2139, %rd42, %rd2138;
|
||
ld.shared.u32 %r7894, [%rd2139];
|
||
// inline asm
|
||
bfe.u32 %r5941, %r5950, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2140, %r5941, 4;
|
||
add.s64 %rd2141, %rd45, %rd2140;
|
||
ld.shared.u32 %r7895, [%rd2141];
|
||
add.s32 %r7896, %r7895, %r7894;
|
||
// inline asm
|
||
bfe.u32 %r5945, %r5950, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2142, %r5945, 4;
|
||
add.s64 %rd2143, %rd48, %rd2142;
|
||
ld.shared.u32 %r7897, [%rd2143];
|
||
xor.b32 %r7898, %r7897, %r7896;
|
||
// inline asm
|
||
bfe.u32 %r5949, %r5950, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2144, %r5949, 4;
|
||
add.s64 %rd2145, %rd51, %rd2144;
|
||
ld.shared.u32 %r7899, [%rd2145];
|
||
add.s32 %r7900, %r7899, %r7898;
|
||
xor.b32 %r7901, %r7645, %r5934;
|
||
xor.b32 %r5966, %r7901, %r7900;
|
||
// inline asm
|
||
bfe.u32 %r5953, %r5966, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2146, %r5953, 4;
|
||
add.s64 %rd2147, %rd42, %rd2146;
|
||
ld.shared.u32 %r7902, [%rd2147];
|
||
// inline asm
|
||
bfe.u32 %r5957, %r5966, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2148, %r5957, 4;
|
||
add.s64 %rd2149, %rd45, %rd2148;
|
||
ld.shared.u32 %r7903, [%rd2149];
|
||
add.s32 %r7904, %r7903, %r7902;
|
||
// inline asm
|
||
bfe.u32 %r5961, %r5966, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2150, %r5961, 4;
|
||
add.s64 %rd2151, %rd48, %rd2150;
|
||
ld.shared.u32 %r7905, [%rd2151];
|
||
xor.b32 %r7906, %r7905, %r7904;
|
||
// inline asm
|
||
bfe.u32 %r5965, %r5966, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2152, %r5965, 4;
|
||
add.s64 %rd2153, %rd51, %rd2152;
|
||
ld.shared.u32 %r7907, [%rd2153];
|
||
add.s32 %r7908, %r7907, %r7906;
|
||
xor.b32 %r7909, %r7654, %r5950;
|
||
xor.b32 %r5982, %r7909, %r7908;
|
||
// inline asm
|
||
bfe.u32 %r5969, %r5982, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2154, %r5969, 4;
|
||
add.s64 %rd2155, %rd42, %rd2154;
|
||
ld.shared.u32 %r7910, [%rd2155];
|
||
// inline asm
|
||
bfe.u32 %r5973, %r5982, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2156, %r5973, 4;
|
||
add.s64 %rd2157, %rd45, %rd2156;
|
||
ld.shared.u32 %r7911, [%rd2157];
|
||
add.s32 %r7912, %r7911, %r7910;
|
||
// inline asm
|
||
bfe.u32 %r5977, %r5982, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2158, %r5977, 4;
|
||
add.s64 %rd2159, %rd48, %rd2158;
|
||
ld.shared.u32 %r7913, [%rd2159];
|
||
xor.b32 %r7914, %r7913, %r7912;
|
||
// inline asm
|
||
bfe.u32 %r5981, %r5982, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2160, %r5981, 4;
|
||
add.s64 %rd2161, %rd51, %rd2160;
|
||
ld.shared.u32 %r7915, [%rd2161];
|
||
add.s32 %r7916, %r7915, %r7914;
|
||
xor.b32 %r7917, %r7663, %r5966;
|
||
xor.b32 %r10346, %r7917, %r7916;
|
||
xor.b32 %r10347, %r7665, %r5982;
|
||
xor.b32 %r5998, %r5470, %r5982;
|
||
// inline asm
|
||
bfe.u32 %r5985, %r5998, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2162, %r5985, 4;
|
||
add.s64 %rd2163, %rd42, %rd2162;
|
||
ld.shared.u32 %r7918, [%rd2163];
|
||
// inline asm
|
||
bfe.u32 %r5989, %r5998, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2164, %r5989, 4;
|
||
add.s64 %rd2165, %rd45, %rd2164;
|
||
ld.shared.u32 %r7919, [%rd2165];
|
||
add.s32 %r7920, %r7919, %r7918;
|
||
// inline asm
|
||
bfe.u32 %r5993, %r5998, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2166, %r5993, 4;
|
||
add.s64 %rd2167, %rd48, %rd2166;
|
||
ld.shared.u32 %r7921, [%rd2167];
|
||
xor.b32 %r7922, %r7921, %r7920;
|
||
// inline asm
|
||
bfe.u32 %r5997, %r5998, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2168, %r5997, 4;
|
||
add.s64 %rd2169, %rd51, %rd2168;
|
||
ld.shared.u32 %r7923, [%rd2169];
|
||
add.s32 %r7924, %r7923, %r7922;
|
||
xor.b32 %r7925, %r10350, %r10346;
|
||
xor.b32 %r6014, %r7925, %r7924;
|
||
// inline asm
|
||
bfe.u32 %r6001, %r6014, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2170, %r6001, 4;
|
||
add.s64 %rd2171, %rd42, %rd2170;
|
||
ld.shared.u32 %r7926, [%rd2171];
|
||
// inline asm
|
||
bfe.u32 %r6005, %r6014, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2172, %r6005, 4;
|
||
add.s64 %rd2173, %rd45, %rd2172;
|
||
ld.shared.u32 %r7927, [%rd2173];
|
||
add.s32 %r7928, %r7927, %r7926;
|
||
// inline asm
|
||
bfe.u32 %r6009, %r6014, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2174, %r6009, 4;
|
||
add.s64 %rd2175, %rd48, %rd2174;
|
||
ld.shared.u32 %r7929, [%rd2175];
|
||
xor.b32 %r7930, %r7929, %r7928;
|
||
// inline asm
|
||
bfe.u32 %r6013, %r6014, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2176, %r6013, 4;
|
||
add.s64 %rd2177, %rd51, %rd2176;
|
||
ld.shared.u32 %r7931, [%rd2177];
|
||
add.s32 %r7932, %r7931, %r7930;
|
||
xor.b32 %r7933, %r10349, %r5998;
|
||
xor.b32 %r6030, %r7933, %r7932;
|
||
// inline asm
|
||
bfe.u32 %r6017, %r6030, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2178, %r6017, 4;
|
||
add.s64 %rd2179, %rd42, %rd2178;
|
||
ld.shared.u32 %r7934, [%rd2179];
|
||
// inline asm
|
||
bfe.u32 %r6021, %r6030, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2180, %r6021, 4;
|
||
add.s64 %rd2181, %rd45, %rd2180;
|
||
ld.shared.u32 %r7935, [%rd2181];
|
||
add.s32 %r7936, %r7935, %r7934;
|
||
// inline asm
|
||
bfe.u32 %r6025, %r6030, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2182, %r6025, 4;
|
||
add.s64 %rd2183, %rd48, %rd2182;
|
||
ld.shared.u32 %r7937, [%rd2183];
|
||
xor.b32 %r7938, %r7937, %r7936;
|
||
// inline asm
|
||
bfe.u32 %r6029, %r6030, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2184, %r6029, 4;
|
||
add.s64 %rd2185, %rd51, %rd2184;
|
||
ld.shared.u32 %r7939, [%rd2185];
|
||
add.s32 %r7940, %r7939, %r7938;
|
||
xor.b32 %r7941, %r10348, %r6014;
|
||
xor.b32 %r6046, %r7941, %r7940;
|
||
// inline asm
|
||
bfe.u32 %r6033, %r6046, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2186, %r6033, 4;
|
||
add.s64 %rd2187, %rd42, %rd2186;
|
||
ld.shared.u32 %r7942, [%rd2187];
|
||
// inline asm
|
||
bfe.u32 %r6037, %r6046, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2188, %r6037, 4;
|
||
add.s64 %rd2189, %rd45, %rd2188;
|
||
ld.shared.u32 %r7943, [%rd2189];
|
||
add.s32 %r7944, %r7943, %r7942;
|
||
// inline asm
|
||
bfe.u32 %r6041, %r6046, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2190, %r6041, 4;
|
||
add.s64 %rd2191, %rd48, %rd2190;
|
||
ld.shared.u32 %r7945, [%rd2191];
|
||
xor.b32 %r7946, %r7945, %r7944;
|
||
// inline asm
|
||
bfe.u32 %r6045, %r6046, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2192, %r6045, 4;
|
||
add.s64 %rd2193, %rd51, %rd2192;
|
||
ld.shared.u32 %r7947, [%rd2193];
|
||
add.s32 %r7948, %r7947, %r7946;
|
||
xor.b32 %r7949, %r10347, %r6030;
|
||
xor.b32 %r6062, %r7949, %r7948;
|
||
// inline asm
|
||
bfe.u32 %r6049, %r6062, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2194, %r6049, 4;
|
||
add.s64 %rd2195, %rd42, %rd2194;
|
||
ld.shared.u32 %r7950, [%rd2195];
|
||
// inline asm
|
||
bfe.u32 %r6053, %r6062, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2196, %r6053, 4;
|
||
add.s64 %rd2197, %rd45, %rd2196;
|
||
ld.shared.u32 %r7951, [%rd2197];
|
||
add.s32 %r7952, %r7951, %r7950;
|
||
// inline asm
|
||
bfe.u32 %r6057, %r6062, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2198, %r6057, 4;
|
||
add.s64 %rd2199, %rd48, %rd2198;
|
||
ld.shared.u32 %r7953, [%rd2199];
|
||
xor.b32 %r7954, %r7953, %r7952;
|
||
// inline asm
|
||
bfe.u32 %r6061, %r6062, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2200, %r6061, 4;
|
||
add.s64 %rd2201, %rd51, %rd2200;
|
||
ld.shared.u32 %r7955, [%rd2201];
|
||
add.s32 %r7956, %r7955, %r7954;
|
||
xor.b32 %r7957, %r10346, %r6046;
|
||
xor.b32 %r6078, %r7957, %r7956;
|
||
// inline asm
|
||
bfe.u32 %r6065, %r6078, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2202, %r6065, 4;
|
||
add.s64 %rd2203, %rd42, %rd2202;
|
||
ld.shared.u32 %r7958, [%rd2203];
|
||
// inline asm
|
||
bfe.u32 %r6069, %r6078, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2204, %r6069, 4;
|
||
add.s64 %rd2205, %rd45, %rd2204;
|
||
ld.shared.u32 %r7959, [%rd2205];
|
||
add.s32 %r7960, %r7959, %r7958;
|
||
// inline asm
|
||
bfe.u32 %r6073, %r6078, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2206, %r6073, 4;
|
||
add.s64 %rd2207, %rd48, %rd2206;
|
||
ld.shared.u32 %r7961, [%rd2207];
|
||
xor.b32 %r7962, %r7961, %r7960;
|
||
// inline asm
|
||
bfe.u32 %r6077, %r6078, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2208, %r6077, 4;
|
||
add.s64 %rd2209, %rd51, %rd2208;
|
||
ld.shared.u32 %r7963, [%rd2209];
|
||
add.s32 %r7964, %r7963, %r7962;
|
||
xor.b32 %r7965, %r7573, %r6062;
|
||
xor.b32 %r6094, %r7965, %r7964;
|
||
// inline asm
|
||
bfe.u32 %r6081, %r6094, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2210, %r6081, 4;
|
||
add.s64 %rd2211, %rd42, %rd2210;
|
||
ld.shared.u32 %r7966, [%rd2211];
|
||
// inline asm
|
||
bfe.u32 %r6085, %r6094, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2212, %r6085, 4;
|
||
add.s64 %rd2213, %rd45, %rd2212;
|
||
ld.shared.u32 %r7967, [%rd2213];
|
||
add.s32 %r7968, %r7967, %r7966;
|
||
// inline asm
|
||
bfe.u32 %r6089, %r6094, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2214, %r6089, 4;
|
||
add.s64 %rd2215, %rd48, %rd2214;
|
||
ld.shared.u32 %r7969, [%rd2215];
|
||
xor.b32 %r7970, %r7969, %r7968;
|
||
// inline asm
|
||
bfe.u32 %r6093, %r6094, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2216, %r6093, 4;
|
||
add.s64 %rd2217, %rd51, %rd2216;
|
||
ld.shared.u32 %r7971, [%rd2217];
|
||
add.s32 %r7972, %r7971, %r7970;
|
||
xor.b32 %r7973, %r7582, %r6078;
|
||
xor.b32 %r6110, %r7973, %r7972;
|
||
// inline asm
|
||
bfe.u32 %r6097, %r6110, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2218, %r6097, 4;
|
||
add.s64 %rd2219, %rd42, %rd2218;
|
||
ld.shared.u32 %r7974, [%rd2219];
|
||
// inline asm
|
||
bfe.u32 %r6101, %r6110, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2220, %r6101, 4;
|
||
add.s64 %rd2221, %rd45, %rd2220;
|
||
ld.shared.u32 %r7975, [%rd2221];
|
||
add.s32 %r7976, %r7975, %r7974;
|
||
// inline asm
|
||
bfe.u32 %r6105, %r6110, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2222, %r6105, 4;
|
||
add.s64 %rd2223, %rd48, %rd2222;
|
||
ld.shared.u32 %r7977, [%rd2223];
|
||
xor.b32 %r7978, %r7977, %r7976;
|
||
// inline asm
|
||
bfe.u32 %r6109, %r6110, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2224, %r6109, 4;
|
||
add.s64 %rd2225, %rd51, %rd2224;
|
||
ld.shared.u32 %r7979, [%rd2225];
|
||
add.s32 %r7980, %r7979, %r7978;
|
||
xor.b32 %r7981, %r7591, %r6094;
|
||
xor.b32 %r6126, %r7981, %r7980;
|
||
// inline asm
|
||
bfe.u32 %r6113, %r6126, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2226, %r6113, 4;
|
||
add.s64 %rd2227, %rd42, %rd2226;
|
||
ld.shared.u32 %r7982, [%rd2227];
|
||
// inline asm
|
||
bfe.u32 %r6117, %r6126, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2228, %r6117, 4;
|
||
add.s64 %rd2229, %rd45, %rd2228;
|
||
ld.shared.u32 %r7983, [%rd2229];
|
||
add.s32 %r7984, %r7983, %r7982;
|
||
// inline asm
|
||
bfe.u32 %r6121, %r6126, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2230, %r6121, 4;
|
||
add.s64 %rd2231, %rd48, %rd2230;
|
||
ld.shared.u32 %r7985, [%rd2231];
|
||
xor.b32 %r7986, %r7985, %r7984;
|
||
// inline asm
|
||
bfe.u32 %r6125, %r6126, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2232, %r6125, 4;
|
||
add.s64 %rd2233, %rd51, %rd2232;
|
||
ld.shared.u32 %r7987, [%rd2233];
|
||
add.s32 %r7988, %r7987, %r7986;
|
||
xor.b32 %r7989, %r7600, %r6110;
|
||
xor.b32 %r6142, %r7989, %r7988;
|
||
// inline asm
|
||
bfe.u32 %r6129, %r6142, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2234, %r6129, 4;
|
||
add.s64 %rd2235, %rd42, %rd2234;
|
||
ld.shared.u32 %r7990, [%rd2235];
|
||
// inline asm
|
||
bfe.u32 %r6133, %r6142, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2236, %r6133, 4;
|
||
add.s64 %rd2237, %rd45, %rd2236;
|
||
ld.shared.u32 %r7991, [%rd2237];
|
||
add.s32 %r7992, %r7991, %r7990;
|
||
// inline asm
|
||
bfe.u32 %r6137, %r6142, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2238, %r6137, 4;
|
||
add.s64 %rd2239, %rd48, %rd2238;
|
||
ld.shared.u32 %r7993, [%rd2239];
|
||
xor.b32 %r7994, %r7993, %r7992;
|
||
// inline asm
|
||
bfe.u32 %r6141, %r6142, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2240, %r6141, 4;
|
||
add.s64 %rd2241, %rd51, %rd2240;
|
||
ld.shared.u32 %r7995, [%rd2241];
|
||
add.s32 %r7996, %r7995, %r7994;
|
||
xor.b32 %r7997, %r7609, %r6126;
|
||
xor.b32 %r6158, %r7997, %r7996;
|
||
// inline asm
|
||
bfe.u32 %r6145, %r6158, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2242, %r6145, 4;
|
||
add.s64 %rd2243, %rd42, %rd2242;
|
||
ld.shared.u32 %r7998, [%rd2243];
|
||
// inline asm
|
||
bfe.u32 %r6149, %r6158, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2244, %r6149, 4;
|
||
add.s64 %rd2245, %rd45, %rd2244;
|
||
ld.shared.u32 %r7999, [%rd2245];
|
||
add.s32 %r8000, %r7999, %r7998;
|
||
// inline asm
|
||
bfe.u32 %r6153, %r6158, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2246, %r6153, 4;
|
||
add.s64 %rd2247, %rd48, %rd2246;
|
||
ld.shared.u32 %r8001, [%rd2247];
|
||
xor.b32 %r8002, %r8001, %r8000;
|
||
// inline asm
|
||
bfe.u32 %r6157, %r6158, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2248, %r6157, 4;
|
||
add.s64 %rd2249, %rd51, %rd2248;
|
||
ld.shared.u32 %r8003, [%rd2249];
|
||
add.s32 %r8004, %r8003, %r8002;
|
||
xor.b32 %r8005, %r7618, %r6142;
|
||
xor.b32 %r6174, %r8005, %r8004;
|
||
// inline asm
|
||
bfe.u32 %r6161, %r6174, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2250, %r6161, 4;
|
||
add.s64 %rd2251, %rd42, %rd2250;
|
||
ld.shared.u32 %r8006, [%rd2251];
|
||
// inline asm
|
||
bfe.u32 %r6165, %r6174, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2252, %r6165, 4;
|
||
add.s64 %rd2253, %rd45, %rd2252;
|
||
ld.shared.u32 %r8007, [%rd2253];
|
||
add.s32 %r8008, %r8007, %r8006;
|
||
// inline asm
|
||
bfe.u32 %r6169, %r6174, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2254, %r6169, 4;
|
||
add.s64 %rd2255, %rd48, %rd2254;
|
||
ld.shared.u32 %r8009, [%rd2255];
|
||
xor.b32 %r8010, %r8009, %r8008;
|
||
// inline asm
|
||
bfe.u32 %r6173, %r6174, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2256, %r6173, 4;
|
||
add.s64 %rd2257, %rd51, %rd2256;
|
||
ld.shared.u32 %r8011, [%rd2257];
|
||
add.s32 %r8012, %r8011, %r8010;
|
||
xor.b32 %r8013, %r7627, %r6158;
|
||
xor.b32 %r6190, %r8013, %r8012;
|
||
// inline asm
|
||
bfe.u32 %r6177, %r6190, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2258, %r6177, 4;
|
||
add.s64 %rd2259, %rd42, %rd2258;
|
||
ld.shared.u32 %r8014, [%rd2259];
|
||
// inline asm
|
||
bfe.u32 %r6181, %r6190, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2260, %r6181, 4;
|
||
add.s64 %rd2261, %rd45, %rd2260;
|
||
ld.shared.u32 %r8015, [%rd2261];
|
||
add.s32 %r8016, %r8015, %r8014;
|
||
// inline asm
|
||
bfe.u32 %r6185, %r6190, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2262, %r6185, 4;
|
||
add.s64 %rd2263, %rd48, %rd2262;
|
||
ld.shared.u32 %r8017, [%rd2263];
|
||
xor.b32 %r8018, %r8017, %r8016;
|
||
// inline asm
|
||
bfe.u32 %r6189, %r6190, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2264, %r6189, 4;
|
||
add.s64 %rd2265, %rd51, %rd2264;
|
||
ld.shared.u32 %r8019, [%rd2265];
|
||
add.s32 %r8020, %r8019, %r8018;
|
||
xor.b32 %r8021, %r7636, %r6174;
|
||
xor.b32 %r6206, %r8021, %r8020;
|
||
// inline asm
|
||
bfe.u32 %r6193, %r6206, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2266, %r6193, 4;
|
||
add.s64 %rd2267, %rd42, %rd2266;
|
||
ld.shared.u32 %r8022, [%rd2267];
|
||
// inline asm
|
||
bfe.u32 %r6197, %r6206, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2268, %r6197, 4;
|
||
add.s64 %rd2269, %rd45, %rd2268;
|
||
ld.shared.u32 %r8023, [%rd2269];
|
||
add.s32 %r8024, %r8023, %r8022;
|
||
// inline asm
|
||
bfe.u32 %r6201, %r6206, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2270, %r6201, 4;
|
||
add.s64 %rd2271, %rd48, %rd2270;
|
||
ld.shared.u32 %r8025, [%rd2271];
|
||
xor.b32 %r8026, %r8025, %r8024;
|
||
// inline asm
|
||
bfe.u32 %r6205, %r6206, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2272, %r6205, 4;
|
||
add.s64 %rd2273, %rd51, %rd2272;
|
||
ld.shared.u32 %r8027, [%rd2273];
|
||
add.s32 %r8028, %r8027, %r8026;
|
||
xor.b32 %r8029, %r7645, %r6190;
|
||
xor.b32 %r6222, %r8029, %r8028;
|
||
// inline asm
|
||
bfe.u32 %r6209, %r6222, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2274, %r6209, 4;
|
||
add.s64 %rd2275, %rd42, %rd2274;
|
||
ld.shared.u32 %r8030, [%rd2275];
|
||
// inline asm
|
||
bfe.u32 %r6213, %r6222, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2276, %r6213, 4;
|
||
add.s64 %rd2277, %rd45, %rd2276;
|
||
ld.shared.u32 %r8031, [%rd2277];
|
||
add.s32 %r8032, %r8031, %r8030;
|
||
// inline asm
|
||
bfe.u32 %r6217, %r6222, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2278, %r6217, 4;
|
||
add.s64 %rd2279, %rd48, %rd2278;
|
||
ld.shared.u32 %r8033, [%rd2279];
|
||
xor.b32 %r8034, %r8033, %r8032;
|
||
// inline asm
|
||
bfe.u32 %r6221, %r6222, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2280, %r6221, 4;
|
||
add.s64 %rd2281, %rd51, %rd2280;
|
||
ld.shared.u32 %r8035, [%rd2281];
|
||
add.s32 %r8036, %r8035, %r8034;
|
||
xor.b32 %r8037, %r7654, %r6206;
|
||
xor.b32 %r6238, %r8037, %r8036;
|
||
// inline asm
|
||
bfe.u32 %r6225, %r6238, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2282, %r6225, 4;
|
||
add.s64 %rd2283, %rd42, %rd2282;
|
||
ld.shared.u32 %r8038, [%rd2283];
|
||
// inline asm
|
||
bfe.u32 %r6229, %r6238, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2284, %r6229, 4;
|
||
add.s64 %rd2285, %rd45, %rd2284;
|
||
ld.shared.u32 %r8039, [%rd2285];
|
||
add.s32 %r8040, %r8039, %r8038;
|
||
// inline asm
|
||
bfe.u32 %r6233, %r6238, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2286, %r6233, 4;
|
||
add.s64 %rd2287, %rd48, %rd2286;
|
||
ld.shared.u32 %r8041, [%rd2287];
|
||
xor.b32 %r8042, %r8041, %r8040;
|
||
// inline asm
|
||
bfe.u32 %r6237, %r6238, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2288, %r6237, 4;
|
||
add.s64 %rd2289, %rd51, %rd2288;
|
||
ld.shared.u32 %r8043, [%rd2289];
|
||
add.s32 %r8044, %r8043, %r8042;
|
||
xor.b32 %r8045, %r7663, %r6222;
|
||
xor.b32 %r10344, %r8045, %r8044;
|
||
xor.b32 %r10345, %r7665, %r6238;
|
||
xor.b32 %r6254, %r5470, %r6238;
|
||
// inline asm
|
||
bfe.u32 %r6241, %r6254, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2290, %r6241, 4;
|
||
add.s64 %rd2291, %rd42, %rd2290;
|
||
ld.shared.u32 %r8046, [%rd2291];
|
||
// inline asm
|
||
bfe.u32 %r6245, %r6254, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2292, %r6245, 4;
|
||
add.s64 %rd2293, %rd45, %rd2292;
|
||
ld.shared.u32 %r8047, [%rd2293];
|
||
add.s32 %r8048, %r8047, %r8046;
|
||
// inline asm
|
||
bfe.u32 %r6249, %r6254, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2294, %r6249, 4;
|
||
add.s64 %rd2295, %rd48, %rd2294;
|
||
ld.shared.u32 %r8049, [%rd2295];
|
||
xor.b32 %r8050, %r8049, %r8048;
|
||
// inline asm
|
||
bfe.u32 %r6253, %r6254, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2296, %r6253, 4;
|
||
add.s64 %rd2297, %rd51, %rd2296;
|
||
ld.shared.u32 %r8051, [%rd2297];
|
||
add.s32 %r8052, %r8051, %r8050;
|
||
xor.b32 %r8053, %r10350, %r10344;
|
||
xor.b32 %r6270, %r8053, %r8052;
|
||
// inline asm
|
||
bfe.u32 %r6257, %r6270, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2298, %r6257, 4;
|
||
add.s64 %rd2299, %rd42, %rd2298;
|
||
ld.shared.u32 %r8054, [%rd2299];
|
||
// inline asm
|
||
bfe.u32 %r6261, %r6270, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2300, %r6261, 4;
|
||
add.s64 %rd2301, %rd45, %rd2300;
|
||
ld.shared.u32 %r8055, [%rd2301];
|
||
add.s32 %r8056, %r8055, %r8054;
|
||
// inline asm
|
||
bfe.u32 %r6265, %r6270, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2302, %r6265, 4;
|
||
add.s64 %rd2303, %rd48, %rd2302;
|
||
ld.shared.u32 %r8057, [%rd2303];
|
||
xor.b32 %r8058, %r8057, %r8056;
|
||
// inline asm
|
||
bfe.u32 %r6269, %r6270, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2304, %r6269, 4;
|
||
add.s64 %rd2305, %rd51, %rd2304;
|
||
ld.shared.u32 %r8059, [%rd2305];
|
||
add.s32 %r8060, %r8059, %r8058;
|
||
xor.b32 %r8061, %r10349, %r6254;
|
||
xor.b32 %r6286, %r8061, %r8060;
|
||
// inline asm
|
||
bfe.u32 %r6273, %r6286, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2306, %r6273, 4;
|
||
add.s64 %rd2307, %rd42, %rd2306;
|
||
ld.shared.u32 %r8062, [%rd2307];
|
||
// inline asm
|
||
bfe.u32 %r6277, %r6286, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2308, %r6277, 4;
|
||
add.s64 %rd2309, %rd45, %rd2308;
|
||
ld.shared.u32 %r8063, [%rd2309];
|
||
add.s32 %r8064, %r8063, %r8062;
|
||
// inline asm
|
||
bfe.u32 %r6281, %r6286, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2310, %r6281, 4;
|
||
add.s64 %rd2311, %rd48, %rd2310;
|
||
ld.shared.u32 %r8065, [%rd2311];
|
||
xor.b32 %r8066, %r8065, %r8064;
|
||
// inline asm
|
||
bfe.u32 %r6285, %r6286, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2312, %r6285, 4;
|
||
add.s64 %rd2313, %rd51, %rd2312;
|
||
ld.shared.u32 %r8067, [%rd2313];
|
||
add.s32 %r8068, %r8067, %r8066;
|
||
xor.b32 %r8069, %r10348, %r6270;
|
||
xor.b32 %r6302, %r8069, %r8068;
|
||
// inline asm
|
||
bfe.u32 %r6289, %r6302, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2314, %r6289, 4;
|
||
add.s64 %rd2315, %rd42, %rd2314;
|
||
ld.shared.u32 %r8070, [%rd2315];
|
||
// inline asm
|
||
bfe.u32 %r6293, %r6302, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2316, %r6293, 4;
|
||
add.s64 %rd2317, %rd45, %rd2316;
|
||
ld.shared.u32 %r8071, [%rd2317];
|
||
add.s32 %r8072, %r8071, %r8070;
|
||
// inline asm
|
||
bfe.u32 %r6297, %r6302, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2318, %r6297, 4;
|
||
add.s64 %rd2319, %rd48, %rd2318;
|
||
ld.shared.u32 %r8073, [%rd2319];
|
||
xor.b32 %r8074, %r8073, %r8072;
|
||
// inline asm
|
||
bfe.u32 %r6301, %r6302, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2320, %r6301, 4;
|
||
add.s64 %rd2321, %rd51, %rd2320;
|
||
ld.shared.u32 %r8075, [%rd2321];
|
||
add.s32 %r8076, %r8075, %r8074;
|
||
xor.b32 %r8077, %r10347, %r6286;
|
||
xor.b32 %r6318, %r8077, %r8076;
|
||
// inline asm
|
||
bfe.u32 %r6305, %r6318, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2322, %r6305, 4;
|
||
add.s64 %rd2323, %rd42, %rd2322;
|
||
ld.shared.u32 %r8078, [%rd2323];
|
||
// inline asm
|
||
bfe.u32 %r6309, %r6318, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2324, %r6309, 4;
|
||
add.s64 %rd2325, %rd45, %rd2324;
|
||
ld.shared.u32 %r8079, [%rd2325];
|
||
add.s32 %r8080, %r8079, %r8078;
|
||
// inline asm
|
||
bfe.u32 %r6313, %r6318, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2326, %r6313, 4;
|
||
add.s64 %rd2327, %rd48, %rd2326;
|
||
ld.shared.u32 %r8081, [%rd2327];
|
||
xor.b32 %r8082, %r8081, %r8080;
|
||
// inline asm
|
||
bfe.u32 %r6317, %r6318, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2328, %r6317, 4;
|
||
add.s64 %rd2329, %rd51, %rd2328;
|
||
ld.shared.u32 %r8083, [%rd2329];
|
||
add.s32 %r8084, %r8083, %r8082;
|
||
xor.b32 %r8085, %r10346, %r6302;
|
||
xor.b32 %r6334, %r8085, %r8084;
|
||
// inline asm
|
||
bfe.u32 %r6321, %r6334, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2330, %r6321, 4;
|
||
add.s64 %rd2331, %rd42, %rd2330;
|
||
ld.shared.u32 %r8086, [%rd2331];
|
||
// inline asm
|
||
bfe.u32 %r6325, %r6334, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2332, %r6325, 4;
|
||
add.s64 %rd2333, %rd45, %rd2332;
|
||
ld.shared.u32 %r8087, [%rd2333];
|
||
add.s32 %r8088, %r8087, %r8086;
|
||
// inline asm
|
||
bfe.u32 %r6329, %r6334, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2334, %r6329, 4;
|
||
add.s64 %rd2335, %rd48, %rd2334;
|
||
ld.shared.u32 %r8089, [%rd2335];
|
||
xor.b32 %r8090, %r8089, %r8088;
|
||
// inline asm
|
||
bfe.u32 %r6333, %r6334, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2336, %r6333, 4;
|
||
add.s64 %rd2337, %rd51, %rd2336;
|
||
ld.shared.u32 %r8091, [%rd2337];
|
||
add.s32 %r8092, %r8091, %r8090;
|
||
xor.b32 %r8093, %r10345, %r6318;
|
||
xor.b32 %r6350, %r8093, %r8092;
|
||
// inline asm
|
||
bfe.u32 %r6337, %r6350, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2338, %r6337, 4;
|
||
add.s64 %rd2339, %rd42, %rd2338;
|
||
ld.shared.u32 %r8094, [%rd2339];
|
||
// inline asm
|
||
bfe.u32 %r6341, %r6350, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2340, %r6341, 4;
|
||
add.s64 %rd2341, %rd45, %rd2340;
|
||
ld.shared.u32 %r8095, [%rd2341];
|
||
add.s32 %r8096, %r8095, %r8094;
|
||
// inline asm
|
||
bfe.u32 %r6345, %r6350, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2342, %r6345, 4;
|
||
add.s64 %rd2343, %rd48, %rd2342;
|
||
ld.shared.u32 %r8097, [%rd2343];
|
||
xor.b32 %r8098, %r8097, %r8096;
|
||
// inline asm
|
||
bfe.u32 %r6349, %r6350, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2344, %r6349, 4;
|
||
add.s64 %rd2345, %rd51, %rd2344;
|
||
ld.shared.u32 %r8099, [%rd2345];
|
||
add.s32 %r8100, %r8099, %r8098;
|
||
xor.b32 %r8101, %r10344, %r6334;
|
||
xor.b32 %r6366, %r8101, %r8100;
|
||
// inline asm
|
||
bfe.u32 %r6353, %r6366, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2346, %r6353, 4;
|
||
add.s64 %rd2347, %rd42, %rd2346;
|
||
ld.shared.u32 %r8102, [%rd2347];
|
||
// inline asm
|
||
bfe.u32 %r6357, %r6366, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2348, %r6357, 4;
|
||
add.s64 %rd2349, %rd45, %rd2348;
|
||
ld.shared.u32 %r8103, [%rd2349];
|
||
add.s32 %r8104, %r8103, %r8102;
|
||
// inline asm
|
||
bfe.u32 %r6361, %r6366, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2350, %r6361, 4;
|
||
add.s64 %rd2351, %rd48, %rd2350;
|
||
ld.shared.u32 %r8105, [%rd2351];
|
||
xor.b32 %r8106, %r8105, %r8104;
|
||
// inline asm
|
||
bfe.u32 %r6365, %r6366, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2352, %r6365, 4;
|
||
add.s64 %rd2353, %rd51, %rd2352;
|
||
ld.shared.u32 %r8107, [%rd2353];
|
||
add.s32 %r8108, %r8107, %r8106;
|
||
xor.b32 %r8109, %r7591, %r6350;
|
||
xor.b32 %r6382, %r8109, %r8108;
|
||
// inline asm
|
||
bfe.u32 %r6369, %r6382, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2354, %r6369, 4;
|
||
add.s64 %rd2355, %rd42, %rd2354;
|
||
ld.shared.u32 %r8110, [%rd2355];
|
||
// inline asm
|
||
bfe.u32 %r6373, %r6382, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2356, %r6373, 4;
|
||
add.s64 %rd2357, %rd45, %rd2356;
|
||
ld.shared.u32 %r8111, [%rd2357];
|
||
add.s32 %r8112, %r8111, %r8110;
|
||
// inline asm
|
||
bfe.u32 %r6377, %r6382, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2358, %r6377, 4;
|
||
add.s64 %rd2359, %rd48, %rd2358;
|
||
ld.shared.u32 %r8113, [%rd2359];
|
||
xor.b32 %r8114, %r8113, %r8112;
|
||
// inline asm
|
||
bfe.u32 %r6381, %r6382, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2360, %r6381, 4;
|
||
add.s64 %rd2361, %rd51, %rd2360;
|
||
ld.shared.u32 %r8115, [%rd2361];
|
||
add.s32 %r8116, %r8115, %r8114;
|
||
xor.b32 %r8117, %r7600, %r6366;
|
||
xor.b32 %r6398, %r8117, %r8116;
|
||
// inline asm
|
||
bfe.u32 %r6385, %r6398, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2362, %r6385, 4;
|
||
add.s64 %rd2363, %rd42, %rd2362;
|
||
ld.shared.u32 %r8118, [%rd2363];
|
||
// inline asm
|
||
bfe.u32 %r6389, %r6398, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2364, %r6389, 4;
|
||
add.s64 %rd2365, %rd45, %rd2364;
|
||
ld.shared.u32 %r8119, [%rd2365];
|
||
add.s32 %r8120, %r8119, %r8118;
|
||
// inline asm
|
||
bfe.u32 %r6393, %r6398, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2366, %r6393, 4;
|
||
add.s64 %rd2367, %rd48, %rd2366;
|
||
ld.shared.u32 %r8121, [%rd2367];
|
||
xor.b32 %r8122, %r8121, %r8120;
|
||
// inline asm
|
||
bfe.u32 %r6397, %r6398, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2368, %r6397, 4;
|
||
add.s64 %rd2369, %rd51, %rd2368;
|
||
ld.shared.u32 %r8123, [%rd2369];
|
||
add.s32 %r8124, %r8123, %r8122;
|
||
xor.b32 %r8125, %r7609, %r6382;
|
||
xor.b32 %r6414, %r8125, %r8124;
|
||
// inline asm
|
||
bfe.u32 %r6401, %r6414, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2370, %r6401, 4;
|
||
add.s64 %rd2371, %rd42, %rd2370;
|
||
ld.shared.u32 %r8126, [%rd2371];
|
||
// inline asm
|
||
bfe.u32 %r6405, %r6414, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2372, %r6405, 4;
|
||
add.s64 %rd2373, %rd45, %rd2372;
|
||
ld.shared.u32 %r8127, [%rd2373];
|
||
add.s32 %r8128, %r8127, %r8126;
|
||
// inline asm
|
||
bfe.u32 %r6409, %r6414, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2374, %r6409, 4;
|
||
add.s64 %rd2375, %rd48, %rd2374;
|
||
ld.shared.u32 %r8129, [%rd2375];
|
||
xor.b32 %r8130, %r8129, %r8128;
|
||
// inline asm
|
||
bfe.u32 %r6413, %r6414, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2376, %r6413, 4;
|
||
add.s64 %rd2377, %rd51, %rd2376;
|
||
ld.shared.u32 %r8131, [%rd2377];
|
||
add.s32 %r8132, %r8131, %r8130;
|
||
xor.b32 %r8133, %r7618, %r6398;
|
||
xor.b32 %r6430, %r8133, %r8132;
|
||
// inline asm
|
||
bfe.u32 %r6417, %r6430, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2378, %r6417, 4;
|
||
add.s64 %rd2379, %rd42, %rd2378;
|
||
ld.shared.u32 %r8134, [%rd2379];
|
||
// inline asm
|
||
bfe.u32 %r6421, %r6430, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2380, %r6421, 4;
|
||
add.s64 %rd2381, %rd45, %rd2380;
|
||
ld.shared.u32 %r8135, [%rd2381];
|
||
add.s32 %r8136, %r8135, %r8134;
|
||
// inline asm
|
||
bfe.u32 %r6425, %r6430, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2382, %r6425, 4;
|
||
add.s64 %rd2383, %rd48, %rd2382;
|
||
ld.shared.u32 %r8137, [%rd2383];
|
||
xor.b32 %r8138, %r8137, %r8136;
|
||
// inline asm
|
||
bfe.u32 %r6429, %r6430, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2384, %r6429, 4;
|
||
add.s64 %rd2385, %rd51, %rd2384;
|
||
ld.shared.u32 %r8139, [%rd2385];
|
||
add.s32 %r8140, %r8139, %r8138;
|
||
xor.b32 %r8141, %r7627, %r6414;
|
||
xor.b32 %r6446, %r8141, %r8140;
|
||
// inline asm
|
||
bfe.u32 %r6433, %r6446, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2386, %r6433, 4;
|
||
add.s64 %rd2387, %rd42, %rd2386;
|
||
ld.shared.u32 %r8142, [%rd2387];
|
||
// inline asm
|
||
bfe.u32 %r6437, %r6446, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2388, %r6437, 4;
|
||
add.s64 %rd2389, %rd45, %rd2388;
|
||
ld.shared.u32 %r8143, [%rd2389];
|
||
add.s32 %r8144, %r8143, %r8142;
|
||
// inline asm
|
||
bfe.u32 %r6441, %r6446, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2390, %r6441, 4;
|
||
add.s64 %rd2391, %rd48, %rd2390;
|
||
ld.shared.u32 %r8145, [%rd2391];
|
||
xor.b32 %r8146, %r8145, %r8144;
|
||
// inline asm
|
||
bfe.u32 %r6445, %r6446, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2392, %r6445, 4;
|
||
add.s64 %rd2393, %rd51, %rd2392;
|
||
ld.shared.u32 %r8147, [%rd2393];
|
||
add.s32 %r8148, %r8147, %r8146;
|
||
xor.b32 %r8149, %r7636, %r6430;
|
||
xor.b32 %r6462, %r8149, %r8148;
|
||
// inline asm
|
||
bfe.u32 %r6449, %r6462, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2394, %r6449, 4;
|
||
add.s64 %rd2395, %rd42, %rd2394;
|
||
ld.shared.u32 %r8150, [%rd2395];
|
||
// inline asm
|
||
bfe.u32 %r6453, %r6462, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2396, %r6453, 4;
|
||
add.s64 %rd2397, %rd45, %rd2396;
|
||
ld.shared.u32 %r8151, [%rd2397];
|
||
add.s32 %r8152, %r8151, %r8150;
|
||
// inline asm
|
||
bfe.u32 %r6457, %r6462, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2398, %r6457, 4;
|
||
add.s64 %rd2399, %rd48, %rd2398;
|
||
ld.shared.u32 %r8153, [%rd2399];
|
||
xor.b32 %r8154, %r8153, %r8152;
|
||
// inline asm
|
||
bfe.u32 %r6461, %r6462, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2400, %r6461, 4;
|
||
add.s64 %rd2401, %rd51, %rd2400;
|
||
ld.shared.u32 %r8155, [%rd2401];
|
||
add.s32 %r8156, %r8155, %r8154;
|
||
xor.b32 %r8157, %r7645, %r6446;
|
||
xor.b32 %r6478, %r8157, %r8156;
|
||
// inline asm
|
||
bfe.u32 %r6465, %r6478, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2402, %r6465, 4;
|
||
add.s64 %rd2403, %rd42, %rd2402;
|
||
ld.shared.u32 %r8158, [%rd2403];
|
||
// inline asm
|
||
bfe.u32 %r6469, %r6478, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2404, %r6469, 4;
|
||
add.s64 %rd2405, %rd45, %rd2404;
|
||
ld.shared.u32 %r8159, [%rd2405];
|
||
add.s32 %r8160, %r8159, %r8158;
|
||
// inline asm
|
||
bfe.u32 %r6473, %r6478, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2406, %r6473, 4;
|
||
add.s64 %rd2407, %rd48, %rd2406;
|
||
ld.shared.u32 %r8161, [%rd2407];
|
||
xor.b32 %r8162, %r8161, %r8160;
|
||
// inline asm
|
||
bfe.u32 %r6477, %r6478, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2408, %r6477, 4;
|
||
add.s64 %rd2409, %rd51, %rd2408;
|
||
ld.shared.u32 %r8163, [%rd2409];
|
||
add.s32 %r8164, %r8163, %r8162;
|
||
xor.b32 %r8165, %r7654, %r6462;
|
||
xor.b32 %r6494, %r8165, %r8164;
|
||
// inline asm
|
||
bfe.u32 %r6481, %r6494, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2410, %r6481, 4;
|
||
add.s64 %rd2411, %rd42, %rd2410;
|
||
ld.shared.u32 %r8166, [%rd2411];
|
||
// inline asm
|
||
bfe.u32 %r6485, %r6494, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2412, %r6485, 4;
|
||
add.s64 %rd2413, %rd45, %rd2412;
|
||
ld.shared.u32 %r8167, [%rd2413];
|
||
add.s32 %r8168, %r8167, %r8166;
|
||
// inline asm
|
||
bfe.u32 %r6489, %r6494, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2414, %r6489, 4;
|
||
add.s64 %rd2415, %rd48, %rd2414;
|
||
ld.shared.u32 %r8169, [%rd2415];
|
||
xor.b32 %r8170, %r8169, %r8168;
|
||
// inline asm
|
||
bfe.u32 %r6493, %r6494, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2416, %r6493, 4;
|
||
add.s64 %rd2417, %rd51, %rd2416;
|
||
ld.shared.u32 %r8171, [%rd2417];
|
||
add.s32 %r8172, %r8171, %r8170;
|
||
xor.b32 %r8173, %r7663, %r6478;
|
||
xor.b32 %r10342, %r8173, %r8172;
|
||
xor.b32 %r10343, %r7665, %r6494;
|
||
xor.b32 %r6510, %r5470, %r6494;
|
||
// inline asm
|
||
bfe.u32 %r6497, %r6510, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2418, %r6497, 4;
|
||
add.s64 %rd2419, %rd42, %rd2418;
|
||
ld.shared.u32 %r8174, [%rd2419];
|
||
// inline asm
|
||
bfe.u32 %r6501, %r6510, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2420, %r6501, 4;
|
||
add.s64 %rd2421, %rd45, %rd2420;
|
||
ld.shared.u32 %r8175, [%rd2421];
|
||
add.s32 %r8176, %r8175, %r8174;
|
||
// inline asm
|
||
bfe.u32 %r6505, %r6510, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2422, %r6505, 4;
|
||
add.s64 %rd2423, %rd48, %rd2422;
|
||
ld.shared.u32 %r8177, [%rd2423];
|
||
xor.b32 %r8178, %r8177, %r8176;
|
||
// inline asm
|
||
bfe.u32 %r6509, %r6510, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2424, %r6509, 4;
|
||
add.s64 %rd2425, %rd51, %rd2424;
|
||
ld.shared.u32 %r8179, [%rd2425];
|
||
add.s32 %r8180, %r8179, %r8178;
|
||
xor.b32 %r8181, %r10350, %r10342;
|
||
xor.b32 %r6526, %r8181, %r8180;
|
||
// inline asm
|
||
bfe.u32 %r6513, %r6526, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2426, %r6513, 4;
|
||
add.s64 %rd2427, %rd42, %rd2426;
|
||
ld.shared.u32 %r8182, [%rd2427];
|
||
// inline asm
|
||
bfe.u32 %r6517, %r6526, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2428, %r6517, 4;
|
||
add.s64 %rd2429, %rd45, %rd2428;
|
||
ld.shared.u32 %r8183, [%rd2429];
|
||
add.s32 %r8184, %r8183, %r8182;
|
||
// inline asm
|
||
bfe.u32 %r6521, %r6526, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2430, %r6521, 4;
|
||
add.s64 %rd2431, %rd48, %rd2430;
|
||
ld.shared.u32 %r8185, [%rd2431];
|
||
xor.b32 %r8186, %r8185, %r8184;
|
||
// inline asm
|
||
bfe.u32 %r6525, %r6526, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2432, %r6525, 4;
|
||
add.s64 %rd2433, %rd51, %rd2432;
|
||
ld.shared.u32 %r8187, [%rd2433];
|
||
add.s32 %r8188, %r8187, %r8186;
|
||
xor.b32 %r8189, %r10349, %r6510;
|
||
xor.b32 %r6542, %r8189, %r8188;
|
||
// inline asm
|
||
bfe.u32 %r6529, %r6542, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2434, %r6529, 4;
|
||
add.s64 %rd2435, %rd42, %rd2434;
|
||
ld.shared.u32 %r8190, [%rd2435];
|
||
// inline asm
|
||
bfe.u32 %r6533, %r6542, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2436, %r6533, 4;
|
||
add.s64 %rd2437, %rd45, %rd2436;
|
||
ld.shared.u32 %r8191, [%rd2437];
|
||
add.s32 %r8192, %r8191, %r8190;
|
||
// inline asm
|
||
bfe.u32 %r6537, %r6542, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2438, %r6537, 4;
|
||
add.s64 %rd2439, %rd48, %rd2438;
|
||
ld.shared.u32 %r8193, [%rd2439];
|
||
xor.b32 %r8194, %r8193, %r8192;
|
||
// inline asm
|
||
bfe.u32 %r6541, %r6542, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2440, %r6541, 4;
|
||
add.s64 %rd2441, %rd51, %rd2440;
|
||
ld.shared.u32 %r8195, [%rd2441];
|
||
add.s32 %r8196, %r8195, %r8194;
|
||
xor.b32 %r8197, %r10348, %r6526;
|
||
xor.b32 %r6558, %r8197, %r8196;
|
||
// inline asm
|
||
bfe.u32 %r6545, %r6558, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2442, %r6545, 4;
|
||
add.s64 %rd2443, %rd42, %rd2442;
|
||
ld.shared.u32 %r8198, [%rd2443];
|
||
// inline asm
|
||
bfe.u32 %r6549, %r6558, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2444, %r6549, 4;
|
||
add.s64 %rd2445, %rd45, %rd2444;
|
||
ld.shared.u32 %r8199, [%rd2445];
|
||
add.s32 %r8200, %r8199, %r8198;
|
||
// inline asm
|
||
bfe.u32 %r6553, %r6558, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2446, %r6553, 4;
|
||
add.s64 %rd2447, %rd48, %rd2446;
|
||
ld.shared.u32 %r8201, [%rd2447];
|
||
xor.b32 %r8202, %r8201, %r8200;
|
||
// inline asm
|
||
bfe.u32 %r6557, %r6558, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2448, %r6557, 4;
|
||
add.s64 %rd2449, %rd51, %rd2448;
|
||
ld.shared.u32 %r8203, [%rd2449];
|
||
add.s32 %r8204, %r8203, %r8202;
|
||
xor.b32 %r8205, %r10347, %r6542;
|
||
xor.b32 %r6574, %r8205, %r8204;
|
||
// inline asm
|
||
bfe.u32 %r6561, %r6574, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2450, %r6561, 4;
|
||
add.s64 %rd2451, %rd42, %rd2450;
|
||
ld.shared.u32 %r8206, [%rd2451];
|
||
// inline asm
|
||
bfe.u32 %r6565, %r6574, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2452, %r6565, 4;
|
||
add.s64 %rd2453, %rd45, %rd2452;
|
||
ld.shared.u32 %r8207, [%rd2453];
|
||
add.s32 %r8208, %r8207, %r8206;
|
||
// inline asm
|
||
bfe.u32 %r6569, %r6574, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2454, %r6569, 4;
|
||
add.s64 %rd2455, %rd48, %rd2454;
|
||
ld.shared.u32 %r8209, [%rd2455];
|
||
xor.b32 %r8210, %r8209, %r8208;
|
||
// inline asm
|
||
bfe.u32 %r6573, %r6574, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2456, %r6573, 4;
|
||
add.s64 %rd2457, %rd51, %rd2456;
|
||
ld.shared.u32 %r8211, [%rd2457];
|
||
add.s32 %r8212, %r8211, %r8210;
|
||
xor.b32 %r8213, %r10346, %r6558;
|
||
xor.b32 %r6590, %r8213, %r8212;
|
||
// inline asm
|
||
bfe.u32 %r6577, %r6590, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2458, %r6577, 4;
|
||
add.s64 %rd2459, %rd42, %rd2458;
|
||
ld.shared.u32 %r8214, [%rd2459];
|
||
// inline asm
|
||
bfe.u32 %r6581, %r6590, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2460, %r6581, 4;
|
||
add.s64 %rd2461, %rd45, %rd2460;
|
||
ld.shared.u32 %r8215, [%rd2461];
|
||
add.s32 %r8216, %r8215, %r8214;
|
||
// inline asm
|
||
bfe.u32 %r6585, %r6590, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2462, %r6585, 4;
|
||
add.s64 %rd2463, %rd48, %rd2462;
|
||
ld.shared.u32 %r8217, [%rd2463];
|
||
xor.b32 %r8218, %r8217, %r8216;
|
||
// inline asm
|
||
bfe.u32 %r6589, %r6590, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2464, %r6589, 4;
|
||
add.s64 %rd2465, %rd51, %rd2464;
|
||
ld.shared.u32 %r8219, [%rd2465];
|
||
add.s32 %r8220, %r8219, %r8218;
|
||
xor.b32 %r8221, %r10345, %r6574;
|
||
xor.b32 %r6606, %r8221, %r8220;
|
||
// inline asm
|
||
bfe.u32 %r6593, %r6606, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2466, %r6593, 4;
|
||
add.s64 %rd2467, %rd42, %rd2466;
|
||
ld.shared.u32 %r8222, [%rd2467];
|
||
// inline asm
|
||
bfe.u32 %r6597, %r6606, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2468, %r6597, 4;
|
||
add.s64 %rd2469, %rd45, %rd2468;
|
||
ld.shared.u32 %r8223, [%rd2469];
|
||
add.s32 %r8224, %r8223, %r8222;
|
||
// inline asm
|
||
bfe.u32 %r6601, %r6606, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2470, %r6601, 4;
|
||
add.s64 %rd2471, %rd48, %rd2470;
|
||
ld.shared.u32 %r8225, [%rd2471];
|
||
xor.b32 %r8226, %r8225, %r8224;
|
||
// inline asm
|
||
bfe.u32 %r6605, %r6606, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2472, %r6605, 4;
|
||
add.s64 %rd2473, %rd51, %rd2472;
|
||
ld.shared.u32 %r8227, [%rd2473];
|
||
add.s32 %r8228, %r8227, %r8226;
|
||
xor.b32 %r8229, %r10344, %r6590;
|
||
xor.b32 %r6622, %r8229, %r8228;
|
||
// inline asm
|
||
bfe.u32 %r6609, %r6622, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2474, %r6609, 4;
|
||
add.s64 %rd2475, %rd42, %rd2474;
|
||
ld.shared.u32 %r8230, [%rd2475];
|
||
// inline asm
|
||
bfe.u32 %r6613, %r6622, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2476, %r6613, 4;
|
||
add.s64 %rd2477, %rd45, %rd2476;
|
||
ld.shared.u32 %r8231, [%rd2477];
|
||
add.s32 %r8232, %r8231, %r8230;
|
||
// inline asm
|
||
bfe.u32 %r6617, %r6622, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2478, %r6617, 4;
|
||
add.s64 %rd2479, %rd48, %rd2478;
|
||
ld.shared.u32 %r8233, [%rd2479];
|
||
xor.b32 %r8234, %r8233, %r8232;
|
||
// inline asm
|
||
bfe.u32 %r6621, %r6622, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2480, %r6621, 4;
|
||
add.s64 %rd2481, %rd51, %rd2480;
|
||
ld.shared.u32 %r8235, [%rd2481];
|
||
add.s32 %r8236, %r8235, %r8234;
|
||
xor.b32 %r8237, %r10343, %r6606;
|
||
xor.b32 %r6638, %r8237, %r8236;
|
||
// inline asm
|
||
bfe.u32 %r6625, %r6638, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2482, %r6625, 4;
|
||
add.s64 %rd2483, %rd42, %rd2482;
|
||
ld.shared.u32 %r8238, [%rd2483];
|
||
// inline asm
|
||
bfe.u32 %r6629, %r6638, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2484, %r6629, 4;
|
||
add.s64 %rd2485, %rd45, %rd2484;
|
||
ld.shared.u32 %r8239, [%rd2485];
|
||
add.s32 %r8240, %r8239, %r8238;
|
||
// inline asm
|
||
bfe.u32 %r6633, %r6638, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2486, %r6633, 4;
|
||
add.s64 %rd2487, %rd48, %rd2486;
|
||
ld.shared.u32 %r8241, [%rd2487];
|
||
xor.b32 %r8242, %r8241, %r8240;
|
||
// inline asm
|
||
bfe.u32 %r6637, %r6638, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2488, %r6637, 4;
|
||
add.s64 %rd2489, %rd51, %rd2488;
|
||
ld.shared.u32 %r8243, [%rd2489];
|
||
add.s32 %r8244, %r8243, %r8242;
|
||
xor.b32 %r8245, %r10342, %r6622;
|
||
xor.b32 %r6654, %r8245, %r8244;
|
||
// inline asm
|
||
bfe.u32 %r6641, %r6654, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2490, %r6641, 4;
|
||
add.s64 %rd2491, %rd42, %rd2490;
|
||
ld.shared.u32 %r8246, [%rd2491];
|
||
// inline asm
|
||
bfe.u32 %r6645, %r6654, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2492, %r6645, 4;
|
||
add.s64 %rd2493, %rd45, %rd2492;
|
||
ld.shared.u32 %r8247, [%rd2493];
|
||
add.s32 %r8248, %r8247, %r8246;
|
||
// inline asm
|
||
bfe.u32 %r6649, %r6654, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2494, %r6649, 4;
|
||
add.s64 %rd2495, %rd48, %rd2494;
|
||
ld.shared.u32 %r8249, [%rd2495];
|
||
xor.b32 %r8250, %r8249, %r8248;
|
||
// inline asm
|
||
bfe.u32 %r6653, %r6654, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2496, %r6653, 4;
|
||
add.s64 %rd2497, %rd51, %rd2496;
|
||
ld.shared.u32 %r8251, [%rd2497];
|
||
add.s32 %r8252, %r8251, %r8250;
|
||
xor.b32 %r8253, %r7609, %r6638;
|
||
xor.b32 %r6670, %r8253, %r8252;
|
||
// inline asm
|
||
bfe.u32 %r6657, %r6670, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2498, %r6657, 4;
|
||
add.s64 %rd2499, %rd42, %rd2498;
|
||
ld.shared.u32 %r8254, [%rd2499];
|
||
// inline asm
|
||
bfe.u32 %r6661, %r6670, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2500, %r6661, 4;
|
||
add.s64 %rd2501, %rd45, %rd2500;
|
||
ld.shared.u32 %r8255, [%rd2501];
|
||
add.s32 %r8256, %r8255, %r8254;
|
||
// inline asm
|
||
bfe.u32 %r6665, %r6670, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2502, %r6665, 4;
|
||
add.s64 %rd2503, %rd48, %rd2502;
|
||
ld.shared.u32 %r8257, [%rd2503];
|
||
xor.b32 %r8258, %r8257, %r8256;
|
||
// inline asm
|
||
bfe.u32 %r6669, %r6670, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2504, %r6669, 4;
|
||
add.s64 %rd2505, %rd51, %rd2504;
|
||
ld.shared.u32 %r8259, [%rd2505];
|
||
add.s32 %r8260, %r8259, %r8258;
|
||
xor.b32 %r8261, %r7618, %r6654;
|
||
xor.b32 %r6686, %r8261, %r8260;
|
||
// inline asm
|
||
bfe.u32 %r6673, %r6686, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2506, %r6673, 4;
|
||
add.s64 %rd2507, %rd42, %rd2506;
|
||
ld.shared.u32 %r8262, [%rd2507];
|
||
// inline asm
|
||
bfe.u32 %r6677, %r6686, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2508, %r6677, 4;
|
||
add.s64 %rd2509, %rd45, %rd2508;
|
||
ld.shared.u32 %r8263, [%rd2509];
|
||
add.s32 %r8264, %r8263, %r8262;
|
||
// inline asm
|
||
bfe.u32 %r6681, %r6686, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2510, %r6681, 4;
|
||
add.s64 %rd2511, %rd48, %rd2510;
|
||
ld.shared.u32 %r8265, [%rd2511];
|
||
xor.b32 %r8266, %r8265, %r8264;
|
||
// inline asm
|
||
bfe.u32 %r6685, %r6686, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2512, %r6685, 4;
|
||
add.s64 %rd2513, %rd51, %rd2512;
|
||
ld.shared.u32 %r8267, [%rd2513];
|
||
add.s32 %r8268, %r8267, %r8266;
|
||
xor.b32 %r8269, %r7627, %r6670;
|
||
xor.b32 %r6702, %r8269, %r8268;
|
||
// inline asm
|
||
bfe.u32 %r6689, %r6702, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2514, %r6689, 4;
|
||
add.s64 %rd2515, %rd42, %rd2514;
|
||
ld.shared.u32 %r8270, [%rd2515];
|
||
// inline asm
|
||
bfe.u32 %r6693, %r6702, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2516, %r6693, 4;
|
||
add.s64 %rd2517, %rd45, %rd2516;
|
||
ld.shared.u32 %r8271, [%rd2517];
|
||
add.s32 %r8272, %r8271, %r8270;
|
||
// inline asm
|
||
bfe.u32 %r6697, %r6702, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2518, %r6697, 4;
|
||
add.s64 %rd2519, %rd48, %rd2518;
|
||
ld.shared.u32 %r8273, [%rd2519];
|
||
xor.b32 %r8274, %r8273, %r8272;
|
||
// inline asm
|
||
bfe.u32 %r6701, %r6702, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2520, %r6701, 4;
|
||
add.s64 %rd2521, %rd51, %rd2520;
|
||
ld.shared.u32 %r8275, [%rd2521];
|
||
add.s32 %r8276, %r8275, %r8274;
|
||
xor.b32 %r8277, %r7636, %r6686;
|
||
xor.b32 %r6718, %r8277, %r8276;
|
||
// inline asm
|
||
bfe.u32 %r6705, %r6718, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2522, %r6705, 4;
|
||
add.s64 %rd2523, %rd42, %rd2522;
|
||
ld.shared.u32 %r8278, [%rd2523];
|
||
// inline asm
|
||
bfe.u32 %r6709, %r6718, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2524, %r6709, 4;
|
||
add.s64 %rd2525, %rd45, %rd2524;
|
||
ld.shared.u32 %r8279, [%rd2525];
|
||
add.s32 %r8280, %r8279, %r8278;
|
||
// inline asm
|
||
bfe.u32 %r6713, %r6718, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2526, %r6713, 4;
|
||
add.s64 %rd2527, %rd48, %rd2526;
|
||
ld.shared.u32 %r8281, [%rd2527];
|
||
xor.b32 %r8282, %r8281, %r8280;
|
||
// inline asm
|
||
bfe.u32 %r6717, %r6718, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2528, %r6717, 4;
|
||
add.s64 %rd2529, %rd51, %rd2528;
|
||
ld.shared.u32 %r8283, [%rd2529];
|
||
add.s32 %r8284, %r8283, %r8282;
|
||
xor.b32 %r8285, %r7645, %r6702;
|
||
xor.b32 %r6734, %r8285, %r8284;
|
||
// inline asm
|
||
bfe.u32 %r6721, %r6734, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2530, %r6721, 4;
|
||
add.s64 %rd2531, %rd42, %rd2530;
|
||
ld.shared.u32 %r8286, [%rd2531];
|
||
// inline asm
|
||
bfe.u32 %r6725, %r6734, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2532, %r6725, 4;
|
||
add.s64 %rd2533, %rd45, %rd2532;
|
||
ld.shared.u32 %r8287, [%rd2533];
|
||
add.s32 %r8288, %r8287, %r8286;
|
||
// inline asm
|
||
bfe.u32 %r6729, %r6734, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2534, %r6729, 4;
|
||
add.s64 %rd2535, %rd48, %rd2534;
|
||
ld.shared.u32 %r8289, [%rd2535];
|
||
xor.b32 %r8290, %r8289, %r8288;
|
||
// inline asm
|
||
bfe.u32 %r6733, %r6734, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2536, %r6733, 4;
|
||
add.s64 %rd2537, %rd51, %rd2536;
|
||
ld.shared.u32 %r8291, [%rd2537];
|
||
add.s32 %r8292, %r8291, %r8290;
|
||
xor.b32 %r8293, %r7654, %r6718;
|
||
xor.b32 %r6750, %r8293, %r8292;
|
||
// inline asm
|
||
bfe.u32 %r6737, %r6750, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2538, %r6737, 4;
|
||
add.s64 %rd2539, %rd42, %rd2538;
|
||
ld.shared.u32 %r8294, [%rd2539];
|
||
// inline asm
|
||
bfe.u32 %r6741, %r6750, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2540, %r6741, 4;
|
||
add.s64 %rd2541, %rd45, %rd2540;
|
||
ld.shared.u32 %r8295, [%rd2541];
|
||
add.s32 %r8296, %r8295, %r8294;
|
||
// inline asm
|
||
bfe.u32 %r6745, %r6750, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2542, %r6745, 4;
|
||
add.s64 %rd2543, %rd48, %rd2542;
|
||
ld.shared.u32 %r8297, [%rd2543];
|
||
xor.b32 %r8298, %r8297, %r8296;
|
||
// inline asm
|
||
bfe.u32 %r6749, %r6750, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2544, %r6749, 4;
|
||
add.s64 %rd2545, %rd51, %rd2544;
|
||
ld.shared.u32 %r8299, [%rd2545];
|
||
add.s32 %r8300, %r8299, %r8298;
|
||
xor.b32 %r8301, %r7663, %r6734;
|
||
xor.b32 %r10340, %r8301, %r8300;
|
||
xor.b32 %r10341, %r7665, %r6750;
|
||
xor.b32 %r6766, %r5470, %r6750;
|
||
// inline asm
|
||
bfe.u32 %r6753, %r6766, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2546, %r6753, 4;
|
||
add.s64 %rd2547, %rd42, %rd2546;
|
||
ld.shared.u32 %r8302, [%rd2547];
|
||
// inline asm
|
||
bfe.u32 %r6757, %r6766, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2548, %r6757, 4;
|
||
add.s64 %rd2549, %rd45, %rd2548;
|
||
ld.shared.u32 %r8303, [%rd2549];
|
||
add.s32 %r8304, %r8303, %r8302;
|
||
// inline asm
|
||
bfe.u32 %r6761, %r6766, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2550, %r6761, 4;
|
||
add.s64 %rd2551, %rd48, %rd2550;
|
||
ld.shared.u32 %r8305, [%rd2551];
|
||
xor.b32 %r8306, %r8305, %r8304;
|
||
// inline asm
|
||
bfe.u32 %r6765, %r6766, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2552, %r6765, 4;
|
||
add.s64 %rd2553, %rd51, %rd2552;
|
||
ld.shared.u32 %r8307, [%rd2553];
|
||
add.s32 %r8308, %r8307, %r8306;
|
||
xor.b32 %r8309, %r10350, %r10340;
|
||
xor.b32 %r6782, %r8309, %r8308;
|
||
// inline asm
|
||
bfe.u32 %r6769, %r6782, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2554, %r6769, 4;
|
||
add.s64 %rd2555, %rd42, %rd2554;
|
||
ld.shared.u32 %r8310, [%rd2555];
|
||
// inline asm
|
||
bfe.u32 %r6773, %r6782, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2556, %r6773, 4;
|
||
add.s64 %rd2557, %rd45, %rd2556;
|
||
ld.shared.u32 %r8311, [%rd2557];
|
||
add.s32 %r8312, %r8311, %r8310;
|
||
// inline asm
|
||
bfe.u32 %r6777, %r6782, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2558, %r6777, 4;
|
||
add.s64 %rd2559, %rd48, %rd2558;
|
||
ld.shared.u32 %r8313, [%rd2559];
|
||
xor.b32 %r8314, %r8313, %r8312;
|
||
// inline asm
|
||
bfe.u32 %r6781, %r6782, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2560, %r6781, 4;
|
||
add.s64 %rd2561, %rd51, %rd2560;
|
||
ld.shared.u32 %r8315, [%rd2561];
|
||
add.s32 %r8316, %r8315, %r8314;
|
||
xor.b32 %r8317, %r10349, %r6766;
|
||
xor.b32 %r6798, %r8317, %r8316;
|
||
// inline asm
|
||
bfe.u32 %r6785, %r6798, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2562, %r6785, 4;
|
||
add.s64 %rd2563, %rd42, %rd2562;
|
||
ld.shared.u32 %r8318, [%rd2563];
|
||
// inline asm
|
||
bfe.u32 %r6789, %r6798, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2564, %r6789, 4;
|
||
add.s64 %rd2565, %rd45, %rd2564;
|
||
ld.shared.u32 %r8319, [%rd2565];
|
||
add.s32 %r8320, %r8319, %r8318;
|
||
// inline asm
|
||
bfe.u32 %r6793, %r6798, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2566, %r6793, 4;
|
||
add.s64 %rd2567, %rd48, %rd2566;
|
||
ld.shared.u32 %r8321, [%rd2567];
|
||
xor.b32 %r8322, %r8321, %r8320;
|
||
// inline asm
|
||
bfe.u32 %r6797, %r6798, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2568, %r6797, 4;
|
||
add.s64 %rd2569, %rd51, %rd2568;
|
||
ld.shared.u32 %r8323, [%rd2569];
|
||
add.s32 %r8324, %r8323, %r8322;
|
||
xor.b32 %r8325, %r10348, %r6782;
|
||
xor.b32 %r6814, %r8325, %r8324;
|
||
// inline asm
|
||
bfe.u32 %r6801, %r6814, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2570, %r6801, 4;
|
||
add.s64 %rd2571, %rd42, %rd2570;
|
||
ld.shared.u32 %r8326, [%rd2571];
|
||
// inline asm
|
||
bfe.u32 %r6805, %r6814, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2572, %r6805, 4;
|
||
add.s64 %rd2573, %rd45, %rd2572;
|
||
ld.shared.u32 %r8327, [%rd2573];
|
||
add.s32 %r8328, %r8327, %r8326;
|
||
// inline asm
|
||
bfe.u32 %r6809, %r6814, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2574, %r6809, 4;
|
||
add.s64 %rd2575, %rd48, %rd2574;
|
||
ld.shared.u32 %r8329, [%rd2575];
|
||
xor.b32 %r8330, %r8329, %r8328;
|
||
// inline asm
|
||
bfe.u32 %r6813, %r6814, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2576, %r6813, 4;
|
||
add.s64 %rd2577, %rd51, %rd2576;
|
||
ld.shared.u32 %r8331, [%rd2577];
|
||
add.s32 %r8332, %r8331, %r8330;
|
||
xor.b32 %r8333, %r10347, %r6798;
|
||
xor.b32 %r6830, %r8333, %r8332;
|
||
// inline asm
|
||
bfe.u32 %r6817, %r6830, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2578, %r6817, 4;
|
||
add.s64 %rd2579, %rd42, %rd2578;
|
||
ld.shared.u32 %r8334, [%rd2579];
|
||
// inline asm
|
||
bfe.u32 %r6821, %r6830, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2580, %r6821, 4;
|
||
add.s64 %rd2581, %rd45, %rd2580;
|
||
ld.shared.u32 %r8335, [%rd2581];
|
||
add.s32 %r8336, %r8335, %r8334;
|
||
// inline asm
|
||
bfe.u32 %r6825, %r6830, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2582, %r6825, 4;
|
||
add.s64 %rd2583, %rd48, %rd2582;
|
||
ld.shared.u32 %r8337, [%rd2583];
|
||
xor.b32 %r8338, %r8337, %r8336;
|
||
// inline asm
|
||
bfe.u32 %r6829, %r6830, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2584, %r6829, 4;
|
||
add.s64 %rd2585, %rd51, %rd2584;
|
||
ld.shared.u32 %r8339, [%rd2585];
|
||
add.s32 %r8340, %r8339, %r8338;
|
||
xor.b32 %r8341, %r10346, %r6814;
|
||
xor.b32 %r6846, %r8341, %r8340;
|
||
// inline asm
|
||
bfe.u32 %r6833, %r6846, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2586, %r6833, 4;
|
||
add.s64 %rd2587, %rd42, %rd2586;
|
||
ld.shared.u32 %r8342, [%rd2587];
|
||
// inline asm
|
||
bfe.u32 %r6837, %r6846, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2588, %r6837, 4;
|
||
add.s64 %rd2589, %rd45, %rd2588;
|
||
ld.shared.u32 %r8343, [%rd2589];
|
||
add.s32 %r8344, %r8343, %r8342;
|
||
// inline asm
|
||
bfe.u32 %r6841, %r6846, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2590, %r6841, 4;
|
||
add.s64 %rd2591, %rd48, %rd2590;
|
||
ld.shared.u32 %r8345, [%rd2591];
|
||
xor.b32 %r8346, %r8345, %r8344;
|
||
// inline asm
|
||
bfe.u32 %r6845, %r6846, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2592, %r6845, 4;
|
||
add.s64 %rd2593, %rd51, %rd2592;
|
||
ld.shared.u32 %r8347, [%rd2593];
|
||
add.s32 %r8348, %r8347, %r8346;
|
||
xor.b32 %r8349, %r10345, %r6830;
|
||
xor.b32 %r6862, %r8349, %r8348;
|
||
// inline asm
|
||
bfe.u32 %r6849, %r6862, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2594, %r6849, 4;
|
||
add.s64 %rd2595, %rd42, %rd2594;
|
||
ld.shared.u32 %r8350, [%rd2595];
|
||
// inline asm
|
||
bfe.u32 %r6853, %r6862, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2596, %r6853, 4;
|
||
add.s64 %rd2597, %rd45, %rd2596;
|
||
ld.shared.u32 %r8351, [%rd2597];
|
||
add.s32 %r8352, %r8351, %r8350;
|
||
// inline asm
|
||
bfe.u32 %r6857, %r6862, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2598, %r6857, 4;
|
||
add.s64 %rd2599, %rd48, %rd2598;
|
||
ld.shared.u32 %r8353, [%rd2599];
|
||
xor.b32 %r8354, %r8353, %r8352;
|
||
// inline asm
|
||
bfe.u32 %r6861, %r6862, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2600, %r6861, 4;
|
||
add.s64 %rd2601, %rd51, %rd2600;
|
||
ld.shared.u32 %r8355, [%rd2601];
|
||
add.s32 %r8356, %r8355, %r8354;
|
||
xor.b32 %r8357, %r10344, %r6846;
|
||
xor.b32 %r6878, %r8357, %r8356;
|
||
// inline asm
|
||
bfe.u32 %r6865, %r6878, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2602, %r6865, 4;
|
||
add.s64 %rd2603, %rd42, %rd2602;
|
||
ld.shared.u32 %r8358, [%rd2603];
|
||
// inline asm
|
||
bfe.u32 %r6869, %r6878, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2604, %r6869, 4;
|
||
add.s64 %rd2605, %rd45, %rd2604;
|
||
ld.shared.u32 %r8359, [%rd2605];
|
||
add.s32 %r8360, %r8359, %r8358;
|
||
// inline asm
|
||
bfe.u32 %r6873, %r6878, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2606, %r6873, 4;
|
||
add.s64 %rd2607, %rd48, %rd2606;
|
||
ld.shared.u32 %r8361, [%rd2607];
|
||
xor.b32 %r8362, %r8361, %r8360;
|
||
// inline asm
|
||
bfe.u32 %r6877, %r6878, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2608, %r6877, 4;
|
||
add.s64 %rd2609, %rd51, %rd2608;
|
||
ld.shared.u32 %r8363, [%rd2609];
|
||
add.s32 %r8364, %r8363, %r8362;
|
||
xor.b32 %r8365, %r10343, %r6862;
|
||
xor.b32 %r6894, %r8365, %r8364;
|
||
// inline asm
|
||
bfe.u32 %r6881, %r6894, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2610, %r6881, 4;
|
||
add.s64 %rd2611, %rd42, %rd2610;
|
||
ld.shared.u32 %r8366, [%rd2611];
|
||
// inline asm
|
||
bfe.u32 %r6885, %r6894, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2612, %r6885, 4;
|
||
add.s64 %rd2613, %rd45, %rd2612;
|
||
ld.shared.u32 %r8367, [%rd2613];
|
||
add.s32 %r8368, %r8367, %r8366;
|
||
// inline asm
|
||
bfe.u32 %r6889, %r6894, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2614, %r6889, 4;
|
||
add.s64 %rd2615, %rd48, %rd2614;
|
||
ld.shared.u32 %r8369, [%rd2615];
|
||
xor.b32 %r8370, %r8369, %r8368;
|
||
// inline asm
|
||
bfe.u32 %r6893, %r6894, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2616, %r6893, 4;
|
||
add.s64 %rd2617, %rd51, %rd2616;
|
||
ld.shared.u32 %r8371, [%rd2617];
|
||
add.s32 %r8372, %r8371, %r8370;
|
||
xor.b32 %r8373, %r10342, %r6878;
|
||
xor.b32 %r6910, %r8373, %r8372;
|
||
// inline asm
|
||
bfe.u32 %r6897, %r6910, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2618, %r6897, 4;
|
||
add.s64 %rd2619, %rd42, %rd2618;
|
||
ld.shared.u32 %r8374, [%rd2619];
|
||
// inline asm
|
||
bfe.u32 %r6901, %r6910, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2620, %r6901, 4;
|
||
add.s64 %rd2621, %rd45, %rd2620;
|
||
ld.shared.u32 %r8375, [%rd2621];
|
||
add.s32 %r8376, %r8375, %r8374;
|
||
// inline asm
|
||
bfe.u32 %r6905, %r6910, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2622, %r6905, 4;
|
||
add.s64 %rd2623, %rd48, %rd2622;
|
||
ld.shared.u32 %r8377, [%rd2623];
|
||
xor.b32 %r8378, %r8377, %r8376;
|
||
// inline asm
|
||
bfe.u32 %r6909, %r6910, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2624, %r6909, 4;
|
||
add.s64 %rd2625, %rd51, %rd2624;
|
||
ld.shared.u32 %r8379, [%rd2625];
|
||
add.s32 %r8380, %r8379, %r8378;
|
||
xor.b32 %r8381, %r10341, %r6894;
|
||
xor.b32 %r6926, %r8381, %r8380;
|
||
// inline asm
|
||
bfe.u32 %r6913, %r6926, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2626, %r6913, 4;
|
||
add.s64 %rd2627, %rd42, %rd2626;
|
||
ld.shared.u32 %r8382, [%rd2627];
|
||
// inline asm
|
||
bfe.u32 %r6917, %r6926, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2628, %r6917, 4;
|
||
add.s64 %rd2629, %rd45, %rd2628;
|
||
ld.shared.u32 %r8383, [%rd2629];
|
||
add.s32 %r8384, %r8383, %r8382;
|
||
// inline asm
|
||
bfe.u32 %r6921, %r6926, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2630, %r6921, 4;
|
||
add.s64 %rd2631, %rd48, %rd2630;
|
||
ld.shared.u32 %r8385, [%rd2631];
|
||
xor.b32 %r8386, %r8385, %r8384;
|
||
// inline asm
|
||
bfe.u32 %r6925, %r6926, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2632, %r6925, 4;
|
||
add.s64 %rd2633, %rd51, %rd2632;
|
||
ld.shared.u32 %r8387, [%rd2633];
|
||
add.s32 %r8388, %r8387, %r8386;
|
||
xor.b32 %r8389, %r10340, %r6910;
|
||
xor.b32 %r6942, %r8389, %r8388;
|
||
// inline asm
|
||
bfe.u32 %r6929, %r6942, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2634, %r6929, 4;
|
||
add.s64 %rd2635, %rd42, %rd2634;
|
||
ld.shared.u32 %r8390, [%rd2635];
|
||
// inline asm
|
||
bfe.u32 %r6933, %r6942, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2636, %r6933, 4;
|
||
add.s64 %rd2637, %rd45, %rd2636;
|
||
ld.shared.u32 %r8391, [%rd2637];
|
||
add.s32 %r8392, %r8391, %r8390;
|
||
// inline asm
|
||
bfe.u32 %r6937, %r6942, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2638, %r6937, 4;
|
||
add.s64 %rd2639, %rd48, %rd2638;
|
||
ld.shared.u32 %r8393, [%rd2639];
|
||
xor.b32 %r8394, %r8393, %r8392;
|
||
// inline asm
|
||
bfe.u32 %r6941, %r6942, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2640, %r6941, 4;
|
||
add.s64 %rd2641, %rd51, %rd2640;
|
||
ld.shared.u32 %r8395, [%rd2641];
|
||
add.s32 %r8396, %r8395, %r8394;
|
||
xor.b32 %r8397, %r7627, %r6926;
|
||
xor.b32 %r6958, %r8397, %r8396;
|
||
// inline asm
|
||
bfe.u32 %r6945, %r6958, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2642, %r6945, 4;
|
||
add.s64 %rd2643, %rd42, %rd2642;
|
||
ld.shared.u32 %r8398, [%rd2643];
|
||
// inline asm
|
||
bfe.u32 %r6949, %r6958, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2644, %r6949, 4;
|
||
add.s64 %rd2645, %rd45, %rd2644;
|
||
ld.shared.u32 %r8399, [%rd2645];
|
||
add.s32 %r8400, %r8399, %r8398;
|
||
// inline asm
|
||
bfe.u32 %r6953, %r6958, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2646, %r6953, 4;
|
||
add.s64 %rd2647, %rd48, %rd2646;
|
||
ld.shared.u32 %r8401, [%rd2647];
|
||
xor.b32 %r8402, %r8401, %r8400;
|
||
// inline asm
|
||
bfe.u32 %r6957, %r6958, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2648, %r6957, 4;
|
||
add.s64 %rd2649, %rd51, %rd2648;
|
||
ld.shared.u32 %r8403, [%rd2649];
|
||
add.s32 %r8404, %r8403, %r8402;
|
||
xor.b32 %r8405, %r7636, %r6942;
|
||
xor.b32 %r6974, %r8405, %r8404;
|
||
// inline asm
|
||
bfe.u32 %r6961, %r6974, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2650, %r6961, 4;
|
||
add.s64 %rd2651, %rd42, %rd2650;
|
||
ld.shared.u32 %r8406, [%rd2651];
|
||
// inline asm
|
||
bfe.u32 %r6965, %r6974, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2652, %r6965, 4;
|
||
add.s64 %rd2653, %rd45, %rd2652;
|
||
ld.shared.u32 %r8407, [%rd2653];
|
||
add.s32 %r8408, %r8407, %r8406;
|
||
// inline asm
|
||
bfe.u32 %r6969, %r6974, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2654, %r6969, 4;
|
||
add.s64 %rd2655, %rd48, %rd2654;
|
||
ld.shared.u32 %r8409, [%rd2655];
|
||
xor.b32 %r8410, %r8409, %r8408;
|
||
// inline asm
|
||
bfe.u32 %r6973, %r6974, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2656, %r6973, 4;
|
||
add.s64 %rd2657, %rd51, %rd2656;
|
||
ld.shared.u32 %r8411, [%rd2657];
|
||
add.s32 %r8412, %r8411, %r8410;
|
||
xor.b32 %r8413, %r7645, %r6958;
|
||
xor.b32 %r6990, %r8413, %r8412;
|
||
// inline asm
|
||
bfe.u32 %r6977, %r6990, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2658, %r6977, 4;
|
||
add.s64 %rd2659, %rd42, %rd2658;
|
||
ld.shared.u32 %r8414, [%rd2659];
|
||
// inline asm
|
||
bfe.u32 %r6981, %r6990, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2660, %r6981, 4;
|
||
add.s64 %rd2661, %rd45, %rd2660;
|
||
ld.shared.u32 %r8415, [%rd2661];
|
||
add.s32 %r8416, %r8415, %r8414;
|
||
// inline asm
|
||
bfe.u32 %r6985, %r6990, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2662, %r6985, 4;
|
||
add.s64 %rd2663, %rd48, %rd2662;
|
||
ld.shared.u32 %r8417, [%rd2663];
|
||
xor.b32 %r8418, %r8417, %r8416;
|
||
// inline asm
|
||
bfe.u32 %r6989, %r6990, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2664, %r6989, 4;
|
||
add.s64 %rd2665, %rd51, %rd2664;
|
||
ld.shared.u32 %r8419, [%rd2665];
|
||
add.s32 %r8420, %r8419, %r8418;
|
||
xor.b32 %r8421, %r7654, %r6974;
|
||
xor.b32 %r7006, %r8421, %r8420;
|
||
// inline asm
|
||
bfe.u32 %r6993, %r7006, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2666, %r6993, 4;
|
||
add.s64 %rd2667, %rd42, %rd2666;
|
||
ld.shared.u32 %r8422, [%rd2667];
|
||
// inline asm
|
||
bfe.u32 %r6997, %r7006, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2668, %r6997, 4;
|
||
add.s64 %rd2669, %rd45, %rd2668;
|
||
ld.shared.u32 %r8423, [%rd2669];
|
||
add.s32 %r8424, %r8423, %r8422;
|
||
// inline asm
|
||
bfe.u32 %r7001, %r7006, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2670, %r7001, 4;
|
||
add.s64 %rd2671, %rd48, %rd2670;
|
||
ld.shared.u32 %r8425, [%rd2671];
|
||
xor.b32 %r8426, %r8425, %r8424;
|
||
// inline asm
|
||
bfe.u32 %r7005, %r7006, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2672, %r7005, 4;
|
||
add.s64 %rd2673, %rd51, %rd2672;
|
||
ld.shared.u32 %r8427, [%rd2673];
|
||
add.s32 %r8428, %r8427, %r8426;
|
||
xor.b32 %r8429, %r7663, %r6990;
|
||
xor.b32 %r10338, %r8429, %r8428;
|
||
xor.b32 %r10339, %r7665, %r7006;
|
||
xor.b32 %r7022, %r5470, %r7006;
|
||
// inline asm
|
||
bfe.u32 %r7009, %r7022, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2674, %r7009, 4;
|
||
add.s64 %rd2675, %rd42, %rd2674;
|
||
ld.shared.u32 %r8430, [%rd2675];
|
||
// inline asm
|
||
bfe.u32 %r7013, %r7022, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2676, %r7013, 4;
|
||
add.s64 %rd2677, %rd45, %rd2676;
|
||
ld.shared.u32 %r8431, [%rd2677];
|
||
add.s32 %r8432, %r8431, %r8430;
|
||
// inline asm
|
||
bfe.u32 %r7017, %r7022, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2678, %r7017, 4;
|
||
add.s64 %rd2679, %rd48, %rd2678;
|
||
ld.shared.u32 %r8433, [%rd2679];
|
||
xor.b32 %r8434, %r8433, %r8432;
|
||
// inline asm
|
||
bfe.u32 %r7021, %r7022, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2680, %r7021, 4;
|
||
add.s64 %rd2681, %rd51, %rd2680;
|
||
ld.shared.u32 %r8435, [%rd2681];
|
||
add.s32 %r8436, %r8435, %r8434;
|
||
xor.b32 %r8437, %r10350, %r10338;
|
||
xor.b32 %r7038, %r8437, %r8436;
|
||
// inline asm
|
||
bfe.u32 %r7025, %r7038, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2682, %r7025, 4;
|
||
add.s64 %rd2683, %rd42, %rd2682;
|
||
ld.shared.u32 %r8438, [%rd2683];
|
||
// inline asm
|
||
bfe.u32 %r7029, %r7038, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2684, %r7029, 4;
|
||
add.s64 %rd2685, %rd45, %rd2684;
|
||
ld.shared.u32 %r8439, [%rd2685];
|
||
add.s32 %r8440, %r8439, %r8438;
|
||
// inline asm
|
||
bfe.u32 %r7033, %r7038, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2686, %r7033, 4;
|
||
add.s64 %rd2687, %rd48, %rd2686;
|
||
ld.shared.u32 %r8441, [%rd2687];
|
||
xor.b32 %r8442, %r8441, %r8440;
|
||
// inline asm
|
||
bfe.u32 %r7037, %r7038, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2688, %r7037, 4;
|
||
add.s64 %rd2689, %rd51, %rd2688;
|
||
ld.shared.u32 %r8443, [%rd2689];
|
||
add.s32 %r8444, %r8443, %r8442;
|
||
xor.b32 %r8445, %r10349, %r7022;
|
||
xor.b32 %r7054, %r8445, %r8444;
|
||
// inline asm
|
||
bfe.u32 %r7041, %r7054, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2690, %r7041, 4;
|
||
add.s64 %rd2691, %rd42, %rd2690;
|
||
ld.shared.u32 %r8446, [%rd2691];
|
||
// inline asm
|
||
bfe.u32 %r7045, %r7054, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2692, %r7045, 4;
|
||
add.s64 %rd2693, %rd45, %rd2692;
|
||
ld.shared.u32 %r8447, [%rd2693];
|
||
add.s32 %r8448, %r8447, %r8446;
|
||
// inline asm
|
||
bfe.u32 %r7049, %r7054, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2694, %r7049, 4;
|
||
add.s64 %rd2695, %rd48, %rd2694;
|
||
ld.shared.u32 %r8449, [%rd2695];
|
||
xor.b32 %r8450, %r8449, %r8448;
|
||
// inline asm
|
||
bfe.u32 %r7053, %r7054, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2696, %r7053, 4;
|
||
add.s64 %rd2697, %rd51, %rd2696;
|
||
ld.shared.u32 %r8451, [%rd2697];
|
||
add.s32 %r8452, %r8451, %r8450;
|
||
xor.b32 %r8453, %r10348, %r7038;
|
||
xor.b32 %r7070, %r8453, %r8452;
|
||
// inline asm
|
||
bfe.u32 %r7057, %r7070, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2698, %r7057, 4;
|
||
add.s64 %rd2699, %rd42, %rd2698;
|
||
ld.shared.u32 %r8454, [%rd2699];
|
||
// inline asm
|
||
bfe.u32 %r7061, %r7070, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2700, %r7061, 4;
|
||
add.s64 %rd2701, %rd45, %rd2700;
|
||
ld.shared.u32 %r8455, [%rd2701];
|
||
add.s32 %r8456, %r8455, %r8454;
|
||
// inline asm
|
||
bfe.u32 %r7065, %r7070, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2702, %r7065, 4;
|
||
add.s64 %rd2703, %rd48, %rd2702;
|
||
ld.shared.u32 %r8457, [%rd2703];
|
||
xor.b32 %r8458, %r8457, %r8456;
|
||
// inline asm
|
||
bfe.u32 %r7069, %r7070, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2704, %r7069, 4;
|
||
add.s64 %rd2705, %rd51, %rd2704;
|
||
ld.shared.u32 %r8459, [%rd2705];
|
||
add.s32 %r8460, %r8459, %r8458;
|
||
xor.b32 %r8461, %r10347, %r7054;
|
||
xor.b32 %r7086, %r8461, %r8460;
|
||
// inline asm
|
||
bfe.u32 %r7073, %r7086, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2706, %r7073, 4;
|
||
add.s64 %rd2707, %rd42, %rd2706;
|
||
ld.shared.u32 %r8462, [%rd2707];
|
||
// inline asm
|
||
bfe.u32 %r7077, %r7086, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2708, %r7077, 4;
|
||
add.s64 %rd2709, %rd45, %rd2708;
|
||
ld.shared.u32 %r8463, [%rd2709];
|
||
add.s32 %r8464, %r8463, %r8462;
|
||
// inline asm
|
||
bfe.u32 %r7081, %r7086, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2710, %r7081, 4;
|
||
add.s64 %rd2711, %rd48, %rd2710;
|
||
ld.shared.u32 %r8465, [%rd2711];
|
||
xor.b32 %r8466, %r8465, %r8464;
|
||
// inline asm
|
||
bfe.u32 %r7085, %r7086, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2712, %r7085, 4;
|
||
add.s64 %rd2713, %rd51, %rd2712;
|
||
ld.shared.u32 %r8467, [%rd2713];
|
||
add.s32 %r8468, %r8467, %r8466;
|
||
xor.b32 %r8469, %r10346, %r7070;
|
||
xor.b32 %r7102, %r8469, %r8468;
|
||
// inline asm
|
||
bfe.u32 %r7089, %r7102, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2714, %r7089, 4;
|
||
add.s64 %rd2715, %rd42, %rd2714;
|
||
ld.shared.u32 %r8470, [%rd2715];
|
||
// inline asm
|
||
bfe.u32 %r7093, %r7102, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2716, %r7093, 4;
|
||
add.s64 %rd2717, %rd45, %rd2716;
|
||
ld.shared.u32 %r8471, [%rd2717];
|
||
add.s32 %r8472, %r8471, %r8470;
|
||
// inline asm
|
||
bfe.u32 %r7097, %r7102, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2718, %r7097, 4;
|
||
add.s64 %rd2719, %rd48, %rd2718;
|
||
ld.shared.u32 %r8473, [%rd2719];
|
||
xor.b32 %r8474, %r8473, %r8472;
|
||
// inline asm
|
||
bfe.u32 %r7101, %r7102, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2720, %r7101, 4;
|
||
add.s64 %rd2721, %rd51, %rd2720;
|
||
ld.shared.u32 %r8475, [%rd2721];
|
||
add.s32 %r8476, %r8475, %r8474;
|
||
xor.b32 %r8477, %r10345, %r7086;
|
||
xor.b32 %r7118, %r8477, %r8476;
|
||
// inline asm
|
||
bfe.u32 %r7105, %r7118, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2722, %r7105, 4;
|
||
add.s64 %rd2723, %rd42, %rd2722;
|
||
ld.shared.u32 %r8478, [%rd2723];
|
||
// inline asm
|
||
bfe.u32 %r7109, %r7118, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2724, %r7109, 4;
|
||
add.s64 %rd2725, %rd45, %rd2724;
|
||
ld.shared.u32 %r8479, [%rd2725];
|
||
add.s32 %r8480, %r8479, %r8478;
|
||
// inline asm
|
||
bfe.u32 %r7113, %r7118, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2726, %r7113, 4;
|
||
add.s64 %rd2727, %rd48, %rd2726;
|
||
ld.shared.u32 %r8481, [%rd2727];
|
||
xor.b32 %r8482, %r8481, %r8480;
|
||
// inline asm
|
||
bfe.u32 %r7117, %r7118, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2728, %r7117, 4;
|
||
add.s64 %rd2729, %rd51, %rd2728;
|
||
ld.shared.u32 %r8483, [%rd2729];
|
||
add.s32 %r8484, %r8483, %r8482;
|
||
xor.b32 %r8485, %r10344, %r7102;
|
||
xor.b32 %r7134, %r8485, %r8484;
|
||
// inline asm
|
||
bfe.u32 %r7121, %r7134, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2730, %r7121, 4;
|
||
add.s64 %rd2731, %rd42, %rd2730;
|
||
ld.shared.u32 %r8486, [%rd2731];
|
||
// inline asm
|
||
bfe.u32 %r7125, %r7134, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2732, %r7125, 4;
|
||
add.s64 %rd2733, %rd45, %rd2732;
|
||
ld.shared.u32 %r8487, [%rd2733];
|
||
add.s32 %r8488, %r8487, %r8486;
|
||
// inline asm
|
||
bfe.u32 %r7129, %r7134, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2734, %r7129, 4;
|
||
add.s64 %rd2735, %rd48, %rd2734;
|
||
ld.shared.u32 %r8489, [%rd2735];
|
||
xor.b32 %r8490, %r8489, %r8488;
|
||
// inline asm
|
||
bfe.u32 %r7133, %r7134, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2736, %r7133, 4;
|
||
add.s64 %rd2737, %rd51, %rd2736;
|
||
ld.shared.u32 %r8491, [%rd2737];
|
||
add.s32 %r8492, %r8491, %r8490;
|
||
xor.b32 %r8493, %r10343, %r7118;
|
||
xor.b32 %r7150, %r8493, %r8492;
|
||
// inline asm
|
||
bfe.u32 %r7137, %r7150, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2738, %r7137, 4;
|
||
add.s64 %rd2739, %rd42, %rd2738;
|
||
ld.shared.u32 %r8494, [%rd2739];
|
||
// inline asm
|
||
bfe.u32 %r7141, %r7150, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2740, %r7141, 4;
|
||
add.s64 %rd2741, %rd45, %rd2740;
|
||
ld.shared.u32 %r8495, [%rd2741];
|
||
add.s32 %r8496, %r8495, %r8494;
|
||
// inline asm
|
||
bfe.u32 %r7145, %r7150, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2742, %r7145, 4;
|
||
add.s64 %rd2743, %rd48, %rd2742;
|
||
ld.shared.u32 %r8497, [%rd2743];
|
||
xor.b32 %r8498, %r8497, %r8496;
|
||
// inline asm
|
||
bfe.u32 %r7149, %r7150, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2744, %r7149, 4;
|
||
add.s64 %rd2745, %rd51, %rd2744;
|
||
ld.shared.u32 %r8499, [%rd2745];
|
||
add.s32 %r8500, %r8499, %r8498;
|
||
xor.b32 %r8501, %r10342, %r7134;
|
||
xor.b32 %r7166, %r8501, %r8500;
|
||
// inline asm
|
||
bfe.u32 %r7153, %r7166, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2746, %r7153, 4;
|
||
add.s64 %rd2747, %rd42, %rd2746;
|
||
ld.shared.u32 %r8502, [%rd2747];
|
||
// inline asm
|
||
bfe.u32 %r7157, %r7166, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2748, %r7157, 4;
|
||
add.s64 %rd2749, %rd45, %rd2748;
|
||
ld.shared.u32 %r8503, [%rd2749];
|
||
add.s32 %r8504, %r8503, %r8502;
|
||
// inline asm
|
||
bfe.u32 %r7161, %r7166, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2750, %r7161, 4;
|
||
add.s64 %rd2751, %rd48, %rd2750;
|
||
ld.shared.u32 %r8505, [%rd2751];
|
||
xor.b32 %r8506, %r8505, %r8504;
|
||
// inline asm
|
||
bfe.u32 %r7165, %r7166, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2752, %r7165, 4;
|
||
add.s64 %rd2753, %rd51, %rd2752;
|
||
ld.shared.u32 %r8507, [%rd2753];
|
||
add.s32 %r8508, %r8507, %r8506;
|
||
xor.b32 %r8509, %r10341, %r7150;
|
||
xor.b32 %r7182, %r8509, %r8508;
|
||
// inline asm
|
||
bfe.u32 %r7169, %r7182, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2754, %r7169, 4;
|
||
add.s64 %rd2755, %rd42, %rd2754;
|
||
ld.shared.u32 %r8510, [%rd2755];
|
||
// inline asm
|
||
bfe.u32 %r7173, %r7182, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2756, %r7173, 4;
|
||
add.s64 %rd2757, %rd45, %rd2756;
|
||
ld.shared.u32 %r8511, [%rd2757];
|
||
add.s32 %r8512, %r8511, %r8510;
|
||
// inline asm
|
||
bfe.u32 %r7177, %r7182, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2758, %r7177, 4;
|
||
add.s64 %rd2759, %rd48, %rd2758;
|
||
ld.shared.u32 %r8513, [%rd2759];
|
||
xor.b32 %r8514, %r8513, %r8512;
|
||
// inline asm
|
||
bfe.u32 %r7181, %r7182, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2760, %r7181, 4;
|
||
add.s64 %rd2761, %rd51, %rd2760;
|
||
ld.shared.u32 %r8515, [%rd2761];
|
||
add.s32 %r8516, %r8515, %r8514;
|
||
xor.b32 %r8517, %r10340, %r7166;
|
||
xor.b32 %r7198, %r8517, %r8516;
|
||
// inline asm
|
||
bfe.u32 %r7185, %r7198, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2762, %r7185, 4;
|
||
add.s64 %rd2763, %rd42, %rd2762;
|
||
ld.shared.u32 %r8518, [%rd2763];
|
||
// inline asm
|
||
bfe.u32 %r7189, %r7198, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2764, %r7189, 4;
|
||
add.s64 %rd2765, %rd45, %rd2764;
|
||
ld.shared.u32 %r8519, [%rd2765];
|
||
add.s32 %r8520, %r8519, %r8518;
|
||
// inline asm
|
||
bfe.u32 %r7193, %r7198, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2766, %r7193, 4;
|
||
add.s64 %rd2767, %rd48, %rd2766;
|
||
ld.shared.u32 %r8521, [%rd2767];
|
||
xor.b32 %r8522, %r8521, %r8520;
|
||
// inline asm
|
||
bfe.u32 %r7197, %r7198, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2768, %r7197, 4;
|
||
add.s64 %rd2769, %rd51, %rd2768;
|
||
ld.shared.u32 %r8523, [%rd2769];
|
||
add.s32 %r8524, %r8523, %r8522;
|
||
xor.b32 %r8525, %r10339, %r7182;
|
||
xor.b32 %r7214, %r8525, %r8524;
|
||
// inline asm
|
||
bfe.u32 %r7201, %r7214, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2770, %r7201, 4;
|
||
add.s64 %rd2771, %rd42, %rd2770;
|
||
ld.shared.u32 %r8526, [%rd2771];
|
||
// inline asm
|
||
bfe.u32 %r7205, %r7214, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2772, %r7205, 4;
|
||
add.s64 %rd2773, %rd45, %rd2772;
|
||
ld.shared.u32 %r8527, [%rd2773];
|
||
add.s32 %r8528, %r8527, %r8526;
|
||
// inline asm
|
||
bfe.u32 %r7209, %r7214, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2774, %r7209, 4;
|
||
add.s64 %rd2775, %rd48, %rd2774;
|
||
ld.shared.u32 %r8529, [%rd2775];
|
||
xor.b32 %r8530, %r8529, %r8528;
|
||
// inline asm
|
||
bfe.u32 %r7213, %r7214, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2776, %r7213, 4;
|
||
add.s64 %rd2777, %rd51, %rd2776;
|
||
ld.shared.u32 %r8531, [%rd2777];
|
||
add.s32 %r8532, %r8531, %r8530;
|
||
xor.b32 %r8533, %r10338, %r7198;
|
||
xor.b32 %r7230, %r8533, %r8532;
|
||
// inline asm
|
||
bfe.u32 %r7217, %r7230, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2778, %r7217, 4;
|
||
add.s64 %rd2779, %rd42, %rd2778;
|
||
ld.shared.u32 %r8534, [%rd2779];
|
||
// inline asm
|
||
bfe.u32 %r7221, %r7230, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2780, %r7221, 4;
|
||
add.s64 %rd2781, %rd45, %rd2780;
|
||
ld.shared.u32 %r8535, [%rd2781];
|
||
add.s32 %r8536, %r8535, %r8534;
|
||
// inline asm
|
||
bfe.u32 %r7225, %r7230, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2782, %r7225, 4;
|
||
add.s64 %rd2783, %rd48, %rd2782;
|
||
ld.shared.u32 %r8537, [%rd2783];
|
||
xor.b32 %r8538, %r8537, %r8536;
|
||
// inline asm
|
||
bfe.u32 %r7229, %r7230, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2784, %r7229, 4;
|
||
add.s64 %rd2785, %rd51, %rd2784;
|
||
ld.shared.u32 %r8539, [%rd2785];
|
||
add.s32 %r8540, %r8539, %r8538;
|
||
xor.b32 %r8541, %r7645, %r7214;
|
||
xor.b32 %r7246, %r8541, %r8540;
|
||
// inline asm
|
||
bfe.u32 %r7233, %r7246, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2786, %r7233, 4;
|
||
add.s64 %rd2787, %rd42, %rd2786;
|
||
ld.shared.u32 %r8542, [%rd2787];
|
||
// inline asm
|
||
bfe.u32 %r7237, %r7246, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2788, %r7237, 4;
|
||
add.s64 %rd2789, %rd45, %rd2788;
|
||
ld.shared.u32 %r8543, [%rd2789];
|
||
add.s32 %r8544, %r8543, %r8542;
|
||
// inline asm
|
||
bfe.u32 %r7241, %r7246, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2790, %r7241, 4;
|
||
add.s64 %rd2791, %rd48, %rd2790;
|
||
ld.shared.u32 %r8545, [%rd2791];
|
||
xor.b32 %r8546, %r8545, %r8544;
|
||
// inline asm
|
||
bfe.u32 %r7245, %r7246, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2792, %r7245, 4;
|
||
add.s64 %rd2793, %rd51, %rd2792;
|
||
ld.shared.u32 %r8547, [%rd2793];
|
||
add.s32 %r8548, %r8547, %r8546;
|
||
xor.b32 %r8549, %r7654, %r7230;
|
||
xor.b32 %r7262, %r8549, %r8548;
|
||
// inline asm
|
||
bfe.u32 %r7249, %r7262, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2794, %r7249, 4;
|
||
add.s64 %rd2795, %rd42, %rd2794;
|
||
ld.shared.u32 %r8550, [%rd2795];
|
||
// inline asm
|
||
bfe.u32 %r7253, %r7262, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2796, %r7253, 4;
|
||
add.s64 %rd2797, %rd45, %rd2796;
|
||
ld.shared.u32 %r8551, [%rd2797];
|
||
add.s32 %r8552, %r8551, %r8550;
|
||
// inline asm
|
||
bfe.u32 %r7257, %r7262, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2798, %r7257, 4;
|
||
add.s64 %rd2799, %rd48, %rd2798;
|
||
ld.shared.u32 %r8553, [%rd2799];
|
||
xor.b32 %r8554, %r8553, %r8552;
|
||
// inline asm
|
||
bfe.u32 %r7261, %r7262, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2800, %r7261, 4;
|
||
add.s64 %rd2801, %rd51, %rd2800;
|
||
ld.shared.u32 %r8555, [%rd2801];
|
||
add.s32 %r8556, %r8555, %r8554;
|
||
xor.b32 %r8557, %r7663, %r7246;
|
||
xor.b32 %r10336, %r8557, %r8556;
|
||
xor.b32 %r10337, %r7665, %r7262;
|
||
xor.b32 %r7278, %r5470, %r7262;
|
||
// inline asm
|
||
bfe.u32 %r7265, %r7278, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2802, %r7265, 4;
|
||
add.s64 %rd2803, %rd42, %rd2802;
|
||
ld.shared.u32 %r8558, [%rd2803];
|
||
// inline asm
|
||
bfe.u32 %r7269, %r7278, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2804, %r7269, 4;
|
||
add.s64 %rd2805, %rd45, %rd2804;
|
||
ld.shared.u32 %r8559, [%rd2805];
|
||
add.s32 %r8560, %r8559, %r8558;
|
||
// inline asm
|
||
bfe.u32 %r7273, %r7278, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2806, %r7273, 4;
|
||
add.s64 %rd2807, %rd48, %rd2806;
|
||
ld.shared.u32 %r8561, [%rd2807];
|
||
xor.b32 %r8562, %r8561, %r8560;
|
||
// inline asm
|
||
bfe.u32 %r7277, %r7278, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2808, %r7277, 4;
|
||
add.s64 %rd2809, %rd51, %rd2808;
|
||
ld.shared.u32 %r8563, [%rd2809];
|
||
add.s32 %r8564, %r8563, %r8562;
|
||
xor.b32 %r8565, %r10350, %r10336;
|
||
xor.b32 %r7294, %r8565, %r8564;
|
||
// inline asm
|
||
bfe.u32 %r7281, %r7294, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2810, %r7281, 4;
|
||
add.s64 %rd2811, %rd42, %rd2810;
|
||
ld.shared.u32 %r8566, [%rd2811];
|
||
// inline asm
|
||
bfe.u32 %r7285, %r7294, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2812, %r7285, 4;
|
||
add.s64 %rd2813, %rd45, %rd2812;
|
||
ld.shared.u32 %r8567, [%rd2813];
|
||
add.s32 %r8568, %r8567, %r8566;
|
||
// inline asm
|
||
bfe.u32 %r7289, %r7294, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2814, %r7289, 4;
|
||
add.s64 %rd2815, %rd48, %rd2814;
|
||
ld.shared.u32 %r8569, [%rd2815];
|
||
xor.b32 %r8570, %r8569, %r8568;
|
||
// inline asm
|
||
bfe.u32 %r7293, %r7294, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2816, %r7293, 4;
|
||
add.s64 %rd2817, %rd51, %rd2816;
|
||
ld.shared.u32 %r8571, [%rd2817];
|
||
add.s32 %r8572, %r8571, %r8570;
|
||
xor.b32 %r8573, %r10349, %r7278;
|
||
xor.b32 %r7310, %r8573, %r8572;
|
||
// inline asm
|
||
bfe.u32 %r7297, %r7310, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2818, %r7297, 4;
|
||
add.s64 %rd2819, %rd42, %rd2818;
|
||
ld.shared.u32 %r8574, [%rd2819];
|
||
// inline asm
|
||
bfe.u32 %r7301, %r7310, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2820, %r7301, 4;
|
||
add.s64 %rd2821, %rd45, %rd2820;
|
||
ld.shared.u32 %r8575, [%rd2821];
|
||
add.s32 %r8576, %r8575, %r8574;
|
||
// inline asm
|
||
bfe.u32 %r7305, %r7310, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2822, %r7305, 4;
|
||
add.s64 %rd2823, %rd48, %rd2822;
|
||
ld.shared.u32 %r8577, [%rd2823];
|
||
xor.b32 %r8578, %r8577, %r8576;
|
||
// inline asm
|
||
bfe.u32 %r7309, %r7310, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2824, %r7309, 4;
|
||
add.s64 %rd2825, %rd51, %rd2824;
|
||
ld.shared.u32 %r8579, [%rd2825];
|
||
add.s32 %r8580, %r8579, %r8578;
|
||
xor.b32 %r8581, %r10348, %r7294;
|
||
xor.b32 %r7326, %r8581, %r8580;
|
||
// inline asm
|
||
bfe.u32 %r7313, %r7326, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2826, %r7313, 4;
|
||
add.s64 %rd2827, %rd42, %rd2826;
|
||
ld.shared.u32 %r8582, [%rd2827];
|
||
// inline asm
|
||
bfe.u32 %r7317, %r7326, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2828, %r7317, 4;
|
||
add.s64 %rd2829, %rd45, %rd2828;
|
||
ld.shared.u32 %r8583, [%rd2829];
|
||
add.s32 %r8584, %r8583, %r8582;
|
||
// inline asm
|
||
bfe.u32 %r7321, %r7326, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2830, %r7321, 4;
|
||
add.s64 %rd2831, %rd48, %rd2830;
|
||
ld.shared.u32 %r8585, [%rd2831];
|
||
xor.b32 %r8586, %r8585, %r8584;
|
||
// inline asm
|
||
bfe.u32 %r7325, %r7326, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2832, %r7325, 4;
|
||
add.s64 %rd2833, %rd51, %rd2832;
|
||
ld.shared.u32 %r8587, [%rd2833];
|
||
add.s32 %r8588, %r8587, %r8586;
|
||
xor.b32 %r8589, %r10347, %r7310;
|
||
xor.b32 %r7342, %r8589, %r8588;
|
||
// inline asm
|
||
bfe.u32 %r7329, %r7342, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2834, %r7329, 4;
|
||
add.s64 %rd2835, %rd42, %rd2834;
|
||
ld.shared.u32 %r8590, [%rd2835];
|
||
// inline asm
|
||
bfe.u32 %r7333, %r7342, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2836, %r7333, 4;
|
||
add.s64 %rd2837, %rd45, %rd2836;
|
||
ld.shared.u32 %r8591, [%rd2837];
|
||
add.s32 %r8592, %r8591, %r8590;
|
||
// inline asm
|
||
bfe.u32 %r7337, %r7342, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2838, %r7337, 4;
|
||
add.s64 %rd2839, %rd48, %rd2838;
|
||
ld.shared.u32 %r8593, [%rd2839];
|
||
xor.b32 %r8594, %r8593, %r8592;
|
||
// inline asm
|
||
bfe.u32 %r7341, %r7342, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2840, %r7341, 4;
|
||
add.s64 %rd2841, %rd51, %rd2840;
|
||
ld.shared.u32 %r8595, [%rd2841];
|
||
add.s32 %r8596, %r8595, %r8594;
|
||
xor.b32 %r8597, %r10346, %r7326;
|
||
xor.b32 %r7358, %r8597, %r8596;
|
||
// inline asm
|
||
bfe.u32 %r7345, %r7358, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2842, %r7345, 4;
|
||
add.s64 %rd2843, %rd42, %rd2842;
|
||
ld.shared.u32 %r8598, [%rd2843];
|
||
// inline asm
|
||
bfe.u32 %r7349, %r7358, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2844, %r7349, 4;
|
||
add.s64 %rd2845, %rd45, %rd2844;
|
||
ld.shared.u32 %r8599, [%rd2845];
|
||
add.s32 %r8600, %r8599, %r8598;
|
||
// inline asm
|
||
bfe.u32 %r7353, %r7358, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2846, %r7353, 4;
|
||
add.s64 %rd2847, %rd48, %rd2846;
|
||
ld.shared.u32 %r8601, [%rd2847];
|
||
xor.b32 %r8602, %r8601, %r8600;
|
||
// inline asm
|
||
bfe.u32 %r7357, %r7358, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2848, %r7357, 4;
|
||
add.s64 %rd2849, %rd51, %rd2848;
|
||
ld.shared.u32 %r8603, [%rd2849];
|
||
add.s32 %r8604, %r8603, %r8602;
|
||
xor.b32 %r8605, %r10345, %r7342;
|
||
xor.b32 %r7374, %r8605, %r8604;
|
||
// inline asm
|
||
bfe.u32 %r7361, %r7374, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2850, %r7361, 4;
|
||
add.s64 %rd2851, %rd42, %rd2850;
|
||
ld.shared.u32 %r8606, [%rd2851];
|
||
// inline asm
|
||
bfe.u32 %r7365, %r7374, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2852, %r7365, 4;
|
||
add.s64 %rd2853, %rd45, %rd2852;
|
||
ld.shared.u32 %r8607, [%rd2853];
|
||
add.s32 %r8608, %r8607, %r8606;
|
||
// inline asm
|
||
bfe.u32 %r7369, %r7374, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2854, %r7369, 4;
|
||
add.s64 %rd2855, %rd48, %rd2854;
|
||
ld.shared.u32 %r8609, [%rd2855];
|
||
xor.b32 %r8610, %r8609, %r8608;
|
||
// inline asm
|
||
bfe.u32 %r7373, %r7374, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2856, %r7373, 4;
|
||
add.s64 %rd2857, %rd51, %rd2856;
|
||
ld.shared.u32 %r8611, [%rd2857];
|
||
add.s32 %r8612, %r8611, %r8610;
|
||
xor.b32 %r8613, %r10344, %r7358;
|
||
xor.b32 %r7390, %r8613, %r8612;
|
||
// inline asm
|
||
bfe.u32 %r7377, %r7390, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2858, %r7377, 4;
|
||
add.s64 %rd2859, %rd42, %rd2858;
|
||
ld.shared.u32 %r8614, [%rd2859];
|
||
// inline asm
|
||
bfe.u32 %r7381, %r7390, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2860, %r7381, 4;
|
||
add.s64 %rd2861, %rd45, %rd2860;
|
||
ld.shared.u32 %r8615, [%rd2861];
|
||
add.s32 %r8616, %r8615, %r8614;
|
||
// inline asm
|
||
bfe.u32 %r7385, %r7390, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2862, %r7385, 4;
|
||
add.s64 %rd2863, %rd48, %rd2862;
|
||
ld.shared.u32 %r8617, [%rd2863];
|
||
xor.b32 %r8618, %r8617, %r8616;
|
||
// inline asm
|
||
bfe.u32 %r7389, %r7390, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2864, %r7389, 4;
|
||
add.s64 %rd2865, %rd51, %rd2864;
|
||
ld.shared.u32 %r8619, [%rd2865];
|
||
add.s32 %r8620, %r8619, %r8618;
|
||
xor.b32 %r8621, %r10343, %r7374;
|
||
xor.b32 %r7406, %r8621, %r8620;
|
||
// inline asm
|
||
bfe.u32 %r7393, %r7406, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2866, %r7393, 4;
|
||
add.s64 %rd2867, %rd42, %rd2866;
|
||
ld.shared.u32 %r8622, [%rd2867];
|
||
// inline asm
|
||
bfe.u32 %r7397, %r7406, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2868, %r7397, 4;
|
||
add.s64 %rd2869, %rd45, %rd2868;
|
||
ld.shared.u32 %r8623, [%rd2869];
|
||
add.s32 %r8624, %r8623, %r8622;
|
||
// inline asm
|
||
bfe.u32 %r7401, %r7406, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2870, %r7401, 4;
|
||
add.s64 %rd2871, %rd48, %rd2870;
|
||
ld.shared.u32 %r8625, [%rd2871];
|
||
xor.b32 %r8626, %r8625, %r8624;
|
||
// inline asm
|
||
bfe.u32 %r7405, %r7406, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2872, %r7405, 4;
|
||
add.s64 %rd2873, %rd51, %rd2872;
|
||
ld.shared.u32 %r8627, [%rd2873];
|
||
add.s32 %r8628, %r8627, %r8626;
|
||
xor.b32 %r8629, %r10342, %r7390;
|
||
xor.b32 %r7422, %r8629, %r8628;
|
||
// inline asm
|
||
bfe.u32 %r7409, %r7422, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2874, %r7409, 4;
|
||
add.s64 %rd2875, %rd42, %rd2874;
|
||
ld.shared.u32 %r8630, [%rd2875];
|
||
// inline asm
|
||
bfe.u32 %r7413, %r7422, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2876, %r7413, 4;
|
||
add.s64 %rd2877, %rd45, %rd2876;
|
||
ld.shared.u32 %r8631, [%rd2877];
|
||
add.s32 %r8632, %r8631, %r8630;
|
||
// inline asm
|
||
bfe.u32 %r7417, %r7422, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2878, %r7417, 4;
|
||
add.s64 %rd2879, %rd48, %rd2878;
|
||
ld.shared.u32 %r8633, [%rd2879];
|
||
xor.b32 %r8634, %r8633, %r8632;
|
||
// inline asm
|
||
bfe.u32 %r7421, %r7422, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2880, %r7421, 4;
|
||
add.s64 %rd2881, %rd51, %rd2880;
|
||
ld.shared.u32 %r8635, [%rd2881];
|
||
add.s32 %r8636, %r8635, %r8634;
|
||
xor.b32 %r8637, %r10341, %r7406;
|
||
xor.b32 %r7438, %r8637, %r8636;
|
||
// inline asm
|
||
bfe.u32 %r7425, %r7438, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2882, %r7425, 4;
|
||
add.s64 %rd2883, %rd42, %rd2882;
|
||
ld.shared.u32 %r8638, [%rd2883];
|
||
// inline asm
|
||
bfe.u32 %r7429, %r7438, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2884, %r7429, 4;
|
||
add.s64 %rd2885, %rd45, %rd2884;
|
||
ld.shared.u32 %r8639, [%rd2885];
|
||
add.s32 %r8640, %r8639, %r8638;
|
||
// inline asm
|
||
bfe.u32 %r7433, %r7438, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2886, %r7433, 4;
|
||
add.s64 %rd2887, %rd48, %rd2886;
|
||
ld.shared.u32 %r8641, [%rd2887];
|
||
xor.b32 %r8642, %r8641, %r8640;
|
||
// inline asm
|
||
bfe.u32 %r7437, %r7438, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2888, %r7437, 4;
|
||
add.s64 %rd2889, %rd51, %rd2888;
|
||
ld.shared.u32 %r8643, [%rd2889];
|
||
add.s32 %r8644, %r8643, %r8642;
|
||
xor.b32 %r8645, %r10340, %r7422;
|
||
xor.b32 %r7454, %r8645, %r8644;
|
||
// inline asm
|
||
bfe.u32 %r7441, %r7454, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2890, %r7441, 4;
|
||
add.s64 %rd2891, %rd42, %rd2890;
|
||
ld.shared.u32 %r8646, [%rd2891];
|
||
// inline asm
|
||
bfe.u32 %r7445, %r7454, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2892, %r7445, 4;
|
||
add.s64 %rd2893, %rd45, %rd2892;
|
||
ld.shared.u32 %r8647, [%rd2893];
|
||
add.s32 %r8648, %r8647, %r8646;
|
||
// inline asm
|
||
bfe.u32 %r7449, %r7454, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2894, %r7449, 4;
|
||
add.s64 %rd2895, %rd48, %rd2894;
|
||
ld.shared.u32 %r8649, [%rd2895];
|
||
xor.b32 %r8650, %r8649, %r8648;
|
||
// inline asm
|
||
bfe.u32 %r7453, %r7454, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2896, %r7453, 4;
|
||
add.s64 %rd2897, %rd51, %rd2896;
|
||
ld.shared.u32 %r8651, [%rd2897];
|
||
add.s32 %r8652, %r8651, %r8650;
|
||
xor.b32 %r8653, %r10339, %r7438;
|
||
xor.b32 %r7470, %r8653, %r8652;
|
||
// inline asm
|
||
bfe.u32 %r7457, %r7470, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2898, %r7457, 4;
|
||
add.s64 %rd2899, %rd42, %rd2898;
|
||
ld.shared.u32 %r8654, [%rd2899];
|
||
// inline asm
|
||
bfe.u32 %r7461, %r7470, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2900, %r7461, 4;
|
||
add.s64 %rd2901, %rd45, %rd2900;
|
||
ld.shared.u32 %r8655, [%rd2901];
|
||
add.s32 %r8656, %r8655, %r8654;
|
||
// inline asm
|
||
bfe.u32 %r7465, %r7470, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2902, %r7465, 4;
|
||
add.s64 %rd2903, %rd48, %rd2902;
|
||
ld.shared.u32 %r8657, [%rd2903];
|
||
xor.b32 %r8658, %r8657, %r8656;
|
||
// inline asm
|
||
bfe.u32 %r7469, %r7470, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2904, %r7469, 4;
|
||
add.s64 %rd2905, %rd51, %rd2904;
|
||
ld.shared.u32 %r8659, [%rd2905];
|
||
add.s32 %r8660, %r8659, %r8658;
|
||
xor.b32 %r8661, %r10338, %r7454;
|
||
xor.b32 %r7486, %r8661, %r8660;
|
||
// inline asm
|
||
bfe.u32 %r7473, %r7486, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2906, %r7473, 4;
|
||
add.s64 %rd2907, %rd42, %rd2906;
|
||
ld.shared.u32 %r8662, [%rd2907];
|
||
// inline asm
|
||
bfe.u32 %r7477, %r7486, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2908, %r7477, 4;
|
||
add.s64 %rd2909, %rd45, %rd2908;
|
||
ld.shared.u32 %r8663, [%rd2909];
|
||
add.s32 %r8664, %r8663, %r8662;
|
||
// inline asm
|
||
bfe.u32 %r7481, %r7486, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2910, %r7481, 4;
|
||
add.s64 %rd2911, %rd48, %rd2910;
|
||
ld.shared.u32 %r8665, [%rd2911];
|
||
xor.b32 %r8666, %r8665, %r8664;
|
||
// inline asm
|
||
bfe.u32 %r7485, %r7486, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2912, %r7485, 4;
|
||
add.s64 %rd2913, %rd51, %rd2912;
|
||
ld.shared.u32 %r8667, [%rd2913];
|
||
add.s32 %r8668, %r8667, %r8666;
|
||
xor.b32 %r8669, %r10337, %r7470;
|
||
xor.b32 %r7502, %r8669, %r8668;
|
||
// inline asm
|
||
bfe.u32 %r7489, %r7502, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2914, %r7489, 4;
|
||
add.s64 %rd2915, %rd42, %rd2914;
|
||
ld.shared.u32 %r8670, [%rd2915];
|
||
// inline asm
|
||
bfe.u32 %r7493, %r7502, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2916, %r7493, 4;
|
||
add.s64 %rd2917, %rd45, %rd2916;
|
||
ld.shared.u32 %r8671, [%rd2917];
|
||
add.s32 %r8672, %r8671, %r8670;
|
||
// inline asm
|
||
bfe.u32 %r7497, %r7502, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2918, %r7497, 4;
|
||
add.s64 %rd2919, %rd48, %rd2918;
|
||
ld.shared.u32 %r8673, [%rd2919];
|
||
xor.b32 %r8674, %r8673, %r8672;
|
||
// inline asm
|
||
bfe.u32 %r7501, %r7502, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2920, %r7501, 4;
|
||
add.s64 %rd2921, %rd51, %rd2920;
|
||
ld.shared.u32 %r8675, [%rd2921];
|
||
add.s32 %r8676, %r8675, %r8674;
|
||
xor.b32 %r8677, %r10336, %r7486;
|
||
xor.b32 %r7518, %r8677, %r8676;
|
||
// inline asm
|
||
bfe.u32 %r7505, %r7518, %r10287, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2922, %r7505, 4;
|
||
add.s64 %rd2923, %rd42, %rd2922;
|
||
ld.shared.u32 %r8678, [%rd2923];
|
||
// inline asm
|
||
bfe.u32 %r7509, %r7518, %r10289, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2924, %r7509, 4;
|
||
add.s64 %rd2925, %rd45, %rd2924;
|
||
ld.shared.u32 %r8679, [%rd2925];
|
||
add.s32 %r8680, %r8679, %r8678;
|
||
// inline asm
|
||
bfe.u32 %r7513, %r7518, %r10288, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2926, %r7513, 4;
|
||
add.s64 %rd2927, %rd48, %rd2926;
|
||
ld.shared.u32 %r8681, [%rd2927];
|
||
xor.b32 %r8682, %r8681, %r8680;
|
||
// inline asm
|
||
bfe.u32 %r7517, %r7518, %r10322, %r10288;
|
||
// inline asm
|
||
mul.wide.u32 %rd2928, %r7517, 4;
|
||
add.s64 %rd2929, %rd51, %rd2928;
|
||
ld.shared.u32 %r8683, [%rd2929];
|
||
add.s32 %r8684, %r8683, %r8682;
|
||
xor.b32 %r8685, %r7663, %r7502;
|
||
xor.b32 %r10334, %r8685, %r8684;
|
||
xor.b32 %r10335, %r7665, %r7518;
|
||
mov.u64 %rd3499, %rd3;
|
||
mov.u32 %r10326, %r10334;
|
||
mov.u32 %r10327, %r10335;
|
||
|
||
BB4_14:
|
||
mov.u32 %r10325, 0;
|
||
mov.u32 %r10260, 16;
|
||
mov.u32 %r10259, 8;
|
||
mov.u32 %r10258, 24;
|
||
xor.b32 %r8699, %r10351, %r10327;
|
||
// inline asm
|
||
bfe.u32 %r8686, %r8699, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2933, %r8686, 4;
|
||
add.s64 %rd2934, %rd42, %rd2933;
|
||
ld.shared.u32 %r8943, [%rd2934];
|
||
// inline asm
|
||
bfe.u32 %r8690, %r8699, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2937, %r8690, 4;
|
||
add.s64 %rd2938, %rd45, %rd2937;
|
||
ld.shared.u32 %r8944, [%rd2938];
|
||
add.s32 %r8945, %r8944, %r8943;
|
||
// inline asm
|
||
bfe.u32 %r8694, %r8699, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2941, %r8694, 4;
|
||
add.s64 %rd2942, %rd48, %rd2941;
|
||
ld.shared.u32 %r8946, [%rd2942];
|
||
xor.b32 %r8947, %r8946, %r8945;
|
||
// inline asm
|
||
bfe.u32 %r8698, %r8699, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2945, %r8698, 4;
|
||
add.s64 %rd2946, %rd51, %rd2945;
|
||
ld.shared.u32 %r8948, [%rd2946];
|
||
add.s32 %r8949, %r8948, %r8947;
|
||
xor.b32 %r8950, %r10350, %r10326;
|
||
xor.b32 %r8715, %r8950, %r8949;
|
||
// inline asm
|
||
bfe.u32 %r8702, %r8715, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2947, %r8702, 4;
|
||
add.s64 %rd2948, %rd42, %rd2947;
|
||
ld.shared.u32 %r8951, [%rd2948];
|
||
// inline asm
|
||
bfe.u32 %r8706, %r8715, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2949, %r8706, 4;
|
||
add.s64 %rd2950, %rd45, %rd2949;
|
||
ld.shared.u32 %r8952, [%rd2950];
|
||
add.s32 %r8953, %r8952, %r8951;
|
||
// inline asm
|
||
bfe.u32 %r8710, %r8715, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2951, %r8710, 4;
|
||
add.s64 %rd2952, %rd48, %rd2951;
|
||
ld.shared.u32 %r8954, [%rd2952];
|
||
xor.b32 %r8955, %r8954, %r8953;
|
||
// inline asm
|
||
bfe.u32 %r8714, %r8715, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2953, %r8714, 4;
|
||
add.s64 %rd2954, %rd51, %rd2953;
|
||
ld.shared.u32 %r8956, [%rd2954];
|
||
add.s32 %r8957, %r8956, %r8955;
|
||
xor.b32 %r8958, %r10349, %r8699;
|
||
xor.b32 %r8731, %r8958, %r8957;
|
||
// inline asm
|
||
bfe.u32 %r8718, %r8731, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2955, %r8718, 4;
|
||
add.s64 %rd2956, %rd42, %rd2955;
|
||
ld.shared.u32 %r8959, [%rd2956];
|
||
// inline asm
|
||
bfe.u32 %r8722, %r8731, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2957, %r8722, 4;
|
||
add.s64 %rd2958, %rd45, %rd2957;
|
||
ld.shared.u32 %r8960, [%rd2958];
|
||
add.s32 %r8961, %r8960, %r8959;
|
||
// inline asm
|
||
bfe.u32 %r8726, %r8731, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2959, %r8726, 4;
|
||
add.s64 %rd2960, %rd48, %rd2959;
|
||
ld.shared.u32 %r8962, [%rd2960];
|
||
xor.b32 %r8963, %r8962, %r8961;
|
||
// inline asm
|
||
bfe.u32 %r8730, %r8731, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2961, %r8730, 4;
|
||
add.s64 %rd2962, %rd51, %rd2961;
|
||
ld.shared.u32 %r8964, [%rd2962];
|
||
add.s32 %r8965, %r8964, %r8963;
|
||
xor.b32 %r8966, %r10348, %r8715;
|
||
xor.b32 %r8747, %r8966, %r8965;
|
||
// inline asm
|
||
bfe.u32 %r8734, %r8747, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2963, %r8734, 4;
|
||
add.s64 %rd2964, %rd42, %rd2963;
|
||
ld.shared.u32 %r8967, [%rd2964];
|
||
// inline asm
|
||
bfe.u32 %r8738, %r8747, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2965, %r8738, 4;
|
||
add.s64 %rd2966, %rd45, %rd2965;
|
||
ld.shared.u32 %r8968, [%rd2966];
|
||
add.s32 %r8969, %r8968, %r8967;
|
||
// inline asm
|
||
bfe.u32 %r8742, %r8747, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2967, %r8742, 4;
|
||
add.s64 %rd2968, %rd48, %rd2967;
|
||
ld.shared.u32 %r8970, [%rd2968];
|
||
xor.b32 %r8971, %r8970, %r8969;
|
||
// inline asm
|
||
bfe.u32 %r8746, %r8747, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2969, %r8746, 4;
|
||
add.s64 %rd2970, %rd51, %rd2969;
|
||
ld.shared.u32 %r8972, [%rd2970];
|
||
add.s32 %r8973, %r8972, %r8971;
|
||
xor.b32 %r8974, %r10347, %r8731;
|
||
xor.b32 %r8763, %r8974, %r8973;
|
||
// inline asm
|
||
bfe.u32 %r8750, %r8763, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2971, %r8750, 4;
|
||
add.s64 %rd2972, %rd42, %rd2971;
|
||
ld.shared.u32 %r8975, [%rd2972];
|
||
// inline asm
|
||
bfe.u32 %r8754, %r8763, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2973, %r8754, 4;
|
||
add.s64 %rd2974, %rd45, %rd2973;
|
||
ld.shared.u32 %r8976, [%rd2974];
|
||
add.s32 %r8977, %r8976, %r8975;
|
||
// inline asm
|
||
bfe.u32 %r8758, %r8763, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2975, %r8758, 4;
|
||
add.s64 %rd2976, %rd48, %rd2975;
|
||
ld.shared.u32 %r8978, [%rd2976];
|
||
xor.b32 %r8979, %r8978, %r8977;
|
||
// inline asm
|
||
bfe.u32 %r8762, %r8763, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2977, %r8762, 4;
|
||
add.s64 %rd2978, %rd51, %rd2977;
|
||
ld.shared.u32 %r8980, [%rd2978];
|
||
add.s32 %r8981, %r8980, %r8979;
|
||
xor.b32 %r8982, %r10346, %r8747;
|
||
xor.b32 %r8779, %r8982, %r8981;
|
||
// inline asm
|
||
bfe.u32 %r8766, %r8779, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2979, %r8766, 4;
|
||
add.s64 %rd2980, %rd42, %rd2979;
|
||
ld.shared.u32 %r8983, [%rd2980];
|
||
// inline asm
|
||
bfe.u32 %r8770, %r8779, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2981, %r8770, 4;
|
||
add.s64 %rd2982, %rd45, %rd2981;
|
||
ld.shared.u32 %r8984, [%rd2982];
|
||
add.s32 %r8985, %r8984, %r8983;
|
||
// inline asm
|
||
bfe.u32 %r8774, %r8779, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2983, %r8774, 4;
|
||
add.s64 %rd2984, %rd48, %rd2983;
|
||
ld.shared.u32 %r8986, [%rd2984];
|
||
xor.b32 %r8987, %r8986, %r8985;
|
||
// inline asm
|
||
bfe.u32 %r8778, %r8779, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2985, %r8778, 4;
|
||
add.s64 %rd2986, %rd51, %rd2985;
|
||
ld.shared.u32 %r8988, [%rd2986];
|
||
add.s32 %r8989, %r8988, %r8987;
|
||
xor.b32 %r8990, %r10345, %r8763;
|
||
xor.b32 %r8795, %r8990, %r8989;
|
||
// inline asm
|
||
bfe.u32 %r8782, %r8795, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2987, %r8782, 4;
|
||
add.s64 %rd2988, %rd42, %rd2987;
|
||
ld.shared.u32 %r8991, [%rd2988];
|
||
// inline asm
|
||
bfe.u32 %r8786, %r8795, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2989, %r8786, 4;
|
||
add.s64 %rd2990, %rd45, %rd2989;
|
||
ld.shared.u32 %r8992, [%rd2990];
|
||
add.s32 %r8993, %r8992, %r8991;
|
||
// inline asm
|
||
bfe.u32 %r8790, %r8795, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2991, %r8790, 4;
|
||
add.s64 %rd2992, %rd48, %rd2991;
|
||
ld.shared.u32 %r8994, [%rd2992];
|
||
xor.b32 %r8995, %r8994, %r8993;
|
||
// inline asm
|
||
bfe.u32 %r8794, %r8795, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2993, %r8794, 4;
|
||
add.s64 %rd2994, %rd51, %rd2993;
|
||
ld.shared.u32 %r8996, [%rd2994];
|
||
add.s32 %r8997, %r8996, %r8995;
|
||
xor.b32 %r8998, %r10344, %r8779;
|
||
xor.b32 %r8811, %r8998, %r8997;
|
||
// inline asm
|
||
bfe.u32 %r8798, %r8811, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2995, %r8798, 4;
|
||
add.s64 %rd2996, %rd42, %rd2995;
|
||
ld.shared.u32 %r8999, [%rd2996];
|
||
// inline asm
|
||
bfe.u32 %r8802, %r8811, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2997, %r8802, 4;
|
||
add.s64 %rd2998, %rd45, %rd2997;
|
||
ld.shared.u32 %r9000, [%rd2998];
|
||
add.s32 %r9001, %r9000, %r8999;
|
||
// inline asm
|
||
bfe.u32 %r8806, %r8811, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd2999, %r8806, 4;
|
||
add.s64 %rd3000, %rd48, %rd2999;
|
||
ld.shared.u32 %r9002, [%rd3000];
|
||
xor.b32 %r9003, %r9002, %r9001;
|
||
// inline asm
|
||
bfe.u32 %r8810, %r8811, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3001, %r8810, 4;
|
||
add.s64 %rd3002, %rd51, %rd3001;
|
||
ld.shared.u32 %r9004, [%rd3002];
|
||
add.s32 %r9005, %r9004, %r9003;
|
||
xor.b32 %r9006, %r10343, %r8795;
|
||
xor.b32 %r8827, %r9006, %r9005;
|
||
// inline asm
|
||
bfe.u32 %r8814, %r8827, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3003, %r8814, 4;
|
||
add.s64 %rd3004, %rd42, %rd3003;
|
||
ld.shared.u32 %r9007, [%rd3004];
|
||
// inline asm
|
||
bfe.u32 %r8818, %r8827, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3005, %r8818, 4;
|
||
add.s64 %rd3006, %rd45, %rd3005;
|
||
ld.shared.u32 %r9008, [%rd3006];
|
||
add.s32 %r9009, %r9008, %r9007;
|
||
// inline asm
|
||
bfe.u32 %r8822, %r8827, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3007, %r8822, 4;
|
||
add.s64 %rd3008, %rd48, %rd3007;
|
||
ld.shared.u32 %r9010, [%rd3008];
|
||
xor.b32 %r9011, %r9010, %r9009;
|
||
// inline asm
|
||
bfe.u32 %r8826, %r8827, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3009, %r8826, 4;
|
||
add.s64 %rd3010, %rd51, %rd3009;
|
||
ld.shared.u32 %r9012, [%rd3010];
|
||
add.s32 %r9013, %r9012, %r9011;
|
||
xor.b32 %r9014, %r10342, %r8811;
|
||
xor.b32 %r8843, %r9014, %r9013;
|
||
// inline asm
|
||
bfe.u32 %r8830, %r8843, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3011, %r8830, 4;
|
||
add.s64 %rd3012, %rd42, %rd3011;
|
||
ld.shared.u32 %r9015, [%rd3012];
|
||
// inline asm
|
||
bfe.u32 %r8834, %r8843, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3013, %r8834, 4;
|
||
add.s64 %rd3014, %rd45, %rd3013;
|
||
ld.shared.u32 %r9016, [%rd3014];
|
||
add.s32 %r9017, %r9016, %r9015;
|
||
// inline asm
|
||
bfe.u32 %r8838, %r8843, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3015, %r8838, 4;
|
||
add.s64 %rd3016, %rd48, %rd3015;
|
||
ld.shared.u32 %r9018, [%rd3016];
|
||
xor.b32 %r9019, %r9018, %r9017;
|
||
// inline asm
|
||
bfe.u32 %r8842, %r8843, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3017, %r8842, 4;
|
||
add.s64 %rd3018, %rd51, %rd3017;
|
||
ld.shared.u32 %r9020, [%rd3018];
|
||
add.s32 %r9021, %r9020, %r9019;
|
||
xor.b32 %r9022, %r10341, %r8827;
|
||
xor.b32 %r8859, %r9022, %r9021;
|
||
// inline asm
|
||
bfe.u32 %r8846, %r8859, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3019, %r8846, 4;
|
||
add.s64 %rd3020, %rd42, %rd3019;
|
||
ld.shared.u32 %r9023, [%rd3020];
|
||
// inline asm
|
||
bfe.u32 %r8850, %r8859, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3021, %r8850, 4;
|
||
add.s64 %rd3022, %rd45, %rd3021;
|
||
ld.shared.u32 %r9024, [%rd3022];
|
||
add.s32 %r9025, %r9024, %r9023;
|
||
// inline asm
|
||
bfe.u32 %r8854, %r8859, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3023, %r8854, 4;
|
||
add.s64 %rd3024, %rd48, %rd3023;
|
||
ld.shared.u32 %r9026, [%rd3024];
|
||
xor.b32 %r9027, %r9026, %r9025;
|
||
// inline asm
|
||
bfe.u32 %r8858, %r8859, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3025, %r8858, 4;
|
||
add.s64 %rd3026, %rd51, %rd3025;
|
||
ld.shared.u32 %r9028, [%rd3026];
|
||
add.s32 %r9029, %r9028, %r9027;
|
||
xor.b32 %r9030, %r10340, %r8843;
|
||
xor.b32 %r8875, %r9030, %r9029;
|
||
// inline asm
|
||
bfe.u32 %r8862, %r8875, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3027, %r8862, 4;
|
||
add.s64 %rd3028, %rd42, %rd3027;
|
||
ld.shared.u32 %r9031, [%rd3028];
|
||
// inline asm
|
||
bfe.u32 %r8866, %r8875, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3029, %r8866, 4;
|
||
add.s64 %rd3030, %rd45, %rd3029;
|
||
ld.shared.u32 %r9032, [%rd3030];
|
||
add.s32 %r9033, %r9032, %r9031;
|
||
// inline asm
|
||
bfe.u32 %r8870, %r8875, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3031, %r8870, 4;
|
||
add.s64 %rd3032, %rd48, %rd3031;
|
||
ld.shared.u32 %r9034, [%rd3032];
|
||
xor.b32 %r9035, %r9034, %r9033;
|
||
// inline asm
|
||
bfe.u32 %r8874, %r8875, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3033, %r8874, 4;
|
||
add.s64 %rd3034, %rd51, %rd3033;
|
||
ld.shared.u32 %r9036, [%rd3034];
|
||
add.s32 %r9037, %r9036, %r9035;
|
||
xor.b32 %r9038, %r10339, %r8859;
|
||
xor.b32 %r8891, %r9038, %r9037;
|
||
// inline asm
|
||
bfe.u32 %r8878, %r8891, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3035, %r8878, 4;
|
||
add.s64 %rd3036, %rd42, %rd3035;
|
||
ld.shared.u32 %r9039, [%rd3036];
|
||
// inline asm
|
||
bfe.u32 %r8882, %r8891, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3037, %r8882, 4;
|
||
add.s64 %rd3038, %rd45, %rd3037;
|
||
ld.shared.u32 %r9040, [%rd3038];
|
||
add.s32 %r9041, %r9040, %r9039;
|
||
// inline asm
|
||
bfe.u32 %r8886, %r8891, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3039, %r8886, 4;
|
||
add.s64 %rd3040, %rd48, %rd3039;
|
||
ld.shared.u32 %r9042, [%rd3040];
|
||
xor.b32 %r9043, %r9042, %r9041;
|
||
// inline asm
|
||
bfe.u32 %r8890, %r8891, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3041, %r8890, 4;
|
||
add.s64 %rd3042, %rd51, %rd3041;
|
||
ld.shared.u32 %r9044, [%rd3042];
|
||
add.s32 %r9045, %r9044, %r9043;
|
||
xor.b32 %r9046, %r10338, %r8875;
|
||
xor.b32 %r8907, %r9046, %r9045;
|
||
// inline asm
|
||
bfe.u32 %r8894, %r8907, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3043, %r8894, 4;
|
||
add.s64 %rd3044, %rd42, %rd3043;
|
||
ld.shared.u32 %r9047, [%rd3044];
|
||
// inline asm
|
||
bfe.u32 %r8898, %r8907, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3045, %r8898, 4;
|
||
add.s64 %rd3046, %rd45, %rd3045;
|
||
ld.shared.u32 %r9048, [%rd3046];
|
||
add.s32 %r9049, %r9048, %r9047;
|
||
// inline asm
|
||
bfe.u32 %r8902, %r8907, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3047, %r8902, 4;
|
||
add.s64 %rd3048, %rd48, %rd3047;
|
||
ld.shared.u32 %r9050, [%rd3048];
|
||
xor.b32 %r9051, %r9050, %r9049;
|
||
// inline asm
|
||
bfe.u32 %r8906, %r8907, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3049, %r8906, 4;
|
||
add.s64 %rd3050, %rd51, %rd3049;
|
||
ld.shared.u32 %r9052, [%rd3050];
|
||
add.s32 %r9053, %r9052, %r9051;
|
||
xor.b32 %r9054, %r10337, %r8891;
|
||
xor.b32 %r8923, %r9054, %r9053;
|
||
// inline asm
|
||
bfe.u32 %r8910, %r8923, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3051, %r8910, 4;
|
||
add.s64 %rd3052, %rd42, %rd3051;
|
||
ld.shared.u32 %r9055, [%rd3052];
|
||
// inline asm
|
||
bfe.u32 %r8914, %r8923, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3053, %r8914, 4;
|
||
add.s64 %rd3054, %rd45, %rd3053;
|
||
ld.shared.u32 %r9056, [%rd3054];
|
||
add.s32 %r9057, %r9056, %r9055;
|
||
// inline asm
|
||
bfe.u32 %r8918, %r8923, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3055, %r8918, 4;
|
||
add.s64 %rd3056, %rd48, %rd3055;
|
||
ld.shared.u32 %r9058, [%rd3056];
|
||
xor.b32 %r9059, %r9058, %r9057;
|
||
// inline asm
|
||
bfe.u32 %r8922, %r8923, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3057, %r8922, 4;
|
||
add.s64 %rd3058, %rd51, %rd3057;
|
||
ld.shared.u32 %r9060, [%rd3058];
|
||
add.s32 %r9061, %r9060, %r9059;
|
||
xor.b32 %r9062, %r10336, %r8907;
|
||
xor.b32 %r8939, %r9062, %r9061;
|
||
// inline asm
|
||
bfe.u32 %r8926, %r8939, %r10258, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3059, %r8926, 4;
|
||
add.s64 %rd3060, %rd42, %rd3059;
|
||
ld.shared.u32 %r9063, [%rd3060];
|
||
// inline asm
|
||
bfe.u32 %r8930, %r8939, %r10260, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3061, %r8930, 4;
|
||
add.s64 %rd3062, %rd45, %rd3061;
|
||
ld.shared.u32 %r9064, [%rd3062];
|
||
add.s32 %r9065, %r9064, %r9063;
|
||
// inline asm
|
||
bfe.u32 %r8934, %r8939, %r10259, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3063, %r8934, 4;
|
||
add.s64 %rd3064, %rd48, %rd3063;
|
||
ld.shared.u32 %r9066, [%rd3064];
|
||
xor.b32 %r9067, %r9066, %r9065;
|
||
// inline asm
|
||
bfe.u32 %r8938, %r8939, %r10325, %r10259;
|
||
// inline asm
|
||
mul.wide.u32 %rd3065, %r8938, 4;
|
||
add.s64 %rd3066, %rd51, %rd3065;
|
||
ld.shared.u32 %r9068, [%rd3066];
|
||
add.s32 %r9069, %r9068, %r9067;
|
||
xor.b32 %r9070, %r10335, %r8923;
|
||
xor.b32 %r10326, %r9070, %r9069;
|
||
xor.b32 %r10327, %r10334, %r8939;
|
||
st.shared.u32 [%rd3499], %r10327;
|
||
st.shared.u32 [%rd3499+4], %r10326;
|
||
add.s64 %rd3499, %rd3499, 8;
|
||
add.s32 %r10322, %r10322, 2;
|
||
setp.lt.u32 %p8, %r10322, 256;
|
||
@%p8 bra BB4_14;
|
||
|
||
mov.u64 %rd3500, %rd4;
|
||
|
||
BB4_16:
|
||
mov.u32 %r10328, 0;
|
||
mov.u32 %r10264, 16;
|
||
mov.u32 %r10263, 8;
|
||
mov.u32 %r10262, 24;
|
||
xor.b32 %r9084, %r10351, %r10327;
|
||
// inline asm
|
||
bfe.u32 %r9071, %r9084, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3070, %r9071, 4;
|
||
add.s64 %rd3071, %rd42, %rd3070;
|
||
ld.shared.u32 %r9328, [%rd3071];
|
||
// inline asm
|
||
bfe.u32 %r9075, %r9084, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3074, %r9075, 4;
|
||
add.s64 %rd3075, %rd45, %rd3074;
|
||
ld.shared.u32 %r9329, [%rd3075];
|
||
add.s32 %r9330, %r9329, %r9328;
|
||
// inline asm
|
||
bfe.u32 %r9079, %r9084, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3078, %r9079, 4;
|
||
add.s64 %rd3079, %rd48, %rd3078;
|
||
ld.shared.u32 %r9331, [%rd3079];
|
||
xor.b32 %r9332, %r9331, %r9330;
|
||
// inline asm
|
||
bfe.u32 %r9083, %r9084, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3082, %r9083, 4;
|
||
add.s64 %rd3083, %rd51, %rd3082;
|
||
ld.shared.u32 %r9333, [%rd3083];
|
||
add.s32 %r9334, %r9333, %r9332;
|
||
xor.b32 %r9335, %r10350, %r10326;
|
||
xor.b32 %r9100, %r9335, %r9334;
|
||
// inline asm
|
||
bfe.u32 %r9087, %r9100, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3084, %r9087, 4;
|
||
add.s64 %rd3085, %rd42, %rd3084;
|
||
ld.shared.u32 %r9336, [%rd3085];
|
||
// inline asm
|
||
bfe.u32 %r9091, %r9100, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3086, %r9091, 4;
|
||
add.s64 %rd3087, %rd45, %rd3086;
|
||
ld.shared.u32 %r9337, [%rd3087];
|
||
add.s32 %r9338, %r9337, %r9336;
|
||
// inline asm
|
||
bfe.u32 %r9095, %r9100, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3088, %r9095, 4;
|
||
add.s64 %rd3089, %rd48, %rd3088;
|
||
ld.shared.u32 %r9339, [%rd3089];
|
||
xor.b32 %r9340, %r9339, %r9338;
|
||
// inline asm
|
||
bfe.u32 %r9099, %r9100, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3090, %r9099, 4;
|
||
add.s64 %rd3091, %rd51, %rd3090;
|
||
ld.shared.u32 %r9341, [%rd3091];
|
||
add.s32 %r9342, %r9341, %r9340;
|
||
xor.b32 %r9343, %r10349, %r9084;
|
||
xor.b32 %r9116, %r9343, %r9342;
|
||
// inline asm
|
||
bfe.u32 %r9103, %r9116, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3092, %r9103, 4;
|
||
add.s64 %rd3093, %rd42, %rd3092;
|
||
ld.shared.u32 %r9344, [%rd3093];
|
||
// inline asm
|
||
bfe.u32 %r9107, %r9116, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3094, %r9107, 4;
|
||
add.s64 %rd3095, %rd45, %rd3094;
|
||
ld.shared.u32 %r9345, [%rd3095];
|
||
add.s32 %r9346, %r9345, %r9344;
|
||
// inline asm
|
||
bfe.u32 %r9111, %r9116, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3096, %r9111, 4;
|
||
add.s64 %rd3097, %rd48, %rd3096;
|
||
ld.shared.u32 %r9347, [%rd3097];
|
||
xor.b32 %r9348, %r9347, %r9346;
|
||
// inline asm
|
||
bfe.u32 %r9115, %r9116, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3098, %r9115, 4;
|
||
add.s64 %rd3099, %rd51, %rd3098;
|
||
ld.shared.u32 %r9349, [%rd3099];
|
||
add.s32 %r9350, %r9349, %r9348;
|
||
xor.b32 %r9351, %r10348, %r9100;
|
||
xor.b32 %r9132, %r9351, %r9350;
|
||
// inline asm
|
||
bfe.u32 %r9119, %r9132, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3100, %r9119, 4;
|
||
add.s64 %rd3101, %rd42, %rd3100;
|
||
ld.shared.u32 %r9352, [%rd3101];
|
||
// inline asm
|
||
bfe.u32 %r9123, %r9132, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3102, %r9123, 4;
|
||
add.s64 %rd3103, %rd45, %rd3102;
|
||
ld.shared.u32 %r9353, [%rd3103];
|
||
add.s32 %r9354, %r9353, %r9352;
|
||
// inline asm
|
||
bfe.u32 %r9127, %r9132, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3104, %r9127, 4;
|
||
add.s64 %rd3105, %rd48, %rd3104;
|
||
ld.shared.u32 %r9355, [%rd3105];
|
||
xor.b32 %r9356, %r9355, %r9354;
|
||
// inline asm
|
||
bfe.u32 %r9131, %r9132, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3106, %r9131, 4;
|
||
add.s64 %rd3107, %rd51, %rd3106;
|
||
ld.shared.u32 %r9357, [%rd3107];
|
||
add.s32 %r9358, %r9357, %r9356;
|
||
xor.b32 %r9359, %r10347, %r9116;
|
||
xor.b32 %r9148, %r9359, %r9358;
|
||
// inline asm
|
||
bfe.u32 %r9135, %r9148, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3108, %r9135, 4;
|
||
add.s64 %rd3109, %rd42, %rd3108;
|
||
ld.shared.u32 %r9360, [%rd3109];
|
||
// inline asm
|
||
bfe.u32 %r9139, %r9148, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3110, %r9139, 4;
|
||
add.s64 %rd3111, %rd45, %rd3110;
|
||
ld.shared.u32 %r9361, [%rd3111];
|
||
add.s32 %r9362, %r9361, %r9360;
|
||
// inline asm
|
||
bfe.u32 %r9143, %r9148, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3112, %r9143, 4;
|
||
add.s64 %rd3113, %rd48, %rd3112;
|
||
ld.shared.u32 %r9363, [%rd3113];
|
||
xor.b32 %r9364, %r9363, %r9362;
|
||
// inline asm
|
||
bfe.u32 %r9147, %r9148, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3114, %r9147, 4;
|
||
add.s64 %rd3115, %rd51, %rd3114;
|
||
ld.shared.u32 %r9365, [%rd3115];
|
||
add.s32 %r9366, %r9365, %r9364;
|
||
xor.b32 %r9367, %r10346, %r9132;
|
||
xor.b32 %r9164, %r9367, %r9366;
|
||
// inline asm
|
||
bfe.u32 %r9151, %r9164, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3116, %r9151, 4;
|
||
add.s64 %rd3117, %rd42, %rd3116;
|
||
ld.shared.u32 %r9368, [%rd3117];
|
||
// inline asm
|
||
bfe.u32 %r9155, %r9164, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3118, %r9155, 4;
|
||
add.s64 %rd3119, %rd45, %rd3118;
|
||
ld.shared.u32 %r9369, [%rd3119];
|
||
add.s32 %r9370, %r9369, %r9368;
|
||
// inline asm
|
||
bfe.u32 %r9159, %r9164, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3120, %r9159, 4;
|
||
add.s64 %rd3121, %rd48, %rd3120;
|
||
ld.shared.u32 %r9371, [%rd3121];
|
||
xor.b32 %r9372, %r9371, %r9370;
|
||
// inline asm
|
||
bfe.u32 %r9163, %r9164, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3122, %r9163, 4;
|
||
add.s64 %rd3123, %rd51, %rd3122;
|
||
ld.shared.u32 %r9373, [%rd3123];
|
||
add.s32 %r9374, %r9373, %r9372;
|
||
xor.b32 %r9375, %r10345, %r9148;
|
||
xor.b32 %r9180, %r9375, %r9374;
|
||
// inline asm
|
||
bfe.u32 %r9167, %r9180, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3124, %r9167, 4;
|
||
add.s64 %rd3125, %rd42, %rd3124;
|
||
ld.shared.u32 %r9376, [%rd3125];
|
||
// inline asm
|
||
bfe.u32 %r9171, %r9180, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3126, %r9171, 4;
|
||
add.s64 %rd3127, %rd45, %rd3126;
|
||
ld.shared.u32 %r9377, [%rd3127];
|
||
add.s32 %r9378, %r9377, %r9376;
|
||
// inline asm
|
||
bfe.u32 %r9175, %r9180, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3128, %r9175, 4;
|
||
add.s64 %rd3129, %rd48, %rd3128;
|
||
ld.shared.u32 %r9379, [%rd3129];
|
||
xor.b32 %r9380, %r9379, %r9378;
|
||
// inline asm
|
||
bfe.u32 %r9179, %r9180, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3130, %r9179, 4;
|
||
add.s64 %rd3131, %rd51, %rd3130;
|
||
ld.shared.u32 %r9381, [%rd3131];
|
||
add.s32 %r9382, %r9381, %r9380;
|
||
xor.b32 %r9383, %r10344, %r9164;
|
||
xor.b32 %r9196, %r9383, %r9382;
|
||
// inline asm
|
||
bfe.u32 %r9183, %r9196, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3132, %r9183, 4;
|
||
add.s64 %rd3133, %rd42, %rd3132;
|
||
ld.shared.u32 %r9384, [%rd3133];
|
||
// inline asm
|
||
bfe.u32 %r9187, %r9196, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3134, %r9187, 4;
|
||
add.s64 %rd3135, %rd45, %rd3134;
|
||
ld.shared.u32 %r9385, [%rd3135];
|
||
add.s32 %r9386, %r9385, %r9384;
|
||
// inline asm
|
||
bfe.u32 %r9191, %r9196, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3136, %r9191, 4;
|
||
add.s64 %rd3137, %rd48, %rd3136;
|
||
ld.shared.u32 %r9387, [%rd3137];
|
||
xor.b32 %r9388, %r9387, %r9386;
|
||
// inline asm
|
||
bfe.u32 %r9195, %r9196, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3138, %r9195, 4;
|
||
add.s64 %rd3139, %rd51, %rd3138;
|
||
ld.shared.u32 %r9389, [%rd3139];
|
||
add.s32 %r9390, %r9389, %r9388;
|
||
xor.b32 %r9391, %r10343, %r9180;
|
||
xor.b32 %r9212, %r9391, %r9390;
|
||
// inline asm
|
||
bfe.u32 %r9199, %r9212, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3140, %r9199, 4;
|
||
add.s64 %rd3141, %rd42, %rd3140;
|
||
ld.shared.u32 %r9392, [%rd3141];
|
||
// inline asm
|
||
bfe.u32 %r9203, %r9212, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3142, %r9203, 4;
|
||
add.s64 %rd3143, %rd45, %rd3142;
|
||
ld.shared.u32 %r9393, [%rd3143];
|
||
add.s32 %r9394, %r9393, %r9392;
|
||
// inline asm
|
||
bfe.u32 %r9207, %r9212, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3144, %r9207, 4;
|
||
add.s64 %rd3145, %rd48, %rd3144;
|
||
ld.shared.u32 %r9395, [%rd3145];
|
||
xor.b32 %r9396, %r9395, %r9394;
|
||
// inline asm
|
||
bfe.u32 %r9211, %r9212, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3146, %r9211, 4;
|
||
add.s64 %rd3147, %rd51, %rd3146;
|
||
ld.shared.u32 %r9397, [%rd3147];
|
||
add.s32 %r9398, %r9397, %r9396;
|
||
xor.b32 %r9399, %r10342, %r9196;
|
||
xor.b32 %r9228, %r9399, %r9398;
|
||
// inline asm
|
||
bfe.u32 %r9215, %r9228, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3148, %r9215, 4;
|
||
add.s64 %rd3149, %rd42, %rd3148;
|
||
ld.shared.u32 %r9400, [%rd3149];
|
||
// inline asm
|
||
bfe.u32 %r9219, %r9228, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3150, %r9219, 4;
|
||
add.s64 %rd3151, %rd45, %rd3150;
|
||
ld.shared.u32 %r9401, [%rd3151];
|
||
add.s32 %r9402, %r9401, %r9400;
|
||
// inline asm
|
||
bfe.u32 %r9223, %r9228, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3152, %r9223, 4;
|
||
add.s64 %rd3153, %rd48, %rd3152;
|
||
ld.shared.u32 %r9403, [%rd3153];
|
||
xor.b32 %r9404, %r9403, %r9402;
|
||
// inline asm
|
||
bfe.u32 %r9227, %r9228, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3154, %r9227, 4;
|
||
add.s64 %rd3155, %rd51, %rd3154;
|
||
ld.shared.u32 %r9405, [%rd3155];
|
||
add.s32 %r9406, %r9405, %r9404;
|
||
xor.b32 %r9407, %r10341, %r9212;
|
||
xor.b32 %r9244, %r9407, %r9406;
|
||
// inline asm
|
||
bfe.u32 %r9231, %r9244, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3156, %r9231, 4;
|
||
add.s64 %rd3157, %rd42, %rd3156;
|
||
ld.shared.u32 %r9408, [%rd3157];
|
||
// inline asm
|
||
bfe.u32 %r9235, %r9244, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3158, %r9235, 4;
|
||
add.s64 %rd3159, %rd45, %rd3158;
|
||
ld.shared.u32 %r9409, [%rd3159];
|
||
add.s32 %r9410, %r9409, %r9408;
|
||
// inline asm
|
||
bfe.u32 %r9239, %r9244, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3160, %r9239, 4;
|
||
add.s64 %rd3161, %rd48, %rd3160;
|
||
ld.shared.u32 %r9411, [%rd3161];
|
||
xor.b32 %r9412, %r9411, %r9410;
|
||
// inline asm
|
||
bfe.u32 %r9243, %r9244, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3162, %r9243, 4;
|
||
add.s64 %rd3163, %rd51, %rd3162;
|
||
ld.shared.u32 %r9413, [%rd3163];
|
||
add.s32 %r9414, %r9413, %r9412;
|
||
xor.b32 %r9415, %r10340, %r9228;
|
||
xor.b32 %r9260, %r9415, %r9414;
|
||
// inline asm
|
||
bfe.u32 %r9247, %r9260, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3164, %r9247, 4;
|
||
add.s64 %rd3165, %rd42, %rd3164;
|
||
ld.shared.u32 %r9416, [%rd3165];
|
||
// inline asm
|
||
bfe.u32 %r9251, %r9260, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3166, %r9251, 4;
|
||
add.s64 %rd3167, %rd45, %rd3166;
|
||
ld.shared.u32 %r9417, [%rd3167];
|
||
add.s32 %r9418, %r9417, %r9416;
|
||
// inline asm
|
||
bfe.u32 %r9255, %r9260, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3168, %r9255, 4;
|
||
add.s64 %rd3169, %rd48, %rd3168;
|
||
ld.shared.u32 %r9419, [%rd3169];
|
||
xor.b32 %r9420, %r9419, %r9418;
|
||
// inline asm
|
||
bfe.u32 %r9259, %r9260, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3170, %r9259, 4;
|
||
add.s64 %rd3171, %rd51, %rd3170;
|
||
ld.shared.u32 %r9421, [%rd3171];
|
||
add.s32 %r9422, %r9421, %r9420;
|
||
xor.b32 %r9423, %r10339, %r9244;
|
||
xor.b32 %r9276, %r9423, %r9422;
|
||
// inline asm
|
||
bfe.u32 %r9263, %r9276, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3172, %r9263, 4;
|
||
add.s64 %rd3173, %rd42, %rd3172;
|
||
ld.shared.u32 %r9424, [%rd3173];
|
||
// inline asm
|
||
bfe.u32 %r9267, %r9276, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3174, %r9267, 4;
|
||
add.s64 %rd3175, %rd45, %rd3174;
|
||
ld.shared.u32 %r9425, [%rd3175];
|
||
add.s32 %r9426, %r9425, %r9424;
|
||
// inline asm
|
||
bfe.u32 %r9271, %r9276, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3176, %r9271, 4;
|
||
add.s64 %rd3177, %rd48, %rd3176;
|
||
ld.shared.u32 %r9427, [%rd3177];
|
||
xor.b32 %r9428, %r9427, %r9426;
|
||
// inline asm
|
||
bfe.u32 %r9275, %r9276, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3178, %r9275, 4;
|
||
add.s64 %rd3179, %rd51, %rd3178;
|
||
ld.shared.u32 %r9429, [%rd3179];
|
||
add.s32 %r9430, %r9429, %r9428;
|
||
xor.b32 %r9431, %r10338, %r9260;
|
||
xor.b32 %r9292, %r9431, %r9430;
|
||
// inline asm
|
||
bfe.u32 %r9279, %r9292, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3180, %r9279, 4;
|
||
add.s64 %rd3181, %rd42, %rd3180;
|
||
ld.shared.u32 %r9432, [%rd3181];
|
||
// inline asm
|
||
bfe.u32 %r9283, %r9292, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3182, %r9283, 4;
|
||
add.s64 %rd3183, %rd45, %rd3182;
|
||
ld.shared.u32 %r9433, [%rd3183];
|
||
add.s32 %r9434, %r9433, %r9432;
|
||
// inline asm
|
||
bfe.u32 %r9287, %r9292, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3184, %r9287, 4;
|
||
add.s64 %rd3185, %rd48, %rd3184;
|
||
ld.shared.u32 %r9435, [%rd3185];
|
||
xor.b32 %r9436, %r9435, %r9434;
|
||
// inline asm
|
||
bfe.u32 %r9291, %r9292, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3186, %r9291, 4;
|
||
add.s64 %rd3187, %rd51, %rd3186;
|
||
ld.shared.u32 %r9437, [%rd3187];
|
||
add.s32 %r9438, %r9437, %r9436;
|
||
xor.b32 %r9439, %r10337, %r9276;
|
||
xor.b32 %r9308, %r9439, %r9438;
|
||
// inline asm
|
||
bfe.u32 %r9295, %r9308, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3188, %r9295, 4;
|
||
add.s64 %rd3189, %rd42, %rd3188;
|
||
ld.shared.u32 %r9440, [%rd3189];
|
||
// inline asm
|
||
bfe.u32 %r9299, %r9308, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3190, %r9299, 4;
|
||
add.s64 %rd3191, %rd45, %rd3190;
|
||
ld.shared.u32 %r9441, [%rd3191];
|
||
add.s32 %r9442, %r9441, %r9440;
|
||
// inline asm
|
||
bfe.u32 %r9303, %r9308, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3192, %r9303, 4;
|
||
add.s64 %rd3193, %rd48, %rd3192;
|
||
ld.shared.u32 %r9443, [%rd3193];
|
||
xor.b32 %r9444, %r9443, %r9442;
|
||
// inline asm
|
||
bfe.u32 %r9307, %r9308, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3194, %r9307, 4;
|
||
add.s64 %rd3195, %rd51, %rd3194;
|
||
ld.shared.u32 %r9445, [%rd3195];
|
||
add.s32 %r9446, %r9445, %r9444;
|
||
xor.b32 %r9447, %r10336, %r9292;
|
||
xor.b32 %r9324, %r9447, %r9446;
|
||
// inline asm
|
||
bfe.u32 %r9311, %r9324, %r10262, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3196, %r9311, 4;
|
||
add.s64 %rd3197, %rd42, %rd3196;
|
||
ld.shared.u32 %r9448, [%rd3197];
|
||
// inline asm
|
||
bfe.u32 %r9315, %r9324, %r10264, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3198, %r9315, 4;
|
||
add.s64 %rd3199, %rd45, %rd3198;
|
||
ld.shared.u32 %r9449, [%rd3199];
|
||
add.s32 %r9450, %r9449, %r9448;
|
||
// inline asm
|
||
bfe.u32 %r9319, %r9324, %r10263, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3200, %r9319, 4;
|
||
add.s64 %rd3201, %rd48, %rd3200;
|
||
ld.shared.u32 %r9451, [%rd3201];
|
||
xor.b32 %r9452, %r9451, %r9450;
|
||
// inline asm
|
||
bfe.u32 %r9323, %r9324, %r10328, %r10263;
|
||
// inline asm
|
||
mul.wide.u32 %rd3202, %r9323, 4;
|
||
add.s64 %rd3203, %rd51, %rd3202;
|
||
ld.shared.u32 %r9453, [%rd3203];
|
||
add.s32 %r9454, %r9453, %r9452;
|
||
xor.b32 %r9455, %r10335, %r9308;
|
||
xor.b32 %r10326, %r9455, %r9454;
|
||
xor.b32 %r10327, %r10334, %r9324;
|
||
st.shared.u32 [%rd3500], %r10327;
|
||
st.shared.u32 [%rd3500+4], %r10326;
|
||
add.s64 %rd3500, %rd3500, 8;
|
||
add.s32 %r10325, %r10325, 2;
|
||
setp.lt.u32 %p9, %r10325, 256;
|
||
@%p9 bra BB4_16;
|
||
|
||
mov.u64 %rd3501, %rd5;
|
||
|
||
BB4_18:
|
||
mov.u32 %r10331, 0;
|
||
mov.u32 %r10268, 16;
|
||
mov.u32 %r10267, 8;
|
||
mov.u32 %r10266, 24;
|
||
xor.b32 %r9469, %r10351, %r10327;
|
||
// inline asm
|
||
bfe.u32 %r9456, %r9469, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3207, %r9456, 4;
|
||
add.s64 %rd3208, %rd42, %rd3207;
|
||
ld.shared.u32 %r9713, [%rd3208];
|
||
// inline asm
|
||
bfe.u32 %r9460, %r9469, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3211, %r9460, 4;
|
||
add.s64 %rd3212, %rd45, %rd3211;
|
||
ld.shared.u32 %r9714, [%rd3212];
|
||
add.s32 %r9715, %r9714, %r9713;
|
||
// inline asm
|
||
bfe.u32 %r9464, %r9469, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3215, %r9464, 4;
|
||
add.s64 %rd3216, %rd48, %rd3215;
|
||
ld.shared.u32 %r9716, [%rd3216];
|
||
xor.b32 %r9717, %r9716, %r9715;
|
||
// inline asm
|
||
bfe.u32 %r9468, %r9469, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3219, %r9468, 4;
|
||
add.s64 %rd3220, %rd51, %rd3219;
|
||
ld.shared.u32 %r9718, [%rd3220];
|
||
add.s32 %r9719, %r9718, %r9717;
|
||
xor.b32 %r9720, %r10350, %r10326;
|
||
xor.b32 %r9485, %r9720, %r9719;
|
||
// inline asm
|
||
bfe.u32 %r9472, %r9485, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3221, %r9472, 4;
|
||
add.s64 %rd3222, %rd42, %rd3221;
|
||
ld.shared.u32 %r9721, [%rd3222];
|
||
// inline asm
|
||
bfe.u32 %r9476, %r9485, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3223, %r9476, 4;
|
||
add.s64 %rd3224, %rd45, %rd3223;
|
||
ld.shared.u32 %r9722, [%rd3224];
|
||
add.s32 %r9723, %r9722, %r9721;
|
||
// inline asm
|
||
bfe.u32 %r9480, %r9485, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3225, %r9480, 4;
|
||
add.s64 %rd3226, %rd48, %rd3225;
|
||
ld.shared.u32 %r9724, [%rd3226];
|
||
xor.b32 %r9725, %r9724, %r9723;
|
||
// inline asm
|
||
bfe.u32 %r9484, %r9485, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3227, %r9484, 4;
|
||
add.s64 %rd3228, %rd51, %rd3227;
|
||
ld.shared.u32 %r9726, [%rd3228];
|
||
add.s32 %r9727, %r9726, %r9725;
|
||
xor.b32 %r9728, %r10349, %r9469;
|
||
xor.b32 %r9501, %r9728, %r9727;
|
||
// inline asm
|
||
bfe.u32 %r9488, %r9501, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3229, %r9488, 4;
|
||
add.s64 %rd3230, %rd42, %rd3229;
|
||
ld.shared.u32 %r9729, [%rd3230];
|
||
// inline asm
|
||
bfe.u32 %r9492, %r9501, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3231, %r9492, 4;
|
||
add.s64 %rd3232, %rd45, %rd3231;
|
||
ld.shared.u32 %r9730, [%rd3232];
|
||
add.s32 %r9731, %r9730, %r9729;
|
||
// inline asm
|
||
bfe.u32 %r9496, %r9501, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3233, %r9496, 4;
|
||
add.s64 %rd3234, %rd48, %rd3233;
|
||
ld.shared.u32 %r9732, [%rd3234];
|
||
xor.b32 %r9733, %r9732, %r9731;
|
||
// inline asm
|
||
bfe.u32 %r9500, %r9501, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3235, %r9500, 4;
|
||
add.s64 %rd3236, %rd51, %rd3235;
|
||
ld.shared.u32 %r9734, [%rd3236];
|
||
add.s32 %r9735, %r9734, %r9733;
|
||
xor.b32 %r9736, %r10348, %r9485;
|
||
xor.b32 %r9517, %r9736, %r9735;
|
||
// inline asm
|
||
bfe.u32 %r9504, %r9517, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3237, %r9504, 4;
|
||
add.s64 %rd3238, %rd42, %rd3237;
|
||
ld.shared.u32 %r9737, [%rd3238];
|
||
// inline asm
|
||
bfe.u32 %r9508, %r9517, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3239, %r9508, 4;
|
||
add.s64 %rd3240, %rd45, %rd3239;
|
||
ld.shared.u32 %r9738, [%rd3240];
|
||
add.s32 %r9739, %r9738, %r9737;
|
||
// inline asm
|
||
bfe.u32 %r9512, %r9517, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3241, %r9512, 4;
|
||
add.s64 %rd3242, %rd48, %rd3241;
|
||
ld.shared.u32 %r9740, [%rd3242];
|
||
xor.b32 %r9741, %r9740, %r9739;
|
||
// inline asm
|
||
bfe.u32 %r9516, %r9517, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3243, %r9516, 4;
|
||
add.s64 %rd3244, %rd51, %rd3243;
|
||
ld.shared.u32 %r9742, [%rd3244];
|
||
add.s32 %r9743, %r9742, %r9741;
|
||
xor.b32 %r9744, %r10347, %r9501;
|
||
xor.b32 %r9533, %r9744, %r9743;
|
||
// inline asm
|
||
bfe.u32 %r9520, %r9533, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3245, %r9520, 4;
|
||
add.s64 %rd3246, %rd42, %rd3245;
|
||
ld.shared.u32 %r9745, [%rd3246];
|
||
// inline asm
|
||
bfe.u32 %r9524, %r9533, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3247, %r9524, 4;
|
||
add.s64 %rd3248, %rd45, %rd3247;
|
||
ld.shared.u32 %r9746, [%rd3248];
|
||
add.s32 %r9747, %r9746, %r9745;
|
||
// inline asm
|
||
bfe.u32 %r9528, %r9533, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3249, %r9528, 4;
|
||
add.s64 %rd3250, %rd48, %rd3249;
|
||
ld.shared.u32 %r9748, [%rd3250];
|
||
xor.b32 %r9749, %r9748, %r9747;
|
||
// inline asm
|
||
bfe.u32 %r9532, %r9533, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3251, %r9532, 4;
|
||
add.s64 %rd3252, %rd51, %rd3251;
|
||
ld.shared.u32 %r9750, [%rd3252];
|
||
add.s32 %r9751, %r9750, %r9749;
|
||
xor.b32 %r9752, %r10346, %r9517;
|
||
xor.b32 %r9549, %r9752, %r9751;
|
||
// inline asm
|
||
bfe.u32 %r9536, %r9549, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3253, %r9536, 4;
|
||
add.s64 %rd3254, %rd42, %rd3253;
|
||
ld.shared.u32 %r9753, [%rd3254];
|
||
// inline asm
|
||
bfe.u32 %r9540, %r9549, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3255, %r9540, 4;
|
||
add.s64 %rd3256, %rd45, %rd3255;
|
||
ld.shared.u32 %r9754, [%rd3256];
|
||
add.s32 %r9755, %r9754, %r9753;
|
||
// inline asm
|
||
bfe.u32 %r9544, %r9549, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3257, %r9544, 4;
|
||
add.s64 %rd3258, %rd48, %rd3257;
|
||
ld.shared.u32 %r9756, [%rd3258];
|
||
xor.b32 %r9757, %r9756, %r9755;
|
||
// inline asm
|
||
bfe.u32 %r9548, %r9549, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3259, %r9548, 4;
|
||
add.s64 %rd3260, %rd51, %rd3259;
|
||
ld.shared.u32 %r9758, [%rd3260];
|
||
add.s32 %r9759, %r9758, %r9757;
|
||
xor.b32 %r9760, %r10345, %r9533;
|
||
xor.b32 %r9565, %r9760, %r9759;
|
||
// inline asm
|
||
bfe.u32 %r9552, %r9565, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3261, %r9552, 4;
|
||
add.s64 %rd3262, %rd42, %rd3261;
|
||
ld.shared.u32 %r9761, [%rd3262];
|
||
// inline asm
|
||
bfe.u32 %r9556, %r9565, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3263, %r9556, 4;
|
||
add.s64 %rd3264, %rd45, %rd3263;
|
||
ld.shared.u32 %r9762, [%rd3264];
|
||
add.s32 %r9763, %r9762, %r9761;
|
||
// inline asm
|
||
bfe.u32 %r9560, %r9565, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3265, %r9560, 4;
|
||
add.s64 %rd3266, %rd48, %rd3265;
|
||
ld.shared.u32 %r9764, [%rd3266];
|
||
xor.b32 %r9765, %r9764, %r9763;
|
||
// inline asm
|
||
bfe.u32 %r9564, %r9565, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3267, %r9564, 4;
|
||
add.s64 %rd3268, %rd51, %rd3267;
|
||
ld.shared.u32 %r9766, [%rd3268];
|
||
add.s32 %r9767, %r9766, %r9765;
|
||
xor.b32 %r9768, %r10344, %r9549;
|
||
xor.b32 %r9581, %r9768, %r9767;
|
||
// inline asm
|
||
bfe.u32 %r9568, %r9581, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3269, %r9568, 4;
|
||
add.s64 %rd3270, %rd42, %rd3269;
|
||
ld.shared.u32 %r9769, [%rd3270];
|
||
// inline asm
|
||
bfe.u32 %r9572, %r9581, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3271, %r9572, 4;
|
||
add.s64 %rd3272, %rd45, %rd3271;
|
||
ld.shared.u32 %r9770, [%rd3272];
|
||
add.s32 %r9771, %r9770, %r9769;
|
||
// inline asm
|
||
bfe.u32 %r9576, %r9581, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3273, %r9576, 4;
|
||
add.s64 %rd3274, %rd48, %rd3273;
|
||
ld.shared.u32 %r9772, [%rd3274];
|
||
xor.b32 %r9773, %r9772, %r9771;
|
||
// inline asm
|
||
bfe.u32 %r9580, %r9581, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3275, %r9580, 4;
|
||
add.s64 %rd3276, %rd51, %rd3275;
|
||
ld.shared.u32 %r9774, [%rd3276];
|
||
add.s32 %r9775, %r9774, %r9773;
|
||
xor.b32 %r9776, %r10343, %r9565;
|
||
xor.b32 %r9597, %r9776, %r9775;
|
||
// inline asm
|
||
bfe.u32 %r9584, %r9597, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3277, %r9584, 4;
|
||
add.s64 %rd3278, %rd42, %rd3277;
|
||
ld.shared.u32 %r9777, [%rd3278];
|
||
// inline asm
|
||
bfe.u32 %r9588, %r9597, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3279, %r9588, 4;
|
||
add.s64 %rd3280, %rd45, %rd3279;
|
||
ld.shared.u32 %r9778, [%rd3280];
|
||
add.s32 %r9779, %r9778, %r9777;
|
||
// inline asm
|
||
bfe.u32 %r9592, %r9597, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3281, %r9592, 4;
|
||
add.s64 %rd3282, %rd48, %rd3281;
|
||
ld.shared.u32 %r9780, [%rd3282];
|
||
xor.b32 %r9781, %r9780, %r9779;
|
||
// inline asm
|
||
bfe.u32 %r9596, %r9597, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3283, %r9596, 4;
|
||
add.s64 %rd3284, %rd51, %rd3283;
|
||
ld.shared.u32 %r9782, [%rd3284];
|
||
add.s32 %r9783, %r9782, %r9781;
|
||
xor.b32 %r9784, %r10342, %r9581;
|
||
xor.b32 %r9613, %r9784, %r9783;
|
||
// inline asm
|
||
bfe.u32 %r9600, %r9613, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3285, %r9600, 4;
|
||
add.s64 %rd3286, %rd42, %rd3285;
|
||
ld.shared.u32 %r9785, [%rd3286];
|
||
// inline asm
|
||
bfe.u32 %r9604, %r9613, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3287, %r9604, 4;
|
||
add.s64 %rd3288, %rd45, %rd3287;
|
||
ld.shared.u32 %r9786, [%rd3288];
|
||
add.s32 %r9787, %r9786, %r9785;
|
||
// inline asm
|
||
bfe.u32 %r9608, %r9613, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3289, %r9608, 4;
|
||
add.s64 %rd3290, %rd48, %rd3289;
|
||
ld.shared.u32 %r9788, [%rd3290];
|
||
xor.b32 %r9789, %r9788, %r9787;
|
||
// inline asm
|
||
bfe.u32 %r9612, %r9613, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3291, %r9612, 4;
|
||
add.s64 %rd3292, %rd51, %rd3291;
|
||
ld.shared.u32 %r9790, [%rd3292];
|
||
add.s32 %r9791, %r9790, %r9789;
|
||
xor.b32 %r9792, %r10341, %r9597;
|
||
xor.b32 %r9629, %r9792, %r9791;
|
||
// inline asm
|
||
bfe.u32 %r9616, %r9629, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3293, %r9616, 4;
|
||
add.s64 %rd3294, %rd42, %rd3293;
|
||
ld.shared.u32 %r9793, [%rd3294];
|
||
// inline asm
|
||
bfe.u32 %r9620, %r9629, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3295, %r9620, 4;
|
||
add.s64 %rd3296, %rd45, %rd3295;
|
||
ld.shared.u32 %r9794, [%rd3296];
|
||
add.s32 %r9795, %r9794, %r9793;
|
||
// inline asm
|
||
bfe.u32 %r9624, %r9629, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3297, %r9624, 4;
|
||
add.s64 %rd3298, %rd48, %rd3297;
|
||
ld.shared.u32 %r9796, [%rd3298];
|
||
xor.b32 %r9797, %r9796, %r9795;
|
||
// inline asm
|
||
bfe.u32 %r9628, %r9629, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3299, %r9628, 4;
|
||
add.s64 %rd3300, %rd51, %rd3299;
|
||
ld.shared.u32 %r9798, [%rd3300];
|
||
add.s32 %r9799, %r9798, %r9797;
|
||
xor.b32 %r9800, %r10340, %r9613;
|
||
xor.b32 %r9645, %r9800, %r9799;
|
||
// inline asm
|
||
bfe.u32 %r9632, %r9645, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3301, %r9632, 4;
|
||
add.s64 %rd3302, %rd42, %rd3301;
|
||
ld.shared.u32 %r9801, [%rd3302];
|
||
// inline asm
|
||
bfe.u32 %r9636, %r9645, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3303, %r9636, 4;
|
||
add.s64 %rd3304, %rd45, %rd3303;
|
||
ld.shared.u32 %r9802, [%rd3304];
|
||
add.s32 %r9803, %r9802, %r9801;
|
||
// inline asm
|
||
bfe.u32 %r9640, %r9645, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3305, %r9640, 4;
|
||
add.s64 %rd3306, %rd48, %rd3305;
|
||
ld.shared.u32 %r9804, [%rd3306];
|
||
xor.b32 %r9805, %r9804, %r9803;
|
||
// inline asm
|
||
bfe.u32 %r9644, %r9645, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3307, %r9644, 4;
|
||
add.s64 %rd3308, %rd51, %rd3307;
|
||
ld.shared.u32 %r9806, [%rd3308];
|
||
add.s32 %r9807, %r9806, %r9805;
|
||
xor.b32 %r9808, %r10339, %r9629;
|
||
xor.b32 %r9661, %r9808, %r9807;
|
||
// inline asm
|
||
bfe.u32 %r9648, %r9661, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3309, %r9648, 4;
|
||
add.s64 %rd3310, %rd42, %rd3309;
|
||
ld.shared.u32 %r9809, [%rd3310];
|
||
// inline asm
|
||
bfe.u32 %r9652, %r9661, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3311, %r9652, 4;
|
||
add.s64 %rd3312, %rd45, %rd3311;
|
||
ld.shared.u32 %r9810, [%rd3312];
|
||
add.s32 %r9811, %r9810, %r9809;
|
||
// inline asm
|
||
bfe.u32 %r9656, %r9661, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3313, %r9656, 4;
|
||
add.s64 %rd3314, %rd48, %rd3313;
|
||
ld.shared.u32 %r9812, [%rd3314];
|
||
xor.b32 %r9813, %r9812, %r9811;
|
||
// inline asm
|
||
bfe.u32 %r9660, %r9661, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3315, %r9660, 4;
|
||
add.s64 %rd3316, %rd51, %rd3315;
|
||
ld.shared.u32 %r9814, [%rd3316];
|
||
add.s32 %r9815, %r9814, %r9813;
|
||
xor.b32 %r9816, %r10338, %r9645;
|
||
xor.b32 %r9677, %r9816, %r9815;
|
||
// inline asm
|
||
bfe.u32 %r9664, %r9677, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3317, %r9664, 4;
|
||
add.s64 %rd3318, %rd42, %rd3317;
|
||
ld.shared.u32 %r9817, [%rd3318];
|
||
// inline asm
|
||
bfe.u32 %r9668, %r9677, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3319, %r9668, 4;
|
||
add.s64 %rd3320, %rd45, %rd3319;
|
||
ld.shared.u32 %r9818, [%rd3320];
|
||
add.s32 %r9819, %r9818, %r9817;
|
||
// inline asm
|
||
bfe.u32 %r9672, %r9677, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3321, %r9672, 4;
|
||
add.s64 %rd3322, %rd48, %rd3321;
|
||
ld.shared.u32 %r9820, [%rd3322];
|
||
xor.b32 %r9821, %r9820, %r9819;
|
||
// inline asm
|
||
bfe.u32 %r9676, %r9677, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3323, %r9676, 4;
|
||
add.s64 %rd3324, %rd51, %rd3323;
|
||
ld.shared.u32 %r9822, [%rd3324];
|
||
add.s32 %r9823, %r9822, %r9821;
|
||
xor.b32 %r9824, %r10337, %r9661;
|
||
xor.b32 %r9693, %r9824, %r9823;
|
||
// inline asm
|
||
bfe.u32 %r9680, %r9693, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3325, %r9680, 4;
|
||
add.s64 %rd3326, %rd42, %rd3325;
|
||
ld.shared.u32 %r9825, [%rd3326];
|
||
// inline asm
|
||
bfe.u32 %r9684, %r9693, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3327, %r9684, 4;
|
||
add.s64 %rd3328, %rd45, %rd3327;
|
||
ld.shared.u32 %r9826, [%rd3328];
|
||
add.s32 %r9827, %r9826, %r9825;
|
||
// inline asm
|
||
bfe.u32 %r9688, %r9693, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3329, %r9688, 4;
|
||
add.s64 %rd3330, %rd48, %rd3329;
|
||
ld.shared.u32 %r9828, [%rd3330];
|
||
xor.b32 %r9829, %r9828, %r9827;
|
||
// inline asm
|
||
bfe.u32 %r9692, %r9693, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3331, %r9692, 4;
|
||
add.s64 %rd3332, %rd51, %rd3331;
|
||
ld.shared.u32 %r9830, [%rd3332];
|
||
add.s32 %r9831, %r9830, %r9829;
|
||
xor.b32 %r9832, %r10336, %r9677;
|
||
xor.b32 %r9709, %r9832, %r9831;
|
||
// inline asm
|
||
bfe.u32 %r9696, %r9709, %r10266, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3333, %r9696, 4;
|
||
add.s64 %rd3334, %rd42, %rd3333;
|
||
ld.shared.u32 %r9833, [%rd3334];
|
||
// inline asm
|
||
bfe.u32 %r9700, %r9709, %r10268, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3335, %r9700, 4;
|
||
add.s64 %rd3336, %rd45, %rd3335;
|
||
ld.shared.u32 %r9834, [%rd3336];
|
||
add.s32 %r9835, %r9834, %r9833;
|
||
// inline asm
|
||
bfe.u32 %r9704, %r9709, %r10267, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3337, %r9704, 4;
|
||
add.s64 %rd3338, %rd48, %rd3337;
|
||
ld.shared.u32 %r9836, [%rd3338];
|
||
xor.b32 %r9837, %r9836, %r9835;
|
||
// inline asm
|
||
bfe.u32 %r9708, %r9709, %r10331, %r10267;
|
||
// inline asm
|
||
mul.wide.u32 %rd3339, %r9708, 4;
|
||
add.s64 %rd3340, %rd51, %rd3339;
|
||
ld.shared.u32 %r9838, [%rd3340];
|
||
add.s32 %r9839, %r9838, %r9837;
|
||
xor.b32 %r9840, %r10335, %r9693;
|
||
xor.b32 %r10326, %r9840, %r9839;
|
||
xor.b32 %r10327, %r10334, %r9709;
|
||
st.shared.u32 [%rd3501], %r10327;
|
||
st.shared.u32 [%rd3501+4], %r10326;
|
||
add.s64 %rd3501, %rd3501, 8;
|
||
add.s32 %r10328, %r10328, 2;
|
||
setp.lt.u32 %p10, %r10328, 256;
|
||
@%p10 bra BB4_18;
|
||
|
||
mov.u64 %rd3502, %rd6;
|
||
|
||
BB4_20:
|
||
mov.u32 %r10273, 0;
|
||
mov.u32 %r10272, 16;
|
||
mov.u32 %r10271, 8;
|
||
mov.u32 %r10270, 24;
|
||
xor.b32 %r9854, %r10351, %r10327;
|
||
// inline asm
|
||
bfe.u32 %r9841, %r9854, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3344, %r9841, 4;
|
||
add.s64 %rd3345, %rd42, %rd3344;
|
||
ld.shared.u32 %r10097, [%rd3345];
|
||
// inline asm
|
||
bfe.u32 %r9845, %r9854, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3348, %r9845, 4;
|
||
add.s64 %rd3349, %rd45, %rd3348;
|
||
ld.shared.u32 %r10098, [%rd3349];
|
||
add.s32 %r10099, %r10098, %r10097;
|
||
// inline asm
|
||
bfe.u32 %r9849, %r9854, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3352, %r9849, 4;
|
||
add.s64 %rd3353, %rd48, %rd3352;
|
||
ld.shared.u32 %r10100, [%rd3353];
|
||
xor.b32 %r10101, %r10100, %r10099;
|
||
// inline asm
|
||
bfe.u32 %r9853, %r9854, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3356, %r9853, 4;
|
||
add.s64 %rd3357, %rd51, %rd3356;
|
||
ld.shared.u32 %r10102, [%rd3357];
|
||
add.s32 %r10103, %r10102, %r10101;
|
||
xor.b32 %r10104, %r10350, %r10326;
|
||
xor.b32 %r9870, %r10104, %r10103;
|
||
// inline asm
|
||
bfe.u32 %r9857, %r9870, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3358, %r9857, 4;
|
||
add.s64 %rd3359, %rd42, %rd3358;
|
||
ld.shared.u32 %r10105, [%rd3359];
|
||
// inline asm
|
||
bfe.u32 %r9861, %r9870, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3360, %r9861, 4;
|
||
add.s64 %rd3361, %rd45, %rd3360;
|
||
ld.shared.u32 %r10106, [%rd3361];
|
||
add.s32 %r10107, %r10106, %r10105;
|
||
// inline asm
|
||
bfe.u32 %r9865, %r9870, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3362, %r9865, 4;
|
||
add.s64 %rd3363, %rd48, %rd3362;
|
||
ld.shared.u32 %r10108, [%rd3363];
|
||
xor.b32 %r10109, %r10108, %r10107;
|
||
// inline asm
|
||
bfe.u32 %r9869, %r9870, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3364, %r9869, 4;
|
||
add.s64 %rd3365, %rd51, %rd3364;
|
||
ld.shared.u32 %r10110, [%rd3365];
|
||
add.s32 %r10111, %r10110, %r10109;
|
||
xor.b32 %r10112, %r10349, %r9854;
|
||
xor.b32 %r9886, %r10112, %r10111;
|
||
// inline asm
|
||
bfe.u32 %r9873, %r9886, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3366, %r9873, 4;
|
||
add.s64 %rd3367, %rd42, %rd3366;
|
||
ld.shared.u32 %r10113, [%rd3367];
|
||
// inline asm
|
||
bfe.u32 %r9877, %r9886, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3368, %r9877, 4;
|
||
add.s64 %rd3369, %rd45, %rd3368;
|
||
ld.shared.u32 %r10114, [%rd3369];
|
||
add.s32 %r10115, %r10114, %r10113;
|
||
// inline asm
|
||
bfe.u32 %r9881, %r9886, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3370, %r9881, 4;
|
||
add.s64 %rd3371, %rd48, %rd3370;
|
||
ld.shared.u32 %r10116, [%rd3371];
|
||
xor.b32 %r10117, %r10116, %r10115;
|
||
// inline asm
|
||
bfe.u32 %r9885, %r9886, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3372, %r9885, 4;
|
||
add.s64 %rd3373, %rd51, %rd3372;
|
||
ld.shared.u32 %r10118, [%rd3373];
|
||
add.s32 %r10119, %r10118, %r10117;
|
||
xor.b32 %r10120, %r10348, %r9870;
|
||
xor.b32 %r9902, %r10120, %r10119;
|
||
// inline asm
|
||
bfe.u32 %r9889, %r9902, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3374, %r9889, 4;
|
||
add.s64 %rd3375, %rd42, %rd3374;
|
||
ld.shared.u32 %r10121, [%rd3375];
|
||
// inline asm
|
||
bfe.u32 %r9893, %r9902, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3376, %r9893, 4;
|
||
add.s64 %rd3377, %rd45, %rd3376;
|
||
ld.shared.u32 %r10122, [%rd3377];
|
||
add.s32 %r10123, %r10122, %r10121;
|
||
// inline asm
|
||
bfe.u32 %r9897, %r9902, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3378, %r9897, 4;
|
||
add.s64 %rd3379, %rd48, %rd3378;
|
||
ld.shared.u32 %r10124, [%rd3379];
|
||
xor.b32 %r10125, %r10124, %r10123;
|
||
// inline asm
|
||
bfe.u32 %r9901, %r9902, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3380, %r9901, 4;
|
||
add.s64 %rd3381, %rd51, %rd3380;
|
||
ld.shared.u32 %r10126, [%rd3381];
|
||
add.s32 %r10127, %r10126, %r10125;
|
||
xor.b32 %r10128, %r10347, %r9886;
|
||
xor.b32 %r9918, %r10128, %r10127;
|
||
// inline asm
|
||
bfe.u32 %r9905, %r9918, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3382, %r9905, 4;
|
||
add.s64 %rd3383, %rd42, %rd3382;
|
||
ld.shared.u32 %r10129, [%rd3383];
|
||
// inline asm
|
||
bfe.u32 %r9909, %r9918, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3384, %r9909, 4;
|
||
add.s64 %rd3385, %rd45, %rd3384;
|
||
ld.shared.u32 %r10130, [%rd3385];
|
||
add.s32 %r10131, %r10130, %r10129;
|
||
// inline asm
|
||
bfe.u32 %r9913, %r9918, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3386, %r9913, 4;
|
||
add.s64 %rd3387, %rd48, %rd3386;
|
||
ld.shared.u32 %r10132, [%rd3387];
|
||
xor.b32 %r10133, %r10132, %r10131;
|
||
// inline asm
|
||
bfe.u32 %r9917, %r9918, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3388, %r9917, 4;
|
||
add.s64 %rd3389, %rd51, %rd3388;
|
||
ld.shared.u32 %r10134, [%rd3389];
|
||
add.s32 %r10135, %r10134, %r10133;
|
||
xor.b32 %r10136, %r10346, %r9902;
|
||
xor.b32 %r9934, %r10136, %r10135;
|
||
// inline asm
|
||
bfe.u32 %r9921, %r9934, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3390, %r9921, 4;
|
||
add.s64 %rd3391, %rd42, %rd3390;
|
||
ld.shared.u32 %r10137, [%rd3391];
|
||
// inline asm
|
||
bfe.u32 %r9925, %r9934, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3392, %r9925, 4;
|
||
add.s64 %rd3393, %rd45, %rd3392;
|
||
ld.shared.u32 %r10138, [%rd3393];
|
||
add.s32 %r10139, %r10138, %r10137;
|
||
// inline asm
|
||
bfe.u32 %r9929, %r9934, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3394, %r9929, 4;
|
||
add.s64 %rd3395, %rd48, %rd3394;
|
||
ld.shared.u32 %r10140, [%rd3395];
|
||
xor.b32 %r10141, %r10140, %r10139;
|
||
// inline asm
|
||
bfe.u32 %r9933, %r9934, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3396, %r9933, 4;
|
||
add.s64 %rd3397, %rd51, %rd3396;
|
||
ld.shared.u32 %r10142, [%rd3397];
|
||
add.s32 %r10143, %r10142, %r10141;
|
||
xor.b32 %r10144, %r10345, %r9918;
|
||
xor.b32 %r9950, %r10144, %r10143;
|
||
// inline asm
|
||
bfe.u32 %r9937, %r9950, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3398, %r9937, 4;
|
||
add.s64 %rd3399, %rd42, %rd3398;
|
||
ld.shared.u32 %r10145, [%rd3399];
|
||
// inline asm
|
||
bfe.u32 %r9941, %r9950, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3400, %r9941, 4;
|
||
add.s64 %rd3401, %rd45, %rd3400;
|
||
ld.shared.u32 %r10146, [%rd3401];
|
||
add.s32 %r10147, %r10146, %r10145;
|
||
// inline asm
|
||
bfe.u32 %r9945, %r9950, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3402, %r9945, 4;
|
||
add.s64 %rd3403, %rd48, %rd3402;
|
||
ld.shared.u32 %r10148, [%rd3403];
|
||
xor.b32 %r10149, %r10148, %r10147;
|
||
// inline asm
|
||
bfe.u32 %r9949, %r9950, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3404, %r9949, 4;
|
||
add.s64 %rd3405, %rd51, %rd3404;
|
||
ld.shared.u32 %r10150, [%rd3405];
|
||
add.s32 %r10151, %r10150, %r10149;
|
||
xor.b32 %r10152, %r10344, %r9934;
|
||
xor.b32 %r9966, %r10152, %r10151;
|
||
// inline asm
|
||
bfe.u32 %r9953, %r9966, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3406, %r9953, 4;
|
||
add.s64 %rd3407, %rd42, %rd3406;
|
||
ld.shared.u32 %r10153, [%rd3407];
|
||
// inline asm
|
||
bfe.u32 %r9957, %r9966, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3408, %r9957, 4;
|
||
add.s64 %rd3409, %rd45, %rd3408;
|
||
ld.shared.u32 %r10154, [%rd3409];
|
||
add.s32 %r10155, %r10154, %r10153;
|
||
// inline asm
|
||
bfe.u32 %r9961, %r9966, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3410, %r9961, 4;
|
||
add.s64 %rd3411, %rd48, %rd3410;
|
||
ld.shared.u32 %r10156, [%rd3411];
|
||
xor.b32 %r10157, %r10156, %r10155;
|
||
// inline asm
|
||
bfe.u32 %r9965, %r9966, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3412, %r9965, 4;
|
||
add.s64 %rd3413, %rd51, %rd3412;
|
||
ld.shared.u32 %r10158, [%rd3413];
|
||
add.s32 %r10159, %r10158, %r10157;
|
||
xor.b32 %r10160, %r10343, %r9950;
|
||
xor.b32 %r9982, %r10160, %r10159;
|
||
// inline asm
|
||
bfe.u32 %r9969, %r9982, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3414, %r9969, 4;
|
||
add.s64 %rd3415, %rd42, %rd3414;
|
||
ld.shared.u32 %r10161, [%rd3415];
|
||
// inline asm
|
||
bfe.u32 %r9973, %r9982, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3416, %r9973, 4;
|
||
add.s64 %rd3417, %rd45, %rd3416;
|
||
ld.shared.u32 %r10162, [%rd3417];
|
||
add.s32 %r10163, %r10162, %r10161;
|
||
// inline asm
|
||
bfe.u32 %r9977, %r9982, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3418, %r9977, 4;
|
||
add.s64 %rd3419, %rd48, %rd3418;
|
||
ld.shared.u32 %r10164, [%rd3419];
|
||
xor.b32 %r10165, %r10164, %r10163;
|
||
// inline asm
|
||
bfe.u32 %r9981, %r9982, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3420, %r9981, 4;
|
||
add.s64 %rd3421, %rd51, %rd3420;
|
||
ld.shared.u32 %r10166, [%rd3421];
|
||
add.s32 %r10167, %r10166, %r10165;
|
||
xor.b32 %r10168, %r10342, %r9966;
|
||
xor.b32 %r9998, %r10168, %r10167;
|
||
// inline asm
|
||
bfe.u32 %r9985, %r9998, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3422, %r9985, 4;
|
||
add.s64 %rd3423, %rd42, %rd3422;
|
||
ld.shared.u32 %r10169, [%rd3423];
|
||
// inline asm
|
||
bfe.u32 %r9989, %r9998, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3424, %r9989, 4;
|
||
add.s64 %rd3425, %rd45, %rd3424;
|
||
ld.shared.u32 %r10170, [%rd3425];
|
||
add.s32 %r10171, %r10170, %r10169;
|
||
// inline asm
|
||
bfe.u32 %r9993, %r9998, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3426, %r9993, 4;
|
||
add.s64 %rd3427, %rd48, %rd3426;
|
||
ld.shared.u32 %r10172, [%rd3427];
|
||
xor.b32 %r10173, %r10172, %r10171;
|
||
// inline asm
|
||
bfe.u32 %r9997, %r9998, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3428, %r9997, 4;
|
||
add.s64 %rd3429, %rd51, %rd3428;
|
||
ld.shared.u32 %r10174, [%rd3429];
|
||
add.s32 %r10175, %r10174, %r10173;
|
||
xor.b32 %r10176, %r10341, %r9982;
|
||
xor.b32 %r10014, %r10176, %r10175;
|
||
// inline asm
|
||
bfe.u32 %r10001, %r10014, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3430, %r10001, 4;
|
||
add.s64 %rd3431, %rd42, %rd3430;
|
||
ld.shared.u32 %r10177, [%rd3431];
|
||
// inline asm
|
||
bfe.u32 %r10005, %r10014, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3432, %r10005, 4;
|
||
add.s64 %rd3433, %rd45, %rd3432;
|
||
ld.shared.u32 %r10178, [%rd3433];
|
||
add.s32 %r10179, %r10178, %r10177;
|
||
// inline asm
|
||
bfe.u32 %r10009, %r10014, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3434, %r10009, 4;
|
||
add.s64 %rd3435, %rd48, %rd3434;
|
||
ld.shared.u32 %r10180, [%rd3435];
|
||
xor.b32 %r10181, %r10180, %r10179;
|
||
// inline asm
|
||
bfe.u32 %r10013, %r10014, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3436, %r10013, 4;
|
||
add.s64 %rd3437, %rd51, %rd3436;
|
||
ld.shared.u32 %r10182, [%rd3437];
|
||
add.s32 %r10183, %r10182, %r10181;
|
||
xor.b32 %r10184, %r10340, %r9998;
|
||
xor.b32 %r10030, %r10184, %r10183;
|
||
// inline asm
|
||
bfe.u32 %r10017, %r10030, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3438, %r10017, 4;
|
||
add.s64 %rd3439, %rd42, %rd3438;
|
||
ld.shared.u32 %r10185, [%rd3439];
|
||
// inline asm
|
||
bfe.u32 %r10021, %r10030, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3440, %r10021, 4;
|
||
add.s64 %rd3441, %rd45, %rd3440;
|
||
ld.shared.u32 %r10186, [%rd3441];
|
||
add.s32 %r10187, %r10186, %r10185;
|
||
// inline asm
|
||
bfe.u32 %r10025, %r10030, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3442, %r10025, 4;
|
||
add.s64 %rd3443, %rd48, %rd3442;
|
||
ld.shared.u32 %r10188, [%rd3443];
|
||
xor.b32 %r10189, %r10188, %r10187;
|
||
// inline asm
|
||
bfe.u32 %r10029, %r10030, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3444, %r10029, 4;
|
||
add.s64 %rd3445, %rd51, %rd3444;
|
||
ld.shared.u32 %r10190, [%rd3445];
|
||
add.s32 %r10191, %r10190, %r10189;
|
||
xor.b32 %r10192, %r10339, %r10014;
|
||
xor.b32 %r10046, %r10192, %r10191;
|
||
// inline asm
|
||
bfe.u32 %r10033, %r10046, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3446, %r10033, 4;
|
||
add.s64 %rd3447, %rd42, %rd3446;
|
||
ld.shared.u32 %r10193, [%rd3447];
|
||
// inline asm
|
||
bfe.u32 %r10037, %r10046, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3448, %r10037, 4;
|
||
add.s64 %rd3449, %rd45, %rd3448;
|
||
ld.shared.u32 %r10194, [%rd3449];
|
||
add.s32 %r10195, %r10194, %r10193;
|
||
// inline asm
|
||
bfe.u32 %r10041, %r10046, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3450, %r10041, 4;
|
||
add.s64 %rd3451, %rd48, %rd3450;
|
||
ld.shared.u32 %r10196, [%rd3451];
|
||
xor.b32 %r10197, %r10196, %r10195;
|
||
// inline asm
|
||
bfe.u32 %r10045, %r10046, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3452, %r10045, 4;
|
||
add.s64 %rd3453, %rd51, %rd3452;
|
||
ld.shared.u32 %r10198, [%rd3453];
|
||
add.s32 %r10199, %r10198, %r10197;
|
||
xor.b32 %r10200, %r10338, %r10030;
|
||
xor.b32 %r10062, %r10200, %r10199;
|
||
// inline asm
|
||
bfe.u32 %r10049, %r10062, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3454, %r10049, 4;
|
||
add.s64 %rd3455, %rd42, %rd3454;
|
||
ld.shared.u32 %r10201, [%rd3455];
|
||
// inline asm
|
||
bfe.u32 %r10053, %r10062, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3456, %r10053, 4;
|
||
add.s64 %rd3457, %rd45, %rd3456;
|
||
ld.shared.u32 %r10202, [%rd3457];
|
||
add.s32 %r10203, %r10202, %r10201;
|
||
// inline asm
|
||
bfe.u32 %r10057, %r10062, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3458, %r10057, 4;
|
||
add.s64 %rd3459, %rd48, %rd3458;
|
||
ld.shared.u32 %r10204, [%rd3459];
|
||
xor.b32 %r10205, %r10204, %r10203;
|
||
// inline asm
|
||
bfe.u32 %r10061, %r10062, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3460, %r10061, 4;
|
||
add.s64 %rd3461, %rd51, %rd3460;
|
||
ld.shared.u32 %r10206, [%rd3461];
|
||
add.s32 %r10207, %r10206, %r10205;
|
||
xor.b32 %r10208, %r10337, %r10046;
|
||
xor.b32 %r10078, %r10208, %r10207;
|
||
// inline asm
|
||
bfe.u32 %r10065, %r10078, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3462, %r10065, 4;
|
||
add.s64 %rd3463, %rd42, %rd3462;
|
||
ld.shared.u32 %r10209, [%rd3463];
|
||
// inline asm
|
||
bfe.u32 %r10069, %r10078, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3464, %r10069, 4;
|
||
add.s64 %rd3465, %rd45, %rd3464;
|
||
ld.shared.u32 %r10210, [%rd3465];
|
||
add.s32 %r10211, %r10210, %r10209;
|
||
// inline asm
|
||
bfe.u32 %r10073, %r10078, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3466, %r10073, 4;
|
||
add.s64 %rd3467, %rd48, %rd3466;
|
||
ld.shared.u32 %r10212, [%rd3467];
|
||
xor.b32 %r10213, %r10212, %r10211;
|
||
// inline asm
|
||
bfe.u32 %r10077, %r10078, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3468, %r10077, 4;
|
||
add.s64 %rd3469, %rd51, %rd3468;
|
||
ld.shared.u32 %r10214, [%rd3469];
|
||
add.s32 %r10215, %r10214, %r10213;
|
||
xor.b32 %r10216, %r10336, %r10062;
|
||
xor.b32 %r10094, %r10216, %r10215;
|
||
// inline asm
|
||
bfe.u32 %r10081, %r10094, %r10270, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3470, %r10081, 4;
|
||
add.s64 %rd3471, %rd42, %rd3470;
|
||
ld.shared.u32 %r10217, [%rd3471];
|
||
// inline asm
|
||
bfe.u32 %r10085, %r10094, %r10272, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3472, %r10085, 4;
|
||
add.s64 %rd3473, %rd45, %rd3472;
|
||
ld.shared.u32 %r10218, [%rd3473];
|
||
add.s32 %r10219, %r10218, %r10217;
|
||
// inline asm
|
||
bfe.u32 %r10089, %r10094, %r10271, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3474, %r10089, 4;
|
||
add.s64 %rd3475, %rd48, %rd3474;
|
||
ld.shared.u32 %r10220, [%rd3475];
|
||
xor.b32 %r10221, %r10220, %r10219;
|
||
// inline asm
|
||
bfe.u32 %r10093, %r10094, %r10273, %r10271;
|
||
// inline asm
|
||
mul.wide.u32 %rd3476, %r10093, 4;
|
||
add.s64 %rd3477, %rd51, %rd3476;
|
||
ld.shared.u32 %r10222, [%rd3477];
|
||
add.s32 %r10223, %r10222, %r10221;
|
||
xor.b32 %r10224, %r10335, %r10078;
|
||
xor.b32 %r10326, %r10224, %r10223;
|
||
xor.b32 %r10327, %r10334, %r10094;
|
||
st.shared.u32 [%rd3502], %r10327;
|
||
st.shared.u32 [%rd3502+4], %r10326;
|
||
add.s64 %rd3502, %rd3502, 8;
|
||
add.s32 %r10331, %r10331, 2;
|
||
setp.lt.u32 %p11, %r10331, 256;
|
||
@%p11 bra BB4_20;
|
||
|
||
ld.param.u32 %r10274, [m03200_loop_param_29];
|
||
add.s32 %r10309, %r10309, 1;
|
||
setp.lt.u32 %p12, %r10309, %r10274;
|
||
@%p12 bra BB4_5;
|
||
|
||
BB4_22:
|
||
st.global.u32 [%rd2+72], %r10351;
|
||
st.global.u32 [%rd2+76], %r10350;
|
||
st.global.u32 [%rd2+80], %r10349;
|
||
st.global.u32 [%rd2+84], %r10348;
|
||
st.global.u32 [%rd2+88], %r10347;
|
||
st.global.u32 [%rd2+92], %r10346;
|
||
st.global.u32 [%rd2+96], %r10345;
|
||
st.global.u32 [%rd2+100], %r10344;
|
||
st.global.u32 [%rd2+104], %r10343;
|
||
st.global.u32 [%rd2+108], %r10342;
|
||
st.global.u32 [%rd2+112], %r10341;
|
||
st.global.u32 [%rd2+116], %r10340;
|
||
st.global.u32 [%rd2+120], %r10339;
|
||
st.global.u32 [%rd2+124], %r10338;
|
||
st.global.u32 [%rd2+128], %r10337;
|
||
st.global.u32 [%rd2+132], %r10336;
|
||
st.global.u32 [%rd2+136], %r10335;
|
||
st.global.u32 [%rd2+140], %r10334;
|
||
mov.u32 %r10352, 0;
|
||
|
||
BB4_23:
|
||
mul.wide.u32 %rd3481, %r10352, 4;
|
||
add.s64 %rd3482, %rd42, %rd3481;
|
||
ld.shared.u32 %r10226, [%rd3482];
|
||
add.s64 %rd3485, %rd2, %rd3481;
|
||
ld.shared.u32 %r10227, [%rd3482+4];
|
||
ld.shared.u32 %r10228, [%rd3482+8];
|
||
ld.shared.u32 %r10229, [%rd3482+12];
|
||
ld.shared.u32 %r10230, [%rd3482+16];
|
||
ld.shared.u32 %r10231, [%rd3482+20];
|
||
ld.shared.u32 %r10232, [%rd3482+24];
|
||
ld.shared.u32 %r10233, [%rd3482+28];
|
||
st.global.u32 [%rd3485+144], %r10226;
|
||
add.s64 %rd3488, %rd45, %rd3481;
|
||
st.global.u32 [%rd3485+148], %r10227;
|
||
st.global.u32 [%rd3485+152], %r10228;
|
||
st.global.u32 [%rd3485+156], %r10229;
|
||
st.global.u32 [%rd3485+160], %r10230;
|
||
st.global.u32 [%rd3485+164], %r10231;
|
||
st.global.u32 [%rd3485+168], %r10232;
|
||
st.global.u32 [%rd3485+172], %r10233;
|
||
ld.shared.u32 %r10234, [%rd3488];
|
||
ld.shared.u32 %r10235, [%rd3488+4];
|
||
ld.shared.u32 %r10236, [%rd3488+8];
|
||
ld.shared.u32 %r10237, [%rd3488+12];
|
||
ld.shared.u32 %r10238, [%rd3488+16];
|
||
ld.shared.u32 %r10239, [%rd3488+20];
|
||
ld.shared.u32 %r10240, [%rd3488+24];
|
||
ld.shared.u32 %r10241, [%rd3488+28];
|
||
st.global.u32 [%rd3485+1168], %r10234;
|
||
add.s64 %rd3491, %rd48, %rd3481;
|
||
st.global.u32 [%rd3485+1172], %r10235;
|
||
st.global.u32 [%rd3485+1176], %r10236;
|
||
st.global.u32 [%rd3485+1180], %r10237;
|
||
st.global.u32 [%rd3485+1184], %r10238;
|
||
st.global.u32 [%rd3485+1188], %r10239;
|
||
st.global.u32 [%rd3485+1192], %r10240;
|
||
st.global.u32 [%rd3485+1196], %r10241;
|
||
ld.shared.u32 %r10242, [%rd3491];
|
||
ld.shared.u32 %r10243, [%rd3491+4];
|
||
ld.shared.u32 %r10244, [%rd3491+8];
|
||
ld.shared.u32 %r10245, [%rd3491+12];
|
||
ld.shared.u32 %r10246, [%rd3491+16];
|
||
ld.shared.u32 %r10247, [%rd3491+20];
|
||
ld.shared.u32 %r10248, [%rd3491+24];
|
||
ld.shared.u32 %r10249, [%rd3491+28];
|
||
st.global.u32 [%rd3485+2192], %r10242;
|
||
add.s64 %rd3494, %rd51, %rd3481;
|
||
st.global.u32 [%rd3485+2196], %r10243;
|
||
st.global.u32 [%rd3485+2200], %r10244;
|
||
st.global.u32 [%rd3485+2204], %r10245;
|
||
st.global.u32 [%rd3485+2208], %r10246;
|
||
st.global.u32 [%rd3485+2212], %r10247;
|
||
st.global.u32 [%rd3485+2216], %r10248;
|
||
st.global.u32 [%rd3485+2220], %r10249;
|
||
ld.shared.u32 %r10250, [%rd3494];
|
||
ld.shared.u32 %r10251, [%rd3494+4];
|
||
ld.shared.u32 %r10252, [%rd3494+8];
|
||
ld.shared.u32 %r10253, [%rd3494+12];
|
||
ld.shared.u32 %r10254, [%rd3494+16];
|
||
ld.shared.u32 %r10255, [%rd3494+20];
|
||
ld.shared.u32 %r10256, [%rd3494+24];
|
||
ld.shared.u32 %r10257, [%rd3494+28];
|
||
st.global.u32 [%rd3485+3216], %r10250;
|
||
st.global.u32 [%rd3485+3220], %r10251;
|
||
st.global.u32 [%rd3485+3224], %r10252;
|
||
st.global.u32 [%rd3485+3228], %r10253;
|
||
st.global.u32 [%rd3485+3232], %r10254;
|
||
st.global.u32 [%rd3485+3236], %r10255;
|
||
st.global.u32 [%rd3485+3240], %r10256;
|
||
st.global.u32 [%rd3485+3244], %r10257;
|
||
add.s32 %r10352, %r10352, 8;
|
||
setp.ne.s32 %p13, %r10352, 256;
|
||
@%p13 bra BB4_23;
|
||
|
||
BB4_24:
|
||
ret;
|
||
}
|
||
|
||
// .globl m03200_comp
|
||
.entry m03200_comp(
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_0,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_1,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_2,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_3,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_4,
|
||
.param .u64 .ptr .global .align 1 m03200_comp_param_5,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_6,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_7,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_8,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_9,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_10,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_11,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_12,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_13,
|
||
.param .u64 .ptr .global .align 8 m03200_comp_param_14,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_15,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_16,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_17,
|
||
.param .u64 .ptr .global .align 1 m03200_comp_param_18,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_19,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_20,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_21,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_22,
|
||
.param .u64 .ptr .global .align 4 m03200_comp_param_23,
|
||
.param .u32 m03200_comp_param_24,
|
||
.param .u32 m03200_comp_param_25,
|
||
.param .u32 m03200_comp_param_26,
|
||
.param .u32 m03200_comp_param_27,
|
||
.param .u32 m03200_comp_param_28,
|
||
.param .u32 m03200_comp_param_29,
|
||
.param .u32 m03200_comp_param_30,
|
||
.param .u32 m03200_comp_param_31,
|
||
.param .u32 m03200_comp_param_32,
|
||
.param .u32 m03200_comp_param_33,
|
||
.param .u64 m03200_comp_param_34
|
||
)
|
||
.reqntid 8, 1, 1
|
||
{
|
||
.reg .pred %p<28>;
|
||
.reg .b32 %r<971>;
|
||
.reg .b64 %rd<376>;
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_comp$S0_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_comp$S1_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_comp$S2_all[8192];
|
||
// demoted variable
|
||
.shared .align 4 .b8 m03200_comp$S3_all[8192];
|
||
|
||
ld.param.u64 %rd5, [m03200_comp_param_4];
|
||
ld.param.u64 %rd18, [m03200_comp_param_34];
|
||
mov.b32 %r61, %envreg3;
|
||
mov.u32 %r62, %ctaid.x;
|
||
mov.u32 %r63, %ntid.x;
|
||
mad.lo.s32 %r64, %r62, %r63, %r61;
|
||
mov.u32 %r65, %tid.x;
|
||
add.s32 %r1, %r64, %r65;
|
||
cvt.s64.s32 %rd1, %r1;
|
||
cvt.s64.s32 %rd2, %r65;
|
||
setp.ge.u64 %p1, %rd1, %rd18;
|
||
@%p1 bra BB5_34;
|
||
|
||
mul.wide.s32 %rd19, %r1, 4240;
|
||
add.s64 %rd20, %rd5, %rd19;
|
||
ld.global.u32 %r2, [%rd20+72];
|
||
ld.global.u32 %r3, [%rd20+76];
|
||
ld.global.u32 %r4, [%rd20+80];
|
||
ld.global.u32 %r5, [%rd20+84];
|
||
ld.global.u32 %r6, [%rd20+88];
|
||
ld.global.u32 %r7, [%rd20+92];
|
||
ld.global.u32 %r8, [%rd20+96];
|
||
ld.global.u32 %r9, [%rd20+100];
|
||
ld.global.u32 %r10, [%rd20+104];
|
||
ld.global.u32 %r11, [%rd20+108];
|
||
ld.global.u32 %r12, [%rd20+112];
|
||
ld.global.u32 %r13, [%rd20+116];
|
||
ld.global.u32 %r14, [%rd20+120];
|
||
ld.global.u32 %r15, [%rd20+124];
|
||
ld.global.u32 %r16, [%rd20+128];
|
||
ld.global.u32 %r17, [%rd20+132];
|
||
ld.global.u32 %r18, [%rd20+136];
|
||
ld.global.u32 %r19, [%rd20+140];
|
||
mov.u32 %r960, 0;
|
||
|
||
BB5_2:
|
||
mul.wide.u32 %rd23, %r960, 4;
|
||
add.s64 %rd24, %rd20, %rd23;
|
||
ld.global.u32 %r70, [%rd24+144];
|
||
shl.b64 %rd25, %rd2, 10;
|
||
mov.u64 %rd26, m03200_comp$S0_all;
|
||
add.s64 %rd27, %rd26, %rd25;
|
||
add.s64 %rd28, %rd27, %rd23;
|
||
ld.global.u32 %r71, [%rd24+1168];
|
||
ld.global.u32 %r72, [%rd24+2192];
|
||
ld.global.u32 %r73, [%rd24+3216];
|
||
ld.global.u32 %r74, [%rd24+148];
|
||
ld.global.u32 %r75, [%rd24+1172];
|
||
ld.global.u32 %r76, [%rd24+2196];
|
||
ld.global.u32 %r77, [%rd24+3220];
|
||
ld.global.u32 %r78, [%rd24+152];
|
||
ld.global.u32 %r79, [%rd24+1176];
|
||
ld.global.u32 %r80, [%rd24+2200];
|
||
ld.global.u32 %r81, [%rd24+3224];
|
||
ld.global.u32 %r82, [%rd24+156];
|
||
ld.global.u32 %r83, [%rd24+1180];
|
||
ld.global.u32 %r84, [%rd24+2204];
|
||
ld.global.u32 %r85, [%rd24+3228];
|
||
ld.global.u32 %r86, [%rd24+160];
|
||
ld.global.u32 %r87, [%rd24+1184];
|
||
ld.global.u32 %r88, [%rd24+2208];
|
||
ld.global.u32 %r89, [%rd24+3232];
|
||
ld.global.u32 %r90, [%rd24+164];
|
||
ld.global.u32 %r91, [%rd24+1188];
|
||
ld.global.u32 %r92, [%rd24+2212];
|
||
ld.global.u32 %r93, [%rd24+3236];
|
||
ld.global.u32 %r94, [%rd24+168];
|
||
ld.global.u32 %r95, [%rd24+1192];
|
||
ld.global.u32 %r96, [%rd24+2216];
|
||
ld.global.u32 %r97, [%rd24+3240];
|
||
ld.global.u32 %r98, [%rd24+172];
|
||
ld.global.u32 %r99, [%rd24+1196];
|
||
ld.global.u32 %r100, [%rd24+2220];
|
||
ld.global.u32 %r101, [%rd24+3244];
|
||
st.shared.u32 [%rd28], %r70;
|
||
mov.u64 %rd29, m03200_comp$S1_all;
|
||
add.s64 %rd30, %rd29, %rd25;
|
||
add.s64 %rd31, %rd30, %rd23;
|
||
st.shared.u32 [%rd28+4], %r74;
|
||
st.shared.u32 [%rd28+8], %r78;
|
||
st.shared.u32 [%rd28+12], %r82;
|
||
st.shared.u32 [%rd28+16], %r86;
|
||
st.shared.u32 [%rd28+20], %r90;
|
||
st.shared.u32 [%rd28+24], %r94;
|
||
st.shared.u32 [%rd28+28], %r98;
|
||
st.shared.u32 [%rd31], %r71;
|
||
mov.u64 %rd32, m03200_comp$S2_all;
|
||
add.s64 %rd33, %rd32, %rd25;
|
||
add.s64 %rd34, %rd33, %rd23;
|
||
st.shared.u32 [%rd31+4], %r75;
|
||
st.shared.u32 [%rd31+8], %r79;
|
||
st.shared.u32 [%rd31+12], %r83;
|
||
st.shared.u32 [%rd31+16], %r87;
|
||
st.shared.u32 [%rd31+20], %r91;
|
||
st.shared.u32 [%rd31+24], %r95;
|
||
st.shared.u32 [%rd31+28], %r99;
|
||
st.shared.u32 [%rd34], %r72;
|
||
mov.u64 %rd35, m03200_comp$S3_all;
|
||
add.s64 %rd36, %rd35, %rd25;
|
||
add.s64 %rd37, %rd36, %rd23;
|
||
st.shared.u32 [%rd34+4], %r76;
|
||
st.shared.u32 [%rd34+8], %r80;
|
||
st.shared.u32 [%rd34+12], %r84;
|
||
st.shared.u32 [%rd34+16], %r88;
|
||
st.shared.u32 [%rd34+20], %r92;
|
||
st.shared.u32 [%rd34+24], %r96;
|
||
st.shared.u32 [%rd34+28], %r100;
|
||
st.shared.u32 [%rd37], %r73;
|
||
st.shared.u32 [%rd37+4], %r77;
|
||
st.shared.u32 [%rd37+8], %r81;
|
||
st.shared.u32 [%rd37+12], %r85;
|
||
st.shared.u32 [%rd37+16], %r89;
|
||
st.shared.u32 [%rd37+20], %r93;
|
||
st.shared.u32 [%rd37+24], %r97;
|
||
st.shared.u32 [%rd37+28], %r101;
|
||
add.s32 %r960, %r960, 8;
|
||
mov.u32 %r963, 1332899944;
|
||
mov.u32 %r962, 1700884034;
|
||
mov.u32 %r961, -64;
|
||
setp.ne.s32 %p2, %r960, 256;
|
||
@%p2 bra BB5_2;
|
||
|
||
BB5_3:
|
||
mov.u32 %r959, %tid.x;
|
||
cvt.s64.s32 %rd375, %r959;
|
||
shl.b64 %rd374, %rd375, 10;
|
||
mov.u64 %rd373, m03200_comp$S3_all;
|
||
add.s64 %rd372, %rd373, %rd374;
|
||
mov.u32 %r957, %tid.x;
|
||
cvt.s64.s32 %rd367, %r957;
|
||
shl.b64 %rd366, %rd367, 10;
|
||
mov.u64 %rd365, m03200_comp$S2_all;
|
||
add.s64 %rd364, %rd365, %rd366;
|
||
mov.u64 %rd363, m03200_comp$S1_all;
|
||
add.s64 %rd362, %rd363, %rd366;
|
||
mov.u32 %r955, %tid.x;
|
||
cvt.s64.s32 %rd355, %r955;
|
||
shl.b64 %rd354, %rd355, 10;
|
||
mov.u64 %rd353, m03200_comp$S0_all;
|
||
add.s64 %rd352, %rd353, %rd354;
|
||
xor.b32 %r115, %r2, %r963;
|
||
mov.u32 %r344, 24;
|
||
mov.u32 %r357, 8;
|
||
// inline asm
|
||
bfe.u32 %r102, %r115, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd41, %r102, 4;
|
||
add.s64 %rd42, %rd352, %rd41;
|
||
ld.shared.u32 %r361, [%rd42];
|
||
mov.u32 %r348, 16;
|
||
// inline asm
|
||
bfe.u32 %r106, %r115, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd45, %r106, 4;
|
||
add.s64 %rd46, %rd362, %rd45;
|
||
ld.shared.u32 %r362, [%rd46];
|
||
add.s32 %r363, %r362, %r361;
|
||
// inline asm
|
||
bfe.u32 %r110, %r115, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd49, %r110, 4;
|
||
add.s64 %rd50, %rd364, %rd49;
|
||
ld.shared.u32 %r364, [%rd50];
|
||
xor.b32 %r365, %r364, %r363;
|
||
mov.u32 %r356, 0;
|
||
// inline asm
|
||
bfe.u32 %r114, %r115, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd53, %r114, 4;
|
||
add.s64 %rd54, %rd372, %rd53;
|
||
ld.shared.u32 %r366, [%rd54];
|
||
add.s32 %r367, %r366, %r365;
|
||
xor.b32 %r368, %r3, %r962;
|
||
xor.b32 %r131, %r368, %r367;
|
||
// inline asm
|
||
bfe.u32 %r118, %r131, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd55, %r118, 4;
|
||
add.s64 %rd56, %rd352, %rd55;
|
||
ld.shared.u32 %r369, [%rd56];
|
||
// inline asm
|
||
bfe.u32 %r122, %r131, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd57, %r122, 4;
|
||
add.s64 %rd58, %rd362, %rd57;
|
||
ld.shared.u32 %r370, [%rd58];
|
||
add.s32 %r371, %r370, %r369;
|
||
// inline asm
|
||
bfe.u32 %r126, %r131, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd59, %r126, 4;
|
||
add.s64 %rd60, %rd364, %rd59;
|
||
ld.shared.u32 %r372, [%rd60];
|
||
xor.b32 %r373, %r372, %r371;
|
||
// inline asm
|
||
bfe.u32 %r130, %r131, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd61, %r130, 4;
|
||
add.s64 %rd62, %rd372, %rd61;
|
||
ld.shared.u32 %r374, [%rd62];
|
||
add.s32 %r375, %r374, %r373;
|
||
xor.b32 %r376, %r4, %r115;
|
||
xor.b32 %r147, %r376, %r375;
|
||
// inline asm
|
||
bfe.u32 %r134, %r147, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd63, %r134, 4;
|
||
add.s64 %rd64, %rd352, %rd63;
|
||
ld.shared.u32 %r377, [%rd64];
|
||
// inline asm
|
||
bfe.u32 %r138, %r147, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd65, %r138, 4;
|
||
add.s64 %rd66, %rd362, %rd65;
|
||
ld.shared.u32 %r378, [%rd66];
|
||
add.s32 %r379, %r378, %r377;
|
||
// inline asm
|
||
bfe.u32 %r142, %r147, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd67, %r142, 4;
|
||
add.s64 %rd68, %rd364, %rd67;
|
||
ld.shared.u32 %r380, [%rd68];
|
||
xor.b32 %r381, %r380, %r379;
|
||
// inline asm
|
||
bfe.u32 %r146, %r147, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd69, %r146, 4;
|
||
add.s64 %rd70, %rd372, %rd69;
|
||
ld.shared.u32 %r382, [%rd70];
|
||
add.s32 %r383, %r382, %r381;
|
||
xor.b32 %r384, %r5, %r131;
|
||
xor.b32 %r163, %r384, %r383;
|
||
// inline asm
|
||
bfe.u32 %r150, %r163, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd71, %r150, 4;
|
||
add.s64 %rd72, %rd352, %rd71;
|
||
ld.shared.u32 %r385, [%rd72];
|
||
// inline asm
|
||
bfe.u32 %r154, %r163, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd73, %r154, 4;
|
||
add.s64 %rd74, %rd362, %rd73;
|
||
ld.shared.u32 %r386, [%rd74];
|
||
add.s32 %r387, %r386, %r385;
|
||
// inline asm
|
||
bfe.u32 %r158, %r163, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd75, %r158, 4;
|
||
add.s64 %rd76, %rd364, %rd75;
|
||
ld.shared.u32 %r388, [%rd76];
|
||
xor.b32 %r389, %r388, %r387;
|
||
// inline asm
|
||
bfe.u32 %r162, %r163, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd77, %r162, 4;
|
||
add.s64 %rd78, %rd372, %rd77;
|
||
ld.shared.u32 %r390, [%rd78];
|
||
add.s32 %r391, %r390, %r389;
|
||
xor.b32 %r392, %r6, %r147;
|
||
xor.b32 %r179, %r392, %r391;
|
||
// inline asm
|
||
bfe.u32 %r166, %r179, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd79, %r166, 4;
|
||
add.s64 %rd80, %rd352, %rd79;
|
||
ld.shared.u32 %r393, [%rd80];
|
||
// inline asm
|
||
bfe.u32 %r170, %r179, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd81, %r170, 4;
|
||
add.s64 %rd82, %rd362, %rd81;
|
||
ld.shared.u32 %r394, [%rd82];
|
||
add.s32 %r395, %r394, %r393;
|
||
// inline asm
|
||
bfe.u32 %r174, %r179, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd83, %r174, 4;
|
||
add.s64 %rd84, %rd364, %rd83;
|
||
ld.shared.u32 %r396, [%rd84];
|
||
xor.b32 %r397, %r396, %r395;
|
||
// inline asm
|
||
bfe.u32 %r178, %r179, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd85, %r178, 4;
|
||
add.s64 %rd86, %rd372, %rd85;
|
||
ld.shared.u32 %r398, [%rd86];
|
||
add.s32 %r399, %r398, %r397;
|
||
xor.b32 %r400, %r7, %r163;
|
||
xor.b32 %r195, %r400, %r399;
|
||
// inline asm
|
||
bfe.u32 %r182, %r195, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd87, %r182, 4;
|
||
add.s64 %rd88, %rd352, %rd87;
|
||
ld.shared.u32 %r401, [%rd88];
|
||
// inline asm
|
||
bfe.u32 %r186, %r195, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd89, %r186, 4;
|
||
add.s64 %rd90, %rd362, %rd89;
|
||
ld.shared.u32 %r402, [%rd90];
|
||
add.s32 %r403, %r402, %r401;
|
||
// inline asm
|
||
bfe.u32 %r190, %r195, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd91, %r190, 4;
|
||
add.s64 %rd92, %rd364, %rd91;
|
||
ld.shared.u32 %r404, [%rd92];
|
||
xor.b32 %r405, %r404, %r403;
|
||
// inline asm
|
||
bfe.u32 %r194, %r195, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd93, %r194, 4;
|
||
add.s64 %rd94, %rd372, %rd93;
|
||
ld.shared.u32 %r406, [%rd94];
|
||
add.s32 %r407, %r406, %r405;
|
||
xor.b32 %r408, %r8, %r179;
|
||
xor.b32 %r211, %r408, %r407;
|
||
// inline asm
|
||
bfe.u32 %r198, %r211, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd95, %r198, 4;
|
||
add.s64 %rd96, %rd352, %rd95;
|
||
ld.shared.u32 %r409, [%rd96];
|
||
// inline asm
|
||
bfe.u32 %r202, %r211, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd97, %r202, 4;
|
||
add.s64 %rd98, %rd362, %rd97;
|
||
ld.shared.u32 %r410, [%rd98];
|
||
add.s32 %r411, %r410, %r409;
|
||
// inline asm
|
||
bfe.u32 %r206, %r211, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd99, %r206, 4;
|
||
add.s64 %rd100, %rd364, %rd99;
|
||
ld.shared.u32 %r412, [%rd100];
|
||
xor.b32 %r413, %r412, %r411;
|
||
// inline asm
|
||
bfe.u32 %r210, %r211, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd101, %r210, 4;
|
||
add.s64 %rd102, %rd372, %rd101;
|
||
ld.shared.u32 %r414, [%rd102];
|
||
add.s32 %r415, %r414, %r413;
|
||
xor.b32 %r416, %r9, %r195;
|
||
xor.b32 %r227, %r416, %r415;
|
||
// inline asm
|
||
bfe.u32 %r214, %r227, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd103, %r214, 4;
|
||
add.s64 %rd104, %rd352, %rd103;
|
||
ld.shared.u32 %r417, [%rd104];
|
||
// inline asm
|
||
bfe.u32 %r218, %r227, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd105, %r218, 4;
|
||
add.s64 %rd106, %rd362, %rd105;
|
||
ld.shared.u32 %r418, [%rd106];
|
||
add.s32 %r419, %r418, %r417;
|
||
// inline asm
|
||
bfe.u32 %r222, %r227, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd107, %r222, 4;
|
||
add.s64 %rd108, %rd364, %rd107;
|
||
ld.shared.u32 %r420, [%rd108];
|
||
xor.b32 %r421, %r420, %r419;
|
||
// inline asm
|
||
bfe.u32 %r226, %r227, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd109, %r226, 4;
|
||
add.s64 %rd110, %rd372, %rd109;
|
||
ld.shared.u32 %r422, [%rd110];
|
||
add.s32 %r423, %r422, %r421;
|
||
xor.b32 %r424, %r10, %r211;
|
||
xor.b32 %r243, %r424, %r423;
|
||
// inline asm
|
||
bfe.u32 %r230, %r243, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd111, %r230, 4;
|
||
add.s64 %rd112, %rd352, %rd111;
|
||
ld.shared.u32 %r425, [%rd112];
|
||
// inline asm
|
||
bfe.u32 %r234, %r243, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd113, %r234, 4;
|
||
add.s64 %rd114, %rd362, %rd113;
|
||
ld.shared.u32 %r426, [%rd114];
|
||
add.s32 %r427, %r426, %r425;
|
||
// inline asm
|
||
bfe.u32 %r238, %r243, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd115, %r238, 4;
|
||
add.s64 %rd116, %rd364, %rd115;
|
||
ld.shared.u32 %r428, [%rd116];
|
||
xor.b32 %r429, %r428, %r427;
|
||
// inline asm
|
||
bfe.u32 %r242, %r243, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd117, %r242, 4;
|
||
add.s64 %rd118, %rd372, %rd117;
|
||
ld.shared.u32 %r430, [%rd118];
|
||
add.s32 %r431, %r430, %r429;
|
||
xor.b32 %r432, %r11, %r227;
|
||
xor.b32 %r259, %r432, %r431;
|
||
// inline asm
|
||
bfe.u32 %r246, %r259, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd119, %r246, 4;
|
||
add.s64 %rd120, %rd352, %rd119;
|
||
ld.shared.u32 %r433, [%rd120];
|
||
// inline asm
|
||
bfe.u32 %r250, %r259, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd121, %r250, 4;
|
||
add.s64 %rd122, %rd362, %rd121;
|
||
ld.shared.u32 %r434, [%rd122];
|
||
add.s32 %r435, %r434, %r433;
|
||
// inline asm
|
||
bfe.u32 %r254, %r259, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd123, %r254, 4;
|
||
add.s64 %rd124, %rd364, %rd123;
|
||
ld.shared.u32 %r436, [%rd124];
|
||
xor.b32 %r437, %r436, %r435;
|
||
// inline asm
|
||
bfe.u32 %r258, %r259, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd125, %r258, 4;
|
||
add.s64 %rd126, %rd372, %rd125;
|
||
ld.shared.u32 %r438, [%rd126];
|
||
add.s32 %r439, %r438, %r437;
|
||
xor.b32 %r440, %r12, %r243;
|
||
xor.b32 %r275, %r440, %r439;
|
||
// inline asm
|
||
bfe.u32 %r262, %r275, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd127, %r262, 4;
|
||
add.s64 %rd128, %rd352, %rd127;
|
||
ld.shared.u32 %r441, [%rd128];
|
||
// inline asm
|
||
bfe.u32 %r266, %r275, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd129, %r266, 4;
|
||
add.s64 %rd130, %rd362, %rd129;
|
||
ld.shared.u32 %r442, [%rd130];
|
||
add.s32 %r443, %r442, %r441;
|
||
// inline asm
|
||
bfe.u32 %r270, %r275, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd131, %r270, 4;
|
||
add.s64 %rd132, %rd364, %rd131;
|
||
ld.shared.u32 %r444, [%rd132];
|
||
xor.b32 %r445, %r444, %r443;
|
||
// inline asm
|
||
bfe.u32 %r274, %r275, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd133, %r274, 4;
|
||
add.s64 %rd134, %rd372, %rd133;
|
||
ld.shared.u32 %r446, [%rd134];
|
||
add.s32 %r447, %r446, %r445;
|
||
xor.b32 %r448, %r13, %r259;
|
||
xor.b32 %r291, %r448, %r447;
|
||
// inline asm
|
||
bfe.u32 %r278, %r291, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd135, %r278, 4;
|
||
add.s64 %rd136, %rd352, %rd135;
|
||
ld.shared.u32 %r449, [%rd136];
|
||
// inline asm
|
||
bfe.u32 %r282, %r291, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd137, %r282, 4;
|
||
add.s64 %rd138, %rd362, %rd137;
|
||
ld.shared.u32 %r450, [%rd138];
|
||
add.s32 %r451, %r450, %r449;
|
||
// inline asm
|
||
bfe.u32 %r286, %r291, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd139, %r286, 4;
|
||
add.s64 %rd140, %rd364, %rd139;
|
||
ld.shared.u32 %r452, [%rd140];
|
||
xor.b32 %r453, %r452, %r451;
|
||
// inline asm
|
||
bfe.u32 %r290, %r291, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd141, %r290, 4;
|
||
add.s64 %rd142, %rd372, %rd141;
|
||
ld.shared.u32 %r454, [%rd142];
|
||
add.s32 %r455, %r454, %r453;
|
||
xor.b32 %r456, %r14, %r275;
|
||
xor.b32 %r307, %r456, %r455;
|
||
// inline asm
|
||
bfe.u32 %r294, %r307, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd143, %r294, 4;
|
||
add.s64 %rd144, %rd352, %rd143;
|
||
ld.shared.u32 %r457, [%rd144];
|
||
// inline asm
|
||
bfe.u32 %r298, %r307, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd145, %r298, 4;
|
||
add.s64 %rd146, %rd362, %rd145;
|
||
ld.shared.u32 %r458, [%rd146];
|
||
add.s32 %r459, %r458, %r457;
|
||
// inline asm
|
||
bfe.u32 %r302, %r307, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd147, %r302, 4;
|
||
add.s64 %rd148, %rd364, %rd147;
|
||
ld.shared.u32 %r460, [%rd148];
|
||
xor.b32 %r461, %r460, %r459;
|
||
// inline asm
|
||
bfe.u32 %r306, %r307, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd149, %r306, 4;
|
||
add.s64 %rd150, %rd372, %rd149;
|
||
ld.shared.u32 %r462, [%rd150];
|
||
add.s32 %r463, %r462, %r461;
|
||
xor.b32 %r464, %r15, %r291;
|
||
xor.b32 %r323, %r464, %r463;
|
||
// inline asm
|
||
bfe.u32 %r310, %r323, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd151, %r310, 4;
|
||
add.s64 %rd152, %rd352, %rd151;
|
||
ld.shared.u32 %r465, [%rd152];
|
||
// inline asm
|
||
bfe.u32 %r314, %r323, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd153, %r314, 4;
|
||
add.s64 %rd154, %rd362, %rd153;
|
||
ld.shared.u32 %r466, [%rd154];
|
||
add.s32 %r467, %r466, %r465;
|
||
// inline asm
|
||
bfe.u32 %r318, %r323, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd155, %r318, 4;
|
||
add.s64 %rd156, %rd364, %rd155;
|
||
ld.shared.u32 %r468, [%rd156];
|
||
xor.b32 %r469, %r468, %r467;
|
||
// inline asm
|
||
bfe.u32 %r322, %r323, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd157, %r322, 4;
|
||
add.s64 %rd158, %rd372, %rd157;
|
||
ld.shared.u32 %r470, [%rd158];
|
||
add.s32 %r471, %r470, %r469;
|
||
xor.b32 %r472, %r16, %r307;
|
||
xor.b32 %r339, %r472, %r471;
|
||
// inline asm
|
||
bfe.u32 %r326, %r339, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd159, %r326, 4;
|
||
add.s64 %rd160, %rd352, %rd159;
|
||
ld.shared.u32 %r473, [%rd160];
|
||
// inline asm
|
||
bfe.u32 %r330, %r339, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd161, %r330, 4;
|
||
add.s64 %rd162, %rd362, %rd161;
|
||
ld.shared.u32 %r474, [%rd162];
|
||
add.s32 %r475, %r474, %r473;
|
||
// inline asm
|
||
bfe.u32 %r334, %r339, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd163, %r334, 4;
|
||
add.s64 %rd164, %rd364, %rd163;
|
||
ld.shared.u32 %r476, [%rd164];
|
||
xor.b32 %r477, %r476, %r475;
|
||
// inline asm
|
||
bfe.u32 %r338, %r339, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd165, %r338, 4;
|
||
add.s64 %rd166, %rd372, %rd165;
|
||
ld.shared.u32 %r478, [%rd166];
|
||
add.s32 %r479, %r478, %r477;
|
||
xor.b32 %r480, %r17, %r323;
|
||
xor.b32 %r355, %r480, %r479;
|
||
// inline asm
|
||
bfe.u32 %r342, %r355, %r344, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd167, %r342, 4;
|
||
add.s64 %rd168, %rd352, %rd167;
|
||
ld.shared.u32 %r481, [%rd168];
|
||
// inline asm
|
||
bfe.u32 %r346, %r355, %r348, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd169, %r346, 4;
|
||
add.s64 %rd170, %rd362, %rd169;
|
||
ld.shared.u32 %r482, [%rd170];
|
||
add.s32 %r483, %r482, %r481;
|
||
// inline asm
|
||
bfe.u32 %r350, %r355, %r357, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd171, %r350, 4;
|
||
add.s64 %rd172, %rd364, %rd171;
|
||
ld.shared.u32 %r484, [%rd172];
|
||
xor.b32 %r485, %r484, %r483;
|
||
// inline asm
|
||
bfe.u32 %r354, %r355, %r356, %r357;
|
||
// inline asm
|
||
mul.wide.u32 %rd173, %r354, 4;
|
||
add.s64 %rd174, %rd372, %rd173;
|
||
ld.shared.u32 %r486, [%rd174];
|
||
add.s32 %r487, %r486, %r485;
|
||
xor.b32 %r488, %r18, %r339;
|
||
xor.b32 %r962, %r488, %r487;
|
||
xor.b32 %r963, %r19, %r355;
|
||
add.s32 %r961, %r961, 1;
|
||
setp.ne.s32 %p3, %r961, 0;
|
||
@%p3 bra BB5_3;
|
||
|
||
mov.u32 %r966, 1701343084;
|
||
mov.u32 %r965, 1684370003;
|
||
mov.u32 %r964, -64;
|
||
|
||
BB5_5:
|
||
mov.u32 %r958, %tid.x;
|
||
cvt.s64.s32 %rd371, %r958;
|
||
shl.b64 %rd370, %rd371, 10;
|
||
mov.u64 %rd369, m03200_comp$S3_all;
|
||
add.s64 %rd368, %rd369, %rd370;
|
||
mov.u32 %r956, %tid.x;
|
||
cvt.s64.s32 %rd361, %r956;
|
||
shl.b64 %rd360, %rd361, 10;
|
||
mov.u64 %rd359, m03200_comp$S2_all;
|
||
add.s64 %rd358, %rd359, %rd360;
|
||
mov.u64 %rd357, m03200_comp$S1_all;
|
||
add.s64 %rd356, %rd357, %rd360;
|
||
mov.u32 %r943, %tid.x;
|
||
cvt.s64.s32 %rd347, %r943;
|
||
shl.b64 %rd346, %rd347, 10;
|
||
mov.u64 %rd345, m03200_comp$S0_all;
|
||
add.s64 %rd344, %rd345, %rd346;
|
||
mov.u32 %r936, 0;
|
||
mov.u32 %r935, 16;
|
||
mov.u32 %r934, 8;
|
||
mov.u32 %r933, 24;
|
||
xor.b32 %r502, %r2, %r966;
|
||
// inline asm
|
||
bfe.u32 %r489, %r502, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd178, %r489, 4;
|
||
add.s64 %rd179, %rd344, %rd178;
|
||
ld.shared.u32 %r745, [%rd179];
|
||
// inline asm
|
||
bfe.u32 %r493, %r502, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd182, %r493, 4;
|
||
add.s64 %rd183, %rd356, %rd182;
|
||
ld.shared.u32 %r746, [%rd183];
|
||
add.s32 %r747, %r746, %r745;
|
||
// inline asm
|
||
bfe.u32 %r497, %r502, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd186, %r497, 4;
|
||
add.s64 %rd187, %rd358, %rd186;
|
||
ld.shared.u32 %r748, [%rd187];
|
||
xor.b32 %r749, %r748, %r747;
|
||
// inline asm
|
||
bfe.u32 %r501, %r502, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd190, %r501, 4;
|
||
add.s64 %rd191, %rd368, %rd190;
|
||
ld.shared.u32 %r750, [%rd191];
|
||
add.s32 %r751, %r750, %r749;
|
||
xor.b32 %r752, %r3, %r965;
|
||
xor.b32 %r518, %r752, %r751;
|
||
// inline asm
|
||
bfe.u32 %r505, %r518, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd192, %r505, 4;
|
||
add.s64 %rd193, %rd344, %rd192;
|
||
ld.shared.u32 %r753, [%rd193];
|
||
// inline asm
|
||
bfe.u32 %r509, %r518, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd194, %r509, 4;
|
||
add.s64 %rd195, %rd356, %rd194;
|
||
ld.shared.u32 %r754, [%rd195];
|
||
add.s32 %r755, %r754, %r753;
|
||
// inline asm
|
||
bfe.u32 %r513, %r518, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd196, %r513, 4;
|
||
add.s64 %rd197, %rd358, %rd196;
|
||
ld.shared.u32 %r756, [%rd197];
|
||
xor.b32 %r757, %r756, %r755;
|
||
// inline asm
|
||
bfe.u32 %r517, %r518, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd198, %r517, 4;
|
||
add.s64 %rd199, %rd368, %rd198;
|
||
ld.shared.u32 %r758, [%rd199];
|
||
add.s32 %r759, %r758, %r757;
|
||
xor.b32 %r760, %r4, %r502;
|
||
xor.b32 %r534, %r760, %r759;
|
||
// inline asm
|
||
bfe.u32 %r521, %r534, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd200, %r521, 4;
|
||
add.s64 %rd201, %rd344, %rd200;
|
||
ld.shared.u32 %r761, [%rd201];
|
||
// inline asm
|
||
bfe.u32 %r525, %r534, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd202, %r525, 4;
|
||
add.s64 %rd203, %rd356, %rd202;
|
||
ld.shared.u32 %r762, [%rd203];
|
||
add.s32 %r763, %r762, %r761;
|
||
// inline asm
|
||
bfe.u32 %r529, %r534, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd204, %r529, 4;
|
||
add.s64 %rd205, %rd358, %rd204;
|
||
ld.shared.u32 %r764, [%rd205];
|
||
xor.b32 %r765, %r764, %r763;
|
||
// inline asm
|
||
bfe.u32 %r533, %r534, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd206, %r533, 4;
|
||
add.s64 %rd207, %rd368, %rd206;
|
||
ld.shared.u32 %r766, [%rd207];
|
||
add.s32 %r767, %r766, %r765;
|
||
xor.b32 %r768, %r5, %r518;
|
||
xor.b32 %r550, %r768, %r767;
|
||
// inline asm
|
||
bfe.u32 %r537, %r550, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd208, %r537, 4;
|
||
add.s64 %rd209, %rd344, %rd208;
|
||
ld.shared.u32 %r769, [%rd209];
|
||
// inline asm
|
||
bfe.u32 %r541, %r550, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd210, %r541, 4;
|
||
add.s64 %rd211, %rd356, %rd210;
|
||
ld.shared.u32 %r770, [%rd211];
|
||
add.s32 %r771, %r770, %r769;
|
||
// inline asm
|
||
bfe.u32 %r545, %r550, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd212, %r545, 4;
|
||
add.s64 %rd213, %rd358, %rd212;
|
||
ld.shared.u32 %r772, [%rd213];
|
||
xor.b32 %r773, %r772, %r771;
|
||
// inline asm
|
||
bfe.u32 %r549, %r550, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd214, %r549, 4;
|
||
add.s64 %rd215, %rd368, %rd214;
|
||
ld.shared.u32 %r774, [%rd215];
|
||
add.s32 %r775, %r774, %r773;
|
||
xor.b32 %r776, %r6, %r534;
|
||
xor.b32 %r566, %r776, %r775;
|
||
// inline asm
|
||
bfe.u32 %r553, %r566, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd216, %r553, 4;
|
||
add.s64 %rd217, %rd344, %rd216;
|
||
ld.shared.u32 %r777, [%rd217];
|
||
// inline asm
|
||
bfe.u32 %r557, %r566, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd218, %r557, 4;
|
||
add.s64 %rd219, %rd356, %rd218;
|
||
ld.shared.u32 %r778, [%rd219];
|
||
add.s32 %r779, %r778, %r777;
|
||
// inline asm
|
||
bfe.u32 %r561, %r566, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd220, %r561, 4;
|
||
add.s64 %rd221, %rd358, %rd220;
|
||
ld.shared.u32 %r780, [%rd221];
|
||
xor.b32 %r781, %r780, %r779;
|
||
// inline asm
|
||
bfe.u32 %r565, %r566, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd222, %r565, 4;
|
||
add.s64 %rd223, %rd368, %rd222;
|
||
ld.shared.u32 %r782, [%rd223];
|
||
add.s32 %r783, %r782, %r781;
|
||
xor.b32 %r784, %r7, %r550;
|
||
xor.b32 %r582, %r784, %r783;
|
||
// inline asm
|
||
bfe.u32 %r569, %r582, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd224, %r569, 4;
|
||
add.s64 %rd225, %rd344, %rd224;
|
||
ld.shared.u32 %r785, [%rd225];
|
||
// inline asm
|
||
bfe.u32 %r573, %r582, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd226, %r573, 4;
|
||
add.s64 %rd227, %rd356, %rd226;
|
||
ld.shared.u32 %r786, [%rd227];
|
||
add.s32 %r787, %r786, %r785;
|
||
// inline asm
|
||
bfe.u32 %r577, %r582, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd228, %r577, 4;
|
||
add.s64 %rd229, %rd358, %rd228;
|
||
ld.shared.u32 %r788, [%rd229];
|
||
xor.b32 %r789, %r788, %r787;
|
||
// inline asm
|
||
bfe.u32 %r581, %r582, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd230, %r581, 4;
|
||
add.s64 %rd231, %rd368, %rd230;
|
||
ld.shared.u32 %r790, [%rd231];
|
||
add.s32 %r791, %r790, %r789;
|
||
xor.b32 %r792, %r8, %r566;
|
||
xor.b32 %r598, %r792, %r791;
|
||
// inline asm
|
||
bfe.u32 %r585, %r598, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd232, %r585, 4;
|
||
add.s64 %rd233, %rd344, %rd232;
|
||
ld.shared.u32 %r793, [%rd233];
|
||
// inline asm
|
||
bfe.u32 %r589, %r598, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd234, %r589, 4;
|
||
add.s64 %rd235, %rd356, %rd234;
|
||
ld.shared.u32 %r794, [%rd235];
|
||
add.s32 %r795, %r794, %r793;
|
||
// inline asm
|
||
bfe.u32 %r593, %r598, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd236, %r593, 4;
|
||
add.s64 %rd237, %rd358, %rd236;
|
||
ld.shared.u32 %r796, [%rd237];
|
||
xor.b32 %r797, %r796, %r795;
|
||
// inline asm
|
||
bfe.u32 %r597, %r598, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd238, %r597, 4;
|
||
add.s64 %rd239, %rd368, %rd238;
|
||
ld.shared.u32 %r798, [%rd239];
|
||
add.s32 %r799, %r798, %r797;
|
||
xor.b32 %r800, %r9, %r582;
|
||
xor.b32 %r614, %r800, %r799;
|
||
// inline asm
|
||
bfe.u32 %r601, %r614, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd240, %r601, 4;
|
||
add.s64 %rd241, %rd344, %rd240;
|
||
ld.shared.u32 %r801, [%rd241];
|
||
// inline asm
|
||
bfe.u32 %r605, %r614, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd242, %r605, 4;
|
||
add.s64 %rd243, %rd356, %rd242;
|
||
ld.shared.u32 %r802, [%rd243];
|
||
add.s32 %r803, %r802, %r801;
|
||
// inline asm
|
||
bfe.u32 %r609, %r614, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd244, %r609, 4;
|
||
add.s64 %rd245, %rd358, %rd244;
|
||
ld.shared.u32 %r804, [%rd245];
|
||
xor.b32 %r805, %r804, %r803;
|
||
// inline asm
|
||
bfe.u32 %r613, %r614, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd246, %r613, 4;
|
||
add.s64 %rd247, %rd368, %rd246;
|
||
ld.shared.u32 %r806, [%rd247];
|
||
add.s32 %r807, %r806, %r805;
|
||
xor.b32 %r808, %r10, %r598;
|
||
xor.b32 %r630, %r808, %r807;
|
||
// inline asm
|
||
bfe.u32 %r617, %r630, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd248, %r617, 4;
|
||
add.s64 %rd249, %rd344, %rd248;
|
||
ld.shared.u32 %r809, [%rd249];
|
||
// inline asm
|
||
bfe.u32 %r621, %r630, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd250, %r621, 4;
|
||
add.s64 %rd251, %rd356, %rd250;
|
||
ld.shared.u32 %r810, [%rd251];
|
||
add.s32 %r811, %r810, %r809;
|
||
// inline asm
|
||
bfe.u32 %r625, %r630, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd252, %r625, 4;
|
||
add.s64 %rd253, %rd358, %rd252;
|
||
ld.shared.u32 %r812, [%rd253];
|
||
xor.b32 %r813, %r812, %r811;
|
||
// inline asm
|
||
bfe.u32 %r629, %r630, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd254, %r629, 4;
|
||
add.s64 %rd255, %rd368, %rd254;
|
||
ld.shared.u32 %r814, [%rd255];
|
||
add.s32 %r815, %r814, %r813;
|
||
xor.b32 %r816, %r11, %r614;
|
||
xor.b32 %r646, %r816, %r815;
|
||
// inline asm
|
||
bfe.u32 %r633, %r646, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd256, %r633, 4;
|
||
add.s64 %rd257, %rd344, %rd256;
|
||
ld.shared.u32 %r817, [%rd257];
|
||
// inline asm
|
||
bfe.u32 %r637, %r646, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd258, %r637, 4;
|
||
add.s64 %rd259, %rd356, %rd258;
|
||
ld.shared.u32 %r818, [%rd259];
|
||
add.s32 %r819, %r818, %r817;
|
||
// inline asm
|
||
bfe.u32 %r641, %r646, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd260, %r641, 4;
|
||
add.s64 %rd261, %rd358, %rd260;
|
||
ld.shared.u32 %r820, [%rd261];
|
||
xor.b32 %r821, %r820, %r819;
|
||
// inline asm
|
||
bfe.u32 %r645, %r646, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd262, %r645, 4;
|
||
add.s64 %rd263, %rd368, %rd262;
|
||
ld.shared.u32 %r822, [%rd263];
|
||
add.s32 %r823, %r822, %r821;
|
||
xor.b32 %r824, %r12, %r630;
|
||
xor.b32 %r662, %r824, %r823;
|
||
// inline asm
|
||
bfe.u32 %r649, %r662, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd264, %r649, 4;
|
||
add.s64 %rd265, %rd344, %rd264;
|
||
ld.shared.u32 %r825, [%rd265];
|
||
// inline asm
|
||
bfe.u32 %r653, %r662, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd266, %r653, 4;
|
||
add.s64 %rd267, %rd356, %rd266;
|
||
ld.shared.u32 %r826, [%rd267];
|
||
add.s32 %r827, %r826, %r825;
|
||
// inline asm
|
||
bfe.u32 %r657, %r662, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd268, %r657, 4;
|
||
add.s64 %rd269, %rd358, %rd268;
|
||
ld.shared.u32 %r828, [%rd269];
|
||
xor.b32 %r829, %r828, %r827;
|
||
// inline asm
|
||
bfe.u32 %r661, %r662, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd270, %r661, 4;
|
||
add.s64 %rd271, %rd368, %rd270;
|
||
ld.shared.u32 %r830, [%rd271];
|
||
add.s32 %r831, %r830, %r829;
|
||
xor.b32 %r832, %r13, %r646;
|
||
xor.b32 %r678, %r832, %r831;
|
||
// inline asm
|
||
bfe.u32 %r665, %r678, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd272, %r665, 4;
|
||
add.s64 %rd273, %rd344, %rd272;
|
||
ld.shared.u32 %r833, [%rd273];
|
||
// inline asm
|
||
bfe.u32 %r669, %r678, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd274, %r669, 4;
|
||
add.s64 %rd275, %rd356, %rd274;
|
||
ld.shared.u32 %r834, [%rd275];
|
||
add.s32 %r835, %r834, %r833;
|
||
// inline asm
|
||
bfe.u32 %r673, %r678, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd276, %r673, 4;
|
||
add.s64 %rd277, %rd358, %rd276;
|
||
ld.shared.u32 %r836, [%rd277];
|
||
xor.b32 %r837, %r836, %r835;
|
||
// inline asm
|
||
bfe.u32 %r677, %r678, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd278, %r677, 4;
|
||
add.s64 %rd279, %rd368, %rd278;
|
||
ld.shared.u32 %r838, [%rd279];
|
||
add.s32 %r839, %r838, %r837;
|
||
xor.b32 %r840, %r14, %r662;
|
||
xor.b32 %r694, %r840, %r839;
|
||
// inline asm
|
||
bfe.u32 %r681, %r694, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd280, %r681, 4;
|
||
add.s64 %rd281, %rd344, %rd280;
|
||
ld.shared.u32 %r841, [%rd281];
|
||
// inline asm
|
||
bfe.u32 %r685, %r694, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd282, %r685, 4;
|
||
add.s64 %rd283, %rd356, %rd282;
|
||
ld.shared.u32 %r842, [%rd283];
|
||
add.s32 %r843, %r842, %r841;
|
||
// inline asm
|
||
bfe.u32 %r689, %r694, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd284, %r689, 4;
|
||
add.s64 %rd285, %rd358, %rd284;
|
||
ld.shared.u32 %r844, [%rd285];
|
||
xor.b32 %r845, %r844, %r843;
|
||
// inline asm
|
||
bfe.u32 %r693, %r694, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd286, %r693, 4;
|
||
add.s64 %rd287, %rd368, %rd286;
|
||
ld.shared.u32 %r846, [%rd287];
|
||
add.s32 %r847, %r846, %r845;
|
||
xor.b32 %r848, %r15, %r678;
|
||
xor.b32 %r710, %r848, %r847;
|
||
// inline asm
|
||
bfe.u32 %r697, %r710, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd288, %r697, 4;
|
||
add.s64 %rd289, %rd344, %rd288;
|
||
ld.shared.u32 %r849, [%rd289];
|
||
// inline asm
|
||
bfe.u32 %r701, %r710, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd290, %r701, 4;
|
||
add.s64 %rd291, %rd356, %rd290;
|
||
ld.shared.u32 %r850, [%rd291];
|
||
add.s32 %r851, %r850, %r849;
|
||
// inline asm
|
||
bfe.u32 %r705, %r710, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd292, %r705, 4;
|
||
add.s64 %rd293, %rd358, %rd292;
|
||
ld.shared.u32 %r852, [%rd293];
|
||
xor.b32 %r853, %r852, %r851;
|
||
// inline asm
|
||
bfe.u32 %r709, %r710, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd294, %r709, 4;
|
||
add.s64 %rd295, %rd368, %rd294;
|
||
ld.shared.u32 %r854, [%rd295];
|
||
add.s32 %r855, %r854, %r853;
|
||
xor.b32 %r856, %r16, %r694;
|
||
xor.b32 %r726, %r856, %r855;
|
||
// inline asm
|
||
bfe.u32 %r713, %r726, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd296, %r713, 4;
|
||
add.s64 %rd297, %rd344, %rd296;
|
||
ld.shared.u32 %r857, [%rd297];
|
||
// inline asm
|
||
bfe.u32 %r717, %r726, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd298, %r717, 4;
|
||
add.s64 %rd299, %rd356, %rd298;
|
||
ld.shared.u32 %r858, [%rd299];
|
||
add.s32 %r859, %r858, %r857;
|
||
// inline asm
|
||
bfe.u32 %r721, %r726, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd300, %r721, 4;
|
||
add.s64 %rd301, %rd358, %rd300;
|
||
ld.shared.u32 %r860, [%rd301];
|
||
xor.b32 %r861, %r860, %r859;
|
||
// inline asm
|
||
bfe.u32 %r725, %r726, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd302, %r725, 4;
|
||
add.s64 %rd303, %rd368, %rd302;
|
||
ld.shared.u32 %r862, [%rd303];
|
||
add.s32 %r863, %r862, %r861;
|
||
xor.b32 %r864, %r17, %r710;
|
||
xor.b32 %r742, %r864, %r863;
|
||
// inline asm
|
||
bfe.u32 %r729, %r742, %r933, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd304, %r729, 4;
|
||
add.s64 %rd305, %rd344, %rd304;
|
||
ld.shared.u32 %r865, [%rd305];
|
||
// inline asm
|
||
bfe.u32 %r733, %r742, %r935, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd306, %r733, 4;
|
||
add.s64 %rd307, %rd356, %rd306;
|
||
ld.shared.u32 %r866, [%rd307];
|
||
add.s32 %r867, %r866, %r865;
|
||
// inline asm
|
||
bfe.u32 %r737, %r742, %r934, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd308, %r737, 4;
|
||
add.s64 %rd309, %rd358, %rd308;
|
||
ld.shared.u32 %r868, [%rd309];
|
||
xor.b32 %r869, %r868, %r867;
|
||
// inline asm
|
||
bfe.u32 %r741, %r742, %r936, %r934;
|
||
// inline asm
|
||
mul.wide.u32 %rd310, %r741, 4;
|
||
add.s64 %rd311, %rd368, %rd310;
|
||
ld.shared.u32 %r870, [%rd311];
|
||
add.s32 %r871, %r870, %r869;
|
||
xor.b32 %r872, %r18, %r726;
|
||
xor.b32 %r965, %r872, %r871;
|
||
xor.b32 %r966, %r19, %r742;
|
||
add.s32 %r964, %r964, 1;
|
||
setp.ne.s32 %p4, %r964, 0;
|
||
@%p4 bra BB5_5;
|
||
|
||
ld.param.u32 %r944, [m03200_comp_param_24];
|
||
ld.param.u64 %rd335, [m03200_comp_param_6];
|
||
ld.param.u32 %r937, [m03200_comp_param_25];
|
||
and.b32 %r34, %r937, 31;
|
||
shr.u32 %r873, %r963, %r34;
|
||
and.b32 %r874, %r873, %r944;
|
||
mul.wide.u32 %rd312, %r874, 4;
|
||
add.s64 %rd313, %rd335, %rd312;
|
||
and.b32 %r875, %r963, 31;
|
||
mov.u32 %r876, 1;
|
||
shl.b32 %r35, %r876, %r875;
|
||
ld.global.u32 %r877, [%rd313];
|
||
and.b32 %r878, %r877, %r35;
|
||
setp.eq.s32 %p5, %r878, 0;
|
||
@%p5 bra BB5_34;
|
||
|
||
ld.param.u64 %rd348, [m03200_comp_param_7];
|
||
ld.param.u32 %r945, [m03200_comp_param_24];
|
||
shr.u32 %r879, %r962, %r34;
|
||
and.b32 %r880, %r879, %r945;
|
||
mul.wide.u32 %rd314, %r880, 4;
|
||
add.s64 %rd315, %rd348, %rd314;
|
||
and.b32 %r881, %r962, 31;
|
||
shl.b32 %r36, %r876, %r881;
|
||
ld.global.u32 %r883, [%rd315];
|
||
and.b32 %r884, %r883, %r36;
|
||
setp.eq.s32 %p6, %r884, 0;
|
||
@%p6 bra BB5_34;
|
||
|
||
ld.param.u32 %r946, [m03200_comp_param_24];
|
||
ld.param.u64 %rd336, [m03200_comp_param_8];
|
||
shr.u32 %r885, %r966, %r34;
|
||
and.b32 %r886, %r885, %r946;
|
||
mul.wide.u32 %rd316, %r886, 4;
|
||
add.s64 %rd317, %rd336, %rd316;
|
||
and.b32 %r887, %r966, 31;
|
||
shl.b32 %r37, %r876, %r887;
|
||
ld.global.u32 %r889, [%rd317];
|
||
and.b32 %r890, %r889, %r37;
|
||
setp.eq.s32 %p7, %r890, 0;
|
||
@%p7 bra BB5_34;
|
||
|
||
ld.param.u32 %r947, [m03200_comp_param_24];
|
||
ld.param.u64 %rd337, [m03200_comp_param_9];
|
||
shr.u32 %r891, %r965, %r34;
|
||
and.b32 %r892, %r891, %r947;
|
||
mul.wide.u32 %rd318, %r892, 4;
|
||
add.s64 %rd319, %rd337, %rd318;
|
||
and.b32 %r893, %r965, 31;
|
||
shl.b32 %r38, %r876, %r893;
|
||
ld.global.u32 %r895, [%rd319];
|
||
and.b32 %r896, %r895, %r38;
|
||
setp.eq.s32 %p8, %r896, 0;
|
||
@%p8 bra BB5_34;
|
||
|
||
ld.param.u32 %r948, [m03200_comp_param_24];
|
||
ld.param.u64 %rd338, [m03200_comp_param_10];
|
||
ld.param.u32 %r938, [m03200_comp_param_26];
|
||
and.b32 %r39, %r938, 31;
|
||
shr.u32 %r897, %r963, %r39;
|
||
and.b32 %r898, %r897, %r948;
|
||
mul.wide.u32 %rd320, %r898, 4;
|
||
add.s64 %rd321, %rd338, %rd320;
|
||
ld.global.u32 %r899, [%rd321];
|
||
and.b32 %r900, %r899, %r35;
|
||
setp.eq.s32 %p9, %r900, 0;
|
||
@%p9 bra BB5_34;
|
||
|
||
ld.param.u32 %r949, [m03200_comp_param_24];
|
||
ld.param.u64 %rd339, [m03200_comp_param_11];
|
||
shr.u32 %r901, %r962, %r39;
|
||
and.b32 %r902, %r901, %r949;
|
||
mul.wide.u32 %rd322, %r902, 4;
|
||
add.s64 %rd323, %rd339, %rd322;
|
||
ld.global.u32 %r903, [%rd323];
|
||
and.b32 %r904, %r903, %r36;
|
||
setp.eq.s32 %p10, %r904, 0;
|
||
@%p10 bra BB5_34;
|
||
|
||
ld.param.u32 %r950, [m03200_comp_param_24];
|
||
ld.param.u64 %rd340, [m03200_comp_param_12];
|
||
shr.u32 %r905, %r966, %r39;
|
||
and.b32 %r906, %r905, %r950;
|
||
mul.wide.u32 %rd324, %r906, 4;
|
||
add.s64 %rd325, %rd340, %rd324;
|
||
ld.global.u32 %r907, [%rd325];
|
||
and.b32 %r908, %r907, %r37;
|
||
setp.eq.s32 %p11, %r908, 0;
|
||
@%p11 bra BB5_34;
|
||
|
||
ld.param.u32 %r951, [m03200_comp_param_24];
|
||
ld.param.u64 %rd341, [m03200_comp_param_13];
|
||
shr.u32 %r909, %r965, %r39;
|
||
and.b32 %r910, %r909, %r951;
|
||
mul.wide.u32 %rd326, %r910, 4;
|
||
add.s64 %rd327, %rd341, %rd326;
|
||
ld.global.u32 %r911, [%rd327];
|
||
and.b32 %r912, %r911, %r38;
|
||
setp.eq.s32 %p12, %r912, 0;
|
||
@%p12 bra BB5_34;
|
||
|
||
ld.param.u32 %r952, [m03200_comp_param_31];
|
||
ld.param.u32 %r939, [m03200_comp_param_32];
|
||
cvt.u64.u32 %rd3, %r939;
|
||
setp.eq.s32 %p13, %r952, 0;
|
||
mov.u32 %r913, -1;
|
||
@%p13 bra BB5_28;
|
||
|
||
ld.param.u32 %r967, [m03200_comp_param_31];
|
||
mov.u32 %r968, 0;
|
||
|
||
BB5_16:
|
||
ld.param.u64 %rd342, [m03200_comp_param_15];
|
||
shr.u32 %r42, %r967, 1;
|
||
add.s32 %r970, %r42, %r968;
|
||
cvt.u64.u32 %rd328, %r970;
|
||
add.s64 %rd329, %rd328, %rd3;
|
||
mul.lo.s64 %rd330, %rd329, 24;
|
||
add.s64 %rd4, %rd342, %rd330;
|
||
ld.global.u32 %r44, [%rd4+12];
|
||
setp.gt.u32 %p14, %r965, %r44;
|
||
mov.u32 %r969, %r876;
|
||
@%p14 bra BB5_26;
|
||
|
||
setp.lt.u32 %p15, %r965, %r44;
|
||
mov.u32 %r916, -1;
|
||
@%p15 bra BB5_18;
|
||
bra.uni BB5_19;
|
||
|
||
BB5_18:
|
||
mov.u32 %r969, %r916;
|
||
bra.uni BB5_26;
|
||
|
||
BB5_19:
|
||
ld.global.u32 %r45, [%rd4+8];
|
||
setp.gt.u32 %p16, %r966, %r45;
|
||
mov.u32 %r969, %r876;
|
||
@%p16 bra BB5_26;
|
||
|
||
setp.lt.u32 %p17, %r966, %r45;
|
||
@%p17 bra BB5_21;
|
||
bra.uni BB5_22;
|
||
|
||
BB5_21:
|
||
mov.u32 %r969, %r916;
|
||
bra.uni BB5_26;
|
||
|
||
BB5_22:
|
||
ld.global.u32 %r46, [%rd4+4];
|
||
setp.gt.u32 %p18, %r962, %r46;
|
||
mov.u32 %r969, %r876;
|
||
@%p18 bra BB5_26;
|
||
|
||
setp.lt.u32 %p19, %r962, %r46;
|
||
mov.u32 %r969, %r916;
|
||
@%p19 bra BB5_26;
|
||
|
||
ld.global.u32 %r47, [%rd4];
|
||
setp.gt.u32 %p20, %r963, %r47;
|
||
mov.u32 %r969, %r876;
|
||
@%p20 bra BB5_26;
|
||
|
||
setp.lt.u32 %p21, %r963, %r47;
|
||
selp.b32 %r969, -1, 0, %p21;
|
||
|
||
BB5_26:
|
||
add.s32 %r922, %r42, 1;
|
||
setp.gt.s32 %p22, %r969, 0;
|
||
selp.b32 %r923, %r922, 0, %p22;
|
||
add.s32 %r968, %r923, %r968;
|
||
selp.b32 %r924, -1, 0, %p22;
|
||
add.s32 %r925, %r924, %r967;
|
||
shr.u32 %r967, %r925, 1;
|
||
setp.eq.s32 %p23, %r969, 0;
|
||
@%p23 bra BB5_29;
|
||
|
||
setp.ne.s32 %p24, %r967, 0;
|
||
@%p24 bra BB5_16;
|
||
|
||
BB5_28:
|
||
mov.u32 %r970, %r913;
|
||
|
||
BB5_29:
|
||
setp.eq.s32 %p25, %r970, -1;
|
||
@%p25 bra BB5_34;
|
||
|
||
ld.param.u64 %rd343, [m03200_comp_param_16];
|
||
ld.param.u32 %r941, [m03200_comp_param_32];
|
||
add.s32 %r53, %r970, %r941;
|
||
mul.wide.u32 %rd331, %r53, 4;
|
||
add.s64 %rd332, %rd343, %rd331;
|
||
atom.global.add.u32 %r927, [%rd332], 1;
|
||
setp.ne.s32 %p26, %r927, 0;
|
||
@%p26 bra BB5_34;
|
||
|
||
ld.param.u64 %rd349, [m03200_comp_param_19];
|
||
ld.param.u32 %r954, [m03200_comp_param_31];
|
||
atom.global.add.u32 %r54, [%rd349], 1;
|
||
setp.lt.u32 %p27, %r54, %r954;
|
||
@%p27 bra BB5_33;
|
||
bra.uni BB5_32;
|
||
|
||
BB5_33:
|
||
ld.param.u64 %rd351, [m03200_comp_param_14];
|
||
ld.param.u32 %r942, [m03200_comp_param_27];
|
||
mul.wide.u32 %rd333, %r54, 24;
|
||
add.s64 %rd334, %rd351, %rd333;
|
||
st.global.v2.u32 [%rd334+16], {%r970, %r53};
|
||
mov.u32 %r929, 0;
|
||
st.global.v2.u32 [%rd334+8], {%r929, %r942};
|
||
st.global.u64 [%rd334], %rd1;
|
||
bra.uni BB5_34;
|
||
|
||
BB5_32:
|
||
ld.param.u64 %rd350, [m03200_comp_param_19];
|
||
atom.global.add.u32 %r928, [%rd350], -1;
|
||
|
||
BB5_34:
|
||
ret;
|
||
}
|
||
|
||
|
||
.metadata_section {
|
||
|
||
.metadata 0 {
|
||
"cl_kernel_attributes",
|
||
"m03200_init",
|
||
"reqd_work_group_size(8,1,1)"
|
||
}
|
||
|
||
.metadata 1 {
|
||
"cl_kernel_attributes",
|
||
"m03200_loop",
|
||
"reqd_work_group_size(8,1,1)"
|
||
}
|
||
|
||
.metadata 2 {
|
||
"cl_kernel_attributes",
|
||
"m03200_comp",
|
||
"reqd_work_group_size(8,1,1)"
|
||
}
|
||
|
||
} // end of .metadata_section
|
||
|