promach
Advanced Member level 4
Could anyone point out which code snippets in the attached PDF correspond to (unrolling factor = 3 or 4 or 5) and loop tiling (tiling factor = 20 or 15 or 12) ?
View attachment Loop_unrolling_and_tiling.pdf
Code Snippet #1
Code Snippet #2
Code Snippet #3
Code Snippet #4
View attachment Loop_unrolling_and_tiling.pdf
Code Snippet #1
Code C++ - [expand] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 #define NX 60 #define NY 60 int i; int j; int tmp[NX]; int y[NY]; int x[NY]; int A[NX][NY]; int main() { for (i = 0; i < NY; i += 1) { y[i] = 0; } for (i = 0; i < NX; i += 1) { tmp[i] = 0; } for (i = 0; i < NX; i += 1) { for (j = 0; j < NY; j += 1) { tmp[i] = tmp[i] + A[i][j] * x[j]; } } for (i = 0; i < NX; i += 1) { for (j = 0; j < NY; j += 1) { y[j] = y[j] + A[i][j] * tmp[i]; } } }
Code Snippet #2
Code C++ - [expand] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 #define NX 60 #define NY 60 int i; int j; int tmp[60]; int y[60]; int x[60]; int A[60][60]; int main() { for (i = 0; i < 60; i += 1) { y[i] = 0; } for (i = 0; i < 60; i += 1) { tmp[i] = 0; } int ii; for (ii = 0; ii <= 59; ii += 60) { for (i = ii; i <= ((59 < ii + 20 - 1?59 : ii + 20*3 - 1)); i += 3) { for (j = 0; j <= 59; j += 3) { tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2]; tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2]; tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2]; } } } int jj; for (jj = 0; jj <= 59; jj += 60) { for (j = jj; j <= ((59 < jj + 20 - 1?59 : jj + 20*3 - 1)); j += 3) { for (i = 0; i <= 59; i += 3) { y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2]; y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2]; y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2]; } } } }
Code Snippet #3
Code C++ - [expand] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 #define NX 60 #define NY 60 int i; int j; int tmp[60]; int y[60]; int x[60]; int A[60][60]; int main() { for (i = 0; i < 60; i += 1) { y[i] = 0; } for (i = 0; i < 60; i += 1) { tmp[i] = 0; } int ii; for (ii = 0; ii <= 59; ii += 60) { for (i = ii; i <= ((59 < ii + 15*4 - 1?59 : ii + 15*4 - 1)); i += 4) { for (j = 0; j <= 59; j += 3) { tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2]; tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2]; tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2]; tmp[i + 3] = tmp[i + 3] + A[i + 3][j] * x[j] + A[i + 3][j + 1] * x[j + 1] + A[i + 3][j + 2] * x[j + 2]; } } } int jj; for (jj = 0; jj <= 59; jj += 60) { for (j = jj; j <= ((59 < jj + 15*4 - 1?59 : jj + 15*4 - 1)); j += 4) { for (i = 0; i <= 59; i += 3) { y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2]; y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2]; y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2]; y[j + 3] = y[j + 3] + A[i][j + 3] * tmp[i] + A[i + 1][j + 3] * tmp[i + 1] + A[i + 2][j + 3] * tmp[i + 2]; } } } }
Code Snippet #4
Code C++ - [expand] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 #define NX 60 #define NY 60 int i; int j; int tmp[60]; int y[60]; int x[60]; int A[60][60]; int main() { for (i = 0; i < 60; i += 1) { y[i] = 0; } for (i = 0; i < 60; i += 1) { tmp[i] = 0; } int ii; for (ii = 0; ii <= 59; ii += 60) { for (i = ii; i <= ((59 < ii + 12*5 - 1?59 : ii + 12*5 - 1)); i += 5) { for (j = 0; j <= 59; j += 3) { tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2]; tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2]; tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2]; tmp[i + 3] = tmp[i + 3] + A[i + 3][j] * x[j] + A[i + 3][j + 1] * x[j + 1] + A[i + 3][j + 2] * x[j + 2]; tmp[i + 4] = tmp[i + 4] + A[i + 4][j] * x[j] + A[i + 4][j + 1] * x[j + 1] + A[i + 4][j + 2] * x[j + 2]; } } } int jj; for (jj = 0; jj <= 59; jj += 60) { for (j = jj; j <= ((59 < jj + 12*5 - 1?59 : jj + 12*5 - 1)); j += 5) { for (i = 0; i <= 59; i += 3) { y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2]; y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2]; y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2]; y[j + 3] = y[j + 3] + A[i][j + 3] * tmp[i] + A[i + 1][j + 3] * tmp[i + 1] + A[i + 2][j + 3] * tmp[i + 2]; y[j + 4] = y[j + 4] + A[i][j + 4] * tmp[i] + A[i + 1][j + 4] * tmp[i + 1] + A[i + 2][j + 4] * tmp[i + 2]; } } } }