Loop unrolling and tiling

Status
Not open for further replies.

promach

Advanced Member level 4
Joined
Feb 22, 2016
Messages
1,199
Helped
2
Reputation
4
Reaction score
5
Trophy points
1,318
Activity points
11,636
Could anyone point out which code snippets in the attached PDF correspond to (unrolling factor = 3 or 4 or 5) and loop tiling (tiling factor = 20 or 15 or 12) ?

View attachment Loop_unrolling_and_tiling.pdf

Code Snippet #1


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[NX];
int y[NY];
int x[NY];
int A[NX][NY];
 
int main()
{
   for (i = 0; i < NY; i += 1) {
      y[i] = 0;
   }
   
   for (i = 0; i < NX; i += 1) {
      tmp[i] = 0;
   }
   
   for (i = 0; i < NX; i += 1) 
   {
      for (j = 0; j < NY; j += 1) {
         tmp[i] = tmp[i] + A[i][j] * x[j];
      }
   }
   
   for (i = 0; i < NX; i += 1) {
      for (j = 0; j < NY; j += 1) {
         y[j] = y[j] + A[i][j] * tmp[i];
      }
   }
}



Code Snippet #2


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[60];
int y[60];
int x[60];
int A[60][60];
 
int main()
{
    for (i = 0; i < 60; i += 1) {
        y[i] = 0;
    }
    
    for (i = 0; i < 60; i += 1) {
        tmp[i] = 0;
    }
    
    int ii;
 
    for (ii = 0; ii <= 59; ii += 60) {
        for (i = ii; i <= ((59 < ii + 20 - 1?59 : ii + 20*3 - 1)); i += 3) {
            for (j = 0; j <= 59; j += 3) {
                tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2];
                tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2];
                tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2];
            }
        }
    }
    
    int jj;
 
    for (jj = 0; jj <= 59; jj += 60) {
        for (j = jj; j <= ((59 < jj + 20 - 1?59 : jj + 20*3 - 1)); j += 3) {
            for (i = 0; i <= 59; i += 3) {
                y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2];
                y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2];
                y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2];
            }
        }
    }
}



Code Snippet #3


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[60];
int y[60];
int x[60];
int A[60][60];
 
int main()
{
    for (i = 0; i < 60; i += 1) {
        y[i] = 0;
    }
    
    for (i = 0; i < 60; i += 1) {
        tmp[i] = 0;
    }
    
    int ii;
    
    for (ii = 0; ii <= 59; ii += 60) {
        for (i = ii; i <= ((59 < ii + 15*4 - 1?59 : ii + 15*4 - 1)); i += 4) {
            for (j = 0; j <= 59; j += 3) {
                tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2];
                tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2];
                tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2];
                tmp[i + 3] = tmp[i + 3] + A[i + 3][j] * x[j] + A[i + 3][j + 1] * x[j + 1] + A[i + 3][j + 2] * x[j + 2];
            }
        }
    }
    
    int jj;
 
    for (jj = 0; jj <= 59; jj += 60) {
        for (j = jj; j <= ((59 < jj + 15*4 - 1?59 : jj + 15*4 - 1)); j += 4) {
            for (i = 0; i <= 59; i += 3) {
                y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2];
                y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2];
                y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2];
                y[j + 3] = y[j + 3] + A[i][j + 3] * tmp[i] + A[i + 1][j + 3] * tmp[i + 1] + A[i + 2][j + 3] * tmp[i + 2];
            }
        }
    }
}



Code Snippet #4


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[60];
int y[60];
int x[60];
int A[60][60];
 
int main()
{
    for (i = 0; i < 60; i += 1) {
        y[i] = 0;
    }
    
    for (i = 0; i < 60; i += 1) {
        tmp[i] = 0;
    }
    
    int ii;
    
    for (ii = 0; ii <= 59; ii += 60) {
        for (i = ii; i <= ((59 < ii + 12*5 - 1?59 : ii + 12*5 - 1)); i += 5) {
            for (j = 0; j <= 59; j += 3) {
                tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2];
                tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2];
                tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2];
                tmp[i + 3] = tmp[i + 3] + A[i + 3][j] * x[j] + A[i + 3][j + 1] * x[j + 1] + A[i + 3][j + 2] * x[j + 2];
                tmp[i + 4] = tmp[i + 4] + A[i + 4][j] * x[j] + A[i + 4][j + 1] * x[j + 1] + A[i + 4][j + 2] * x[j + 2];
            }
        }
    }
    
    int jj;
    
    for (jj = 0; jj <= 59; jj += 60) {
        for (j = jj; j <= ((59 < jj + 12*5 - 1?59 : jj + 12*5 - 1)); j += 5) {
            for (i = 0; i <= 59; i += 3) {
                y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2];
                y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2];
                y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2];
                y[j + 3] = y[j + 3] + A[i][j + 3] * tmp[i] + A[i + 1][j + 3] * tmp[i + 1] + A[i + 2][j + 3] * tmp[i + 2];
                y[j + 4] = y[j + 4] + A[i][j + 4] * tmp[i] + A[i + 1][j + 4] * tmp[i + 1] + A[i + 2][j + 4] * tmp[i + 2];
            }
        }
    }
}

 

Status
Not open for further replies.
Cookies are required to use this site. You must accept them to continue using the site. Learn more…