Continue to Site

Welcome to EDAboard.com

Welcome to our site! EDAboard.com is an international Electronics Discussion Forum focused on EDA software, circuits, schematics, books, theory, papers, asic, pld, 8051, DSP, Network, RF, Analog Design, PCB, Service Manuals... and a whole lot more! To participate you need to register. Registration is free. Click here to register now.

Loop unrolling and tiling

Status
Not open for further replies.

promach

Advanced Member level 4
Joined
Feb 22, 2016
Messages
1,199
Helped
2
Reputation
4
Reaction score
5
Trophy points
1,318
Activity points
11,636
Could anyone point out which code snippets in the attached PDF correspond to (unrolling factor = 3 or 4 or 5) and loop tiling (tiling factor = 20 or 15 or 12) ?

View attachment Loop_unrolling_and_tiling.pdf

Code Snippet #1


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[NX];
int y[NY];
int x[NY];
int A[NX][NY];
 
int main()
{
   for (i = 0; i < NY; i += 1) {
      y[i] = 0;
   }
   
   for (i = 0; i < NX; i += 1) {
      tmp[i] = 0;
   }
   
   for (i = 0; i < NX; i += 1) 
   {
      for (j = 0; j < NY; j += 1) {
         tmp[i] = tmp[i] + A[i][j] * x[j];
      }
   }
   
   for (i = 0; i < NX; i += 1) {
      for (j = 0; j < NY; j += 1) {
         y[j] = y[j] + A[i][j] * tmp[i];
      }
   }
}



Code Snippet #2


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[60];
int y[60];
int x[60];
int A[60][60];
 
int main()
{
    for (i = 0; i < 60; i += 1) {
        y[i] = 0;
    }
    
    for (i = 0; i < 60; i += 1) {
        tmp[i] = 0;
    }
    
    int ii;
 
    for (ii = 0; ii <= 59; ii += 60) {
        for (i = ii; i <= ((59 < ii + 20 - 1?59 : ii + 20*3 - 1)); i += 3) {
            for (j = 0; j <= 59; j += 3) {
                tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2];
                tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2];
                tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2];
            }
        }
    }
    
    int jj;
 
    for (jj = 0; jj <= 59; jj += 60) {
        for (j = jj; j <= ((59 < jj + 20 - 1?59 : jj + 20*3 - 1)); j += 3) {
            for (i = 0; i <= 59; i += 3) {
                y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2];
                y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2];
                y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2];
            }
        }
    }
}



Code Snippet #3


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[60];
int y[60];
int x[60];
int A[60][60];
 
int main()
{
    for (i = 0; i < 60; i += 1) {
        y[i] = 0;
    }
    
    for (i = 0; i < 60; i += 1) {
        tmp[i] = 0;
    }
    
    int ii;
    
    for (ii = 0; ii <= 59; ii += 60) {
        for (i = ii; i <= ((59 < ii + 15*4 - 1?59 : ii + 15*4 - 1)); i += 4) {
            for (j = 0; j <= 59; j += 3) {
                tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2];
                tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2];
                tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2];
                tmp[i + 3] = tmp[i + 3] + A[i + 3][j] * x[j] + A[i + 3][j + 1] * x[j + 1] + A[i + 3][j + 2] * x[j + 2];
            }
        }
    }
    
    int jj;
 
    for (jj = 0; jj <= 59; jj += 60) {
        for (j = jj; j <= ((59 < jj + 15*4 - 1?59 : jj + 15*4 - 1)); j += 4) {
            for (i = 0; i <= 59; i += 3) {
                y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2];
                y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2];
                y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2];
                y[j + 3] = y[j + 3] + A[i][j + 3] * tmp[i] + A[i + 1][j + 3] * tmp[i + 1] + A[i + 2][j + 3] * tmp[i + 2];
            }
        }
    }
}



Code Snippet #4


Code C++ - [expand]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#define NX 60
#define NY 60
 
int i;
int j;
int tmp[60];
int y[60];
int x[60];
int A[60][60];
 
int main()
{
    for (i = 0; i < 60; i += 1) {
        y[i] = 0;
    }
    
    for (i = 0; i < 60; i += 1) {
        tmp[i] = 0;
    }
    
    int ii;
    
    for (ii = 0; ii <= 59; ii += 60) {
        for (i = ii; i <= ((59 < ii + 12*5 - 1?59 : ii + 12*5 - 1)); i += 5) {
            for (j = 0; j <= 59; j += 3) {
                tmp[i] = tmp[i] + A[i][j] * x[j] + A[i][j + 1] * x[j + 1] + A[i][j + 2] * x[j + 2];
                tmp[i + 1] = tmp[i + 1] + A[i + 1][j] * x[j] + A[i + 1][j + 1] * x[j + 1] + A[i + 1][j + 2] * x[j + 2];
                tmp[i + 2] = tmp[i + 2] + A[i + 2][j] * x[j] + A[i + 2][j + 1] * x[j + 1] + A[i + 2][j + 2] * x[j + 2];
                tmp[i + 3] = tmp[i + 3] + A[i + 3][j] * x[j] + A[i + 3][j + 1] * x[j + 1] + A[i + 3][j + 2] * x[j + 2];
                tmp[i + 4] = tmp[i + 4] + A[i + 4][j] * x[j] + A[i + 4][j + 1] * x[j + 1] + A[i + 4][j + 2] * x[j + 2];
            }
        }
    }
    
    int jj;
    
    for (jj = 0; jj <= 59; jj += 60) {
        for (j = jj; j <= ((59 < jj + 12*5 - 1?59 : jj + 12*5 - 1)); j += 5) {
            for (i = 0; i <= 59; i += 3) {
                y[j] = y[j] + A[i][j] * tmp[i] + A[i + 1][j] * tmp[i + 1] + A[i + 2][j] * tmp[i + 2];
                y[j + 1] = y[j + 1] + A[i][j + 1] * tmp[i] + A[i + 1][j + 1] * tmp[i + 1] + A[i + 2][j + 1] * tmp[i + 2];
                y[j + 2] = y[j + 2] + A[i][j + 2] * tmp[i] + A[i + 1][j + 2] * tmp[i + 1] + A[i + 2][j + 2] * tmp[i + 2];
                y[j + 3] = y[j + 3] + A[i][j + 3] * tmp[i] + A[i + 1][j + 3] * tmp[i + 1] + A[i + 2][j + 3] * tmp[i + 2];
                y[j + 4] = y[j + 4] + A[i][j + 4] * tmp[i] + A[i + 1][j + 4] * tmp[i + 1] + A[i + 2][j + 4] * tmp[i + 2];
            }
        }
    }
}

 

Status
Not open for further replies.

Part and Inventory Search

Welcome to EDABoard.com

Sponsor

Back
Top