pluto example, heat-1d优化分析
1)
原始代码
#pragma scopfor (t = 0; t < T; t++) {for (i = 1; i < N + 1; i++) {A[(t + 1) % 2][i] =0.250 * (A[t % 2][i + 1] - 2.0 * A[t % 2][i] + A[t % 2][i - 1]);}}
#pragma endscop
用#pragma scop和#pragma endscop包围的东西
2)head1d-tile.c
变成了下面这样:
int t1, t2, t3, t4;register int lbv, ubv;
for (t1=-1563;t1<=0;t1++) {for (t2=max(t1,-t1-1);t2<=min(-t1+1,t1+3125);t2++) {for (t3=max(max(0,512*t1+512*t2),1024*t1+1);t3<=min(999,512*t1+512*t2+1023);t3++) {lbv=max(max(1024*t2,t3+1),-1024*t1+2*t3-1023);ubv=min(min(-1024*t1+2*t3,1024*t2+1023),t3+1600000);
#pragma ivdep
#pragma vector alwaysfor (t4=lbv;t4<=ubv;t4++) {A[(t3 + 1) % 2][(-t3+t4)] = (0.250 * ((A[t3 % 2][(-t3+t4) + 1] - (2.0 * A[t3 % 2][(-t3+t4)])) + A[t3 % 2][(-t3+t4) - 1]));;}}}
}
3)
感觉 搞得复杂了。