|
|
|
|
|
by bhuztez
3569 days ago
|
|
the OpenCL generation is mostly stolen from ppcg ( http://ppcg.gforge.inria.fr/ ) Unlike ppcg, right now, the local memory is not properly handled, you can see that there are some complicated expression here. __kernel
void
kernel0(
__global float v0[8][8],
__global float v2[8][8]){
__local float local_v0[2][2][16];
float private_v2[2][2];
int b0 = get_group_id(0);
int b1 = get_group_id(1);
int t0 = get_local_id(0);
int t1 = get_local_id(1); for(int c2 = 0; (c2 <= 15); c2 = c2 + 1){
if(((((((((30 * t0) + (31 * t1)) + (16 * b0)) + (28 * c2)) + 31) % 32) >= 16) || (b1 == t0))){
local_v0[t0][t1][c2] = (v0[((((2 * t0) + t1) + (4 * c2)) / 8)][((((2 * t0) + t1) + (4 * c2)) % 8)]);
}
} barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
for(int c0 = (2 * b0); (c0 <= 7); c0 = c0 + 4){
for(int c1 = (2 * b1); (c1 <= 7); c1 = c1 + 4){
private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)] = 0.000000;
for(int c2 = 0; (c2 <= 3); c2 = c2 + 1){
for(int c5 = (2 * c2); (c5 <= ((2 * c2) + 1)); c5 = c5 + 1){
private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)] = ((private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)]) + ((local_v0[(c2 % 2)][((-2 * c2) + c5)][(((2 * t0) + (2 * c0)) + (c2 / 2))]) * (local_v0[b1][t1][((((-2 * b1) + c1) / 4) + (2 * c5))])));
}
}
private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)] = (private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)]);
}
} for(int c0 = 0; (c0 <= 1); c0 = c0 + 1){
for(int c1 = 0; (c1 <= 1); c1 = c1 + 1){
v2[(((2 * b0) + t0) + (4 * c0))][(((2 * b1) + t1) + (4 * c1))] = (private_v2[c0][c1]);
}
} barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
} |
|