Hacker News new | ask | show | jobs
by bhuztez 3569 days ago
the OpenCL generation is mostly stolen from ppcg ( http://ppcg.gforge.inria.fr/ ) Unlike ppcg, right now, the local memory is not properly handled, you can see that there are some complicated expression here.

__kernel void kernel0( __global float v0[8][8], __global float v2[8][8]){ __local float local_v0[2][2][16]; float private_v2[2][2]; int b0 = get_group_id(0); int b1 = get_group_id(1); int t0 = get_local_id(0); int t1 = get_local_id(1);

for(int c2 = 0; (c2 <= 15); c2 = c2 + 1){ if(((((((((30 * t0) + (31 * t1)) + (16 * b0)) + (28 * c2)) + 31) % 32) >= 16) || (b1 == t0))){ local_v0[t0][t1][c2] = (v0[((((2 * t0) + t1) + (4 * c2)) / 8)][((((2 * t0) + t1) + (4 * c2)) % 8)]); } }

barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); for(int c0 = (2 * b0); (c0 <= 7); c0 = c0 + 4){ for(int c1 = (2 * b1); (c1 <= 7); c1 = c1 + 4){ private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)] = 0.000000; for(int c2 = 0; (c2 <= 3); c2 = c2 + 1){ for(int c5 = (2 * c2); (c5 <= ((2 * c2) + 1)); c5 = c5 + 1){ private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)] = ((private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)]) + ((local_v0[(c2 % 2)][((-2 * c2) + c5)][(((2 * t0) + (2 * c0)) + (c2 / 2))]) * (local_v0[b1][t1][((((-2 * b1) + c1) / 4) + (2 * c5))]))); } } private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)] = (private_v2[(((-2 * b0) + c0) / 4)][(((-2 * b1) + c1) / 4)]); } }

for(int c0 = 0; (c0 <= 1); c0 = c0 + 1){ for(int c1 = 0; (c1 <= 1); c1 = c1 + 1){ v2[(((2 * b0) + t0) + (4 * c0))][(((2 * b1) + t1) + (4 * c1))] = (private_v2[c0][c1]); } }

barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); }

1 comments

Man I think it dropped one if these )