Ticket #18: permute_atomic.inl

File permute_atomic.inl, 1.3 kB (added by tmcdonell, 4 years ago)

permute write combining using atomic intrinsics

Line 
1/* -----------------------------------------------------------------------------
2 *
3 * Module    : Permute
4 * Copyright : (c) [2009..2010] Trevor L. McDonell
5 * License   : BSD
6 *
7 * Forward permutation, characterised by a function that determines for each
8 * element in the source array where it should go in the target. The output
9 * array should be initialised with a default value, as the permutation may be
10 * between arrays of different sizes and some positions may never be touched.
11 *
12 * Elements from the source array are dropped for which the permutation function
13 * yields the magic index `ignore`.
14 *
15 * ---------------------------------------------------------------------------*/
16
17
18extern "C"
19__global__ void
20permute
21(
22    ArrOut              d_out,
23    const ArrIn0        d_in0,
24    const Ix            shape
25)
26{
27    Ix       dst;
28    Ix       idx;
29    const Ix gridSize = __umul24(blockDim.x, gridDim.x);
30
31    for (idx = __umul24(blockDim.x, blockIdx.x) + threadIdx.x; idx < shape; idx += gridSize)
32    {
33        dst = project(idx);
34
35        if (dst != ignore)
36        {
37            TyIn0 x0  = get0(d_in0, idx);
38            TyOut x1_ = get0(d_out, dst);
39            TyOut x1;
40
41            do
42            {
43                x1  = x1_;
44                x1_ = atomicCAS(&d_out[dst], x1, apply(x1, x0));
45            }
46            while (x1 != x1_);
47        }
48    }
49}
50