Ticket #18: permute_tag.inl

File permute_tag.inl, 1.6 kB (added by tmcdonell, 4 years ago)

permute write combining using integer tagging

Line 
1/* -----------------------------------------------------------------------------
2 *
3 * Module    : Permute
4 * Copyright : (c) [2009..2010] Trevor L. McDonell
5 * License   : BSD
6 *
7 * Forward permutation, characterised by a function that determines for each
8 * element in the source array where it should go in the target. The output
9 * array should be initialised with a default value, as the permutation may be
10 * between arrays of different sizes and some positions may never be touched.
11 *
12 * Elements from the source array are dropped for which the permutation function
13 * yields the magic index `ignore`.
14 *
15 * ---------------------------------------------------------------------------*/
16
17
18#define TAG_MASK        ((1 << 27) - 1)
19#define TAG_THREAD      (threadIdx.x << 27)
20
21extern "C"
22__global__ void
23permute
24(
25    ArrOut              d_out,
26    const ArrIn0        d_in0,
27    const Ix            shape
28)
29{
30    Ix       dst;
31    Ix       idx;
32    const Ix gridSize = __umul24(blockDim.x, gridDim.x);
33
34    for (idx = __umul24(blockDim.x, blockIdx.x) + threadIdx.x; idx < shape; idx += gridSize)
35    {
36        dst = project(idx);
37
38        if (dst != ignore)
39        {
40            TyOut x1;
41            TyIn0 x0 = get0(d_in0, idx);
42
43            do
44            {
45                x1 = get0(d_out, dst) & TAG_MASK;
46                x1 = apply(x0, x1)    | TAG_THREAD;
47                set(d_out, dst, x1);
48
49                __syncthreads();
50            }
51            while (get0(d_out, dst) != x1);
52
53            __syncthreads();
54            set(d_out, dst, get0(d_out, dst) & TAG_MASK);
55        }
56    }
57}
58