1 | /* ----------------------------------------------------------------------------- |
---|

2 | * |
---|

3 | * Module : Permute |
---|

4 | * Copyright : (c) [2009..2010] Trevor L. McDonell |
---|

5 | * License : BSD |
---|

6 | * |
---|

7 | * Forward permutation, characterised by a function that determines for each |
---|

8 | * element in the source array where it should go in the target. The output |
---|

9 | * array should be initialised with a default value, as the permutation may be |
---|

10 | * between arrays of different sizes and some positions may never be touched. |
---|

11 | * |
---|

12 | * Elements from the source array are dropped for which the permutation function |
---|

13 | * yields the magic index `ignore`. |
---|

14 | * |
---|

15 | * ---------------------------------------------------------------------------*/ |
---|

16 | |
---|

17 | |
---|

18 | extern "C" |
---|

19 | __global__ void |
---|

20 | permute |
---|

21 | ( |
---|

22 | ArrOut d_out, |
---|

23 | const ArrIn0 d_in0, |
---|

24 | const Ix shape |
---|

25 | ) |
---|

26 | { |
---|

27 | Ix dst; |
---|

28 | Ix idx; |
---|

29 | const Ix gridSize = __umul24(blockDim.x, gridDim.x); |
---|

30 | |
---|

31 | for (idx = __umul24(blockDim.x, blockIdx.x) + threadIdx.x; idx < shape; idx += gridSize) |
---|

32 | { |
---|

33 | dst = project(idx); |
---|

34 | |
---|

35 | if (dst != ignore) |
---|

36 | { |
---|

37 | TyIn0 x0 = get0(d_in0, idx); |
---|

38 | TyOut x1_ = get0(d_out, dst); |
---|

39 | TyOut x1; |
---|

40 | |
---|

41 | do |
---|

42 | { |
---|

43 | x1 = x1_; |
---|

44 | x1_ = atomicCAS(&d_out[dst], x1, apply(x1, x0)); |
---|

45 | } |
---|

46 | while (x1 != x1_); |
---|

47 | } |
---|

48 | } |
---|

49 | } |
---|

50 | |
---|