url: http://paste.minad.de/index.php?nid=403
Notizname: - von: - Sprache:

  1. import java.util.Arrays;
  2.  
  3. import static util.AparapiUtilities.*;
  4.  
  5. import com.amd.aparapi.*;
  6.  
  7. class Reduktion5 { // dreistufige Reduktion
  8. private static int n = 1 << 27;
  9.  
  10. public static void main(String[] args) {
  11. int[] globalMemory = new int[n];
  12. Arrays.fill(globalMemory, 1);
  13. OpenCLDevice device = selectDevice();
  14. // Local Memory: 2 Integer pro Work-Item, 4 Byte pro Integer
  15. int localSize = Math.min((int) device.getLocalMemSize() / 8,
  16. device.getMaxWorkGroupSize());
  17. int globalSize = 2 * localSize * localSize;
  18. if (n > 2 * globalSize) { // Stufe 1: sequentielle Reduktion
  19. Kernel k = new ReduceSeq(n, globalMemory);
  20. k.execute(Range.create(device, 2 * globalSize));
  21. addProfile(k);
  22. k.dispose();
  23. n = 2 * globalSize;
  24. }
  25. int[] localMemory = new int[localSize * 2];
  26. Kernel k = new ReducePar(globalMemory, localMemory);
  27. k.setExplicit(true);
  28. if (n > 2 * localSize) { // Stufe 2: mehrere Work-Groups
  29. globalSize = Math.min(globalSize, n / 2);
  30. k.execute(Range.create(device, globalSize, localSize));
  31. addProfile(k);
  32. n = globalSize / localSize;
  33. }
  34. // Stufe 3: 1 abschließende Work-Group
  35. k.execute(Range.create(device, n / 2, n / 2));
  36. k.get(globalMemory);
  37. addProfile(k);
  38. System.out.println("Summe = " + globalMemory[0]);
  39. showAccumulatedProfile();
  40. k.dispose();
  41. }
  42.  
  43. private static class ReduceSeq extends Kernel { // sequentielle Reduktion
  44. private int n;
  45. private int[] globalMemory;
  46.  
  47. ReduceSeq(int n, int[] globalMemory) {
  48. this.n = n;
  49. this.globalMemory = globalMemory;
  50. }
  51.  
  52. @Override
  53. public void run() {
  54. int id = getGlobalId(0);
  55. int size = getGlobalSize(0);
  56. int sum = 0;
  57. for (int i = id; i < n; i += size)
  58. sum += globalMemory[i];
  59. globalMemory[id] = sum;
  60. }
  61. }
  62.  
  63. private static class ReducePar extends Kernel { // parallele Reduktion
  64. private int[] gMem;
  65. @Local
  66. private int[] lMem;
  67.  
  68. ReducePar(int[] gMem, int[] lMem) {
  69. this.gMem = gMem;
  70. this.lMem = lMem;
  71. }
  72.  
  73. @Override
  74. public void run() {
  75. int lIndex = getLocalId(0);
  76. int gIndex = getGroupId(0) * getLocalSize(0) * 2 + lIndex;
  77. lMem[lIndex] = gMem[gIndex];
  78. lMem[lIndex + getLocalSize(0)] = gMem[gIndex + getLocalSize(0)];
  79. localBarrier();
  80. for (int s = getLocalSize(0); s >= 1; s /= 2) {
  81. if (lIndex < s)
  82. lMem[lIndex] += lMem[lIndex + s];
  83. localBarrier();
  84. }
  85. if (lIndex == 0)
  86. gMem[getGroupId(0)] = lMem[0];
  87. }
  88. }
  89. }
Pastelog:
erstellt 2014.11.19 11:07:28
Hits
508




© 2009-2010 rellig