1 module oclcv.sgm; 2 3 import core.stdc.stdio : printf; 4 5 import oclcv.clcore; 6 7 import dplug.core.nogc; 8 9 final class StereoSGMCL{ 10 @nogc nothrow: 11 public: 12 this(int height, int width, int disp_size, CLContext ctx){ 13 14 width_ = width; height_= height; disp_size_ = disp_size; 15 auto r = initialize(ctx); 16 debug _assert(r, "error!"); 17 } 18 19 bool initialize(CLContext ctx){ 20 if(!ctx) 21 return false; 22 context_ = ctx; 23 //initialize kernels 24 sgm_prog_ = mallocNew!CLProgram(CTKernel.KSGM, context_); 25 m_census_kernel = sgm_prog_.getKernel("census_kernel"); 26 m_matching_cost_kernel_128 = sgm_prog_.getKernel("matching_cost_kernel_128"); 27 m_compute_stereo_horizontal_dir_kernel_0 = sgm_prog_.getKernel("compute_stereo_horizontal_dir_kernel_0"); 28 m_compute_stereo_horizontal_dir_kernel_4 = sgm_prog_.getKernel("compute_stereo_horizontal_dir_kernel_4"); 29 m_compute_stereo_vertical_dir_kernel_2 = sgm_prog_.getKernel("compute_stereo_vertical_dir_kernel_2"); 30 m_compute_stereo_vertical_dir_kernel_6 = sgm_prog_.getKernel("compute_stereo_vertical_dir_kernel_6"); 31 m_compute_stereo_oblique_dir_kernel_1 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_1"); 32 m_compute_stereo_oblique_dir_kernel_3 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_3"); 33 m_compute_stereo_oblique_dir_kernel_5 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_5"); 34 m_compute_stereo_oblique_dir_kernel_7 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_7"); 35 m_winner_takes_all_kernel128 = sgm_prog_.getKernel("winner_takes_all_kernel128"); 36 m_check_consistency_left = sgm_prog_.getKernel("check_consistency_kernel_left"); 37 m_median_3x3 = sgm_prog_.getKernel("median3x3"); 38 m_copy_u8_to_u16 = sgm_prog_.getKernel("copy_u8_to_u16"); 39 m_clear_buffer = sgm_prog_.getKernel("clear_buffer"); 40 41 return true; 42 } 43 44 CLBuffer run(CLBuffer d_src_left, CLBuffer d_src_right){ 45 debug _assert(d_src_left.metaData.dataType == UBYTE, "left data type should be ubyte"); 46 debug _assert(d_src_right.metaData.dataType == UBYTE, "right data type should be ubyte"); 47 48 debug _assert(d_src_left.metaData.numberOfChannels == 1, "Only single channel images are supported"); 49 debug _assert(d_src_right.metaData.numberOfChannels == 1, "Only single channel images are supported"); 50 51 this.d_src_left = d_src_left; 52 this.d_src_right = d_src_right; 53 54 //create buffers 55 56 d_left = mallocNew!CLBuffer(context_, BufferMeta(ULONG, height_, width_)); 57 d_right = mallocNew!CLBuffer(context_, BufferMeta(ULONG, height_, width_)); 58 d_matching_cost = mallocNew!CLBuffer(context_, BufferMeta(UBYTE, height_, width_, disp_size_)); 59 d_scost = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_, disp_size_)); 60 d_left_disparity = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_)); 61 d_right_disparity = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_)); 62 d_tmp_left_disp = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_)); 63 d_tmp_right_disp = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_)); 64 65 scope(exit){ 66 destroyFree(d_left); 67 destroyFree(d_right); 68 destroyFree(d_matching_cost); 69 destroyFree(d_scost); 70 destroyFree(d_left_disparity); 71 destroyFree(d_right_disparity); 72 destroyFree(d_tmp_right_disp); 73 } 74 75 //setup kernels 76 77 m_matching_cost_kernel_128.setArgs(d_left, d_right, d_matching_cost, width_, height_); 78 m_compute_stereo_horizontal_dir_kernel_0.setArgs(d_matching_cost, d_scost, width_, height_); 79 m_compute_stereo_horizontal_dir_kernel_4.setArgs(d_matching_cost, d_scost, width_, height_); 80 m_compute_stereo_vertical_dir_kernel_2.setArgs(d_matching_cost, d_scost, width_, height_); 81 m_compute_stereo_vertical_dir_kernel_6.setArgs(d_matching_cost, d_scost, width_, height_); 82 m_compute_stereo_oblique_dir_kernel_1.setArgs(d_matching_cost, d_scost, width_, height_); 83 m_compute_stereo_oblique_dir_kernel_3.setArgs(d_matching_cost, d_scost, width_, height_); 84 m_compute_stereo_oblique_dir_kernel_5.setArgs(d_matching_cost, d_scost, width_, height_); 85 m_compute_stereo_oblique_dir_kernel_7.setArgs(d_matching_cost, d_scost, width_, height_); 86 m_winner_takes_all_kernel128.setArgs(d_left_disparity, d_right_disparity, d_scost, width_, height_); 87 88 m_median_3x3.setArgs(d_left_disparity, d_tmp_left_disp, width_, height_); 89 m_copy_u8_to_u16.setArgs(d_matching_cost, d_scost); 90 91 m_census_kernel.setArgs(d_src_left, d_left, width_, height_); 92 m_check_consistency_left.setArgs(d_tmp_left_disp, d_tmp_right_disp, d_src_left, width_, height_); 93 94 census(); 95 mem_init(); 96 matching_cost(); 97 scan_cost(); 98 winner_takes_all(); 99 median(); 100 context_.finish(0); 101 return d_tmp_left_disp; 102 } 103 104 ~this(){ 105 destroyFree(sgm_prog_); 106 } 107 108 private: 109 110 @nogc nothrow: 111 112 void census(){ 113 m_census_kernel.setArgs(d_src_left, d_left); 114 m_census_kernel.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16), 115 BlockDim(16,16)); 116 117 context_.finish(0); 118 m_census_kernel.setArgs(d_src_right, d_right); 119 m_census_kernel.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16), 120 BlockDim(16,16)); 121 context_.finish(0); 122 } 123 124 void mem_init(){ 125 m_clear_buffer.setArgs(d_left_disparity); 126 m_clear_buffer.launch(0, GridDim(cast(int)(width_ * height_ * (ushort.sizeof)/ 32/ 256)), 127 BlockDim(256)); 128 m_clear_buffer.setArgs(d_right_disparity); 129 m_clear_buffer.launch(0, GridDim(cast(int)(width_ * height_ * ushort.sizeof/ 32/ 256)), 130 BlockDim(256)); 131 m_clear_buffer.setArgs(d_scost); 132 m_clear_buffer.launch(0, GridDim(cast(int)(width_ * height_ * ushort.sizeof * disp_size_ 133 / 32 / 256)), BlockDim(256)); 134 } 135 136 void matching_cost(){ 137 m_matching_cost_kernel_128.launch(0, GridDim(height_/2), BlockDim(128,2)); 138 } 139 140 void scan_cost(){ 141 enum PATHS_IN_BLOCK = 8; 142 const int obl_num_paths = width_ + height_ ; 143 144 m_compute_stereo_horizontal_dir_kernel_0.launch(0, 145 GridDim(height_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 146 m_compute_stereo_horizontal_dir_kernel_4.launch(0, 147 GridDim(height_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 148 m_compute_stereo_vertical_dir_kernel_2.launch(0, 149 GridDim(width_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 150 m_compute_stereo_vertical_dir_kernel_6.launch(0, 151 GridDim(width_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 152 153 m_compute_stereo_oblique_dir_kernel_1.launch(0, 154 GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 155 m_compute_stereo_oblique_dir_kernel_3.launch(0, 156 GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 157 m_compute_stereo_oblique_dir_kernel_5.launch(0, 158 GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 159 m_compute_stereo_oblique_dir_kernel_7.launch(0, 160 GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK)); 161 } 162 163 void winner_takes_all(){ 164 enum WTA_PIXEL_IN_BLOCK = 8; 165 m_winner_takes_all_kernel128.launch(0, 166 GridDim(width_ / WTA_PIXEL_IN_BLOCK,1 * height_), 167 BlockDim(32, WTA_PIXEL_IN_BLOCK)); 168 } 169 170 void median(){ 171 m_median_3x3.setArgs(d_left_disparity, d_tmp_left_disp); 172 m_median_3x3.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16), 173 BlockDim(16,16)); 174 m_median_3x3.setArgs(d_right_disparity, d_tmp_right_disp); 175 m_median_3x3.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16), 176 BlockDim(16,16)); 177 } 178 179 void check_consistency_left(){ 180 m_check_consistency_left.launch(0,GridDim((width_ + 16 - 1)/16, 181 (height_ + 16 - 1)/16),BlockDim(16,16)); 182 } 183 184 int width_, height_, disp_size_; 185 CLContext context_; 186 CLProgram sgm_prog_; 187 188 CLKernel m_census_kernel; 189 CLKernel m_matching_cost_kernel_128; 190 191 CLKernel m_compute_stereo_horizontal_dir_kernel_0; 192 CLKernel m_compute_stereo_horizontal_dir_kernel_4; 193 CLKernel m_compute_stereo_vertical_dir_kernel_2; 194 CLKernel m_compute_stereo_vertical_dir_kernel_6; 195 196 CLKernel m_compute_stereo_oblique_dir_kernel_1; 197 CLKernel m_compute_stereo_oblique_dir_kernel_3; 198 CLKernel m_compute_stereo_oblique_dir_kernel_5; 199 CLKernel m_compute_stereo_oblique_dir_kernel_7; 200 201 202 CLKernel m_winner_takes_all_kernel128; 203 204 CLKernel m_check_consistency_left; 205 206 CLKernel m_median_3x3; 207 208 CLKernel m_copy_u8_to_u16; 209 CLKernel m_clear_buffer; 210 211 CLBuffer d_src_left, d_src_right, d_left, d_right, d_matching_cost, 212 d_scost, d_left_disparity, d_right_disparity, 213 d_tmp_left_disp, d_tmp_right_disp; 214 215 }