1 module oclcv.sgm;
2 
3 import core.stdc.stdio : printf;
4 
5 import oclcv.clcore;
6 
7 import dplug.core.nogc;
8 
9 final class StereoSGMCL{
10     @nogc nothrow:
11 public:
12     this(int height, int width, int disp_size, CLContext ctx){
13 
14         width_ = width; height_= height; disp_size_ = disp_size;
15         auto r = initialize(ctx);
16         debug _assert(r, "error!");
17     }
18 
19     bool initialize(CLContext ctx){
20         if(!ctx)
21             return false;
22         context_ = ctx;
23         //initialize kernels
24         sgm_prog_ = mallocNew!CLProgram(CTKernel.KSGM, context_);
25         m_census_kernel = sgm_prog_.getKernel("census_kernel");
26         m_matching_cost_kernel_128 = sgm_prog_.getKernel("matching_cost_kernel_128");
27         m_compute_stereo_horizontal_dir_kernel_0 = sgm_prog_.getKernel("compute_stereo_horizontal_dir_kernel_0");
28         m_compute_stereo_horizontal_dir_kernel_4 = sgm_prog_.getKernel("compute_stereo_horizontal_dir_kernel_4");
29         m_compute_stereo_vertical_dir_kernel_2 = sgm_prog_.getKernel("compute_stereo_vertical_dir_kernel_2");
30         m_compute_stereo_vertical_dir_kernel_6 = sgm_prog_.getKernel("compute_stereo_vertical_dir_kernel_6");
31         m_compute_stereo_oblique_dir_kernel_1 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_1");
32         m_compute_stereo_oblique_dir_kernel_3 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_3");
33         m_compute_stereo_oblique_dir_kernel_5 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_5");
34         m_compute_stereo_oblique_dir_kernel_7 = sgm_prog_.getKernel("compute_stereo_oblique_dir_kernel_7");
35         m_winner_takes_all_kernel128 = sgm_prog_.getKernel("winner_takes_all_kernel128");
36         m_check_consistency_left = sgm_prog_.getKernel("check_consistency_kernel_left");
37         m_median_3x3 = sgm_prog_.getKernel("median3x3");
38         m_copy_u8_to_u16 = sgm_prog_.getKernel("copy_u8_to_u16");
39         m_clear_buffer = sgm_prog_.getKernel("clear_buffer");
40 
41         return true;
42     }
43 
44     CLBuffer run(CLBuffer d_src_left, CLBuffer d_src_right){
45         debug _assert(d_src_left.metaData.dataType == UBYTE, "left data type should be ubyte");
46         debug _assert(d_src_right.metaData.dataType == UBYTE, "right data type should be ubyte");
47 
48         debug _assert(d_src_left.metaData.numberOfChannels == 1, "Only single channel images are supported");
49         debug _assert(d_src_right.metaData.numberOfChannels == 1, "Only single channel images are supported");
50 
51         this.d_src_left = d_src_left;
52         this.d_src_right = d_src_right;
53 
54         //create buffers
55 
56         d_left = mallocNew!CLBuffer(context_, BufferMeta(ULONG, height_, width_));
57         d_right = mallocNew!CLBuffer(context_, BufferMeta(ULONG, height_, width_));
58         d_matching_cost = mallocNew!CLBuffer(context_, BufferMeta(UBYTE, height_, width_, disp_size_));
59         d_scost = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_, disp_size_));
60         d_left_disparity = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_));
61         d_right_disparity = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_));
62         d_tmp_left_disp = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_));
63         d_tmp_right_disp = mallocNew!CLBuffer(context_, BufferMeta(USHORT, height_, width_));
64 
65         scope(exit){
66             destroyFree(d_left);
67             destroyFree(d_right);
68             destroyFree(d_matching_cost);
69             destroyFree(d_scost);
70             destroyFree(d_left_disparity);
71             destroyFree(d_right_disparity);
72             destroyFree(d_tmp_right_disp);
73         }
74         
75         //setup kernels
76         
77         m_matching_cost_kernel_128.setArgs(d_left, d_right, d_matching_cost, width_, height_);
78         m_compute_stereo_horizontal_dir_kernel_0.setArgs(d_matching_cost, d_scost, width_, height_);
79         m_compute_stereo_horizontal_dir_kernel_4.setArgs(d_matching_cost, d_scost, width_, height_);
80         m_compute_stereo_vertical_dir_kernel_2.setArgs(d_matching_cost, d_scost, width_, height_);
81         m_compute_stereo_vertical_dir_kernel_6.setArgs(d_matching_cost, d_scost, width_, height_);
82         m_compute_stereo_oblique_dir_kernel_1.setArgs(d_matching_cost, d_scost, width_, height_);
83         m_compute_stereo_oblique_dir_kernel_3.setArgs(d_matching_cost, d_scost, width_, height_);
84         m_compute_stereo_oblique_dir_kernel_5.setArgs(d_matching_cost, d_scost, width_, height_);
85         m_compute_stereo_oblique_dir_kernel_7.setArgs(d_matching_cost, d_scost, width_, height_);
86         m_winner_takes_all_kernel128.setArgs(d_left_disparity, d_right_disparity, d_scost, width_, height_);
87         
88         m_median_3x3.setArgs(d_left_disparity, d_tmp_left_disp, width_, height_);
89         m_copy_u8_to_u16.setArgs(d_matching_cost, d_scost);
90 
91         m_census_kernel.setArgs(d_src_left, d_left, width_, height_);
92         m_check_consistency_left.setArgs(d_tmp_left_disp, d_tmp_right_disp, d_src_left, width_, height_);
93         
94         census();
95         mem_init();
96         matching_cost();
97         scan_cost();
98         winner_takes_all();
99         median();
100         context_.finish(0);
101         return d_tmp_left_disp;
102     }
103 
104     ~this(){
105         destroyFree(sgm_prog_);
106     }
107 
108 private:
109 
110 @nogc nothrow:
111 
112     void census(){
113         m_census_kernel.setArgs(d_src_left, d_left);
114         m_census_kernel.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16),
115                                                                     BlockDim(16,16));
116         
117         context_.finish(0);
118         m_census_kernel.setArgs(d_src_right, d_right);
119         m_census_kernel.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16),
120                                                                     BlockDim(16,16));
121         context_.finish(0);
122     }
123 
124     void mem_init(){
125         m_clear_buffer.setArgs(d_left_disparity);
126         m_clear_buffer.launch(0, GridDim(cast(int)(width_ * height_ * (ushort.sizeof)/ 32/ 256)),
127                                                                         BlockDim(256));
128         m_clear_buffer.setArgs(d_right_disparity);
129         m_clear_buffer.launch(0, GridDim(cast(int)(width_ * height_ * ushort.sizeof/ 32/ 256)),
130                                                                         BlockDim(256));
131         m_clear_buffer.setArgs(d_scost);
132         m_clear_buffer.launch(0, GridDim(cast(int)(width_ * height_ * ushort.sizeof * disp_size_
133                                                             / 32 / 256)), BlockDim(256));
134     }
135 
136     void matching_cost(){
137         m_matching_cost_kernel_128.launch(0, GridDim(height_/2), BlockDim(128,2));
138     }
139 
140     void scan_cost(){
141         enum PATHS_IN_BLOCK = 8;
142         const int obl_num_paths = width_ + height_ ;
143 
144         m_compute_stereo_horizontal_dir_kernel_0.launch(0,
145         GridDim(height_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
146         m_compute_stereo_horizontal_dir_kernel_4.launch(0,
147         GridDim(height_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
148         m_compute_stereo_vertical_dir_kernel_2.launch(0,
149         GridDim(width_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
150         m_compute_stereo_vertical_dir_kernel_6.launch(0,
151         GridDim(width_ / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
152 
153         m_compute_stereo_oblique_dir_kernel_1.launch(0,
154         GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
155         m_compute_stereo_oblique_dir_kernel_3.launch(0,
156         GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
157         m_compute_stereo_oblique_dir_kernel_5.launch(0,
158         GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
159         m_compute_stereo_oblique_dir_kernel_7.launch(0,
160         GridDim(obl_num_paths / PATHS_IN_BLOCK),BlockDim(32, PATHS_IN_BLOCK));
161     }
162 
163     void winner_takes_all(){
164         enum WTA_PIXEL_IN_BLOCK = 8;
165         m_winner_takes_all_kernel128.launch(0,
166         GridDim(width_ / WTA_PIXEL_IN_BLOCK,1 * height_),
167         BlockDim(32, WTA_PIXEL_IN_BLOCK));
168     }
169 
170     void median(){
171         m_median_3x3.setArgs(d_left_disparity, d_tmp_left_disp);
172         m_median_3x3.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16),
173                                                                     BlockDim(16,16));
174         m_median_3x3.setArgs(d_right_disparity, d_tmp_right_disp);
175         m_median_3x3.launch(0, GridDim((width_ + 16 - 1)/16, (height_ + 16 - 1)/16),
176                                                                     BlockDim(16,16));
177     }
178 
179     void check_consistency_left(){
180         m_check_consistency_left.launch(0,GridDim((width_ + 16 - 1)/16,
181                                               (height_ + 16 - 1)/16),BlockDim(16,16));
182     }
183 
184     int width_, height_, disp_size_;
185     CLContext context_;
186     CLProgram sgm_prog_;
187 
188     CLKernel m_census_kernel;
189     CLKernel m_matching_cost_kernel_128;
190 
191     CLKernel m_compute_stereo_horizontal_dir_kernel_0;
192     CLKernel m_compute_stereo_horizontal_dir_kernel_4;
193     CLKernel m_compute_stereo_vertical_dir_kernel_2;
194     CLKernel m_compute_stereo_vertical_dir_kernel_6;
195 
196     CLKernel m_compute_stereo_oblique_dir_kernel_1;
197     CLKernel m_compute_stereo_oblique_dir_kernel_3;
198     CLKernel m_compute_stereo_oblique_dir_kernel_5;
199     CLKernel m_compute_stereo_oblique_dir_kernel_7;
200 
201 
202     CLKernel m_winner_takes_all_kernel128;
203 
204     CLKernel m_check_consistency_left;
205 
206     CLKernel m_median_3x3;
207 
208     CLKernel m_copy_u8_to_u16;
209     CLKernel m_clear_buffer;
210 
211     CLBuffer d_src_left, d_src_right, d_left, d_right, d_matching_cost,
212         d_scost, d_left_disparity, d_right_disparity,
213         d_tmp_left_disp, d_tmp_right_disp;
214 
215 }