1 /** 2 Module introduces methods that compute disparity maps for stereo pairs. 3 4 $(DL Stereo Matching Base API: 5 $(DD 6 $(LINK2 #emptyDisparityMap,emptyDisparityMap) 7 $(LINK2 #StereoMatcher,StereoMatcher) 8 $(LINK2 #StereoPipelineProperties,StereoPipelineProperties) 9 $(LINK2 #StereoPipeline,StereoPipeline) 10 $(LINK2 #sumAbsoluteDifferences,sumAbsoluteDifferences) 11 $(LINK2 #normalisedCrossCorrelation,normalisedCrossCorrelation) 12 $(LINK2 #absoluteDifference,absoluteDifference) 13 $(LINK2 #squaredDifference,squaredDifference) 14 $(LINK2 #semiGlobalAggregator,semiGlobalAggregator) 15 $(LINK2 #winnerTakesAll,winnerTakesAll) 16 $(LINK2 #medianDisparityFilter,medianDisparityFilter) 17 $(LINK2 #bilateralDisparityFilter,bilateralDisparityFilter) 18 ) 19 ) 20 $(DL Stereo Matching Pipelines: 21 $(DD 22 $(LINK2 #semiGlobalMatchingPipeline,semiGlobalMatchingPipeline) 23 ) 24 ) 25 26 Copyright: Copyright © 2016, Henry Gouk. 27 28 Authors: Henry Gouk 29 30 License: $(LINK3 http://www.boost.org/LICENSE_1_0.txt, Boost Software License - Version 1.0). 31 */ 32 module dcv.multiview.stereo.matching; 33 34 import mir.math.common; 35 import mir.math.sum; 36 37 import mir.ndslice.slice; 38 import mir.ndslice.allocation; 39 import mir.ndslice.algorithm; 40 import mir.ndslice.topology; 41 42 import dcv.core; 43 import dcv.core.image; 44 import dcv.core.utils : emptySlice, clip; 45 import dcv.imgproc; 46 47 alias DisparityType = uint; 48 alias CostType = float; 49 alias DisparityMap = Slice!(SliceKind.contiguous, [2], DisparityType *); 50 alias CostVolume = Slice!(SliceKind.contiguous, [3], CostType *); 51 52 /** 53 Creates an empty disparity map 54 */ 55 DisparityMap emptyDisparityMap() 56 { 57 return emptySlice!([2], DisparityType); 58 } 59 60 /** 61 Handles boilerplate code common to all stereo matching methods. 62 */ 63 class StereoMatcher 64 { 65 public 66 { 67 /** 68 Compute a disparity map using the method defined by the subclass. 69 70 This method assumes the images have been rectified. 71 */ 72 DisparityMap evaluate(inout Image left, inout Image right, DisparityMap prealloc = emptyDisparityMap()) 73 in 74 { 75 assert(left.width == right.width && left.height == right.height, "left and right must have the same dimensions"); 76 assert(left.channels == right.channels, "left and right must have the same number of channels"); 77 assert(left.format == right.format, "left and right must have the same pixel format"); 78 assert(left.depth == right.depth, "left and right must have the same bit depth"); 79 } 80 body 81 { 82 if(prealloc.empty || prealloc.length!0 != left.height || prealloc.length!1 != left.width) 83 { 84 prealloc = new uint[left.width * left.height].sliced(left.height, left.width); 85 } 86 87 evaluateImpl(left, right, prealloc); 88 89 return prealloc; 90 } 91 } 92 93 protected 94 { 95 abstract void evaluateImpl(inout Image left, inout Image right, DisparityMap disp); 96 } 97 } 98 99 alias StereoCostFunction = void delegate(const ref StereoPipelineProperties properties, inout Image left, inout Image right, CostVolume cost); 100 alias StereoCostAggregator = void delegate(const ref StereoPipelineProperties props, CostVolume costVol); 101 alias DisparityMethod = void delegate(const ref StereoPipelineProperties props, CostVolume costVol, DisparityMap disp); 102 alias DisparityRefiner = void delegate(const ref StereoPipelineProperties props, DisparityMap disp); 103 104 /** 105 Contains the properties required to build a StereoPipeline 106 */ 107 struct StereoPipelineProperties 108 { 109 size_t frameWidth; 110 size_t frameHeight; 111 size_t frameChannels; 112 DisparityType minDisparity; 113 DisparityType disparityRange; 114 115 this(size_t frameWidth, size_t frameHeight, size_t frameChannels = 3, DisparityType minDisparity = 0, DisparityType disparityRange = 64) 116 { 117 this.frameWidth = frameWidth; 118 this.frameHeight = frameHeight; 119 this.frameChannels = frameChannels; 120 this.minDisparity = minDisparity; 121 this.disparityRange = disparityRange; 122 } 123 } 124 125 /** 126 This class provides a framework for constructing stereo matching pipelines that conform 127 to the taxonomy laid out in Scharstein and Szeliski (2002). 128 129 According to this taxonomy, stereo matching can be divided into four steps: 130 131 - Matching cost computation 132 - Cost aggregation 133 - Disparity computation 134 - Disparity refinement 135 136 Implementations of various algorithms that conform to requirements of these components 137 can be found in this module, as well as several helper functions that will create 138 commonly used pipelines out of these building blocks. 139 */ 140 class StereoPipeline : StereoMatcher 141 { 142 public 143 { 144 this(const ref StereoPipelineProperties properties, StereoCostFunction costFunc, StereoCostAggregator aggregator, DisparityMethod dispMethod, DisparityRefiner refiner) 145 { 146 mProperties = properties; 147 mCostFunction = costFunc; 148 mCostAggregator = aggregator; 149 mDisparityMethod = dispMethod; 150 mDisparityRefiner = refiner; 151 152 //Preallocate the cost tensor 153 mCost = slice!CostType(properties.frameHeight, properties.frameWidth, properties.disparityRange - properties.minDisparity); 154 } 155 } 156 157 protected 158 { 159 CostVolume mCost; 160 StereoPipelineProperties mProperties; 161 StereoCostFunction mCostFunction; 162 StereoCostAggregator mCostAggregator; 163 DisparityMethod mDisparityMethod; 164 DisparityRefiner mDisparityRefiner; 165 166 override void evaluateImpl(inout Image left, inout Image right, DisparityMap disp) 167 { 168 mCostFunction(mProperties, left, right, mCost); 169 mCostAggregator(mProperties, mCost); 170 mDisparityMethod(mProperties, mCost, disp); 171 mDisparityRefiner(mProperties, disp); 172 } 173 } 174 } 175 176 /** 177 Computes the sum of absolute differences between two image patches in order to compute the matching cost. 178 */ 179 StereoCostFunction sumAbsoluteDifferences(uint windowSize = 5) 180 { 181 import mir.ndslice.internal; 182 183 static @fastmath CostType sad(CostType a, CostType b, CostType c) 184 { 185 return a + fabs(b - c); 186 } 187 188 return windowCost!((l, r) => reduce!sad(CostType(0), l, r))(windowSize); 189 } 190 191 /** 192 Computes the normalised cross correlation, also known as the cosine similarity, between image patches. 193 194 The resulting values are multiplied by negative one, so as to act as a loss rather than a fitness. 195 */ 196 StereoCostFunction normalisedCrossCorrelation(uint windowSize = 5) 197 { 198 import mir.ndslice.internal; 199 200 static @fastmath CostType fma(CostType c, CostType a, CostType b) 201 { 202 return c + a * b; 203 } 204 205 alias dot = reduce!fma; 206 207 static @fastmath CostType ncc(L, R)(L l, R r) 208 { 209 return -dot(CostType(0), l, r) / sqrt(dot(CostType(0), l, l) * dot(CostType(0), r, r)); 210 } 211 212 return windowCost!ncc(windowSize); 213 } 214 215 private StereoCostFunction windowCost(alias fun)(uint windowSize) 216 { 217 void costFunc(const ref StereoPipelineProperties props, inout Image left, inout Image right, CostVolume costVol) 218 { 219 //Get the images as slices 220 auto l = left 221 .asType!CostType 222 .sliced!CostType; 223 224 auto r = right 225 .asType!CostType 226 .sliced!CostType; 227 228 auto lpad = slice([l.shape[0] + windowSize - 1, l.shape[1] + windowSize - 1, l.shape[2]], CostType(0)); 229 auto rpad = slice([l.shape[0] + windowSize - 1, l.shape[1] + windowSize - 1, l.shape[2]], CostType(0)); 230 lpad[windowSize / 2 .. $ - windowSize / 2, windowSize / 2 .. $ - windowSize / 2, 0 .. $] = l[]; 231 rpad[windowSize / 2 .. $ - windowSize / 2, windowSize / 2 .. $ - windowSize / 2, 0 .. $] = r[]; 232 233 for(size_t d = 0; d < props.disparityRange; d++) 234 { 235 costVol[0 .. $, 0 .. d, d] = CostType.max; 236 import mir.ndslice.dynamic; 237 costVol[0 .. $, d .. $, d] = zip!true(lpad[0 .. $, d .. $], rpad[0 .. $, 0 .. $ - d]) 238 .pack!1 239 .windows(windowSize, windowSize) 240 .unpack 241 .universal 242 .transposed!(0, 1, 4) 243 .pack!2 244 .map!(x => fun(x.unzip!'a', x.unzip!'b')) 245 .pack!1 246 .map!sum 247 .unpack; 248 } 249 } 250 251 return &costFunc; 252 } 253 254 /** 255 Creates a StereoCostFunction that computes the pixelwise absolute difference between intensities in the left and right images 256 */ 257 StereoCostFunction absoluteDifference() 258 { 259 import std.functional: toDelegate; 260 import mir.functional; 261 return toDelegate(&pointwiseCost!(pipe!("a - b", fabs))); 262 } 263 264 /** 265 Creates a StereoCostFunction that computes the pixelwise squared difference between intensities in the left and right images 266 */ 267 StereoCostFunction squaredDifference() 268 { 269 import std.functional: toDelegate; 270 import mir.functional; 271 return toDelegate(&pointwiseCost!(pipe!("a - b", "a * a"))); 272 } 273 274 /** 275 Generic method that can be used for computing pointwise matching costs 276 */ 277 private void pointwiseCost(alias fun)(const ref StereoPipelineProperties properties, inout Image left, inout Image right, CostVolume costVol) 278 { 279 //Get the images as slices 280 auto l = left 281 .asType!CostType 282 .sliced!CostType; 283 284 auto r = right 285 .asType!CostType 286 .sliced!CostType; 287 288 for(size_t d = 0; d < properties.disparityRange; d++) 289 { 290 //Fill the invalid region of the cost volume with a very high cost 291 costVol[0 .. $, 0 .. d, d] = CostType.max; 292 293 //Compute the costs for the current disparity 294 costVol[0 .. $, d .. $, d] = zip!true(l[0 .. $, d .. $], r[0 .. $, 0 .. $ - d]) 295 .map!fun 296 .pack!1 297 .map!sum 298 .unpack; 299 } 300 } 301 302 /** 303 Implements the cost aggregation method described by Hirschmuller (2007), commonly known as Semi-Global Matching. 304 */ 305 StereoCostAggregator semiGlobalAggregator(size_t numPaths = 8, CostType p1 = 15, CostType p2 = 100) 306 in 307 { 308 assert(numPaths == 2 || numPaths == 4 || numPaths == 8 || numPaths == 16, "numPaths must be 2, 4, 8, or 16"); 309 } 310 body 311 { 312 struct Path 313 { 314 bool reverseY; 315 bool reverseX; 316 int deltaY; 317 int deltaX; 318 } 319 320 static Path[16] paths = [//Horizontal 321 Path(false, false, 0, 1), 322 Path(false, true, 0, 1), 323 //Vertical 324 Path(false, false, 1, 0), 325 Path(true, false, 1, 0), 326 //45 degree angle 327 Path(false, false, 1, 1), 328 Path(false, true, 1, 1), 329 Path(true, false, 1, 1), 330 Path(true, true, 1, 1), 331 //22.5 degree increments 332 Path(false, false, 1, 2), 333 Path(false, true, 1, 2), 334 Path(true, false, 1, 2), 335 Path(true, true, 1, 2), 336 Path(false, false, 2, 1), 337 Path(false, true, 2, 1), 338 Path(true, false, 2, 1), 339 Path(true, true, 2, 1)]; 340 341 void aggregator(const ref StereoPipelineProperties props, CostVolume costVol) 342 { 343 //Create a new cost volume full of zeros 344 auto totalCost = slice(costVol.shape, 0.0f); 345 346 auto height = cast(int)costVol.length!0; 347 auto width = cast(int)costVol.length!1; 348 349 foreach(path; paths[0 .. numPaths]) 350 { 351 auto tmpCostVol = costVol.universal; 352 import mir.ndslice.dynamic; 353 354 if(path.reverseY) 355 { 356 tmpCostVol = tmpCostVol.reversed!0; 357 } 358 359 if(path.reverseX) 360 { 361 tmpCostVol = tmpCostVol.reversed!1; 362 } 363 364 auto pathCost = tmpCostVol.slice; 365 366 //Iterate over the y coordinate 367 for(int y = path.deltaY; y < height; y++) 368 { 369 //Iterate over the x coordinate 370 for(int x = path.deltaX; x < width; x++) 371 { 372 CostType minCost = reduce!fmin(CostType.max, pathCost[y - path.deltaY, x - path.deltaX]); 373 374 //Iterate over each possible disparity 375 for(int d = 0; d < props.disparityRange; d++) 376 { 377 CostType cost = fmin(pathCost[y - path.deltaY, x - path.deltaX, d], minCost + p2); 378 379 if(d > 0) 380 { 381 cost = fmin(cost, pathCost[y - path.deltaY, x - path.deltaX, d - 1] + p1); 382 } 383 384 if(d < props.disparityRange - 1) 385 { 386 cost = fmin(cost, pathCost[y - path.deltaY, x - path.deltaX, d + 1] + p1); 387 } 388 389 pathCost[y, x, d] += cost - minCost; 390 } 391 } 392 } 393 394 tmpCostVol = pathCost.universal; 395 396 if(path.reverseY) 397 { 398 tmpCostVol = tmpCostVol.reversed!0; 399 } 400 401 if(path.reverseX) 402 { 403 tmpCostVol = tmpCostVol.reversed!1; 404 } 405 406 totalCost[] += tmpCostVol[]; 407 } 408 409 costVol[] = totalCost[]; 410 } 411 412 return &aggregator; 413 } 414 415 /** 416 Implements the naive winner takes all algorithm for computing a diparity map from a cost volume. 417 418 At each (x, y) coordinate the disparity with the lowest cost is selected. 419 */ 420 DisparityMethod winnerTakesAll() 421 { 422 void disparityMethod(const ref StereoPipelineProperties props, CostVolume costVol, DisparityMap disp) 423 { 424 import std.algorithm.searching: minPos; 425 disp[] = costVol 426 .pack!1 427 .map!(x => cast(uint)(x.length - minPos(x).length))[]; 428 } 429 430 return &disparityMethod; 431 } 432 433 /** 434 Applies a median filter to the disparity map in order to correct outliers. 435 */ 436 DisparityRefiner medianDisparityFilter(size_t windowSize = 3) 437 { 438 void disparityRefiner(const ref StereoPipelineProperties props, DisparityMap disp) 439 { 440 disp[] = medianFilter(disp, windowSize)[]; 441 } 442 443 return &disparityRefiner; 444 } 445 446 /** 447 Applies a bilateral filter to the disparity map in order to correct outliers. 448 */ 449 DisparityRefiner bilateralDisparityFilter(uint windowSize, float sigmaCol, float sigmaSpace) 450 { 451 void disparityRefiner(const ref StereoPipelineProperties props, DisparityMap disp) 452 { 453 disp[] = bilateralFilter!DisparityType(disp, sigmaCol, sigmaSpace, windowSize); 454 } 455 456 return &disparityRefiner; 457 } 458 459 /** 460 Creates a StereoPipeline that performs semi-global matching. 461 462 Absolute difference is used for computing costs, and 3x3 median filter is applied to the winner take all disparity map. 463 */ 464 StereoPipeline semiGlobalMatchingPipeline(const ref StereoPipelineProperties props, size_t numPaths = 8, CostType p1 = 15, CostType p2 = 100) 465 { 466 return new StereoPipeline(props, absoluteDifference(), semiGlobalAggregator(numPaths, p1, p2), winnerTakesAll(), medianDisparityFilter()); 467 } 468