1 /**
2 Module introduces $(LINK3 https://en.wikipedia.org/wiki/Kernel_(image_processing)#Convolution, image convolution) function.
3 
4 Following example loads famous image of Lena Söderberg and performs gaussian blurring by convolving the image with gaussian kernel.
5 
6 ----
7 import dcv.io.image : imread, ReadParams;
8 import dcv.core.image : Image, asType;
9 import dcv.imgproc.convolution : conv;
10 
11 Image lenaImage = imread("../data/lena.png", ReadParams(ImageFormat.IF_MONO, BitDepth.BD_8));
12 auto slice = lenaImage.sliced!ubyte;
13 ----
14 
15 ... this loads the following image:<br>
16 $(IMAGE https://github.com/libmir/dcv/blob/master/examples/data/lena.png?raw=true)
17 
18 ----
19 blurred = slice
20              .asType!float // convert ubyte data to float.
21              .conv(gaussian!float(0.84f, 5, 5)); // convolve image with gaussian kernel
22 
23 ----
24 
25 ... which give the resulting image:<br>
26 $(IMAGE https://github.com/libmir/dcv/blob/master/examples/filter/result/outblur.png?raw=true)
27 
28 
29 Copyright: Copyright Relja Ljubobratovic 2016.
30 
31 Authors: Relja Ljubobratovic
32 
33 License: $(LINK3 http://www.boost.org/LICENSE_1_0.txt, Boost Software License - Version 1.0).
34 */
35 module dcv.imgproc.convolution;
36 
37 import std.traits : isAssignable, ReturnType;
38 import std.conv : to;
39 import std.parallelism : parallel, taskPool, TaskPool;
40 
41 import mir.math.common : fastmath;
42 
43 import mir.ndslice.slice;
44 import mir.ndslice.topology;
45 import mir.ndslice.algorithm : reduce;
46 import mir.ndslice.allocation: slice;
47 
48 import dcv.core.memory;
49 import dcv.core.utils;
50 
51 /**
52 Perform convolution to given tensor, using given kernel.
53 Convolution is supported for 1, 2, and 3 dimensional tensors.
54 
55 Params:
56     bc = (Template parameter) Boundary Condition function used while indexing the image matrix.
57     input = Input tensor.
58     kernel = Convolution kernel tensor. For 1D input, 1D kernel is expected.
59     For 2D input, 2D kernel is expected. For 3D input, 2D or 3D kernel is expected -
60     if 2D kernel is given, each item in kernel matrix is applied to each value in
61     corresponding 2D coordinate in the input.
62     prealloc = Pre-allocated buffer where convolution result can be stored. Default
63     value is emptySlice, where resulting array will be newly allocated. Also if
64     prealloc is not of same shape as input input, resulting array will be newly allocated.
65     mask = Masking input. Convolution will skip each element where mask is 0. Default value
66     is empty slice, which tells that convolution will be performed on the whole input.
67     pool = Optional TaskPool instance used to parallelize computation.
68 
69 Returns:
70     Resulting image after convolution, of same type as input tensor.
71 
72 Note:
73     Input, mask and pre-allocated slices' strides must be the same.
74 */
75 InputTensor conv
76     (alias bc = neumann, InputTensor, KernelTensor, MaskTensor = KernelTensor)
77     (InputTensor input, KernelTensor kernel, InputTensor prealloc = InputTensor.init,
78      MaskTensor mask = MaskTensor.init, TaskPool pool = taskPool)
79 in
80 {
81     static assert(isSlice!InputTensor, "Input tensor has to be of type mir.ndslice.slice.Slice");
82     static assert(isSlice!KernelTensor, "Kernel tensor has to be of type mir.ndslice.slice.Slice");
83     static assert(isSlice!MaskTensor, "Mask tensor has to be of type mir.ndslice.slice.Slice");
84     static assert(isBoundaryCondition!bc, "Invalid boundary condition test function.");
85     static assert(isAssignable!(DeepElementType!InputTensor, DeepElementType!KernelTensor),
86             "Incompatible types for input and kernel");
87 
88     immutable N = InputTensor.init.shape.length;
89     immutable NK = KernelTensor.init.shape.length;
90 
91     static assert(MaskTensor.init.shape.length == NK, "Mask tensor has to be of same dimension as the kernel tensor.");
92 
93     immutable invalidKernelMsg = "Invalid kernel dimension";
94     static if (N == 1)
95         static assert(NK == 1, invalidKernelMsg);
96     else static if (N == 2)
97         static assert(NK == 2, invalidKernelMsg);
98     else static if (N == 3)
99         static assert(NK == 2, invalidKernelMsg);
100     else
101         static assert(0, "Convolution not implemented for given tensor dimension.");
102 
103     assert(input._iterator != prealloc._iterator, "Preallocated and input buffer cannot point to the same memory.");
104 
105     if (!mask.empty)
106     {
107         assert(mask.shape == input.shape, "Invalid mask size. Should be of same size as input tensor.");
108         assert(input.strides == mask.strides, "Input input and mask need to have same strides.");
109     }
110 
111     if (prealloc.empty)
112         assert(input._stride!(N-1) == 1, "Input tensor has to be contiguous (i.e. input.stride!(N-1) == 1).");
113     else
114         assert(input.strides == prealloc.strides,
115                 "Input input and result(preallocated) buffer need to have same strides.");
116 }
117 body
118 {
119     import mir.ndslice.allocation;
120 
121     static if (prealloc._strides.length == 0)
122         if (prealloc.shape != input.shape)
123             prealloc = uninitializedSlice!(DeepElementType!InputTensor)(input.shape);
124 
125     return convImpl!bc(input, kernel, prealloc, mask, pool);
126 }
127 
128 unittest
129 {
130     import std.math : approxEqual;
131     import std.algorithm.comparison : equal;
132 
133     auto r1 = [0., 1., 2., 3., 4., 5.].sliced(6);
134     auto k1 = [-1., 0., 1.].sliced(3);
135     auto res1 = r1.conv(k1);
136     assert(res1.equal!approxEqual([1., 2., 2., 2., 2., 1.]));
137 }
138 
139 unittest
140 {
141     auto image = slice!float(15, 15);
142     auto kernel = slice!float(3, 3);
143     auto convres = conv(image, kernel);
144     assert(convres.shape == image.shape);
145 }
146 
147 unittest
148 {
149     auto image = slice!float(15, 15, 3);
150     auto kernel = slice!float(3, 3);
151     auto convres = conv(image, kernel);
152     assert(convres.shape == image.shape);
153 }
154 
155 nothrow @nogc @fastmath auto kapply(T)(const T r, const T i, const T k)
156 {
157     return r + i * k;
158 }
159 
160 private:
161 
162 auto convImpl
163     (alias bc = neumann, InputTensor, KernelTensor, MaskTensor)
164     (InputTensor input, KernelTensor kernel, InputTensor prealloc, MaskTensor mask, TaskPool pool) 
165 if (InputTensor.init.shape.length == 1)
166 {
167     alias InputType = DeepElementType!InputTensor;
168 
169     auto kl = kernel.length;
170     auto kh = kl / 2;
171 
172     if (mask.empty)
173     {
174         auto packedWindows = zip!true(prealloc, input).windows(kl);
175         foreach (p; pool.parallel(packedWindows))
176         {
177             p[kh].a = reduce!(kapply!InputType)(0.0f, p.unzip!'b', kernel);
178         }
179     }
180     else
181     {
182         // TODO: extract masked convolution as separate function?
183         auto packedWindows = zip!true(prealloc, input, mask).windows(kl);
184         foreach (p; pool.parallel(packedWindows))
185         {
186             if (p[$ / 2].c)
187                 p[$ / 2].a = reduce!(kapply!InputType)(0.0f, p.unzip!'b', kernel);
188         }
189     }
190 
191     handleEdgeConv1d!bc(input, prealloc, kernel, mask, 0, kl);
192     handleEdgeConv1d!bc(input, prealloc, kernel, mask, input.length - 1 - kh, input.length);
193 
194     return prealloc;
195 }
196 
197 auto convImpl
198     (alias bc = neumann, InputTensor, KernelTensor, MaskTensor)
199     (InputTensor input, KernelTensor kernel, InputTensor prealloc, MaskTensor mask, TaskPool pool) 
200 if (InputTensor.init.shape.length == 2)
201 {
202     auto krs = kernel.length!0; // kernel rows
203     auto kcs = kernel.length!1; // kernel rows
204 
205     auto krh = krs / 2;
206     auto kch = kcs / 2;
207 
208     auto useMask = !mask.empty;
209 
210     if (mask.empty)
211     {
212         auto packedWindows = zip!true(prealloc, input).windows(krs, kcs);
213         foreach (prow; pool.parallel(packedWindows))
214             foreach (p; prow)
215             {
216                 p[krh, kch].a = reduce!kapply(0.0f, p.unzip!'b', kernel);
217             }
218     }
219     else
220     {
221         auto packedWindows = zip!true(prealloc, input, mask).windows(krs, kcs);
222         foreach (prow; pool.parallel(packedWindows))
223             foreach (p; prow)
224                 if (p[krh, kch].c)
225                     p[krh, kch].a = reduce!kapply(0.0f, p.unzip!'b', kernel);
226     }
227 
228     handleEdgeConv2d!bc(input, prealloc, kernel, mask, [0, input.length!0], [0, kch]); // upper row
229     handleEdgeConv2d!bc(input, prealloc, kernel, mask, [0, input.length!0], [input.length!1 - kch, input.length!1]); // lower row
230     handleEdgeConv2d!bc(input, prealloc, kernel, mask, [0, krh], [0, input.length!1]); // left column
231     handleEdgeConv2d!bc(input, prealloc, kernel, mask, [input.length!0 - krh, input.length!0], [0, input.length!1]); // right column
232 
233     return prealloc;
234 }
235 
236 auto convImpl
237     (alias bc = neumann, InputTensor, KernelTensor, MaskTensor)
238     (InputTensor input, KernelTensor kernel, InputTensor prealloc, MaskTensor mask, TaskPool pool) 
239 if (InputTensor.init.shape.length == 3)
240 {
241     foreach (i; 0 .. input.length!2)
242     {
243         auto r_c = input[0 .. $, 0 .. $, i];
244         auto p_c = prealloc[0 .. $, 0 .. $, i];
245         r_c.conv(kernel, p_c, mask, pool);
246     }
247 
248     return prealloc;
249 }
250 
251 void handleEdgeConv1d(alias bc, T, K, M,
252     SliceKind kindi,
253     SliceKind kindp,
254     SliceKind kindk,
255     SliceKind kindm,
256     )(
257     Slice!(kindi, [1], T*) input,
258     Slice!(kindp, [1], T*) prealloc,
259     Slice!(kindk, [1], K*) kernel,
260     Slice!(kindm, [1], M*) mask,
261     size_t from, size_t to)
262 in
263 {
264     assert(from < to);
265 }
266 body
267 {
268     int kl = cast(int)kernel.length;
269     int kh = kl / 2, i = cast(int)from, j;
270 
271     bool useMask = !mask.empty;
272 
273     T t;
274     foreach (ref p; prealloc[from .. to])
275     {
276         if (useMask && mask[i] <= 0)
277             goto loop_end;
278         t = 0;
279         j = -kh;
280         foreach (k; kernel)
281         {
282             t += bc(input, i + j) * k;
283             ++j;
284         }
285         p = t;
286     loop_end:
287         ++i;
288     }
289 }
290 
291 void handleEdgeConv2d(alias bc, SliceKind kind0, SliceKind kind1, SliceKind kind2, SliceKind kind3, T, K, M)(
292     Slice!(kind0, [2], T*) input,
293     Slice!(kind1, [2], T*) prealloc,
294     Slice!(kind2, [2], K*) kernel,
295     Slice!(kind3, [2], M*) mask, 
296     size_t[2] rowRange, size_t[2] colRange)
297 in
298 {
299     assert(rowRange[0] < rowRange[1]);
300     assert(colRange[0] < colRange[1]);
301 }
302 body
303 {
304     int krl = cast(int)kernel.length!0;
305     int kcl = cast(int)kernel.length!1;
306     int krh = krl / 2, kch = kcl / 2;
307     int r = cast(int)rowRange[0], c, i, j;
308 
309     bool useMask = !mask.empty;
310 
311     auto roi = prealloc[rowRange[0] .. rowRange[1], colRange[0] .. colRange[1]];
312 
313     T t;
314     foreach (prow; roi)
315     {
316         c = cast(int)colRange[0];
317         foreach (ref p; prow)
318         {
319             if (useMask && mask[r, c] <= 0)
320                 goto loop_end;
321             t = 0;
322             i = -krh;
323             foreach (krow; kernel)
324             {
325                 j = -kch;
326                 foreach (k; krow)
327                 {
328                     t += bc(input, r + i, c + j) * k;
329                     ++j;
330                 }
331                 ++i;
332             }
333             p = t;
334         loop_end:
335             ++c;
336         }
337         ++r;
338     }
339 }