changeset 113:3b89aa6dc7f2 cell_based_test

Add apply_tiled! that tiles the iteration to optimize cache usage. Doesn't improve runtime at all at the moment
author Jonatan Werpers <jonatan@werpers.com>
date Fri, 08 Feb 2019 21:19:34 +0100
parents 98c788cba9bf
children d24497780ebd
files diffOp.jl
diffstat 1 files changed, 28 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/diffOp.jl	Fri Feb 08 21:14:39 2019 +0100
+++ b/diffOp.jl	Fri Feb 08 21:19:34 2019 +0100
@@ -67,6 +67,34 @@
     return nothing
 end
 
+function apply_tiled!(D::DiffOpCartesian{2}, u::AbstractArray{T,2}, v::AbstractArray{T,2}) where T
+    apply_region_tiled!(D, u, v, Lower, Lower)
+    apply_region_tiled!(D, u, v, Lower, Interior)
+    apply_region_tiled!(D, u, v, Lower, Upper)
+    apply_region_tiled!(D, u, v, Interior, Lower)
+    apply_region_tiled!(D, u, v, Interior, Interior)
+    apply_region_tiled!(D, u, v, Interior, Upper)
+    apply_region_tiled!(D, u, v, Upper, Lower)
+    apply_region_tiled!(D, u, v, Upper, Interior)
+    apply_region_tiled!(D, u, v, Upper, Upper)
+    return nothing
+end
+
+using TiledIteration
+function apply_region_tiled!(D::DiffOpCartesian{2}, u::AbstractArray{T,2}, v::AbstractArray{T,2}, r1::Type{<:Region}, r2::Type{<:Region}) where T
+    N = D.grid.numberOfPointsPerDim
+    closuresize = closureSize(D.op)
+    ri = regionindices(N, closuresize, (r1,r2))
+
+    for tileaxs ∈ TileIterator(axes(ri), padded_tilesize(T, (5,5), 2)) # TBD: Is this the right way, the right size?
+        for i ∈ tileaxs[1], j ∈ tileaxs[2]
+            I = ri[i,j]
+            u[i,j] = apply(D, v, (Index{r1}(I[1]), Index{r2}(I[2])))
+        end
+    end
+    return nothing
+end
+
 function apply(D::DiffOp, v::AbstractVector)::AbstractVector
     u = zeros(eltype(v), size(v))
     apply!(D,v,u)