comparison diffOp.jl @ 113:3b89aa6dc7f2 cell_based_test

Add apply_tiled! that tiles the iteration to optimize cache usage. Doesn't improve runtime at all at the moment
author Jonatan Werpers <jonatan@werpers.com>
date Fri, 08 Feb 2019 21:19:34 +0100
parents 98c788cba9bf
children d24497780ebd
comparison
equal deleted inserted replaced
112:98c788cba9bf 113:3b89aa6dc7f2
65 @inbounds u[I] = apply(D, v, indextuple) 65 @inbounds u[I] = apply(D, v, indextuple)
66 end 66 end
67 return nothing 67 return nothing
68 end 68 end
69 69
70 function apply_tiled!(D::DiffOpCartesian{2}, u::AbstractArray{T,2}, v::AbstractArray{T,2}) where T
71 apply_region_tiled!(D, u, v, Lower, Lower)
72 apply_region_tiled!(D, u, v, Lower, Interior)
73 apply_region_tiled!(D, u, v, Lower, Upper)
74 apply_region_tiled!(D, u, v, Interior, Lower)
75 apply_region_tiled!(D, u, v, Interior, Interior)
76 apply_region_tiled!(D, u, v, Interior, Upper)
77 apply_region_tiled!(D, u, v, Upper, Lower)
78 apply_region_tiled!(D, u, v, Upper, Interior)
79 apply_region_tiled!(D, u, v, Upper, Upper)
80 return nothing
81 end
82
83 using TiledIteration
84 function apply_region_tiled!(D::DiffOpCartesian{2}, u::AbstractArray{T,2}, v::AbstractArray{T,2}, r1::Type{<:Region}, r2::Type{<:Region}) where T
85 N = D.grid.numberOfPointsPerDim
86 closuresize = closureSize(D.op)
87 ri = regionindices(N, closuresize, (r1,r2))
88
89 for tileaxs ∈ TileIterator(axes(ri), padded_tilesize(T, (5,5), 2)) # TBD: Is this the right way, the right size?
90 for i ∈ tileaxs[1], j ∈ tileaxs[2]
91 I = ri[i,j]
92 u[i,j] = apply(D, v, (Index{r1}(I[1]), Index{r2}(I[2])))
93 end
94 end
95 return nothing
96 end
97
70 function apply(D::DiffOp, v::AbstractVector)::AbstractVector 98 function apply(D::DiffOp, v::AbstractVector)::AbstractVector
71 u = zeros(eltype(v), size(v)) 99 u = zeros(eltype(v), size(v))
72 apply!(D,v,u) 100 apply!(D,v,u)
73 return u 101 return u
74 end 102 end