Mercurial > repos > public > sbplib_julia
comparison diffOp.jl @ 113:3b89aa6dc7f2 cell_based_test
Add apply_tiled! that tiles the iteration to optimize cache usage. Doesn't improve runtime at all at the moment
author | Jonatan Werpers <jonatan@werpers.com> |
---|---|
date | Fri, 08 Feb 2019 21:19:34 +0100 |
parents | 98c788cba9bf |
children | d24497780ebd |
comparison
equal
deleted
inserted
replaced
112:98c788cba9bf | 113:3b89aa6dc7f2 |
---|---|
65 @inbounds u[I] = apply(D, v, indextuple) | 65 @inbounds u[I] = apply(D, v, indextuple) |
66 end | 66 end |
67 return nothing | 67 return nothing |
68 end | 68 end |
69 | 69 |
70 function apply_tiled!(D::DiffOpCartesian{2}, u::AbstractArray{T,2}, v::AbstractArray{T,2}) where T | |
71 apply_region_tiled!(D, u, v, Lower, Lower) | |
72 apply_region_tiled!(D, u, v, Lower, Interior) | |
73 apply_region_tiled!(D, u, v, Lower, Upper) | |
74 apply_region_tiled!(D, u, v, Interior, Lower) | |
75 apply_region_tiled!(D, u, v, Interior, Interior) | |
76 apply_region_tiled!(D, u, v, Interior, Upper) | |
77 apply_region_tiled!(D, u, v, Upper, Lower) | |
78 apply_region_tiled!(D, u, v, Upper, Interior) | |
79 apply_region_tiled!(D, u, v, Upper, Upper) | |
80 return nothing | |
81 end | |
82 | |
83 using TiledIteration | |
84 function apply_region_tiled!(D::DiffOpCartesian{2}, u::AbstractArray{T,2}, v::AbstractArray{T,2}, r1::Type{<:Region}, r2::Type{<:Region}) where T | |
85 N = D.grid.numberOfPointsPerDim | |
86 closuresize = closureSize(D.op) | |
87 ri = regionindices(N, closuresize, (r1,r2)) | |
88 | |
89 for tileaxs ∈ TileIterator(axes(ri), padded_tilesize(T, (5,5), 2)) # TBD: Is this the right way, the right size? | |
90 for i ∈ tileaxs[1], j ∈ tileaxs[2] | |
91 I = ri[i,j] | |
92 u[i,j] = apply(D, v, (Index{r1}(I[1]), Index{r2}(I[2]))) | |
93 end | |
94 end | |
95 return nothing | |
96 end | |
97 | |
70 function apply(D::DiffOp, v::AbstractVector)::AbstractVector | 98 function apply(D::DiffOp, v::AbstractVector)::AbstractVector |
71 u = zeros(eltype(v), size(v)) | 99 u = zeros(eltype(v), size(v)) |
72 apply!(D,v,u) | 100 apply!(D,v,u) |
73 return u | 101 return u |
74 end | 102 end |