diff benchmark/benchmark_laplace.jl @ 1376:2ad2de55061a feature/variable_derivatives

Add temp. benchmark script for constant and variable coeff Laplace
author Vidar Stiernström <vidar.stiernstrom@it.uu.se>
date Tue, 30 May 2023 17:27:45 +0200
parents
children 43aaf710463e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/benchmark/benchmark_laplace.jl	Tue May 30 17:27:45 2023 +0200
@@ -0,0 +1,220 @@
+using Sbplib
+using Sbplib.SbpOperators
+using Sbplib.Grids
+using Sbplib.RegionIndices
+using BenchmarkTools
+
+# TODO: Move the below benchmarks into the benchmark suite
+
+const operator_path = sbp_operators_path()*"standard_diagonal.toml"
+
+function benchmark_const_coeff_1d(;N = 100, order = 4)
+    stencil_set = read_stencil_set(operator_path; order=order)
+    g = equidistant_grid(N, 0., 1.)
+    D = second_derivative(g, stencil_set)
+    u = rand(size(g)...)
+    u_xx = rand(size(g)...)
+        
+    b_naive = @benchmark $u_xx .= $D*$u
+    b_reg = @benchmark $apply_region_1d!($u_xx,$u,$D)
+    b_thrd = @benchmark $apply_region_threaded_1d!($u_xx,$u,$D)
+    print_benchmark_result("Naive apply",b_naive)
+    print_benchmark_result("Region apply",b_reg)
+    print_benchmark_result("Threaded region apply",b_thrd)
+end
+
+function benchmark_var_coeff_1d(;N = 100, order = 4)
+    stencil_set = read_stencil_set(operator_path; order=order)
+    g = equidistant_grid(N, 0., 1.)
+    c = rand(size(g)...)
+    c_lz = eval_on(g, x -> 0.5)
+    D = second_derivative_variable(g, c, stencil_set)
+    D_lz = second_derivative_variable(g, c_lz, stencil_set)
+    u = rand(size(g)...)
+    u_xx = rand(size(g)...)
+    
+    b_naive = @benchmark $u_xx .= $D*$u
+    b_naive_lz = @benchmark $u_xx .= $D_lz*$u
+    b_reg = @benchmark $apply_region_1d!($u_xx,$u,$D)
+    b_reg_lz = @benchmark $apply_region_1d!($u_xx,$u,$D_lz)
+    b_thrd = @benchmark $apply_region_threaded_1d!($u_xx,$u,$D)
+    b_thrd_lz = @benchmark $apply_region_threaded_1d!($u_xx,$u,$D_lz)
+    print_benchmark_result("Naive apply",b_naive)
+    print_benchmark_result("Naive apply lazy coeff",b_naive_lz)
+    print_benchmark_result("Region apply",b_reg)
+    print_benchmark_result("Region apply lazy coeff",b_reg_lz)
+    print_benchmark_result("Threaded region apply",b_thrd)
+    print_benchmark_result("Threaded region apply lazy coeff",b_thrd_lz)
+end
+
+function benchmark_const_coeff_2d(;N = 100, order = 4)
+    stencil_set = read_stencil_set(operator_path; order=order)
+    g = equidistant_grid((N,N), (0.,0.,),(1.,1.))
+    D = Laplace(g, stencil_set)
+    u = rand(size(g)...)
+    u_xx = rand(size(g)...)
+    if order == 2
+        clz_sz = 1
+    elseif order == 4
+        clz_sz = 4
+    else
+        error()
+    end
+
+    b_naive = @benchmark $u_xx .= $D*$u
+    b_reg = @benchmark $apply_region_2d!($u_xx,$u,$D,$clz_sz)
+    b_thrd = @benchmark $apply_region_threaded_2d!($u_xx,$u,$D,$clz_sz)
+    print_benchmark_result("Naive apply",b_naive)
+    print_benchmark_result("Region apply",b_reg)
+    print_benchmark_result("Threaded region apply",b_thrd)
+end
+
+function benchmark_var_coeff_2d(;N = 100, order = 4)
+    stencil_set = read_stencil_set(operator_path; order=order)
+    g = equidistant_grid((N,N), (0.,0.,),(1.,1.))
+    c = rand(size(g)...)
+    c_lz = eval_on(g, x-> 0.5)
+    D = second_derivative_variable(g, c, stencil_set, 1) + second_derivative_variable(g, c, stencil_set, 2)
+    D_lz = second_derivative_variable(g, c_lz, stencil_set, 1) + second_derivative_variable(g, c_lz, stencil_set, 2)
+    u = rand(size(g)...)
+    u_xx = rand(size(g)...)
+
+    if order == 2
+        clz_sz = 1
+    elseif order == 4
+        clz_sz = 6
+    else
+        error()
+    end
+    
+    # Check correctnesss
+    # u_xx .= D*u
+    # u_xx_tmp = zeros(size(u_xx)...)
+    # u_xx_tmp .= u_xx
+    # apply_region_threaded_2d!(u_xx, u, D, clz_sz)
+
+    # @show sum(abs.(u_xx_tmp .- u_xx))
+    # @show pointer(u_xx_tmp) == pointer(u_xxs
+
+    
+    b_naive = @benchmark $u_xx .= $D*$u
+    b_naive_lz = @benchmark $u_xx .= $D_lz*$u
+    b_reg = @benchmark $apply_region_2d!($u_xx,$u,$D, $clz_sz)
+    b_reg_lz = @benchmark $apply_region_2d!($u_xx,$u,$D_lz, $clz_sz)
+    b_thrd = @benchmark $apply_region_threaded_2d!($u_xx,$u,$D, $clz_sz)
+    b_thrd_lz = @benchmark $apply_region_threaded_2d!($u_xx,$u,$D_lz, $clz_sz)
+    print_benchmark_result("Naive apply",b_naive)
+    print_benchmark_result("Naive apply lazy coeff",b_naive_lz)
+    print_benchmark_result("Region apply",b_reg)
+    print_benchmark_result("Region apply lazy coeff",b_reg_lz)
+    print_benchmark_result("Threaded region apply",b_thrd)
+    print_benchmark_result("Threaded region apply lazy coeff",b_thrd_lz)
+end
+
+function print_benchmark_result(title_str,res)
+    if title_str[1] != ' '
+        title_str = lpad(title_str,length(title_str)+1, " ")
+    end
+    if title_str[end] != ' '
+        title_str = rpad(title_str,length(title_str)+1, " ")
+    end
+    tot_len = 76
+    pad_len = Int(tot_len/2)
+    header = lpad(title_str,pad_len,"*")
+    header = rpad(header,tot_len,"*")
+    bottom = repeat("*",tot_len)
+    println(header)
+    display(res)
+    println(bottom)
+    return
+end
+
+function apply_region_1d!(u_xx, u, D)
+    clz_sz = SbpOperators.closure_size(D)
+    tm = D*u
+    for i ∈ @view eachindex(u)[1:clz_sz]
+        u_xx[i] = tm[Index{Lower}(i)]
+    end
+    for i ∈ @view eachindex(u)[clz_sz+1:end-clz_sz]
+        u_xx[i] = tm[Index{Interior}(i)]
+    end
+    for i ∈ @view eachindex(u)[end-clz_sz+1:end]
+        u_xx[i] = tm[Index{Upper}(i)]
+    end
+end
+
+function apply_region_threaded_1d!(u_xx, u, D)
+    clz_sz = SbpOperators.closure_size(D)
+    tm = D*u
+    for i ∈ @view eachindex(u)[1:clz_sz]
+        u_xx[i] = tm[Index{Lower}(i)]
+    end
+    Threads.@threads for i ∈ @view eachindex(u)[clz_sz+1:end-clz_sz]
+        u_xx[i] = tm[Index{Interior}(i)]
+    end
+    for i ∈ @view eachindex(u)[end-clz_sz+1:end]
+        u_xx[i] = tm[Index{Upper}(i)]
+    end
+end
+
+function apply_region_2d!(u_xx, u, D, clz_sz)
+    tm = D*u
+    for I ∈ @view CartesianIndices(u)[1:clz_sz,1:clz_sz]
+        u_xx[I] = tm[Index{Lower}(I[1]),Index{Lower}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[1:clz_sz,clz_sz+1:end-clz_sz]
+        u_xx[I] = tm[Index{Lower}(I[1]),Index{Interior}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[1:clz_sz,end-clz_sz+1:end]
+        u_xx[I] = tm[Index{Lower}(I[1]),Index{Upper}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[clz_sz+1:end-clz_sz,1:clz_sz]
+        u_xx[I] = tm[Index{Interior}(I[1]),Index{Lower}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[clz_sz+1:end-clz_sz,clz_sz+1:end-clz_sz]
+        u_xx[I] = tm[Index{Interior}(I[1]),Index{Interior}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[clz_sz+1:end-clz_sz,end-clz_sz+1:end]
+        u_xx[I] = tm[Index{Interior}(I[1]),Index{Upper}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[end-clz_sz+1:end,1:clz_sz]
+        u_xx[I] = tm[Index{Upper}(I[1]),Index{Lower}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[end-clz_sz+1:end,clz_sz+1:end-clz_sz]
+        u_xx[I] = tm[Index{Upper}(I[1]),Index{Interior}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[end-clz_sz+1:end,end-clz_sz+1:end]
+        u_xx[I] = tm[Index{Upper}(I[1]),Index{Upper}(I[2])]
+    end
+end
+
+function apply_region_threaded_2d!(u_xx, u, D, clz_sz)
+    tm = D*u
+    for I ∈ @view CartesianIndices(u)[1:clz_sz,1:clz_sz]
+        u_xx[I] = tm[Index{Lower}(I[1]),Index{Lower}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[1:clz_sz,clz_sz+1:end-clz_sz]
+        u_xx[I] = tm[Index{Lower}(I[1]),Index{Interior}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[1:clz_sz,end-clz_sz+1:end]
+        u_xx[I] = tm[Index{Lower}(I[1]),Index{Upper}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[clz_sz+1:end-clz_sz,1:clz_sz]
+        u_xx[I] = tm[Index{Interior}(I[1]),Index{Lower}(I[2])]
+    end
+    Threads.@threads for I ∈ @view CartesianIndices(u)[clz_sz+1:end-clz_sz,clz_sz+1:end-clz_sz]
+        u_xx[I] = tm[Index{Interior}(I[1]),Index{Interior}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[clz_sz+1:end-clz_sz,end-clz_sz+1:end]
+        u_xx[I] = tm[Index{Interior}(I[1]),Index{Upper}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[end-clz_sz+1:end,1:clz_sz]
+        u_xx[I] = tm[Index{Upper}(I[1]),Index{Lower}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[end-clz_sz+1:end,clz_sz+1:end-clz_sz]
+        u_xx[I] = tm[Index{Upper}(I[1]),Index{Interior}(I[2])]
+    end
+    for I ∈ @view CartesianIndices(u)[end-clz_sz+1:end,end-clz_sz+1:end]
+        u_xx[I] = tm[Index{Upper}(I[1]),Index{Upper}(I[2])]
+    end
+end
\ No newline at end of file