github-actions[bot] on v6.13.1
ChrisRackauckas on master
Update Project.toml (compare)
ChrisRackauckas on ChrisRackauckas-patch-7
ChrisRackauckas on master
Fix test of bignum on 853 Merge pull request #1677 from S… (compare)
github-actions[bot] on v6.11.2
ChrisRackauckas on ChrisRackauckas-patch-7
Fix test of bignum on 853 (compare)
ranocha on release-6-11-2
discretization/generate_finite_difference_rules.jl
CartesianIndex
[slack] <xtalax> the reason that it works is that this is essentially a compilation step, fast code is generated as a result.
See http://methodoflines.sciml.ai/dev/howitworks/ for a useful overview
gapf=gap_f(a3,a4,H_kin_x_diag_datad,cacheH_datad,dx,x_pos[1])
gapdfda2=a2->ForwardDiff.derivative(gapf,a2)
a2list=range(a21,a22,length=20)
lines(a2list,gapf.(a2list))
lines(a2list,gapdfda2.(a2list))
find_zeros(gapdfda2,a21,a22)
lines(a2list,gapf.(a2list))
function eigvals(A::Hermitian{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N}
λ,Q = eigen(Hermitian(getproperty.(parent(A), :value)))
partials = ntuple(j -> diag(Q' * getindex.(getproperty.(A, :partials), j) * Q), N)
Dual{Tg}.(λ, tuple.(partials...))
end
Symmetric
to Hermitian
but I did not change the calculation steps inside.
dualcache
while one can still get the derivative of the function calculated from the matrix eigvals
? Notice that other element of the matrix can be calculated by FFT before I call the function.
[slack] <Baiyi> The full code:
```using QuantumOptics, DelimitedFiles, LaTeXStrings, GLMakie, CairoMakie, SparseArrays,ForwardDiff,PreallocationTools,Roots,
LinearAlgebra, BenchmarkTools, DataFrames, CSV, LoopVectorization, ProfileView, FFTW, Octavian, Interpolations
using MKL
import OrdinaryDiffEq
import ForwardDiff: Dual
import LinearAlgebra: eigvals
function eigvals(A::Hermitian{<:Dual{Tg,T,N}}) where {Tg,T<:Real,N}
λ,Q = eigen(Hermitian(getproperty.(parent(A), :value)))
partials = ntuple(j -> diag(Q' getindex.(getproperty.(A, :partials), j) Q), N)
Dual{Tg}.(λ, tuple.(partials...))
end
include("turbo.jl")
include("QuantumOpticsExtra.jl")
CairoMakie.activate!()
const mₑ = 9.1093837015e-31 # electron mass
const e = 1.602176634e-19 # elementary charge
const ħ = 1.054571817e-34 # reduced Planck constant
const L_unit = 1e-6 # 1L_unit:1
const T_unit = mₑ L_unit^2 / ħ # 1T_unit:1
const E_unit = ħ^2 / (mₑ L_unit^2)# 1E_unit:1
function potential(x;a2, a3, a4)
V = a2 x^2 + a3 x^3 + a4 * x^4
end
d=20e-6 # m
ω=2π300e6 # Hz
a20=1/2mₑ*ω^2/e
a3=2a20/3d
a4=3a3/4d
d=20 # μm
ω=2π300e6 # Hz
a20=1/2mₑω^2/E_unitL_unit^2 # E/L^2
a3=2a20/3d # E/L^3
a4=3a3/4d # E/L^4
N=2048
x_pos = range(-28, 9, length=N)
b_position = PositionBasis(x_pos[1], x_pos[end], N)
dx=spacing(b_position)
b_momentum = MomentumBasis(b_position)
x = position(b_position)
p = momentum(b_momentum)
Tpx = QuantumOptics.transform(b_momentum, b_position)
Txp = dagger(Tpx)
H_kin = p^2 / 2
H_kin_x = Txp dense(H_kin) Tpx
Hermitian!(H_kin_x)
H_kin_lazy = LazyProduct(Txp, H_kin, Tpx)
cacheH_datad=dualcache(H_kin_x.data)
H_kin_x_diag_datad=dualcache(1.0diag(H_kin_x.data))
function eigvalue(a2::T;a3,a4,H_kin_x_diag_data,cacheH_data,dx,xmin) where {T}
cacheH_data=get_tmp(cacheH_data,a2)
H_kin_x_diag_data=get_tmp(H_kin_x_diag_data,a2)
for i in 1:length(H_kin_x_diag_data)
cacheH_data[i,i]=H_kin_x_diag_data[i]+potential(xmin + (i-1)*dx;a2=a2,a3=a3,a4=a4)
end
eigvals(Hermitian(cacheH_data))
end
function smallest_gap(a2::T;a3,a4,H_kin_x_diag_data,cacheH_data,dx,xmin) where {T}
eigcache=eigvalue(a2;a3=a3,a4=a4,H_kin_x_diag_data=H_kin_x_diag_data,cacheH_data=cacheHdata,dx=dx,xmin=xmin)
val=sqrt(a2/2)
,index=findmin(x->abs(x-val),eigcache)
if index == 1
abs(eigcache[index+1]-eigcache[index])
elseif index==length(eigcache)
abs(eigcache[index]-eigcache[index-1])
else
min(abs(eigcache[index]-eigcache[index-1]),abs(eigcache[index+1]-eigcache[index]))
end
end
function gap_f(a3,a4,H_kin_x_diag_data,cacheH_data,dx,xmin)
a2->smallest_gap(a2,a3=a3,a4=a4,H_kin_x_diag_data=H_kin_x_diag_data,cacheH_data=cacheH_data,dx=dx,xmin=xmin)
end
gapf=gap_f(a3,a4,H_kin_x_diag_datad,cacheH_datad,dx,x_pos[1])
gapdfda2=a2->ForwardDiff.derivative(gapf,a2)
a21=5.712e+05e/E_unitL_unit^2
a22=5.694e+05e/E_unitL_unit^2
a2list=range(a21,a22,length=20)
scatter(a2list,gapf.(a2list)*(E_unit/ħ/2π),axis =(xreversed=true,))
scatter(a2list,gapdfda2.(a2list),axis =(xreversed=true,))```
[slack] <Or> ```function qliouville!(drho::Matrix{T}, rho::Matrix{T}, p, t::Float64) where {N, T}
H, C = p
mul!(C, rho, 1.0im)
commutator!(C, H(t), drho)
end
function mul!(A::Matrix, B::Matrix, c::Number)
for i in eachindex(B)
A[i] = c * B[i]
end
end
function commutator!(A::Matrix, B::Matrix, C::Matrix)
mul!(C, A, B)
mul!(C, B, A, -1.0, 1.0)
end```
[slack] <Or> ```julia> @benchmark qliouville!(drho, rho.mat, H, 0.3)
BenchmarkTools.Trial: 10000 samples with 755 evaluations.
Range (min … max): 167.246 ns … 8.041 μs ┊ GC (min … max): 0.00% … 96.76%
Time (median): 185.132 ns ┊ GC (median): 0.00%
Time (mean ± σ): 211.281 ns ± 263.241 ns ┊ GC (mean ± σ): 4.69% ± 3.73%
█▆▄▃▄▃▃▄▆▅▅▄▃▃▂▂▂▁▁▁▁▁▁ ▂
██████████████████████████▇▇██▇▇▇▇▇▆▅▅▆▆▇▅▅▆▆▅▅▆▅▅▄▅▅▅▄▅▄▅▅▅▅ █
167 ns Histogram: log(frequency) by time 426 ns <
Memory estimate: 144 bytes, allocs estimate: 2.```
rho.mat
could also be allocating (not sure what type rho
is?). To see the difference, compare $rho.mat
and $(rho.mat)
.
H(t)
return a new matrix?
[slack] <Or> ```julia> @benchmark qliouville!($drho, $(rho.mat), $H, 0.3)
BenchmarkTools.Trial: 10000 samples with 888 evaluations.
Range (min … max): 130.472 ns … 6.011 μs ┊ GC (min … max): 0.00% … 96.94%
Time (median): 132.940 ns ┊ GC (median): 0.00%
Time (mean ± σ): 156.117 ns ± 227.667 ns ┊ GC (mean ± σ): 5.96% ± 3.99%
▆█▄▂ ▁▁ ▁ ▃▄▄▅▄▃▃▃▃▂ ▂
██████████▆▅▇██▆▇▆▅█████████████▇▇▇▆▆▆▆▆▆▅▅▆▆▅▆▅▆▆▆▆▆▄▅▅▄▄▁▄▅ █
130 ns Histogram: log(frequency) by time 228 ns <
Memory estimate: 144 bytes, allocs estimate: 2.```
this didn't change the allocations.
By the way, I gave the wrong function definition for qliouville:function qliouville!(drho::Matrix{T}, rho::Matrix{T}, H::TimeDependentHamiltonian{N}, t::Float64) where {N, T}
commutator!(1.0im * rho, H(t), drho)
end
H generates a new matrix, but it's not clear why:
```julia> @benchmark $H(0.3)
BenchmarkTools.Trial: 10000 samples with 981 evaluations.
Range (min … max): 63.925 ns … 5.178 μs ┊ GC (min … max): 0.00% … 98.45%
Time (median): 67.286 ns ┊ GC (median): 0.00%
Time (mean ± σ): 68.263 ns ± 70.956 ns ┊ GC (mean ± σ): 1.46% ± 1.39%
▅█▆▂ ▁▁▁▄▄▅▇▆▆▄▂▂▂▁ ▁▁ ▂
████▇██████████████▇█▇█▇▇▅▆▅▆▆▇████▇▆▅▄▃▄▃▃▄▅▃▃▄▅▄▄▅▅▅▃▅▁▃▅ █
63.9 ns Histogram: log(frequency) by time 85.1 ns <
Memory estimate: 16 bytes, allocs estimate: 1.
function (H::TimeDependentHamiltonian)(t::Float64)
mapsum!(H.operators, t, H.cache)
add!(H.cache, H.time_independent_part.mat)
H.cache
end
struct TimeDependentHamiltonian{N}
n_body::Int
operators::Vector{TimeDependentOperator}
time_independent_part::TimeIndependentHamiltonian
cache::Matrix{ComplexF64}
end```
[slack] <Or> ```julia> @benchmark $H.operators1
BenchmarkTools.Trial: 10000 samples with 990 evaluations.
Range (min … max): 43.083 ns … 155.301 ns ┊ GC (min … max): 0.00% … 0.00%
Time (median): 44.206 ns ┊ GC (median): 0.00%
Time (mean ± σ): 45.148 ns ± 5.653 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
▄▄█▃▃▃ ▁
██████▆▆▅▄▁▄▅█▆▄▄▄▄▃▄▄▄▄▃▁▃▁▃▄▁▄▁▁▃▁▅▆█▇▆▅▁▃▃▆▇█▇▅▄▃▁▁▁▃▁▃▄▄ █
43.1 ns Histogram: log(frequency) by time 70.9 ns <
Memory estimate: 0 bytes, allocs estimate: 0.```
H(t)
whose type can't be inferred at compile time, so the appropriate method of mapsum!
and/or add!
has to be looked up at runtime. That's often associated with a small allocation of 16, 32, or 48 bytes (though, in my experience, performance wise the bigger problem is the runtime lookup, not the allocation itself).
@code_warntype H(t)
output?
maxiters
limit. As I understand it, this limit is cumulative across steps. Is there some way to disable it (maxiters=:forever
) or at least reset it every step to 0?
for i in 1:20_000
step!(integrator, dt, true)
end
dt=1.0e-6
, final time 1.0
and low frequency (e.g. 0.3
) sinusoidal input. Rodas5 allocates proportionally to solution length, which is a problem for infinite/"very long" simulations. Is there some way to preallocate this? Saving intermediate steps is not required.