performance - Speed of dereferencing class properties in fortran -

i expect reply don't worry, compiler take care of that wan't sure.

when create method in custom type/class in fortran, there performance nail due referencing/dereferencing fields of object this%a(i) = this%b(i) + this%c(i) in comparing working arrays a(i) = b(i) + c(i)

more complex example:

for illustration have function should interpolate value on 3d grid performance critical (it called within triple loop on other 3d array). i'm thinking if improve ( performance) create using method of class, or rather create normal subroutine takes array argument.

type grid3d                                         ! 3d grid maps of observables   real,    dimension (3) :: rmin, rmax, rspan, step ! grid size , spacing (x,y,z)   integer, dimension (3) :: n                       ! dimension in x,y,z   real, dimension (3,:, :, :), allocatable :: f     ! array storing values of othe observable   contains     procedure :: interpolate => grid3d_interpolate end type grid3d  function grid3d_interpolate(this, r ) result(ff)  implicit none   ! variables   class (grid3d) ::   real, dimension (3), intent (in)   :: r   real :: ff   integer ix0,iy0,iz0   integer ix1,iy1,iz1   real dx,dy,dz   real mx,my,mz   ! function body   ix0 = int( (r(1)/this%step(1)) + fastflooroffset ) - fastflooroffset   iy0 = int( (r(2)/this%step(2)) + fastflooroffset ) - fastflooroffset   iz0 = int( (r(3)/this%step(3)) + fastflooroffset ) - fastflooroffset   dx = r(1) - x0*this%step(1)   dy = r(2) - y0*this%step(2)   dz = r(3) - z0*this%step(3)   ix0 = modulo( x0   , this%n(1) )+1   iy0 = modulo( y0   , this%n(2) )+1   iz0 = modulo( z0   , this%n(3) )+1   ix1 = modulo( x0+1 , this%n(1) )+1   iy1 = modulo( y0+1 , this%n(2) )+1   iz1 = modulo( z0+1 , this%n(3) )+1   mx=1.0-dx   my=1.0-dy   mz=1.0-dz   ff =    mz*(my*(mx*this%f(ix0,iy0,iz0)     &                  +dx*this%f(ix1,iy0,iz0))    &              +dy*(mx*this%f(ix0,iy1,iz0)     &                  +dx*this%f(ix1,iy1,iz0)))   &          +dz*(my*(mx*this%f(ix0,iy0,iz1)     &                  +dx*this%f(ix1,iy0,iz1))    &              +dy*(mx*this%f(ix0,iy1,iz1)     &                  +dx*this%f(ix1,iy1,iz1)))   end if end function grid3d_interpolate  end module t_grid3dvec

not really.

as long code construction quite clear (to compiler), can optimize away quite easily. once oop structures complicated, or level of dereferencing gets large, might improvement out of manual dereferencing scheme. (i utilize quite lot, although maintain code human-readable. had little improvement here once, code using >5 levels of dereferencing. )

here example:

module vec_mod   implicit none    type t_vector     real :: x = 0.     real :: y = 0.     real :: z = 0.   end type    type t_group     type(t_vector),allocatable :: vecs(:)   end type  contains    subroutine sum_vec( vecs, res )     implicit none     type(t_vector),intent(in)   :: vecs(:)     type(t_vector),intent(out)  :: res     integer                     ::      res%x = 0. ; res%y = 0. ; res%z = 0.      i=1,size(vecs)       res%x = res%x + vecs(i)%x       res%y = res%y + vecs(i)%y       res%z = res%z + vecs(i)%z     enddo   end subroutine    subroutine sum_vec_ptr( vecs, res )     implicit none     type(t_vector),intent(in),target   :: vecs(:)     type(t_vector),intent(out)         :: res     integer                            ::     type(t_vector),pointer             :: curvec      res%x = 0. ; res%y = 0. ; res%z = 0.      i=1,size(vecs)       curvec => vecs(i)       res%x = res%x + curvec%x       res%y = res%y + curvec%y       res%z = res%z + curvec%z     enddo   end subroutine    subroutine sum_vecgrp( vecgrp, res )     implicit none     type(t_group),intent(in)    :: vecgrp     type(t_vector),intent(out)  :: res     integer                     ::      res%x = 0. ; res%y = 0. ; res%z = 0.      i=1,size(vecgrp%vecs)       res%x = res%x + vecgrp%vecs(i)%x       res%y = res%y + vecgrp%vecs(i)%y       res%z = res%z + vecgrp%vecs(i)%z     enddo   end subroutine    subroutine sum_vecgrp_ptr( vecgrp, res )     implicit none     type(t_group),intent(in),target    :: vecgrp     type(t_vector),intent(out)         :: res     integer                            ::     type(t_vector),pointer             :: curvec, vecs(:)      res%x = 0. ; res%y = 0. ; res%z = 0.      vecs => vecgrp%vecs     i=1,size(vecs)       curvec => vecs(i)       res%x = res%x + curvec%x       res%y = res%y + curvec%y       res%z = res%z + curvec%z     enddo   end subroutine end module   programme test    utilize omp_lib    utilize vec_mod   use,intrinsic :: iso_fortran_env   implicit none   type(t_vector),allocatable :: vecs(:)   type(t_vector)             :: res   type(t_group)              :: vecgrp   integer,parameter          :: n=100000000   integer                    :: i, stat   real(real64)               :: t1, t2    allocate( vecs(n), vecgrp%vecs(n), stat=stat )   if (stat /= 0) stop 'cannot allocate memory'    i=1,n      phone call random_number(vecs(i)%x)      phone call random_number(vecs(i)%y)      phone call random_number(vecs(i)%z)   enddo    print *,''   print *,'1 level'   t1 = omp_get_wtime()    phone call sum_vec( vecs, res )   print *,res   t2 = omp_get_wtime()   print *,'normal  [s]:', t2-t1    t1 = omp_get_wtime()    phone call sum_vec_ptr( vecs, res )   print *,res   t2 = omp_get_wtime()   print *,'pointer [s]:', t2-t1    print *,''   print *,'2 levels'   vecgrp%vecs = vecs    t1 = omp_get_wtime()    phone call sum_vecgrp( vecgrp, res )   print *,res   t2 = omp_get_wtime()   print *,'normal  [s]:', t2-t1    t1 = omp_get_wtime()    phone call sum_vecgrp_ptr( vecgrp, res )   print *,res   t2 = omp_get_wtime()   print *,'pointer [s]:', t2-t1  end  programme

compiled default options (gfortran test.f90 -fopenmp), 3 slight benefit manually dereferencing, 2 levels of dereferencing:

omp_num_threads=1 ./a.out    1 level    16777216.0       16777216.0       16777216.0      normal  [s]:  0.69216769299237058         16777216.0       16777216.0       16777216.0      pointer [s]:  0.67321390099823475        2 levels    16777216.0       16777216.0       16777216.0      normal  [s]:  0.84902219301147852         16777216.0       16777216.0       16777216.0      pointer [s]:  0.71247501399193425

once turn on optimization (gfortran test.f90 -fopenmp -o3), can see compiler improve job automatically:

omp_num_threads=1 ./a.out    1 level    16777216.0       16777216.0       16777216.0      normal  [s]:  0.13888958499592263         16777216.0       16777216.0       16777216.0      pointer [s]:  0.19099253200693056        2 levels    16777216.0       16777216.0       16777216.0      normal  [s]:  0.13436777899914887         16777216.0       16777216.0       16777216.0      pointer [s]:  0.21104205500159878

performance methods properties fortran dereference

Search This Blog

Three

performance - Speed of dereferencing class properties in fortran -

Comments

Post a Comment

Popular posts from this blog

model view controller - MVC Rails Planning -

html - Submenu setup with jquery and effect 'fold' -

ruby on rails - Devise Logout Error in RoR -