diff --git a/docs/implementation-status.md b/docs/implementation-status.md index 0c66ef8e7..09a5c5908 100644 --- a/docs/implementation-status.md +++ b/docs/implementation-status.md @@ -113,20 +113,18 @@ in the following sections. --- ## Strided Coarray Access -### Support = no +### Support = partial - +| `prif_get_strided` | **YES** | | +| `prif_get_strided_indirect` | **YES** | | +| `prif_put_strided` | **YES** | | +| `prif_put_strided_indirect` | **YES** | | +| `prif_put_strided_with_notify` | no | | +| `prif_put_strided_with_notify_indirect` | no | | +| `prif_put_strided_indirect_with_notify` | no | | +| `prif_put_strided_indirect_with_notify_indirect` | no | | --- diff --git a/src/caffeine/caffeine.c b/src/caffeine/caffeine.c index be4c44d22..51a14d30f 100644 --- a/src/caffeine/caffeine.c +++ b/src/caffeine/caffeine.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "gasnet_safe.h" #include #include @@ -149,6 +150,7 @@ intptr_t caf_convert_base_addr(void* addr, int image) return (intptr_t)((byte*)segment_start_remote_image + offset); } +// _______________________ Contiguous RMA ____________________________ void caf_put(int image, intptr_t dest, void* src, size_t size) { gex_RMA_PutBlocking(myworldteam, image-1, (void*)dest, src, size, 0); @@ -159,6 +161,31 @@ void caf_get(int image, void* dest, intptr_t src, size_t size) gex_RMA_GetBlocking(myworldteam, dest, image-1, (void*)src, size, 0); } +// _______________________ Strided RMA ____________________________ +void caf_put_strided(int dims, int image_num, + intptr_t remote_ptr, void* remote_stride, + void *current_image_buffer, void * current_image_stride, + size_t element_size, void *extent) { + gex_VIS_StridedPutBlocking(myworldteam, + image_num-1, + (void *)remote_ptr, remote_stride, + current_image_buffer, current_image_stride, + element_size, extent, dims, 0); +} + +void caf_get_strided(int dims, int image_num, + intptr_t remote_ptr, void* remote_stride, + void *current_image_buffer, void * current_image_stride, + size_t element_size, void *extent) { + gex_VIS_StridedGetBlocking(myworldteam, + current_image_buffer, current_image_stride, + image_num-1, + (void *)remote_ptr, remote_stride, + element_size, extent, dims, 0); +} + +//------------------------------------------------------------------- + void caf_sync_all() { gasnet_barrier_notify(0,GASNET_BARRIERFLAG_ANONYMOUS); diff --git a/src/caffeine/coarray_access_s.F90 b/src/caffeine/coarray_access_s.F90 index f8c5e9416..3ea07bb9b 100644 --- a/src/caffeine/coarray_access_s.F90 +++ b/src/caffeine/coarray_access_s.F90 @@ -1,5 +1,8 @@ ! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt + +#include "assert_macros.h" + submodule(prif:prif_private_s) coarray_access_s use iso_c_binding, only: c_loc @@ -7,6 +10,7 @@ contains + ! _______________________ Contiguous Put RMA ____________________________ module procedure prif_put integer(c_intptr_t) :: remote_base @@ -42,6 +46,7 @@ call unimplemented("prif_put_indirect_with_notify_indirect") end procedure + ! _______________________ Contiguous Get RMA ____________________________ module procedure prif_get integer(c_intptr_t) :: remote_base @@ -64,20 +69,132 @@ size = size_in_bytes) end procedure + ! _______________________ Strided Get RMA ____________________________ + ! This helper ensures the metadata arrays are contiguous (RMA data may still be non-contiguous) + subroutine get_strided_helper( & + image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & + stat, errmsg, errmsg_alloc) + implicit none + integer(c_int), intent(in) :: image_num + integer(c_intptr_t), intent(in) :: remote_ptr + integer(c_ptrdiff_t), intent(in), target, contiguous :: remote_stride(:) + type(c_ptr), intent(in) :: current_image_buffer + integer(c_ptrdiff_t), intent(in), target, contiguous :: current_image_stride(:) + integer(c_size_t), intent(in) :: element_size + integer(c_size_t), intent(in), target, contiguous :: extent(:) + integer(c_int), intent(out), optional :: stat + character(len=*), intent(inout), optional :: errmsg + character(len=:), intent(inout), allocatable, optional :: errmsg_alloc + + call_assert(size(remote_stride) == size(extent)) + call_assert(size(current_image_stride) == size(extent)) + + call caf_get_strided( & + dims = size(extent), & + image_num = image_num, & + remote_ptr = remote_ptr, & + remote_stride = c_loc(remote_stride), & + current_image_buffer = current_image_buffer, & + current_image_stride = c_loc(current_image_stride), & + element_size = element_size, & + extent = c_loc(extent)) + + if (present(stat)) stat = 0 + end subroutine + module procedure prif_get_strided - call unimplemented("prif_get_strided") + integer(c_intptr_t) :: remote_base + + call base_pointer(coarray_handle, image_num, remote_base) + call prif_get_strided_indirect( & + image_num = image_num, & + remote_ptr = remote_base + offset, & + remote_stride = remote_stride, & + current_image_buffer = current_image_buffer, & + current_image_stride = current_image_stride, & + element_size = element_size, & + extent = extent, & + stat = stat, & + errmsg = errmsg, & + errmsg_alloc = errmsg_alloc) end procedure module procedure prif_get_strided_indirect - call unimplemented("prif_get_strided_indirect") + call get_strided_helper( & + image_num = image_num, & + remote_ptr = remote_ptr, & + remote_stride = remote_stride, & + current_image_buffer = current_image_buffer, & + current_image_stride = current_image_stride, & + element_size = element_size, & + extent = extent, & + stat = stat, & + errmsg = errmsg, & + errmsg_alloc = errmsg_alloc) end procedure + ! _______________________ Strided Put RMA ____________________________ + ! This helper ensures the metadata arrays are contiguous (RMA data may still be non-contiguous) + subroutine put_strided_helper( & + image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & + stat, errmsg, errmsg_alloc) + implicit none + integer(c_int), intent(in) :: image_num + integer(c_intptr_t), intent(in) :: remote_ptr + integer(c_ptrdiff_t), intent(in), target, contiguous :: remote_stride(:) + type(c_ptr), intent(in) :: current_image_buffer + integer(c_ptrdiff_t), intent(in), target, contiguous :: current_image_stride(:) + integer(c_size_t), intent(in) :: element_size + integer(c_size_t), intent(in), target, contiguous :: extent(:) + integer(c_int), intent(out), optional :: stat + character(len=*), intent(inout), optional :: errmsg + character(len=:), intent(inout), allocatable, optional :: errmsg_alloc + + call_assert(size(remote_stride) == size(extent)) + call_assert(size(current_image_stride) == size(extent)) + + call caf_put_strided( & + dims = size(extent), & + image_num = image_num, & + remote_ptr = remote_ptr, & + remote_stride = c_loc(remote_stride), & + current_image_buffer = current_image_buffer, & + current_image_stride = c_loc(current_image_stride), & + element_size = element_size, & + extent = c_loc(extent)) + + if (present(stat)) stat = 0 + end subroutine + module procedure prif_put_strided - call unimplemented("prif_put_strided") + integer(c_intptr_t) :: remote_base + + call base_pointer(coarray_handle, image_num, remote_base) + call prif_put_strided_indirect( & + image_num = image_num, & + remote_ptr = remote_base + offset, & + remote_stride = remote_stride, & + current_image_buffer = current_image_buffer, & + current_image_stride = current_image_stride, & + element_size = element_size, & + extent = extent, & + stat = stat, & + errmsg = errmsg, & + errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_strided_indirect - call unimplemented("prif_put_strided_indirect") + call put_strided_helper( & + image_num = image_num, & + remote_ptr = remote_ptr, & + remote_stride = remote_stride, & + current_image_buffer = current_image_buffer, & + current_image_stride = current_image_stride, & + element_size = element_size, & + extent = extent, & + stat = stat, & + errmsg = errmsg, & + errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_strided_with_notify diff --git a/src/caffeine/prif_private_s.F90 b/src/caffeine/prif_private_s.F90 index 92918a78c..ffec6d8bf 100644 --- a/src/caffeine/prif_private_s.F90 +++ b/src/caffeine/prif_private_s.F90 @@ -104,7 +104,7 @@ module function caf_convert_base_addr(addr, image) result(ptr) bind(c) end function - ! _______________________ RMA ____________________________ + ! _______________________ Contiguous RMA ____________________________ subroutine caf_put(image, dest, src, size) bind(c) !! void caf_put(int image, intptr_t dest, void* src, size_t size) import c_ptr, c_int, c_intptr_t, c_size_t @@ -124,6 +124,46 @@ subroutine caf_get(image, dest, src, size) bind(c) integer(c_intptr_t), intent(in), value :: src integer(c_size_t), intent(in), value :: size end subroutine + + ! _______________________ Strided RMA ____________________________ + subroutine caf_put_strided(dims, image_num, remote_ptr, remote_stride, & + current_image_buffer, current_image_stride, & + element_size, extent) bind(c) + !! void caf_put_strided(int dims, int image_num, + !! intptr_t remote_ptr, void* remote_stride, + !! void *current_image_buffer, void * current_image_stride, + !! size_t element_size, void *extent) + import c_ptr, c_int, c_intptr_t, c_size_t + implicit none + integer(c_int), intent(in), value :: dims + integer(c_int), intent(in), value :: image_num + integer(c_intptr_t), intent(in), value :: remote_ptr + type(c_ptr), intent(in), value :: remote_stride + type(c_ptr), intent(in), value :: current_image_buffer + type(c_ptr), intent(in), value :: current_image_stride + integer(c_size_t), intent(in), value :: element_size + type(c_ptr), intent(in), value :: extent + end subroutine + + subroutine caf_get_strided(dims, image_num, remote_ptr, remote_stride, & + current_image_buffer, current_image_stride, & + element_size, extent) bind(c) + !! void caf_get_strided(int dims, int image_num, + !! intptr_t remote_ptr, void* remote_stride, + !! void *current_image_buffer, void * current_image_stride, + !! size_t element_size, void *extent) + import c_ptr, c_int, c_intptr_t, c_size_t + implicit none + integer(c_int), intent(in), value :: dims + integer(c_int), intent(in), value :: image_num + integer(c_intptr_t), intent(in), value :: remote_ptr + type(c_ptr), intent(in), value :: remote_stride + type(c_ptr), intent(in), value :: current_image_buffer + type(c_ptr), intent(in), value :: current_image_stride + integer(c_size_t), intent(in), value :: element_size + type(c_ptr), intent(in), value :: extent + end subroutine + ! __________________ Synchronization _____________________ subroutine caf_sync_all() bind(C) diff --git a/test/main.F90 b/test/main.F90 index 92cbdd22f..08838cf0c 100644 --- a/test/main.F90 +++ b/test/main.F90 @@ -51,6 +51,8 @@ function run() result(passed) use caf_rma_test, only: & caf_rma_prif_rma => & test_prif_rma + use caf_strided_test, only: & + test_prif_rma_strided use caf_stop_test, only: & caf_stop_prif_this_image => & test_prif_this_image @@ -91,6 +93,7 @@ function run() result(passed) individual_tests = [individual_tests, caf_image_index_prif_image_index()] individual_tests = [individual_tests, caf_num_images_prif_num_images()] individual_tests = [individual_tests, caf_rma_prif_rma()] + individual_tests = [individual_tests, test_prif_rma_strided()] #if !__flang__ individual_tests = [individual_tests, caf_stop_prif_this_image()] #endif diff --git a/test/prif_strided_test.F90 b/test/prif_strided_test.F90 new file mode 100644 index 000000000..72a07b7d1 --- /dev/null +++ b/test/prif_strided_test.F90 @@ -0,0 +1,320 @@ +module caf_strided_test + use iso_c_binding, only: & + c_ptr, c_int64_t, c_intptr_t, c_size_t, c_null_funptr, c_f_pointer, c_loc, c_sizeof + use prif, only: & + prif_coarray_handle, & + prif_allocate_coarray, & + prif_deallocate_coarray, & + prif_allocate, & + prif_deallocate, & + prif_num_images, & + prif_get, & + prif_put_strided, & + prif_put_strided_indirect, & + prif_get_strided, & + prif_get_strided_indirect, & + prif_sync_all, & + prif_this_image_no_coarray + use veggies, only: result_t, test_item_t, assert_equals, describe, it, succeed, fail + + implicit none + private + public :: test_prif_rma_strided +contains + function test_prif_rma_strided() result(tests) + type(test_item_t) :: tests + + tests = describe( & + "PRIF Strided RMA", & + [ it("can put strided data to another image", check_put) & + , it("can put strided data with indirect interface", check_put_indirect) & + , it("can get strided data from another image", check_get) & + , it("can get strided data with indirect interface", check_get_indirect) & + ]) + end function + + function assert_equals_array2d(expected, actual) result(result_) + integer, intent(in) :: expected(:,:) + integer, intent(in) :: actual(:,:) + type(result_t) :: result_ + integer :: i,j + + result_ = succeed("") + result_ = result_ .and. assert_equals(size(expected,1), size(actual,1)) + result_ = result_ .and. assert_equals(size(expected,2), size(actual,2)) + + do i = lbound(actual,1), ubound(actual,1) + do j = lbound(actual,2), ubound(actual,2) + block + character(len=100) :: result_string + + write(result_string, '("At position (", I0, ",", I0, ") expected=", I0, " actual=", I0)') & + i, j, expected(i,j), actual(i,j) + + result_ = result_ .and. & + assert_equals(expected(i,j), actual(i,j), result_string) + end block + end do + end do + + end function + + function check_put() result(result_) + type(result_t) :: result_ + + integer :: me, num_imgs, neighbor + type(prif_coarray_handle) :: coarray_handle + type(c_ptr) :: allocated_memory + integer, target :: mydata(1:4, 1:4) + integer, target :: expected(1:4, 1:4) + integer, pointer :: local_slice(:,:) + integer(c_int64_t) :: lcobounds(1), ucobounds(1) + integer(c_size_t) :: sizeof_int + + sizeof_int = storage_size(me)/8 + call prif_num_images(num_images=num_imgs) + call prif_this_image_no_coarray(this_image=me) + neighbor = merge(me+1, 1, me < num_imgs) + + lcobounds(1) = 1 + ucobounds(1) = num_imgs + call prif_allocate_coarray( & + lcobounds = lcobounds, & + ucobounds = ucobounds, & + size_in_bytes = sizeof_int*product(shape(mydata)), & + final_func = c_null_funptr, & + coarray_handle = coarray_handle, & + allocated_memory = allocated_memory) + call c_f_pointer(allocated_memory, local_slice, shape(mydata)) + + ! init data arrays to known values + local_slice = -1 + expected = -1 + mydata = 0 + + call prif_sync_all + + ! simple example: we set, then copy the interior rectangle of a 4x4 array + mydata(2:3, 2:3) = me + expected(2:3, 2:3) = merge(me-1, num_imgs, me > 1) + + call prif_put_strided( & + image_num = neighbor, & + coarray_handle = coarray_handle, & + offset = 5*sizeof_int, & + remote_stride = [4*sizeof_int, sizeof_int], & + current_image_buffer = c_loc(mydata(2,2)), & + current_image_stride = [4*sizeof_int, sizeof_int], & + element_size = sizeof_int, & + extent = [2_c_size_t, 2_c_size_t]) + + call prif_sync_all + + result_ = assert_equals_array2d(expected, local_slice) + + call prif_deallocate_coarray([coarray_handle]) + end function + + function check_put_indirect() result(result_) + type(result_t) :: result_ + + type :: my_type + type(c_ptr) :: my_component + end type + + type(my_type), target :: dummy_element + integer, pointer :: component_access(:,:) + integer :: me, num_imgs, neighbor + integer, target :: mydata(1:4, 1:4) + integer, target :: expected(1:4, 1:4) + type(prif_coarray_handle) :: coarray_handle + type(c_ptr) :: allocated_memory + type(my_type), pointer :: local_slice + integer(c_int64_t) :: lcobounds(1), ucobounds(1) + integer(c_intptr_t) :: base_addr + integer(c_size_t) :: sizeof_int + + sizeof_int = storage_size(me)/8 + call prif_num_images(num_images=num_imgs) + call prif_this_image_no_coarray(this_image=me) + neighbor = merge(me+1, 1, me < num_imgs) + + lcobounds(1) = 1 + ucobounds(1) = num_imgs + call prif_allocate_coarray( & + lcobounds = lcobounds, & + ucobounds = ucobounds, & + size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & + final_func = c_null_funptr, & + coarray_handle = coarray_handle, & + allocated_memory = allocated_memory) + call c_f_pointer(allocated_memory, local_slice) + call prif_allocate( & + size_in_bytes = int(sizeof_int*product(shape(mydata)), c_size_t), & + allocated_memory = local_slice%my_component) + call c_f_pointer(local_slice%my_component, component_access, shape(mydata)) + + ! init data arrays to known values + component_access = -1 + expected = -1 + mydata = 0 + + call prif_sync_all + + ! simple example: we set, then copy the interior rectangle of a 4x4 array + mydata(2:3, 2:3) = me + expected(2:3, 2:3) = merge(me-1, num_imgs, me > 1) + + call prif_get( & + image_num = neighbor, & + coarray_handle = coarray_handle, & + offset = 0_c_size_t, & + current_image_buffer = c_loc(dummy_element), & + size_in_bytes = int(storage_size(dummy_element)/8, c_size_t)) + base_addr = transfer(dummy_element%my_component, base_addr) + + call prif_put_strided_indirect( & + image_num = neighbor, & + remote_ptr = base_addr + 5*sizeof_int, & + remote_stride = [4*sizeof_int, sizeof_int], & + current_image_buffer = c_loc(mydata(2,2)), & + current_image_stride = [4*sizeof_int, sizeof_int], & + element_size = sizeof_int, & + extent = [2_c_size_t, 2_c_size_t]) + + call prif_sync_all + + result_ = assert_equals_array2d(expected, component_access) + + call prif_deallocate(local_slice%my_component) + call prif_deallocate_coarray([coarray_handle]) + end function + + function check_get() result(result_) + type(result_t) :: result_ + + integer :: me, num_imgs, neighbor + type(prif_coarray_handle) :: coarray_handle + type(c_ptr) :: allocated_memory + integer, target :: mydata(1:4, 1:4) + integer, target :: expected(1:4, 1:4) + integer, pointer :: local_slice(:,:) + integer(c_int64_t) :: lcobounds(1), ucobounds(1) + integer(c_size_t) :: sizeof_int + + sizeof_int = storage_size(me)/8 + call prif_num_images(num_images=num_imgs) + call prif_this_image_no_coarray(this_image=me) + neighbor = merge(me+1, 1, me < num_imgs) + + lcobounds(1) = 1 + ucobounds(1) = num_imgs + call prif_allocate_coarray( & + lcobounds = lcobounds, & + ucobounds = ucobounds, & + size_in_bytes = sizeof_int*product(shape(mydata)), & + final_func = c_null_funptr, & + coarray_handle = coarray_handle, & + allocated_memory = allocated_memory) + call c_f_pointer(allocated_memory, local_slice, shape(mydata)) + + ! simple example: we copy the interior rectangle of a 4x4 array + local_slice = -1 + local_slice(2:3, 2:3) = me + expected = 0 + expected(2:3, 2:3) = neighbor + mydata = 0 + + call prif_sync_all + + call prif_get_strided( & + image_num = neighbor, & + coarray_handle = coarray_handle, & + offset = 5*sizeof_int, & + remote_stride = [4*sizeof_int, sizeof_int], & + current_image_buffer = c_loc(mydata(2,2)), & + current_image_stride = [4*sizeof_int, sizeof_int], & + element_size = sizeof_int, & + extent = [2_c_size_t, 2_c_size_t]) + + call prif_sync_all + + result_ = assert_equals_array2d(expected, mydata) + + call prif_deallocate_coarray([coarray_handle]) + end function + + function check_get_indirect() result(result_) + type(result_t) :: result_ + + type :: my_type + type(c_ptr) :: my_component + end type + + type(my_type), target :: dummy_element + integer, pointer :: component_access(:,:) + integer :: me, num_imgs, neighbor + integer, target :: mydata(1:4, 1:4) + integer, target :: expected(1:4, 1:4) + type(prif_coarray_handle) :: coarray_handle + type(c_ptr) :: allocated_memory + type(my_type), pointer :: local_slice + integer(c_int64_t) :: lcobounds(1), ucobounds(1) + integer(c_intptr_t) :: base_addr + integer(c_size_t) :: sizeof_int + + sizeof_int = storage_size(me)/8 + call prif_num_images(num_images=num_imgs) + call prif_this_image_no_coarray(this_image=me) + neighbor = merge(me+1, 1, me < num_imgs) + + lcobounds(1) = 1 + ucobounds(1) = num_imgs + call prif_allocate_coarray( & + lcobounds = lcobounds, & + ucobounds = ucobounds, & + size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & + final_func = c_null_funptr, & + coarray_handle = coarray_handle, & + allocated_memory = allocated_memory) + call c_f_pointer(allocated_memory, local_slice) + call prif_allocate( & + size_in_bytes = int(sizeof_int*product(shape(mydata)), c_size_t), & + allocated_memory = local_slice%my_component) + call c_f_pointer(local_slice%my_component, component_access, shape(mydata)) + + ! simple example: we copy the interior rectangle of a 4x4 array + component_access = -1 + component_access(2:3, 2:3) = me + expected = 0 + expected(2:3, 2:3) = neighbor + mydata = 0 + + call prif_sync_all + + call prif_get( & + image_num = neighbor, & + coarray_handle = coarray_handle, & + offset = 0_c_size_t, & + current_image_buffer = c_loc(dummy_element), & + size_in_bytes = int(storage_size(dummy_element)/8, c_size_t)) + base_addr = transfer(dummy_element%my_component, base_addr) + + call prif_get_strided_indirect( & + image_num = neighbor, & + remote_ptr = base_addr + 5*sizeof_int, & + remote_stride = [4*sizeof_int, sizeof_int], & + current_image_buffer = c_loc(mydata(2,2)), & + current_image_stride = [4*sizeof_int, sizeof_int], & + element_size = sizeof_int, & + extent = [2_c_size_t, 2_c_size_t]) + + call prif_sync_all + + result_ = assert_equals_array2d(expected, mydata) + + call prif_deallocate(local_slice%my_component) + call prif_deallocate_coarray([coarray_handle]) + end function + +end module