// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +ls64 -S -emit-llvm -x c %s -o - | FileCheck --check-prefixes=CHECK-C %s
// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +ls64 -S -emit-llvm -x c++ %s -o - | FileCheck --check-prefixes=CHECK-CXX %s
// RUN: %clang_cc1 -triple aarch64_be-eabi -target-feature +ls64 -S -emit-llvm -x c %s -o - | FileCheck  --check-prefixes=CHECK-C %s
// RUN: %clang_cc1 -triple aarch64_be-eabi -target-feature +ls64 -S -emit-llvm -x c++ %s -o - | FileCheck  --check-prefixes=CHECK-CXX %s

#include <arm_acle.h>

#ifdef __cplusplus
#define EXTERN_C extern "C"
#else
#define EXTERN_C
#endif

data512_t val;
void *addr;
uint64_t status;

// CHECK-C-LABEL: define {{[^@]+}}@test_ld64b(
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-C-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-C-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]])
// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
// CHECK-C-NEXT:    [[TMP2:%.*]] = call { i64, i64, i64, i64, i64, i64, i64, i64 } @llvm.aarch64.ld64b(ptr [[TMP1]]), !noalias !2
// CHECK-C-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 0
// CHECK-C-NEXT:    store i64 [[TMP3]], ptr [[TMP]], align 8, !alias.scope !2
// CHECK-C-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[TMP]], i32 1
// CHECK-C-NEXT:    [[TMP5:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 1
// CHECK-C-NEXT:    store i64 [[TMP5]], ptr [[TMP4]], align 8, !alias.scope !2
// CHECK-C-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[TMP]], i32 2
// CHECK-C-NEXT:    [[TMP7:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 2
// CHECK-C-NEXT:    store i64 [[TMP7]], ptr [[TMP6]], align 8, !alias.scope !2
// CHECK-C-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP]], i32 3
// CHECK-C-NEXT:    [[TMP9:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 3
// CHECK-C-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 8, !alias.scope !2
// CHECK-C-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[TMP]], i32 4
// CHECK-C-NEXT:    [[TMP11:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 4
// CHECK-C-NEXT:    store i64 [[TMP11]], ptr [[TMP10]], align 8, !alias.scope !2
// CHECK-C-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[TMP]], i32 5
// CHECK-C-NEXT:    [[TMP13:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 5
// CHECK-C-NEXT:    store i64 [[TMP13]], ptr [[TMP12]], align 8, !alias.scope !2
// CHECK-C-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[TMP]], i32 6
// CHECK-C-NEXT:    [[TMP15:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 6
// CHECK-C-NEXT:    store i64 [[TMP15]], ptr [[TMP14]], align 8, !alias.scope !2
// CHECK-C-NEXT:    [[TMP16:%.*]] = getelementptr i64, ptr [[TMP]], i32 7
// CHECK-C-NEXT:    [[TMP17:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 7
// CHECK-C-NEXT:    store i64 [[TMP17]], ptr [[TMP16]], align 8, !alias.scope !2
// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 @val, ptr align 8 [[TMP]], i64 64, i1 false)
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define {{[^@]+}}@test_ld64b(
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-CXX-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-CXX-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]])
// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = call { i64, i64, i64, i64, i64, i64, i64, i64 } @llvm.aarch64.ld64b(ptr [[TMP1]]), !noalias !2
// CHECK-CXX-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 0
// CHECK-CXX-NEXT:    store i64 [[TMP3]], ptr [[REF_TMP]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 1
// CHECK-CXX-NEXT:    [[TMP5:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 1
// CHECK-CXX-NEXT:    store i64 [[TMP5]], ptr [[TMP4]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 2
// CHECK-CXX-NEXT:    [[TMP7:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 2
// CHECK-CXX-NEXT:    store i64 [[TMP7]], ptr [[TMP6]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 3
// CHECK-CXX-NEXT:    [[TMP9:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 3
// CHECK-CXX-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 4
// CHECK-CXX-NEXT:    [[TMP11:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 4
// CHECK-CXX-NEXT:    store i64 [[TMP11]], ptr [[TMP10]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 5
// CHECK-CXX-NEXT:    [[TMP13:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 5
// CHECK-CXX-NEXT:    store i64 [[TMP13]], ptr [[TMP12]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 6
// CHECK-CXX-NEXT:    [[TMP15:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 6
// CHECK-CXX-NEXT:    store i64 [[TMP15]], ptr [[TMP14]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    [[TMP16:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 7
// CHECK-CXX-NEXT:    [[TMP17:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 7
// CHECK-CXX-NEXT:    store i64 [[TMP17]], ptr [[TMP16]], align 8, !alias.scope !2
// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 @val, ptr align 8 [[REF_TMP]], i64 64, i1 false)
// CHECK-CXX-NEXT:    ret void
//
EXTERN_C void test_ld64b(void)
{
    val = __arm_ld64b(addr);
}

// CHECK-C-LABEL: define {{[^@]+}}@test_st64b(
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-C-NEXT:    [[VALUE_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
// CHECK-C-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[BYVAL_TEMP]], ptr align 8 @val, i64 64, i1 false)
// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
// CHECK-C-NEXT:    store ptr [[BYVAL_TEMP]], ptr [[VALUE_INDIRECT_ADDR]], align 8
// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
// CHECK-C-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BYVAL_TEMP]], align 8
// CHECK-C-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 1
// CHECK-C-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-C-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 2
// CHECK-C-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
// CHECK-C-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 3
// CHECK-C-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
// CHECK-C-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 4
// CHECK-C-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-C-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 5
// CHECK-C-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
// CHECK-C-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 6
// CHECK-C-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
// CHECK-C-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 7
// CHECK-C-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
// CHECK-C-NEXT:    call void @llvm.aarch64.st64b(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define {{[^@]+}}@test_st64b(
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-CXX-NEXT:    [[VALUE_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
// CHECK-CXX-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[AGG_TMP]], ptr align 8 @val, i64 64, i1 false)
// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
// CHECK-CXX-NEXT:    store ptr [[AGG_TMP]], ptr [[VALUE_INDIRECT_ADDR]], align 8
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP]], align 8
// CHECK-CXX-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 1
// CHECK-CXX-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-CXX-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 2
// CHECK-CXX-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
// CHECK-CXX-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 3
// CHECK-CXX-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
// CHECK-CXX-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 4
// CHECK-CXX-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-CXX-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 5
// CHECK-CXX-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
// CHECK-CXX-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 6
// CHECK-CXX-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
// CHECK-CXX-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 7
// CHECK-CXX-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
// CHECK-CXX-NEXT:    call void @llvm.aarch64.st64b(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
// CHECK-CXX-NEXT:    ret void
//
EXTERN_C void test_st64b(void)
{
    __arm_st64b(addr, val);
}

// CHECK-C-LABEL: define {{[^@]+}}@test_st64bv(
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-C-NEXT:    [[VALUE_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
// CHECK-C-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[BYVAL_TEMP]], ptr align 8 @val, i64 64, i1 false)
// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
// CHECK-C-NEXT:    store ptr [[BYVAL_TEMP]], ptr [[VALUE_INDIRECT_ADDR]], align 8
// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
// CHECK-C-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BYVAL_TEMP]], align 8
// CHECK-C-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 1
// CHECK-C-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-C-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 2
// CHECK-C-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
// CHECK-C-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 3
// CHECK-C-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
// CHECK-C-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 4
// CHECK-C-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-C-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 5
// CHECK-C-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
// CHECK-C-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 6
// CHECK-C-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
// CHECK-C-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 7
// CHECK-C-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
// CHECK-C-NEXT:    [[TMP17:%.*]] = call i64 @llvm.aarch64.st64bv(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
// CHECK-C-NEXT:    store i64 [[TMP17]], ptr @status, align 8
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define {{[^@]+}}@test_st64bv(
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-CXX-NEXT:    [[VALUE_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
// CHECK-CXX-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[AGG_TMP]], ptr align 8 @val, i64 64, i1 false)
// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
// CHECK-CXX-NEXT:    store ptr [[AGG_TMP]], ptr [[VALUE_INDIRECT_ADDR]], align 8
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP]], align 8
// CHECK-CXX-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 1
// CHECK-CXX-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-CXX-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 2
// CHECK-CXX-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
// CHECK-CXX-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 3
// CHECK-CXX-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
// CHECK-CXX-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 4
// CHECK-CXX-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-CXX-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 5
// CHECK-CXX-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
// CHECK-CXX-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 6
// CHECK-CXX-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
// CHECK-CXX-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 7
// CHECK-CXX-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
// CHECK-CXX-NEXT:    [[TMP17:%.*]] = call noundef i64 @llvm.aarch64.st64bv(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
// CHECK-CXX-NEXT:    store i64 [[TMP17]], ptr @status, align 8
// CHECK-CXX-NEXT:    ret void
//
EXTERN_C void test_st64bv(void)
{
    status = __arm_st64bv(addr, val);
}

// CHECK-C-LABEL: define {{[^@]+}}@test_st64bv0(
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-C-NEXT:    [[VALUE_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
// CHECK-C-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[BYVAL_TEMP]], ptr align 8 @val, i64 64, i1 false)
// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
// CHECK-C-NEXT:    store ptr [[BYVAL_TEMP]], ptr [[VALUE_INDIRECT_ADDR]], align 8
// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
// CHECK-C-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BYVAL_TEMP]], align 8
// CHECK-C-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 1
// CHECK-C-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-C-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 2
// CHECK-C-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
// CHECK-C-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 3
// CHECK-C-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
// CHECK-C-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 4
// CHECK-C-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-C-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 5
// CHECK-C-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
// CHECK-C-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 6
// CHECK-C-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
// CHECK-C-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 7
// CHECK-C-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
// CHECK-C-NEXT:    [[TMP17:%.*]] = call i64 @llvm.aarch64.st64bv0(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
// CHECK-C-NEXT:    store i64 [[TMP17]], ptr @status, align 8
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define {{[^@]+}}@test_st64bv0(
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
// CHECK-CXX-NEXT:    [[VALUE_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
// CHECK-CXX-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[AGG_TMP]], ptr align 8 @val, i64 64, i1 false)
// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
// CHECK-CXX-NEXT:    store ptr [[AGG_TMP]], ptr [[VALUE_INDIRECT_ADDR]], align 8
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP]], align 8
// CHECK-CXX-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 1
// CHECK-CXX-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-CXX-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 2
// CHECK-CXX-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
// CHECK-CXX-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 3
// CHECK-CXX-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
// CHECK-CXX-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 4
// CHECK-CXX-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-CXX-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 5
// CHECK-CXX-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
// CHECK-CXX-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 6
// CHECK-CXX-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
// CHECK-CXX-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 7
// CHECK-CXX-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
// CHECK-CXX-NEXT:    [[TMP17:%.*]] = call noundef i64 @llvm.aarch64.st64bv0(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
// CHECK-CXX-NEXT:    store i64 [[TMP17]], ptr @status, align 8
// CHECK-CXX-NEXT:    ret void
//
EXTERN_C void test_st64bv0(void)
{
    status = __arm_st64bv0(addr, val);
}
