Some fixing of msvc_win32_arm64 UNO bridge

For one, the Windows ABI deviates from the generic aarch64 ABI regarding
returning class instances by value from non-static member functions, see
<https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#return-values>:
"The caller shall reserve a block of memory of sufficient size and alignment to
hold the result.  The address of the memory block shall be passed as an
additional argument to the function in x0, or x1 if $this is passed in x0.  The
callee may modify the result memory block at any point during the execution of
the subroutine.  The callee returns the address of the memory block in x0."
That means RETURN_KIND_HFA_FLOAT and RETURN_KIND_HFA_DOUBLE are not needed, and
can be cleaned up in a follow-up commit.

And for another, setting up a call stack frame in call() in uno2cpp.cxx for
callVirtualFunction() didn't actually work, so go with a slightly less ambitious
aproach (as also used by the gcc_linux_aarch64 bridge) and explicitly copy the
arguments that end up on the stack around in callVirtualFunction().

This allows CustomTarget_testtools/uno_test to proceed at least as far as the
call of getRaiseAttr1(), which still leads to an uncaught
css::uno::RuntimeException.

Change-Id: I4a8ec09c270864ac4de246d7e8d1f923198236b1
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/166585
Tested-by: Jenkins
Reviewed-by: Stephan Bergmann <stephan.bergmann@allotropia.de>
diff --git a/bridges/source/cpp_uno/msvc_win32_arm64/abi.cxx b/bridges/source/cpp_uno/msvc_win32_arm64/abi.cxx
index c888731..b8a1c73 100644
--- a/bridges/source/cpp_uno/msvc_win32_arm64/abi.cxx
+++ b/bridges/source/cpp_uno/msvc_win32_arm64/abi.cxx
@@ -144,13 +144,13 @@ ReturnKind getReturnKind(typelib_TypeDescription const* type)
            switch (getStructKind(reinterpret_cast<typelib_CompoundTypeDescription const*>(type)))
            {
                case STRUCT_KIND_FLOAT:
                    return RETURN_KIND_HFA_FLOAT;
                    return RETURN_KIND_INDIRECT;
                case STRUCT_KIND_DOUBLE:
                    return RETURN_KIND_HFA_DOUBLE;
                    return RETURN_KIND_INDIRECT;
                case STRUCT_KIND_DTOR:
                    return RETURN_KIND_INDIRECT;
                default:
                    return RETURN_KIND_REG;
                    return RETURN_KIND_INDIRECT;
            }
    }
}
diff --git a/bridges/source/cpp_uno/msvc_win32_arm64/callvirtualfunction.S b/bridges/source/cpp_uno/msvc_win32_arm64/callvirtualfunction.S
index a058e47d..546c02c 100644
--- a/bridges/source/cpp_uno/msvc_win32_arm64/callvirtualfunction.S
+++ b/bridges/source/cpp_uno/msvc_win32_arm64/callvirtualfunction.S
@@ -19,53 +19,63 @@
/*
   extern void callVirtualFunction

   x0 stack
   x1 frame
   x2 function
   x3 return
   x0 regs
   x1 stack
   x2 stack count
   x3 function
*/

    NESTED_ENTRY callVirtualFunction_fake
    NESTED_ENTRY callVirtualFunction

        // for unwind information, Windows has to store fp and lr
        PROLOG_SAVE_REG_PAIR	x29, x30, #-32!
        sub   sp, sp, #32
        stp   fp, lr, [sp]
        mov   fp, sp

        ALTERNATE_ENTRY callVirtualFunction
        // Stack space for arguments >= 8 (16-byte aligned):
        lsl   x2, x2, #3
        sub   x9, sp, x2
        bfc   x9, #0, #4
        mov   sp, x9

        // use a stack frame allocated by our caller
        stp   x29, x30, [x1]
        mov   x29, x1
        mov   sp, x0
        // Copy arguments >= 8:
        cbz   x2, done
loop
        sub   x2, x2, #8
        ldr   x9, [x1, x2]
        str   x9, [sp, x2]
        cbnz  x2, loop
done

        mov   x9, x2                  // function
        mov   x8, x3                  // complex return
        str   x3, [x29, #16]          // save rvalue
        mov   x9, x3                  // function

        mov   x10, x0
        str   x10, [fp, #16]

        // load the core argument passing registers
        ldp   x0, x1, [sp, #-128]
        ldp   x2, x3, [sp, #-112]
        ldp   x4, x5, [sp, #-96]
        ldp   x6, x7, [sp, #-80]
        ldp   x0, x1, [x10, #0]
        ldp   x2, x3, [x10, #16]
        ldp   x4, x5, [x10, #32]
        ldp   x6, x7, [x10, #48]

        ldp   d0, d1, [sp, #-64]
        ldp   d2, d3, [sp, #-48]
        ldp   d4, d5, [sp, #-32]
        ldp   d6, d7, [sp, #-16]
        ldp   d0, d1, [x10, #64]
        ldp   d2, d3, [x10, #80]
        ldp   d4, d5, [x10, #96]
        ldp   d6, d7, [x10, #112]

        blr   x9                      // call

        ldr   x3, [x29, #16]          // reload rvalue

        // partially deconstruct the stack frame
        mov   sp, x29
        ldp   x29, x30, [x29]
        ldr   x10, [fp, #16]

        // save the simple return values
        stp   x0, x1, [sp, #0]
        stp   d0, d1, [sp, #64]
        stp   d2, d3, [sp, #80]
        stp   x0, x1, [x10, #0]
        stp   d0, d1, [x10, #64]
        stp   d2, d3, [x10, #80]

        NESTED_END callVirtualFunction_fake
        add   sp, fp, #32
        ldp   fp, lr, [sp, #-32]
        ret

        NESTED_END callVirtualFunction

    END

diff --git a/bridges/source/cpp_uno/msvc_win32_arm64/cpp2uno.cxx b/bridges/source/cpp_uno/msvc_win32_arm64/cpp2uno.cxx
index fc95612..da86946 100644
--- a/bridges/source/cpp_uno/msvc_win32_arm64/cpp2uno.cxx
+++ b/bridges/source/cpp_uno/msvc_win32_arm64/cpp2uno.cxx
@@ -71,7 +71,7 @@ void call(bridges::cpp_uno::shared::CppInterfaceProxy* proxy,
    typelib_TypeDescription** argtds
        = static_cast<typelib_TypeDescription**>(alloca(count * sizeof(typelib_TypeDescription*)));

    sal_Int32 ngpr = 1;
    sal_Int32 ngpr = retKind == RETURN_KIND_INDIRECT ? 2 : 1;
    sal_Int32 nfpr = 0;
    sal_Int32 sp = 0;
    for (sal_Int32 i = 0; i != count; ++i)
@@ -229,6 +229,7 @@ void call(bridges::cpp_uno::shared::CppInterfaceProxy* proxy,
            break;
        case RETURN_KIND_INDIRECT:
            retout = indirectRet;
            gpr[0] = reinterpret_cast<sal_uInt64>(retout);
            break;
    }

@@ -243,11 +244,12 @@ void call(bridges::cpp_uno::shared::CppInterfaceProxy* proxy,
}

extern "C" void vtableCall(sal_Int32 functionIndex, sal_Int32 vtableOffset, sal_uInt64* gpr,
                           sal_uInt64* fpr, sal_uInt64* stack, void* indirectRet)
                           sal_uInt64* fpr, sal_uInt64* stack)
{
    bridges::cpp_uno::shared::CppInterfaceProxy* proxy
        = bridges::cpp_uno::shared::CppInterfaceProxy::castInterfaceToProxy(
            reinterpret_cast<char*>(gpr[0]) - vtableOffset);
    void* indirectRet = reinterpret_cast<void*>(gpr[1]);
    typelib_InterfaceTypeDescription* pInterfaceTD = proxy->getTypeDescr();
    assert(functionIndex < pInterfaceTD->nMapFunctionIndexToMemberIndex);
    sal_Int32 nMemberPos = pInterfaceTD->pMapFunctionIndexToMemberIndex[functionIndex];
diff --git a/bridges/source/cpp_uno/msvc_win32_arm64/uno2cpp.cxx b/bridges/source/cpp_uno/msvc_win32_arm64/uno2cpp.cxx
index a0c2adc..abb9114 100644
--- a/bridges/source/cpp_uno/msvc_win32_arm64/uno2cpp.cxx
+++ b/bridges/source/cpp_uno/msvc_win32_arm64/uno2cpp.cxx
@@ -45,8 +45,8 @@

namespace
{
extern "C" void callVirtualFunction(sal_uInt64* stack, sal_uInt64* frame, sal_uInt64 function,
                                    void* ret);
extern "C" void callVirtualFunction(sal_uInt64* regs, sal_uInt64* stack, sal_Int32 sp,
                                    sal_uInt64 function);

void pushArgument(sal_uInt64 value, sal_uInt64* stack, sal_Int32& sp, sal_uInt64* regs,
                  sal_Int32& nregs)
@@ -68,18 +68,22 @@ void call(bridges::cpp_uno::shared::UnoInterfaceProxy* pProxy,

    sal_uInt64** thisPtr = reinterpret_cast<sal_uInt64**>(pProxy->getCppI()) + slot.offset;

    sal_uInt64* gpr = static_cast<sal_uInt64*>(alloca((count + 16) * sizeof(sal_uInt64) + 32));
    sal_uInt64* fpr = &gpr[8];
    sal_uInt64* stack = &gpr[16];
    sal_uInt64* frame = &gpr[16 + count];
    void** cppArgs = static_cast<void**>(alloca(count * sizeof(void*)));
    typelib_TypeDescription** ptds
        = static_cast<typelib_TypeDescription**>(alloca(count * sizeof(typelib_TypeDescription*)));

    sal_uInt64* gpr = static_cast<sal_uInt64*>(alloca(16 * sizeof(sal_uInt64)));
    sal_uInt64* fpr = &gpr[8];
    sal_uInt64* stack = static_cast<sal_uInt64*>(alloca(count * sizeof(sal_uInt64)));

    sal_Int32 sp = 0;
    sal_Int32 nGPR = 0;
    sal_Int32 nFPR = 0;
    gpr[nGPR++] = reinterpret_cast<sal_uInt64>(thisPtr);
    if (eRetKind == RETURN_KIND_INDIRECT)
    {
        gpr[nGPR++] = reinterpret_cast<sal_uInt64>(ret);
    }

    for (sal_Int32 i = 0; i != count; ++i)
    {
@@ -156,7 +160,7 @@ void call(bridges::cpp_uno::shared::UnoInterfaceProxy* pProxy,

    __try
    {
        callVirtualFunction(stack, frame, (*thisPtr)[slot.index], ret);
        callVirtualFunction(gpr, stack, sp, (*thisPtr)[slot.index]);
    }
    __except (msvc_filterCppException(GetExceptionInformation(), *exception,
                                      pProxy->getBridge()->getCpp2Uno()))
diff --git a/bridges/source/cpp_uno/msvc_win32_arm64/vtableslotcall.S b/bridges/source/cpp_uno/msvc_win32_arm64/vtableslotcall.S
index cda427c..9a1acf3 100644
--- a/bridges/source/cpp_uno/msvc_win32_arm64/vtableslotcall.S
+++ b/bridges/source/cpp_uno/msvc_win32_arm64/vtableslotcall.S
@@ -46,7 +46,6 @@
        mov x3, x20
        mov x2, x19
        stp x4, x5, [sp, 96]
        mov x5, x8
        mov x4, x11
        stp x6, x7, [sp, 112]
        stp d0, d1, [sp, 128]