/** * Copyright (c) 2024 Huawei Technologies Co., Ltd. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). * Please refer to the License for details. You may not use this file except in compliance with the License. * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. */ /*! * \file xor.h * \brief */ #ifndef LIB_XOR_XOR_H #define LIB_XOR_XOR_H #include "kernel_tensor.h" #include "kernel_operator_intf.h" #include "kernel_pop_stack_buffer.h" #include "../../impl/math/xor/xor_common_impl.h" #if ASCENDC_CPU_DEBUG #include "kernel_log.h" #include #endif #if __CCE_AICORE__ >= 200 namespace AscendC { #pragma begin_pipe(V) /* * @brief Xor Computes the element-wise logical XOR of the given input tensors. Zeros are treated as False and nonzeros * are treated as True. Mathematical formulas: 0^0=0；0^1=1；1^0=1；1^1=0 * @ingroup xor * @param [out] dstTensor, output LocalTensor * @param [in] srcTensor0, input LocalTensor * @param [in] srcTensor1, input LocalTensor * @param [in] sharedTmpBuffer, input local temporary Tensor * @param [in] calCount, amount of input data to be calculated */ template __aicore__ inline void Xor(const LocalTensor& dstTensor, const LocalTensor &src0Tensor, const LocalTensor &src1Tensor, const LocalTensor& sharedTmpBuffer, const uint32_t calCount) { // Only for AI Vector Core. if ASCEND_IS_AIC { return; } uint32_t tmpBufferSize = sharedTmpBuffer.GetSize() / sizeof(T); uint32_t stackSize = tmpBufferSize / ONE_BLK_SIZE * ONE_BLK_SIZE; // integer multiple of 32 bytes #if ASCENDC_CPU_DEBUG ASCENDC_ASSERT(((QuePosition)dstTensor.GetPosition() == TPosition::VECIN || (QuePosition)dstTensor.GetPosition() == TPosition::VECOUT || (QuePosition)dstTensor.GetPosition() == TPosition::VECCALC), { KERNEL_LOG(KERNEL_ERROR, "dst position not support, just support position is VECIN, VECOUT, VECCALC."); }); bool result = (calCount <= src0Tensor.GetSize()) && (calCount <= src1Tensor.GetSize()) && (calCount <= dstTensor.GetSize() && (calCount > 0)); ASCENDC_ASSERT(result, { KERNEL_LOG(KERNEL_ERROR, "calCount must be less than or equal to src/dst tensor"); }); result = (std::is_same::value) || (std::is_same::value); ASCENDC_ASSERT(result, { KERNEL_LOG(KERNEL_ERROR, "type must be int16_t or unt16_t"); }); ASCENDC_ASSERT((tmpBufferSize > 0), { KERNEL_LOG(KERNEL_ERROR, "tmpBufferSize must > 0!"); }); ASCENDC_ASSERT((stackSize > 0), { KERNEL_LOG(KERNEL_ERROR, "stackSize must > 0!"); }); #endif const uint32_t round = calCount / stackSize; const uint32_t tail = calCount % stackSize; LocalTensor tmpTensor = sharedTmpBuffer.ReinterpretCast(); SetMaskCount(); SetVectorMask(0, stackSize); uint32_t offset = 0; for (uint32_t i = 0; i < round; i++) { XorCalcSimplified(dstTensor[offset], src0Tensor[offset], src1Tensor[offset], tmpTensor); offset = offset + stackSize; } if (tail != 0) { SetVectorMask(0, tail); XorCalcSimplified(dstTensor[offset], src0Tensor[offset], src1Tensor[offset], tmpTensor); } SetMaskNorm(); SetVectorMask(FULL_MASK, FULL_MASK); } /* * @brief Xor Computes the element-wise logical XOR of the given input tensors. Zeros are treated as False and nonzeros * are treated as True. Mathematical formulas: 0^0=0；0^1=1；1^0=1；1^1=0 * @ingroup xor * @param [out] dstTensor, output LocalTensor * @param [in] srcTensor0, input LocalTensor * @param [in] srcTensor1, input LocalTensor * @param [in] sharedTmpBuffer, input local temporary Tensor */ template __aicore__ inline void Xor(const LocalTensor& dstTensor, const LocalTensor &src0Tensor, const LocalTensor &src1Tensor, const LocalTensor& sharedTmpBuffer) { #if ASCENDC_CPU_DEBUG bool result = (src0Tensor.GetSize() == src1Tensor.GetSize()); ASCENDC_ASSERT(result, { KERNEL_LOG(KERNEL_ERROR, "operands must be equal in size"); }); #endif Xor(dstTensor, src0Tensor, src1Tensor, sharedTmpBuffer, src0Tensor.GetSize()); } /* * @brief Xor Computes the element-wise logical XOR of the given input tensors. Zeros are treated as False and nonzeros * are treated as True. Mathematical formulas: 0^0=0；0^1=1；1^0=1；1^1=0 * @ingroup xor * @param [out] dstTensor, output LocalTensor * @param [in] srcTensor0, input LocalTensor * @param [in] srcTensor1, input LocalTensor * @param [in] calCount, amount of input data to be calculated */ template __aicore__ inline void Xor(const LocalTensor &dstTensor, const LocalTensor &src0Tensor, const LocalTensor &src1Tensor, const uint32_t calCount) { LocalTensor sharedTmpBuffer; bool ans = PopStackBuffer(sharedTmpBuffer); ASCENDC_ASSERT((ans), { KERNEL_LOG(KERNEL_ERROR, "PopStackBuffer Error!"); }); Xor(dstTensor, src0Tensor, src1Tensor, sharedTmpBuffer, calCount); } /* * @brief Xor Computes the element-wise logical XOR of the given input tensors. Zeros are treated as False and nonzeros * are treated as True. Mathematical formulas: 0^0=0；0^1=1；1^0=1；1^1=0 * @ingroup xor * @param [out] dstTensor, output LocalTensor * @param [in] srcTensor0, input LocalTensor * @param [in] srcTensor1, input LocalTensor */ template __aicore__ inline void Xor(const LocalTensor &dstTensor, const LocalTensor &src0Tensor, const LocalTensor &src1Tensor) { #if ASCENDC_CPU_DEBUG bool result = (src0Tensor.GetSize() == src1Tensor.GetSize()); ASCENDC_ASSERT(result, { KERNEL_LOG(KERNEL_ERROR, "operands must be equal in size"); }); #endif Xor(dstTensor, src0Tensor, src1Tensor, src0Tensor.GetSize()); } #pragma end_pipe } // namespace AscendC #endif #endif // LIB_XOR_XOR_H