# one/one.cpp

/*******************************************************************************
    Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
    All rights reserved.
    
    Redistribution and use in source and binary forms, with or without
    modification, are permitted (subject to the limitations in the disclaimer
    below) provided that the following conditions are met:
    
    * Redistributions of source code must retain the above copyright notice, this
        list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice,
        this list of conditions and the following disclaimer in the documentation
        and/or other materials provided with the distribution.
    * Neither the name of Qualcomm Technologies, Inc. nor the names of its
        contributors may be used to endorse or promote products derived from this
        software without specific prior written permission.
    
    NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
    THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
    CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
    NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
    ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    
    @brief
    Program to run each feature of FastADAS once.
    *******************************************************************************/
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <fadas.h>
    
    #define ALIGN_128(x)   ((((x) + 127)>>7)<<7)
    #define MIN(x,y)  (((x) < (y)) ? (x):(y))
    
    void ReadRAW( const char* path, uint32_t w, uint32_t h, uint32_t stride, uint8_t* img, int offset = 0 )
    {
        if( (NULL == path ) || ( NULL == img ) )
        {
            fprintf( stderr, "Invalid input arguments\n" );
        }
        else
        {
            FILE* fp = fopen( path, "rb" );
            if( !fp )
            {
                fprintf( stderr, "Could not open %s for reading\n", path );
            }
            else
            {
                if( offset != 0 )
                    fseek( fp, offset, SEEK_SET );
    
                for( uint32_t y = 0; y < h; y++ )
                {
                    if( fread( &img[ y * stride ], w, 1, fp ) != 1 )
                        fprintf( stderr, "Could not read line %d from %s\n", y, path );
                }
    
                fclose( fp );
            }
        }
    } /* ReadRAW */
    
    void ReadMap( const char* path, int w, int h, float32_t* map )
    {
        if( (NULL == path ) || ( NULL == map ) )
        {
            fprintf( stderr, "Invalid input arguments\n" );
        }
        else
        {
            FILE* fp = fopen( path, "r" );
            if( !fp )
            {
                fprintf( stderr, "Error opening %s\n", path );
            }
            else
            {
                for( int i = 0; i < h; ++i )
                {
                    int idx_row = w * i;
                    if( fscanf( fp, "%f", &map[idx_row] ) < 0 )
                        fprintf( stderr, "Error reading to %s\n", path );
    
                    for( int j = 0; j < w; ++j )
                    {
                        int idx = idx_row + j;
                        if( fscanf( fp, ", %f", &map[idx] ) < 0 )
                            fprintf( stderr, "Error reading to %s\n", path );
                    }
    
                    if( i < (h - 1) )
                        if( fscanf( fp, "\n" ) < 0 )
                            fprintf( stderr, "Error reading to %s\n", path );
                }
    
                fclose( fp );
            }
        }
    } /* ReadMap */
    
    int main( int argc, char** argv )
    {
        FadasError_e ans;
    
        const char* version = FadasVersion();
        if( version != nullptr )
            printf( "FadasVersion = %s\n", version );
    
        ans = FadasInit( nullptr );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasInit failed\n" );
    
        // Multi-Threading parameters
        FadasImage_t srcImg, dstImg;
    
        // Downscaling
        uint32_t w = 1920;
        uint32_t h = 1080;
        uint32_t wOrg = w;
        uint32_t hOrg = h;
        uint32_t srcStride = ALIGN_128(w);
        uint32_t memsz = 3 * srcStride * h;
        const uint64_t byte_alignment = 128;
        uint8_t* src = (uint8_t*)FadasMemAlloc( memsz, byte_alignment, nullptr );
        uint8_t* rgb = (uint8_t*)FadasMemAlloc( memsz, byte_alignment, nullptr );
        uint8_t* dst = (uint8_t*)FadasMemAlloc( memsz, byte_alignment, nullptr );
        uint8_t* dst2 = (uint8_t*)FadasMemAlloc( memsz, byte_alignment, nullptr );
        uint8_t* dstS2 = (uint8_t*)FadasMemAlloc( memsz, byte_alignment, nullptr );
        uint8_t* dst_uv = (uint8_t*)FadasMemAlloc( memsz, byte_alignment, nullptr );
    
        if( ( NULL == src ) || ( NULL == rgb )  ||
            ( NULL == dst ) || ( NULL == dst2 ) ||
            ( NULL == dstS2 ) || ( NULL == dst_uv ) )
        {
            fprintf( stderr, "Failed to allocate memory\n" );
            // Try and free memory if it is already allocated
            (void) FadasMemFree( src );
            (void) FadasMemFree( rgb );
            (void) FadasMemFree( dst );
            (void) FadasMemFree( dst2 );
            (void) FadasMemFree( dstS2 );
            (void) FadasMemFree( dst_uv );
            return -1;
        }
    
        // Register I/O buffers with FastADAS
        ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)src, size_t(memsz));
        if(FADAS_ERROR_NONE == ans)
        {
            ans = FadasRegBuf(FADAS_BUF_TYPE_INOUT, (void *)rgb, size_t(memsz));
            // de-register input buffer
            if(FADAS_ERROR_NONE != ans)
            {
                (void)FadasDeregBuf(src);
            }
        }
        if(FADAS_ERROR_NONE == ans)
        {
            ans = FadasRegBuf(FADAS_BUF_TYPE_INOUT, (void *)dst, size_t(memsz));
            if(FADAS_ERROR_NONE != ans)
            {
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
            }
        }
        if(FADAS_ERROR_NONE == ans)
        {
            ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dst2, size_t(memsz));
            if(FADAS_ERROR_NONE != ans)
            {
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst2);
            }
        }
        if(FADAS_ERROR_NONE == ans)
        {
            ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dstS2, size_t(memsz));
            if(FADAS_ERROR_NONE != ans)
            {
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
            }
        }
    
        if(FADAS_ERROR_NONE == ans)
        {
            ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dst_uv, size_t(memsz));
            if(FADAS_ERROR_NONE != ans)
            {
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
            }
        }

        if(FADAS_ERROR_NONE != ans)
        {
            fprintf( stderr, "Failed to register buffer\n" );
    
            (void)FadasMemFree(src);
            (void)FadasMemFree(rgb);
            (void)FadasMemFree(dst);
            (void)FadasMemFree(dstS2);
            (void)FadasMemFree(dst2);
            (void)FadasMemFree(dst_uv);
    
            return false;
        }
    
        w = 1920, h = 1020;
        srcStride = ALIGN_128(w);
        ReadRAW( "img2.pgm", w, h, srcStride, src, 19 );
        uint32_t dstStride = ALIGN_128(w/2);
        FadasImgProps_t srcProps = { w, h, FADAS_IMAGE_FORMAT_Y, {srcStride, 0, 0, 0}, 1 };
        FadasImgProps_t dstProps = { w/2, h/2, FADAS_IMAGE_FORMAT_Y, {dstStride, 0, 0, 0}, 1 };
        ans = FadasCvtYUV_DownscaleBy2( src, srcProps, dst, &dstProps );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_DownscaleBy2 failed\n" );
    
        dstProps.width = (w * 46341) >> 16;
        dstProps.stride[0] = ALIGN_128((w * 46341) >> 16);
        ans = FadasCvtYUV_DownscaleBySqrt2( src, srcProps, dst, &dstProps );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_DownscaleBySqrt2 failed\n" );

        // Downscale from ROI (upper-left quarter) of input image
        FadasROI_t roi = { 0, 0, w / 2, h / 2 };
    
        srcProps.width = roi.width;
        srcProps.height = roi.height;
        srcProps.stride[0] = srcStride;
        dstProps.width = roi.width/2;
        dstProps.height = roi.height/2;
        dstProps.stride[0] = ALIGN_128(dstProps.width);
        ans = FadasCvtYUV_DownscaleBy2( src, srcProps, dst, &dstProps );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_DownscaleBy2 failed\n" );
    
        // Downscale to arbitrary dimensions and ratios
        srcProps.width = w;
        srcProps.height = h;
        srcProps.stride[0] = srcStride;
        dstProps.width = 640;
        dstProps.height = 480;
        dstProps.stride[0] = ALIGN_128(dstProps.width);
        ans = FadasCvtYUV_Downscale( src, srcProps, dst, dstProps );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_Downscale failed\n" );
    
        // Normalize Y image using fake mean, scale, and standard deviation parameters
        FadasNormlzParams_t normlz = { 30.123f, 1.3121f, 31.51f };
    
        srcProps.width = (uint32_t)w;
        srcProps.height = (uint32_t)h;
        srcProps.stride[0] = srcStride;
        dstStride = srcStride;
        ans = FadasCvtYUV_Renormalize( src, srcProps, normlz, dst, dstStride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_Renormalize failed\n" );
    
        // YUV conversions
        w = 1920, h = 1024;
        srcStride = ALIGN_128((5 * w) / 2);
        ReadRAW( "img3.10uyvy", (5 * w) / 2, h, srcStride, src );
        int dst_uv_stride = ALIGN_128(w);
    
        srcProps.width = (uint32_t)w;
        srcProps.height = (uint32_t)h;
        srcProps.stride[0] = srcStride;
        srcProps.format = FADAS_IMAGE_FORMAT_UYVY10;
        ans = FadasCvtYUV_UYVY10toNV12( src, srcProps, dst, w, dst_uv, dst_uv_stride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_UYVY10toNV12 failed ans = %d \n", ans );
    
        dstStride = ALIGN_128(w * 3);
        ans = FadasCvtYUV_UYVY10toRGB( src, srcProps, dst, dstStride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_UYVY10toRGB failed ans = %d \n", ans );
    
        w = 1920, h = 1020;
        srcStride = ALIGN_128(2 * w);
        ReadRAW( "img1.uyvy", (2 * w), h, srcStride, src );
        dst_uv_stride = ALIGN_128(w);
    
        srcProps.width = (uint32_t)w;
        srcProps.height = (uint32_t)h;
        srcProps.stride[0] = srcStride;
        srcProps.format = FADAS_IMAGE_FORMAT_UYVY;
        ans = FadasCvtYUV_UYVYtoNV12( src, srcProps, dst, w, dst_uv, dst_uv_stride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_UYVYtoNV12 failed ans = %d \n", ans );
    
        dstStride = ALIGN_128(w * 3);
        ans = FadasCvtYUV_UYVYtoRGB( src, srcProps, rgb, dstStride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_UYVYtoRGB failed ans = %d \n",ans );

        uint32_t rgb_stride = ALIGN_128(3 * w);
        srcProps.width = (uint32_t)w;
        srcProps.height = (uint32_t)h;
        srcProps.format = FADAS_IMAGE_FORMAT_RGB888;
        srcProps.stride[0] = rgb_stride;
        dstProps.format = FADAS_IMAGE_FORMAT_RGB888;
        dstProps.stride[0] = ALIGN_128((w*3)/2);
        ans = FadasCvtYUV_Downscale888by2( rgb, srcProps, dst, &dstProps );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_Downscale888by2 failed\n" );
    
        uint64_t s2width = (w * 46341) >> 16;
        dstProps.stride[0] = ALIGN_128(s2width * 3);
        dstProps.width = s2width;
        dstProps.height = (h * 46341) >> 16;
        ans = FadasCvtYUV_Downscale888bySqrt2( rgb, srcProps, dst, &dstProps );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_Downscale888bySqrt2 failed\n" );
    
        // Downscale to arbitrary dimensions and ratios
        srcProps.width = w;
        srcProps.height = h;
        srcProps.stride[0] = rgb_stride;
        dstProps.width = 640;
        dstProps.height = 480;
        dstProps.stride[0] = ALIGN_128(dstProps.width * 3);
        ans = FadasCvtYUV_Downscale888( src, srcProps, dst, dstProps );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_Downscale888 failed\n" );

        // Downscale YUV888 and convert to RGB
        srcStride = ALIGN_128( w * 2);
        dstStride = ALIGN_128( w * 3);
        int w_2 = (w * 2) / 3;
        int h_2 = (h * 2) / 3;
        srcProps = { (uint32_t)w, (uint32_t)h, FADAS_IMAGE_FORMAT_UYVY,   {(uint32_t)srcStride, 0, 0, 0}, 1 };
        dstProps = { (uint32_t)w, (uint32_t)h, FADAS_IMAGE_FORMAT_YUV888, {(uint32_t)dstStride, 0, 0, 0}, 1 };
        FadasImgProps_t dst2Props = { ( uint32_t )w_2, ( uint32_t )h_2, FADAS_IMAGE_FORMAT_RGB888, {( uint32_t )( w_2 * 3 ), 0, 0, 0}, 1 };
        ans = FadasCvtYUV_UYVYtoYUV( src, srcProps, dst, dstStride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_UYVYtoYUV failed in downscale YUV88 and convert to RGB pipeline\n" );
        ans = FadasCvtYUV_DownscaleYUV888AndRGB888( dst, dstProps, dst2, dst2Props );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_DownscaleYUV888AndRGB888 failed in downscale YUV88 and convert to RGB pipeline\n" );
    
        // MT version
        srcImg.plane[0] = dst;
        dstImg.plane[0] = dst2;
        srcImg.props = dstProps;
        dstImg.props = dst2Props;
        roi = { 0 };
    
        int32_t n_threads_affinity[] = { 0,1,2,3 };
    
        void* wrkrsYUV = FadasCvtYUV_CreateWorkers( 4, n_threads_affinity, FADAS_CVTYUV_PIPELINE_DownscaleYUV888AndRGB888 );
        if( nullptr == wrkrsYUV )
        {
            fprintf( stderr, "Error in creating FADAS_CVTYUV_PIPELINE_DownscaleYUV888AndRGB888 wrkrs" );
        }
        else
        {
            ans = FadasCvtYUV_UYVYtoYUV( src, srcProps, dst, dstStride );
            if( FADAS_ERROR_NONE != ans )
            {
                fprintf( stderr, "FadasCvtYUV_UYVYtoYUV failed in downscale YUV88 and convert to RGB pipeline MT version\n" );
            }
            else
            {
                ans = FadasCvtYUV_RunMT( wrkrsYUV, &srcImg, &dstImg, &roi );
    
                if( FADAS_ERROR_NONE != ans )
                {
                    fprintf( stderr, "FadasCvtYUV_RunMT failed\n" );
                }
            }
    
            ans = FadasCvtYUV_DestroyWorkers( wrkrsYUV );
            if( FADAS_ERROR_NONE != ans )
            {
                fprintf( stderr, "FadasCvtYUV_DestroyWorkers for downscale YUV88 and convert to RGB pipeline failed\n" );
            }
        }

        // Downscale UYVY and convert to RGB
        memset( dst2, 0, memsz );
        srcProps = { (uint32_t)w, (uint32_t)h, FADAS_IMAGE_FORMAT_UYVY, {(uint32_t)srcStride, 0, 0, 0}, 1 };
        dstProps = { (uint32_t)w_2, (uint32_t)h_2, FADAS_IMAGE_FORMAT_RGB888, {(uint32_t)(w_2 * 3), 0, 0, 0}, 1 };
    
        srcImg.plane[0] = src;
        dstImg.plane[0] = dst2;
        srcImg.props = srcProps;
        dstImg.props = dst2Props;
        roi = { 0 };
    
        wrkrsYUV = FadasCvtYUV_CreateWorkers( 4, n_threads_affinity, FADAS_CVTYUV_PIPELINE_DownscaleUYVYAndRGB888 );
        if( nullptr == wrkrsYUV )
        {
            fprintf(stderr,  "creating FADAS_CVTYUV_PIPELINE_DownscaleUYVYAndRGB888 wrkrs" );
        }
        else
        {
            ans = FadasCvtYUV_RunMT( wrkrsYUV, &srcImg, &dstImg, &roi );
            if( FADAS_ERROR_NONE != ans )
            {
                fprintf( stderr, "FADAS_CVTYUV_PIPELINE_DownscaleUYVYAndRGB888 MT failed\n" );
            }
    
            ans = FadasCvtYUV_DestroyWorkers( wrkrsYUV );
            if( FADAS_ERROR_NONE != ans )
            {
                fprintf( stderr, "FadasCvtYUV_DestroyWorkers for FADAS_CVTYUV_PIPELINE_DownscaleUYVYAndRGB888 MT pipeline failed\n" );
            }
        }
    
        // Normalize RGB image (rgb) into RGB destination (dst) using fake mean, scale, and standard deviation parameters
        FadasNormlzParams_t normlzR = { 30.999f, 1.111f, 30.321f };
        FadasNormlzParams_t normlzG = { 40.888f, 1.222f, 40.432f };
        FadasNormlzParams_t normlzB = { 50.777f, 1.333f, 50.654f };
        srcProps.width = (uint32_t)w;
        srcProps.height = (uint32_t)h;
        srcProps.format = FADAS_IMAGE_FORMAT_RGB888;
        srcProps.stride[0] = rgb_stride;
        ans = FadasCvtYUV_Renormalize888( rgb, srcProps, normlzR, normlzG, normlzB, dst, dstStride );
        if( FADAS_ERROR_NONE != ans )
        {
            fprintf( stderr, "FadasCvtYUV_Renormalize888 failed\n" );
        }
    
        // Do same operation but feed into one layer of float32 DL network (dst32)
        int sz32 = rgb_stride * h * sizeof( float32_t );
        int dst32_stride = dstStride * sizeof( float32_t );
        float32_t* dst32 = (float32_t*)FadasMemAlloc( sz32, 128, nullptr );
        srcProps.width = (uint32_t)w;
        srcProps.height = (uint32_t)h;
        srcProps.format = FADAS_IMAGE_FORMAT_RGB888;
        srcProps.stride[0] = rgb_stride;
    
        // Register I/O buffers with FastADAS
        ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dst32, size_t(sz32));
    
        if(FADAS_ERROR_NONE != ans)
        {
            fprintf( stderr, "Failed to register buffer\n" );
    
            (void)FadasDeregBuf(src);
            (void)FadasDeregBuf(rgb);
            (void)FadasDeregBuf(dst);
            (void)FadasDeregBuf(dst2);
            (void)FadasDeregBuf(dstS2);
            (void)FadasDeregBuf(dst32);
            (void)FadasDeregBuf(dst_uv);
    
            (void)FadasMemFree(src);
            (void)FadasMemFree(rgb);
            (void)FadasMemFree(dst);
            (void)FadasMemFree(dst2);
            (void)FadasMemFree(dstS2);
            (void)FadasMemFree(dst32);
            (void)FadasMemFree(dst_uv);
    
            return false;
        }
    
        ans = FadasCvtYUV_Renormalize888u8f32( rgb, srcProps, normlzR, normlzG, normlzB, dst32, dst32_stride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_Renormalize888u8f32 failed\n" );
    
        // Combined downscaling and YUV conversion
        dstStride = ALIGN_128(w * 3);
        srcProps.width = (uint32_t)w;
        srcProps.height = (uint32_t)h;
        srcProps.stride[0] = srcStride;
        srcProps.format = FADAS_IMAGE_FORMAT_UYVY;
        FadasImgProps_t dstS2Props = { w / 2, h / 2, FADAS_IMAGE_FORMAT_UYVY, {ALIGN_128( ( w * 3 ) / 2 ), 0, 0, 0}, 1 };
        uint64_t widths2 = ((w * 46341) >> 16);
        uint64_t strides2 = ALIGN_128(widths2 * 3);
        dst2Props = { ( uint32_t )widths2, ( ( h * 46341 ) >> 16 ), FADAS_IMAGE_FORMAT_RGB888, {( uint32_t )strides2, 0, 0, 0}, 1 };
        dstS2Props.format = FADAS_IMAGE_FORMAT_RGB888;
        FadasError_e ret = FadasCvtYUV_UYVYtoRGB888andScale( src, srcProps, dst, dstStride,
                                                            dstS2, &dst2Props,
                                                            dst2, &dstS2Props );
        if( FADAS_ERROR_NONE != ret )
            fprintf( stderr, "FadasCvtYUV_UYVYtoRGB888andScale failed\n" );

        // Remap grayscale
        w = 640, h = 480;
        srcStride = ALIGN_128(w);
        ReadRAW( "img0.pgm", w, h, srcStride, src, 15 );
        int sz = w * h * sizeof( float32_t );
        float32_t* mapX = (float32_t*)FadasMemAlloc( sz, 16, nullptr );
        float32_t* mapY = (float32_t*)FadasMemAlloc( sz, 16, nullptr );
        ReadMap( "img0_map_x.csv", w, h, mapX );
        ReadMap( "img0_map_y.csv", w, h, mapY );
    
        uint32_t mapStride = w * sizeof( float32_t );
        memset( &roi, 0, sizeof( FadasROI_t ) );
        FadasRemapMap_t* map = FadasRemap_CreateMapFromMap( w, h, w, h, mapStride, mapX, mapY,
                                                            FADAS_REMAP_PIPELINE_1C8, 0 );
    
        srcImg.plane[0] = src;
        dstImg.plane[0] = dst;
        srcImg.props = dstImg.props = { ( uint32_t )w, ( uint32_t )h, FADAS_IMAGE_FORMAT_Y,
                                        {( uint32_t )w, 0, 0, 0}, 1 };
    
        ans = FadasRemap_Run( map, &srcImg, &dstImg, &roi );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasRemap_Run failed\n" );
    
        // MT versions of Remap grayscale
        void* wrkrsRemap = FadasRemap_CreateWorkers( 4, n_threads_affinity, FADAS_REMAP_PIPELINE_1C8 );
        if( wrkrsRemap == nullptr )
        {
            printf( "creating wrkrsRemap 0" );
        }
        else
        {
            ans = FadasRemap_RunMT( wrkrsRemap, map, &srcImg, &dstImg, &roi );
            if( ans != FADAS_ERROR_NONE )
                fprintf( stderr, "FadasRemap_Run MT grayscale failed\n" );
    
            ans = FadasRemap_DestroyWorkers( wrkrsRemap );
            if( ans != FADAS_ERROR_NONE )
                fprintf( stderr, "FadasRemap_DestroyWorkers for grayscale failed\n" );
        }

        // Remap from fisheye calibration
        FadasCameraProps_t camProps = { 279.886772, 279.406252, 313.537817, 248.773836 };
        FadasDistCoeffs_t distCoeffs = { 0.003908, -0.009574, 0.010173, -0.003329,
                                        0.0, 0.0, 0.0, 0.0 };
    
        FadasRemapMap_t* map1 = FadasRemap_CreateMapFromCalib(
                                    w, h, camProps, FADAS_LDC_FISHYEYE4,
                                    distCoeffs, FADAS_REMAP_PIPELINE_1C8, w, h, 0 );
    
        srcImg.plane[0] = src;
        dstImg.plane[0] = dst;
        srcImg.props = { ( uint32_t )w, ( uint32_t )h, FADAS_IMAGE_FORMAT_Y,
                        {( uint32_t )srcStride, 0, 0, 0}, 1 };
        dstImg.props = { ( uint32_t )w, ( uint32_t )h, FADAS_IMAGE_FORMAT_Y,
                        {( uint32_t )dstStride, 0, 0, 0}, 1 };
    
        ans = FadasRemap_Run( map1, &srcImg, &dstImg, &roi );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasRemap_Run failed\n" );
    
        // Remap on ROI (lower-right quarter)
        // upper-left corner of ROI
        roi.x = w / 2;
        roi.y = h / 2;
        // width of ROI
        roi.width = w / 2;
        roi.height = h / 2;
        ans = FadasRemap_Run( map, &srcImg, &dstImg, &roi );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasRemap_Run failed\n" );
    
        // Remap RGB on same ROI if FadasRemap_CreateMap or FadasRemap_CreateMapFromCalib
        // called with image format of "3" instead of "1"
    #if 0
        int roi_strideRGB = ALIGN_128(3 * roi_w);
        FadasRemap_Run( map, srcRGB, strideRGB, dstRGB, roi_strideRGB, roi );
    #endif

        // Remap UYVY + DS + Color conversion
        roi = {0};
        uint32_t wOut = 640;
        uint32_t hOut = 542;
        srcStride = ALIGN_128(2*wOrg);
        dstStride = ALIGN_128(3*wOrg);
    
        srcImg.plane[0] = src;
        dstImg.plane[0] = dst;
        srcImg.props = { ( uint32_t )wOrg, ( uint32_t )hOrg, FADAS_IMAGE_FORMAT_UYVY,
                        {( uint32_t )srcStride, 0, 0, 0}, 1 };
        dstImg.props = { ( uint32_t )wOut, ( uint32_t )hOut, FADAS_IMAGE_FORMAT_RGB888,
                        {( uint32_t )dstStride, 0, 0, 0}, 1 };
    
        map1 = FadasRemap_CreateMapFromCalib( wOrg, hOrg, camProps, FADAS_LDC_FISHYEYE4,
                                            distCoeffs, FADAS_REMAP_PIPELINE_UYVY_TO_RGB888,
                                            wOut, hOut, 0 );
    
        ans = FadasRemap_Run( map1, &srcImg, &dstImg, &roi );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasRemap_Run UYVY failed\n" );

        // Remap UYVY + DS + Color conversion  (MT version)
        wrkrsRemap = FadasRemap_CreateWorkers( 4, n_threads_affinity, FADAS_REMAP_PIPELINE_UYVY_TO_RGB888 );
    
        if( wrkrsRemap == nullptr )
            fprintf( stderr, "creating FADAS_REMAP_PIPELINE_UYVY_TO_RGB888 wrkrs" );
    
        memset( dst, 0, dstStride * hOut );
    
        ans = FadasRemap_RunMT( wrkrsRemap, map1, &srcImg, &dstImg, &roi );
        if( ans != FADAS_ERROR_NONE )
            fprintf( stderr, "FadasRemap_Run UYVY MT version failed\n" );
    
        ans = FadasRemap_DestroyWorkers( wrkrsRemap );
            if( ans != FADAS_ERROR_NONE )
                fprintf( stderr, "FadasRemap_DestroyWorkers for UYVY MT version failed\n" );

        // Pyramid API usage from NV12 source
        FadasPyramid_Level_t pyramid[3];
        srcStride = ALIGN_128(wOrg * 2);
        dstStride = ALIGN_128(hOrg);
        srcProps = { ( uint32_t )w, ( uint32_t )h, FADAS_IMAGE_FORMAT_UYVY,
                    {( uint32_t )srcStride, 0, 0, 0}, 1 };
        dstProps = { ( uint32_t )w, ( uint32_t )h, FADAS_IMAGE_FORMAT_Y,
                    {( uint32_t )dstStride, 0, 0, 0}, 1 };
        dst2Props = { 0, 0, FADAS_IMAGE_FORMAT_Y, {0, 0, 0, 0}, 0 };
    
        // convert UYVY to NV12
        ans = FadasCvtYUV_UYVYtoNV12( src, srcProps, dst, dstStride, dst_uv, dstStride );
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_UYVYtoNV12 failed in pyramid pipeline ans = %d \n", ans );
    
        if ( FADAS_ERROR_NONE == ans )
        {
            pyramid[0].data.plane[0] = (void *)dst;
            pyramid[0].data.props.width = w;
            pyramid[0].data.props.height = h;
            pyramid[0].data.props.stride[0] = dstStride;
            pyramid[0].data.props.format = FADAS_IMAGE_FORMAT_Y;
            pyramid[0].data.props.numPlanes = 1;
    
            pyramid[1].data.plane[0] = nullptr;
            pyramid[1].data.props.width = w/2;
            pyramid[1].data.props.height = h/2;
            pyramid[1].data.props.stride[0] = ALIGN_128( w/2 );
            pyramid[1].data.props.format = FADAS_IMAGE_FORMAT_Y;
            pyramid[1].data.props.numPlanes = 1;
    
            pyramid[2].data.plane[0] = nullptr;
            pyramid[2].data.props.width = w/4;
            pyramid[2].data.props.height = h/4;
            pyramid[2].data.props.stride[0] = ALIGN_128( w/4 );
            pyramid[2].data.props.format = FADAS_IMAGE_FORMAT_Y;
            pyramid[2].data.props.numPlanes = 1;
    
            ans = FadasPyramid_Allocate( pyramid[0].data.props, pyramid, 3 );
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasPyramid_Allocate failed in pyramid pipeline\n" );
    
            if ( FADAS_ERROR_NONE == ans )
            {
            ans = FadasPyramid_Run( &(pyramid[0].data), pyramid, 3 );
            }
    
            FadasPyramid_Free( pyramid, 3 );
    
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasPyramid_Run failed in pyramid pipeline\n" );
        }

        // Pyramid API usage from RGB source
        srcStride = ALIGN_128(wOrg * 2);
        dstStride = ALIGN_128(hOrg * 3);
        srcProps = { ( uint32_t )w, ( uint32_t )h, FADAS_IMAGE_FORMAT_UYVY,
                    {( uint32_t )srcStride, 0, 0, 0}, 1 };
        dstProps = { ( uint32_t )w, ( uint32_t )h, FADAS_IMAGE_FORMAT_RGB888,
                    {( uint32_t )dstStride, 0, 0, 0}, 1 };
        dst2Props = { 0, 0, FADAS_IMAGE_FORMAT_RGB888, {0, 0, 0, 0}, 0  };
    
        ans = FadasCvtYUV_UYVYtoRGB( src, srcProps, dst, dstStride ); // convert UYVY to RGB
        if( FADAS_ERROR_NONE != ans )
            fprintf( stderr, "FadasCvtYUV_UYVYtoRGB failed in pyramid pipeline\n" );
        if ( FADAS_ERROR_NONE == ans )
        {
            pyramid[0].data.plane[0] = (void *)dst;
            pyramid[0].data.props.width = w;
            pyramid[0].data.props.height = h;
            pyramid[0].data.props.stride[0] = dstStride;
            pyramid[0].data.props.format = FADAS_IMAGE_FORMAT_RGB888;
            pyramid[0].data.props.numPlanes = 1;
    
            pyramid[1].data.plane[0] = nullptr;
            pyramid[1].data.props.width = w/2;
            pyramid[1].data.props.height = h/2;
            pyramid[1].data.props.stride[0] = ALIGN_128( pyramid[1].data.props.width * 3 );
            pyramid[1].data.props.format = FADAS_IMAGE_FORMAT_RGB888;
            pyramid[1].data.props.numPlanes = 1;

            pyramid[2].data.plane[0] = nullptr;
            pyramid[2].data.props.width = w/4;
            pyramid[2].data.props.height = h/4;
            pyramid[2].data.props.stride[0] = ALIGN_128( pyramid[2].data.props.width * 3 );
            pyramid[2].data.props.format = FADAS_IMAGE_FORMAT_RGB888;
            pyramid[2].data.props.numPlanes = 1;

            ans = FadasPyramid_Allocate( pyramid[0].data.props, pyramid, 3 );
    
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(dst_uv);
                FadasPyramid_Free( pyramid, 3 );
    
                return false;
            }
    
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasPyramid_Allocate failed in pyramid pipeline\n" );
    
            if ( FADAS_ERROR_NONE == ans )
            {
            ans = FadasPyramid_Run( &(pyramid[0].data), pyramid, 3 );
            }
    
            (void)FadasPyramid_Free( pyramid, 3 );
    
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasPyramid_Run failed in pyramid pipeline\n" );
        }

        // RT Trandformation
        const int nPts = 3;
        const int srcPtsStride = 4 * sizeof( float32_t );
        const int dstPtsStride = srcPtsStride;
        const int memsz_pts = nPts * srcPtsStride;
        float32_t* src_pts = (float32_t*)FadasMemAlloc( memsz_pts, byte_alignment, nullptr );
        float32_t* dst_pts = (float32_t*)FadasMemAlloc( memsz_pts, byte_alignment, nullptr );
    
        if(( NULL != src_pts ) && ( NULL != dst_pts ) )
        {
    
            ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)src_pts, size_t(memsz_pts));
    
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dst_pts, size_t(memsz_pts));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(src_pts);
                }
    
            }
    
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(dst_uv);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
    
                return false;
            }
    
            memset( src_pts, 0, memsz_pts );
            memset( dst_pts, 0, memsz_pts );
    
            // fake data
            for( int i = 0; i < nPts; ++i )
            {
                int idx = i * 4;
                src_pts[idx + 0] = float32_t(i * 100);
                src_pts[idx + 1] = float32_t(i);
                src_pts[idx + 2] = float32_t(i);
                src_pts[idx + 3] = 1.f;
            }
    
            // Rotate group of points around z-axis and translate along (45,45) degree line.
            uint32_t RT_stride = 4 * sizeof( float32_t );
            float32_t RTz[4][4] = { { 0, -1, 0, 100 },
                                    { 1,  0, 0, 100 },
                                    { 0,  0, 1, 100 },
                                    { 0,  0, 0,   1 } };
            ans = Fadas3D_MultiplyPtsWth4x4RT( &RTz[0][0], RT_stride, nPts, src_pts, srcPtsStride,
                                            dst_pts, dstPtsStride );
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "Fadas3D_MultiplyPtsWth4x4RT failed\n" );
        }
    
        // Test Tensor APIs
        int mT = 3;
        int nT = 3;
        int nPtsT = 2;
        int mnT = mT * nT;
        int srcStride_T = mnT * sizeof( float32_t );
        int dstStride_T = mT * sizeof( float32_t );
        const int memszT = nPtsT * srcStride_T;
        const uint64_t byte_alignment_T = 128;
        float32_t* src1T = (float32_t*)FadasMemAlloc( memszT, byte_alignment_T, nullptr );
        float32_t* src2T = (float32_t*)FadasMemAlloc( memszT, byte_alignment_T, nullptr );
        float32_t* dst_T = (float32_t*)FadasMemAlloc( memszT, byte_alignment_T, nullptr );
    
        if ( ( NULL != src1T ) && ( NULL != src2T ) && ( NULL != dst_T ) )
        {
    
            ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)src1T, size_t(memszT));
    
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)src2T, size_t(memszT));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(src1T);
                }
            }
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dst_T, size_t(memszT));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(src1T);
                    (void)FadasDeregBuf(src2T);
                }
            }
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
                (void)FadasDeregBuf(src_pts);
                (void)FadasDeregBuf(dst_pts);
    
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(dst_uv);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
                (void)FadasMemFree(src1T);
                (void)FadasMemFree(src2T);
                (void)FadasMemFree(dst_T);
    
                return false;
            }
    
            memset( src1T, 0, memszT );
            memset( src2T, 0, memszT );
            memset( dst_T, 0, memszT );
    
            for( int k = 0; k < nPtsT; ++k )
            {
                src1T[mnT * k + mT * 0 + 0] = 1;
                src1T[mnT * k + mT * 0 + 1] = 2;
                src1T[mnT * k + mT * 0 + 2] = 3;
                src1T[mnT * k + mT * 1 + 0] = 4;
                src1T[mnT * k + mT * 1 + 1] = 5;
                src1T[mnT * k + mT * 1 + 2] = 6;
                src1T[mnT * k + mT * 2 + 0] = 7;
                src1T[mnT * k + mT * 2 + 1] = 8;
                src1T[mnT * k + mT * 2 + 2] = 9;
    
                src2T[mnT * k + mT * 0 + 0] = 9;
                src2T[mnT * k + mT * 0 + 1] = 8;
                src2T[mnT * k + mT * 0 + 2] = 7;
                src2T[mnT * k + mT * 1 + 0] = 6;
                src2T[mnT * k + mT * 1 + 1] = 5;
                src2T[mnT * k + mT * 1 + 2] = 4;
                src2T[mnT * k + mT * 2 + 0] = 3;
                src2T[mnT * k + mT * 2 + 1] = 2;
                src2T[mnT * k + mT * 2 + 2] = 1;
            }
    
            // FadasTensor_Dot2Dx
            for( int k = 0; k < nPtsT; ++k )
            {
                dst_T[nT * k + 0] = 0;
                dst_T[nT * k + 1] = 0;
                dst_T[nT * k + 2] = 0;
            }
    
            ans = FadasTensor_Dot2Dx( nPtsT, nT, mT, src1T, srcStride_T, src2T, srcStride_T, dst_T,
                                    dstStride_T );
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasTensor_Dot2Dx failed\n" );

            // FadasTensor_Dot2Dy
            for( int k = 0; k < nPtsT; ++k )
            {
                dst_T[nT * k + 0] = 0;
                dst_T[nT * k + 1] = 0;
                dst_T[nT * k + 2] = 0;
            }
            ans = FadasTensor_Dot2Dy( nPtsT, nT, mT, src1T, srcStride_T, src2T, srcStride_T, dst_T,
                                    dstStride_T );
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasTensor_Dot2Dy failed\n" );
        }
    
        // Matrix multiply 3x3 with multiple 3x1
        const int mM3 = 3;
        const int n_ptsM3 = 2;
        const int srcStrideM3 = mM3 * sizeof( float32_t );
        const int dstStrideM3 = srcStrideM3;
        const int memszM3 = n_ptsM3 * srcStrideM3;
        const uint64_t byte_alignmentM3 = 128;
        float32_t* srcM3 = (float32_t*)FadasMemAlloc( memszM3, byte_alignmentM3, nullptr );
        float32_t* dstM3 = (float32_t*)FadasMemAlloc( memszM3, byte_alignmentM3, nullptr );
    
        if( ( NULL != srcM3  ) && ( NULL != dstM3  ) )
        {
    
            ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)srcM3, size_t(memszM3));
    
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dstM3, size_t(memszM3));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(srcM3);
                }
            }
    
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(src_pts);
                (void)FadasDeregBuf(dst_pts);
                (void)FadasDeregBuf(src1T);
                (void)FadasDeregBuf(src2T);
                (void)FadasDeregBuf(dst_T);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
                (void)FadasMemFree(src1T);
                (void)FadasMemFree(src2T);
                (void)FadasMemFree(dst_T);
                (void)FadasMemFree(srcM3);
                (void)FadasMemFree(dstM3);
                (void)FadasMemFree(dst_uv);
    
                return false;
            }
    
            memset( srcM3, 0, memszM3 );
            memset( dstM3, 0, memszM3 );
            srcM3[mM3 * 0 + 0] = 17;
            srcM3[mM3 * 0 + 1] = 18;
            srcM3[mM3 * 0 + 2] = 19;
            srcM3[mM3 * 1 + 0] = 21;
            srcM3[mM3 * 1 + 1] = 22;
            srcM3[mM3 * 1 + 2] = 23;
            uint32_t M_strideM3 = mM3 * sizeof( float32_t );
            float32_t MM3[mM3][mM3] = { {  1,  2,  3 },
                                    {  4,  5,  6 },
                                    {  7,  8,  9 } };
            ans = FadasTensor_Multiply3x3byNx3( &MM3[0][0], M_strideM3, n_ptsM3, srcM3, srcStrideM3,
                                                dstM3, dstStrideM3 );
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasTensor_Multiply3x3byNx3 failed\n" );
        }
    
        // Matrix multiply 4x4 with multiple 4x1
        const int mM4 = 4;
        const int n_ptsM4 = 2;
        const int srcStrideM4 = mM4 * sizeof( float32_t );
        const int dstStrideM4 = srcStrideM4;
        const int memszM4 = n_ptsM4 * srcStrideM4;
        const uint64_t byte_alignmentM4 = 128;
        float32_t* srcM4 = (float32_t*)FadasMemAlloc( memszM4, byte_alignmentM4, nullptr );
        float32_t* dstM4 = (float32_t*)FadasMemAlloc( memszM4, byte_alignmentM4, nullptr );
    
        if( ( NULL != srcM4  ) && ( NULL != dstM4  ) )
        {
            ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)srcM4, size_t(memszM4));
    
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dstM4, size_t(memszM4));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(srcM4);
                }
            }
    
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(src_pts);
                (void)FadasDeregBuf(dst_pts);
                (void)FadasDeregBuf(src1T);
                (void)FadasDeregBuf(src2T);
                (void)FadasDeregBuf(dst_T);
                (void)FadasDeregBuf(srcM3);
                (void)FadasDeregBuf(dstM3);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
                (void)FadasMemFree(src1T);
                (void)FadasMemFree(src2T);
                (void)FadasMemFree(dst_T);
                (void)FadasMemFree(srcM3);
                (void)FadasMemFree(dstM3);
                (void)FadasMemFree(srcM4);
                (void)FadasMemFree(dstM4);
                (void)FadasMemFree(dst_uv);
    
                return false;
            }
    
            memset( srcM4, 0, memszM4 );
            memset( dstM4, 0, memszM4 );
            srcM4[mM4 * 0 + 0] = 17;
            srcM4[mM4 * 0 + 1] = 18;
            srcM4[mM4 * 0 + 2] = 19;
            srcM4[mM4 * 0 + 3] = 20;
            srcM4[mM4 * 1 + 0] = 21;
            srcM4[mM4 * 1 + 1] = 22;
            srcM4[mM4 * 1 + 2] = 23;
            srcM4[mM4 * 1 + 3] = 24;
            uint32_t M_strideM4 = mM4 * sizeof( float32_t );
            float32_t MM4[mM4][mM4] = { {  1,  2,  3,  4 },
                                    {  5,  6,  7,  8 },
                                    {  9, 10, 11, 12 },
                                    { 13, 14, 15, 16 } };
    
            ans = FadasTensor_Multiply4x4byNx4( &MM4[0][0], M_strideM4, n_ptsM4, srcM4, srcStrideM4,
                                                dstM4, dstStrideM4 );
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasTensor_Multiply4x4byNx4 failed\n" );
        }

        // Test NCHW2NHWCf32
        FadasTensorFormat_t srcFormat = {2, 8, 4, 2,
                                        8 * 4 * 2 * sizeof(float32_t),
                                        1 * sizeof(float32_t),
                                        8 * sizeof(float32_t),
                                        8 * 4 * sizeof(float32_t)};
    
        FadasTensorFormat_t dstFormat = {2, 8, 4, 2,
                                        8 * 2 * 4 * sizeof(float32_t),
                                        2 * sizeof(float32_t),
                                        8 * 2 * sizeof(float32_t),
                                        1 * sizeof(float32_t)};
    
        float32_t *src1Ptr = ( float32_t* ) FadasMemAlloc( 2 * 8 * 4 * 2 * sizeof(float32_t),
                                                        byte_alignment, nullptr );
        float32_t *dst1Ptr = ( float32_t* ) FadasMemAlloc( 2 * 8 * 8 * sizeof(float32_t),
                                                        byte_alignment, nullptr );
    
        if( ( NULL != src1Ptr  ) && ( NULL != dst1Ptr  ) )
        {
            ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)src1Ptr, size_t(128 * sizeof(float32_t)));
    
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dst1Ptr, size_t(128 * sizeof(float32_t)));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(src1Ptr);
                }
            }
    
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(src_pts);
                (void)FadasDeregBuf(dst_pts);
                (void)FadasDeregBuf(src1T);
                (void)FadasDeregBuf(src2T);
                (void)FadasDeregBuf(dst_T);
                (void)FadasDeregBuf(srcM3);
                (void)FadasDeregBuf(dstM3);
                (void)FadasDeregBuf(srcM4);
                (void)FadasDeregBuf(dstM4);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
                (void)FadasMemFree(src1T);
                (void)FadasMemFree(src2T);
                (void)FadasMemFree(dst_T);
                (void)FadasMemFree(srcM3);
                (void)FadasMemFree(dstM3);
                (void)FadasMemFree(srcM4);
                (void)FadasMemFree(dstM4);
                (void)FadasMemFree(src1Ptr);
                (void)FadasMemFree(dst1Ptr);
                (void)FadasMemFree(dst_uv);
    
                return false;
            }
    
            float32_t src[2 * 8 * 4 * 2] =
            { 1,  2, 3, 4, 5, 6, 900, 901,
            7,  8, 9,10,11,12, 902, 903,
            13,14,15,16,17,18, 904, 905,
            19,20,21,22,23,24, 906, 907,
    
            25,26,27,28,29,30, 908, 909,
            31,32,33,34,35,36, 910, 911,
            37,38,39,40,41,42, 912, 913,
            43,44,45,46,47,48, 914, 915,
    
            60,61,62,63,64,65, 1000, 1001,
            66,67,68,69,70,71, 1002, 1003,
            72,73,74,75,76,77, 1004, 1005,
            78,79,80,81,82,83, 1006, 1007,
    
            84,85,86,87,88,89, 1008, 1009,
            90, 91,92,93,94,95, 1010, 1011,
            96, 97, 98, 99, 100, 101, 1012, 1013,
            102,103,104,105,106, 107 ,1014, 1015};
    
            memcpy(src1Ptr,src,2 * 8 * 4 * 2 * sizeof(float32_t));
    
            memset(dst1Ptr, 0, sizeof(float32_t) * 2 * 8 * 4 * 2);
    
            ans = FadasTensor_NCHW2NHWCf32(src1Ptr, &srcFormat, dst1Ptr, &dstFormat);
    
            if (FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "FadasTensor_NCHW2NHWCf32 failed\n" );
            }
        }

        // Test NCHW2NHWCf32
        dstFormat = { 2, 8, 4, 2,
                    8 * 4 * 2 * sizeof(float32_t),
                    1 * sizeof(float32_t),
                    8 * sizeof(float32_t),
                    8 * 4 * sizeof(float32_t) };
    
        srcFormat = { 2, 8, 4, 2,
                    8 * 2 * 4 * sizeof(float32_t),
                    2 * sizeof(float32_t),
                    8 * 2 * sizeof(float32_t),
                    1 * sizeof(float32_t) };
    
        uint32_t  src2Size = srcFormat.n * srcFormat.n_stride;
        float32_t *src2Ptr = ( float32_t* ) FadasMemAlloc( src2Size, byte_alignment, nullptr );
    
        if (src2Ptr == nullptr)
        {
            return false;
        }
    
        uint32_t  dst2Size = dstFormat.n * dstFormat.n_stride;
        float32_t *dst2Ptr = ( float32_t* ) FadasMemAlloc( dst2Size, byte_alignment, nullptr );
    
        if (dst2Ptr == nullptr)
        {
            return false;
        }

        if( ( NULL != src2Ptr  ) && ( NULL != dst2Ptr  ) )
        {
            float32_t src[2 * 8 * 2 * 4] =
            {
                1,25,2,26,3,27,4,28,5,29,6,30,900,908,901,909,
                7,31,8,32,9,33,10,34,11,35,12,36,902,910,903,911,
                13,37,14,38,15,39,16,40,17,41,18,42,904,912,905,913,
                19,43,20,44,21,45,22,46,23,47,24,48,906,914,907,915,
    
                60,84,61,85,62,86,63,87,64,88,65,89,1000,1008,1001,1009,
                66,90,67,91,68,92,69,93,70,94,71,95,1002,1010,1003,1011,
                72,96,73,97,74,98,75,99,76,100,77,101,1004,1012,1005,1013,
                78, 102, 79, 103, 80, 104, 81, 105, 82, 106, 83, 107, 1006,1014,1007,1015
            };
    
            memcpy(src2Ptr, src, src2Size);
    
            memset(dst2Ptr, 0, dst2Size);
    
            ans = FadasRegBuf(FADAS_BUF_TYPE_IN, (void *)src2Ptr, size_t(src2Size));
    
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)dst2Ptr, size_t(dst2Size));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(src2Ptr);
                }
            }
    
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(src_pts);
                (void)FadasDeregBuf(dst_pts);
                (void)FadasDeregBuf(src1T);
                (void)FadasDeregBuf(src2T);
                (void)FadasDeregBuf(dst_T);
                (void)FadasDeregBuf(srcM3);
                (void)FadasDeregBuf(dstM3);
                (void)FadasDeregBuf(srcM4);
                (void)FadasDeregBuf(dstM4);
                (void)FadasDeregBuf(src1Ptr);
                (void)FadasDeregBuf(dst1Ptr);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
                (void)FadasMemFree(src1T);
                (void)FadasMemFree(src2T);
                (void)FadasMemFree(dst_T);
                (void)FadasMemFree(srcM3);
                (void)FadasMemFree(dstM3);
                (void)FadasMemFree(srcM4);
                (void)FadasMemFree(dstM4);
                (void)FadasMemFree(src1Ptr);
                (void)FadasMemFree(dst1Ptr);
                (void)FadasMemFree(dst_uv);
    
                return false;
            }
    
            ans = FadasTensor_NHWC2NCHWf32(src2Ptr, &srcFormat, dst2Ptr, &dstFormat);
    
            if (FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "FadasTensor_NHWC2NCHWf32 failed\n" );
            }
        }
    
        // SV decomposition
        int mSV;
        int nSV;
        const char *path = "vector_0001_src.data"; // Read A from file
        FILE* fin = fopen(path, "rb");
        if ( nullptr == fin )
        {
        fprintf( stderr,"error reading file %s", path );
        return false;
        }
        char buff[255];
        char buff1[255];
        (void)fgets(buff, 255, fin);  // Read MetaData // type
        (void)fgets(buff1, 255, fin); // width height
        sscanf(buff1, "%d %d\nSV", &mSV, &nSV);
        (void)fgets(buff, 255, fin); // number of channels
    
        uint32_t ASize = (mSV * nSV * sizeof(float32_t));
        uint32_t wSVSize = (nSV * sizeof(float32_t));
        uint32_t USize  = (mSV * MIN(mSV, nSV) * sizeof(float32_t));
        uint32_t VtSize = (nSV * MIN(mSV, nSV) * sizeof(float32_t));
        uint32_t tmpUSize = (mSV * nSV * sizeof(float32_t));
        uint32_t tmpVSize = (nSV * nSV * sizeof(float32_t));
    
        float32_t* A = (float32_t*) FadasMemAlloc( ASize, byte_alignment, nullptr );
        float32_t* wSV = (float32_t*) FadasMemAlloc( wSVSize, byte_alignment, nullptr );
        float32_t* U = (float32_t*) FadasMemAlloc( USize, byte_alignment, nullptr );
        float32_t* Vt = (float32_t*) FadasMemAlloc( VtSize, byte_alignment, nullptr);
        float32_t* tmpU = (float32_t*) FadasMemAlloc( tmpUSize, byte_alignment, nullptr);
        float32_t* tmpV = (float32_t*) FadasMemAlloc( tmpVSize, byte_alignment, nullptr);
    
        if( ( NULL != A ) && ( NULL != wSV ) && ( NULL != U ) &&
            ( NULL != Vt ) && ( NULL != tmpU )  && ( NULL != tmpV ) )
        {
            ans = FadasRegBuf( FADAS_BUF_TYPE_IN, (void *)A, size_t(ASize) );
    
            if(FADAS_ERROR_NONE == ans)
            {
    
                ans = FadasRegBuf( FADAS_BUF_TYPE_OUT, (void *)wSV, size_t(wSVSize) );
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A);
                }
            }
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf( FADAS_BUF_TYPE_OUT, (void *)U, size_t(USize) );
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A);
                    (void)FadasDeregBuf(wSV);
                }
            }
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf( FADAS_BUF_TYPE_OUT, (void *)Vt, size_t(VtSize) );
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A);
                    (void)FadasDeregBuf(wSV);
                    (void)FadasDeregBuf(U);
                }
            }
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf( FADAS_BUF_TYPE_OUT, (void *)tmpU, size_t(tmpUSize) );
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A);
                    (void)FadasDeregBuf(wSV);
                    (void)FadasDeregBuf(U);
                    (void)FadasDeregBuf(Vt);
                }
            }
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf( FADAS_BUF_TYPE_OUT, (void *)tmpV, size_t(tmpVSize) );
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A);
                    (void)FadasDeregBuf(wSV);
                    (void)FadasDeregBuf(U);
                    (void)FadasDeregBuf(Vt);
                    (void)FadasDeregBuf(tmpU);
                }
            }
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(src_pts);
                (void)FadasDeregBuf(dst_pts);
                (void)FadasDeregBuf(src1T);
                (void)FadasDeregBuf(src2T);
                (void)FadasDeregBuf(dst_T);
                (void)FadasDeregBuf(srcM3);
                (void)FadasDeregBuf(dstM3);
                (void)FadasDeregBuf(srcM4);
                (void)FadasDeregBuf(dstM4);
                (void)FadasDeregBuf(src1Ptr);
                (void)FadasDeregBuf(dst1Ptr);
                (void)FadasDeregBuf(src2Ptr);
                (void)FadasDeregBuf(dst2Ptr);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
                (void)FadasMemFree(src1T);
                (void)FadasMemFree(src2T);
                (void)FadasMemFree(dst_T);
                (void)FadasMemFree(srcM3);
                (void)FadasMemFree(dstM3);
                (void)FadasMemFree(srcM4);
                (void)FadasMemFree(dstM4);
                (void)FadasMemFree(src1Ptr);
                (void)FadasMemFree(dst1Ptr);
                (void)FadasMemFree(src2Ptr);
                (void)FadasMemFree(dst2Ptr);
                (void)FadasMemFree(A);
                (void)FadasMemFree(wSV);
                (void)FadasMemFree(U);
                (void)FadasMemFree(Vt);
                (void)FadasMemFree(tmpU);
                (void)FadasMemFree(tmpV);
                (void)FadasMemFree(dst_uv);
    
                return false;
            }
    
            memset(A, 0, ASize);
            memset(wSV, 0, wSVSize);
            memset(U, 0, USize);
            memset(Vt, 0, VtSize);
            memset(tmpU, 0, tmpUSize);
            memset(tmpV, 0, tmpVSize);
    
            fread(A, sizeof(float32_t), mSV * nSV, fin);  // Read A data
            fclose(fin);
            fin = nullptr;
    
            ans = FadasSVD_SVDf32(A, mSV, nSV, wSV, U, Vt, tmpU, tmpV);
            if( FADAS_ERROR_NONE != ans )
                fprintf( stderr, "FadasSVD_SVDf32 failed\n" );
        }
    
        // LU decomposition
        int mLU;
        int nLU;
        path = "A_0.data";
        fin = fopen(path, "rb");
        if ( nullptr == fin )
        {
        fprintf( stderr, "error reading file %s",path);
        return false;
        }
    
        (void)fgets(buff, 255, fin);  // Read MetaData // type
        (void)fgets(buff1, 255, fin); // width height
        sscanf(buff1, "%d %d\nLU", &mLU, &nLU);
        if(mLU != nLU)
            fprintf( stderr, "mLU != nLU, provide same values\n" );
    
        (void)fgets(buff, 255, fin); // number of channels
    
        uint32_t A2Size = (nLU * nLU * sizeof(float32_t));
        uint32_t bSize = (nLU * sizeof(float32_t));
        uint32_t pivotSize = (nLU * sizeof(uint8_t));
        uint32_t xSize = (nLU * sizeof(float32_t));
        float32_t* A2 = (float32_t*) FadasMemAlloc( A2Size, byte_alignment, nullptr);
        float32_t* b = (float32_t*) FadasMemAlloc( bSize, byte_alignment, nullptr);
        uint8_t* pivot = (uint8_t*) FadasMemAlloc( pivotSize, byte_alignment, nullptr);
        float32_t* x = (float32_t*) FadasMemAlloc( xSize, byte_alignment, nullptr);
    
        if( ( NULL != A2  ) && ( NULL != b  ) && ( NULL != pivot  ) && ( NULL != x  ) )
        {
            ans = FadasRegBuf( FADAS_BUF_TYPE_INOUT, (void *)A2, size_t(A2Size) );
    
            if(FADAS_ERROR_NONE == ans)
            {
    
                ans = FadasRegBuf( FADAS_BUF_TYPE_INOUT, (void *)b, size_t(bSize) );
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A2);
                }
            }
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)pivot, size_t(pivotSize));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A2);
                    (void)FadasDeregBuf(b);
                }
            }
            if(FADAS_ERROR_NONE == ans)
            {
                ans = FadasRegBuf(FADAS_BUF_TYPE_OUT, (void *)x, size_t(xSize));
                if(FADAS_ERROR_NONE != ans)
                {
                    (void)FadasDeregBuf(A2);
                    (void)FadasDeregBuf(b);
                    (void)FadasDeregBuf(pivot);
                }
            }
            if(FADAS_ERROR_NONE != ans)
            {
                fprintf( stderr, "Failed to register buffer\n" );
                (void)FadasDeregBuf(src);
                (void)FadasDeregBuf(rgb);
                (void)FadasDeregBuf(dst);
                (void)FadasDeregBuf(dst2);
                (void)FadasDeregBuf(dstS2);
                (void)FadasDeregBuf(src_pts);
                (void)FadasDeregBuf(dst_pts);
                (void)FadasDeregBuf(src1T);
                (void)FadasDeregBuf(src2T);
                (void)FadasDeregBuf(dst_T);
                (void)FadasDeregBuf(srcM3);
                (void)FadasDeregBuf(dstM3);
                (void)FadasDeregBuf(srcM4);
                (void)FadasDeregBuf(dstM4);
                (void)FadasDeregBuf(src1Ptr);
                (void)FadasDeregBuf(dst1Ptr);
                (void)FadasDeregBuf(src2Ptr);
                (void)FadasDeregBuf(dst2Ptr);
                (void)FadasDeregBuf(A);
                (void)FadasDeregBuf(wSV);
                (void)FadasDeregBuf(U);
                (void)FadasDeregBuf(Vt);
                (void)FadasDeregBuf(tmpU);
                (void)FadasDeregBuf(tmpV);
                (void)FadasDeregBuf(dst32);
                (void)FadasDeregBuf(dst_uv);
    
                (void)FadasMemFree(dst32);
                (void)FadasMemFree(src);
                (void)FadasMemFree(rgb);
                (void)FadasMemFree(dst);
                (void)FadasMemFree(dstS2);
                (void)FadasMemFree(dst2);
                (void)FadasMemFree(src_pts);
                (void)FadasMemFree(dst_pts);
                (void)FadasMemFree(src1T);
                (void)FadasMemFree(src2T);
                (void)FadasMemFree(dst_T);
                (void)FadasMemFree(srcM3);
                (void)FadasMemFree(dstM3);
                (void)FadasMemFree(srcM4);
                (void)FadasMemFree(dstM4);
                (void)FadasMemFree(src1Ptr);
                (void)FadasMemFree(dst1Ptr);
                (void)FadasMemFree(src2Ptr);
                (void)FadasMemFree(dst2Ptr);
                (void)FadasMemFree(A);
                (void)FadasMemFree(wSV);
                (void)FadasMemFree(U);
                (void)FadasMemFree(Vt);
                (void)FadasMemFree(tmpU);
                (void)FadasMemFree(tmpV);
                (void)FadasMemFree(A2);
                (void)FadasMemFree(b);
                (void)FadasMemFree(pivot);
                (void)FadasMemFree(x);
                (void)FadasMemFree(dst_uv);
    
                return false;
            }
    
            memset(A2, 0, A2Size);
            memset(b, 0, bSize);
            memset(pivot, 0, pivotSize);
            memset(x, 0, xSize);
    
            fread(A2, sizeof(float32_t), nLU * nLU, fin); // Read A data
            fclose(fin);
            fin = nullptr;
    
            // Read 'b' data
            fin = fopen("b_0.data", "rb");
            if ( nullptr == fin )
            {
                fprintf( stderr, "error reading file b_0.data" );
            }
            else
            {
                int32_t elem_read   = 0;
                int32_t m2          = 0;
                int32_t n2          = 0;
    
                // Read MetaData
                (void)fgets(buff, 255, fin); // type
                (void)fgets(buff1, 255, fin); // width height
                elem_read = sscanf(buff1, "%d %d\n", &m2, &n2);
                if(2 == elem_read)
                {
                    (void)fgets(buff, 255, fin); // number of channels
    
                    // Read b var data
                    fread(b, sizeof(float32_t), m2, fin);
    
                    ans = FadasSVD_SolveLUf32(A2, b, nLU, pivot, x);
                    if( FADAS_ERROR_NONE != ans )
                        fprintf( stderr, "FadasSVD_SolveLUf32 failed\n" );
                }
    
                fclose(fin);
                fin = nullptr;
            }
        }
    
        // cleanup
        (void)FadasDeregBuf(src);
        (void)FadasDeregBuf(rgb);
        (void)FadasDeregBuf(dst);
        (void)FadasDeregBuf(dst2);
        (void)FadasDeregBuf(dstS2);
        (void)FadasDeregBuf(src_pts);
        (void)FadasDeregBuf(dst_pts);
        (void)FadasDeregBuf(src1T);
        (void)FadasDeregBuf(src2T);
        (void)FadasDeregBuf(dst_T);
        (void)FadasDeregBuf(srcM3);
        (void)FadasDeregBuf(dstM3);
        (void)FadasDeregBuf(srcM4);
        (void)FadasDeregBuf(dstM4);
        (void)FadasDeregBuf(src1Ptr);
        (void)FadasDeregBuf(dst1Ptr);
        (void)FadasDeregBuf(src2Ptr);
        (void)FadasDeregBuf(dst2Ptr);
        (void)FadasDeregBuf(A);
        (void)FadasDeregBuf(wSV);
        (void)FadasDeregBuf(U);
        (void)FadasDeregBuf(Vt);
        (void)FadasDeregBuf(tmpU);
        (void)FadasDeregBuf(tmpV);
        (void)FadasDeregBuf(A2);
        (void)FadasDeregBuf(b);
        (void)FadasDeregBuf(pivot);
        (void)FadasDeregBuf(x);
        (void)FadasDeregBuf(dst32);
        (void)FadasDeregBuf(dst_uv);
    
        (void)FadasRemap_DestroyMap( map );
        (void)FadasRemap_DestroyMap( map1 );
        (void)FadasMemFree( mapX );
        (void)FadasMemFree( mapY );
        (void)FadasMemFree( src );
        (void)FadasMemFree( rgb );
        (void)FadasMemFree( dst );
        (void)FadasMemFree( dst2 );
        (void)FadasMemFree( dstS2 );
        (void)FadasMemFree( src_pts );
        (void)FadasMemFree( dst_pts );
    
        (void)FadasMemFree( src1T );
        (void)FadasMemFree( src2T );
        (void)FadasMemFree( dst_T );
        (void)FadasMemFree( srcM3 );
        (void)FadasMemFree( dstM3 );
        (void)FadasMemFree( srcM4 );
        (void)FadasMemFree( dstM4 );
        (void)FadasMemFree( src1Ptr );
        (void)FadasMemFree( dst1Ptr );
        (void)FadasMemFree( src2Ptr );
        (void)FadasMemFree( dst2Ptr );
    
        (void)FadasMemFree(A);
        (void)FadasMemFree(wSV);
        (void)FadasMemFree(U);
        (void)FadasMemFree(Vt);
        (void)FadasMemFree(tmpU);
        (void)FadasMemFree(tmpV);
    
        (void)FadasMemFree(A2);
        (void)FadasMemFree(b);
        (void)FadasMemFree(pivot);
        (void)FadasMemFree(x);
    
        (void)FadasMemFree(dst32);
        (void)FadasMemFree(dst_uv);
    
        FadasDeInit();
    
        return 0;
    }
    Copy to clipboard

Last Published: Sep 30, 2024

[Previous Topic
Examples](https://docs.qualcomm.com/bundle/publicresource/80-63309-1/topics/examples.md) [Next Topic
cqp\_basic/app.cpp](https://docs.qualcomm.com/bundle/publicresource/80-63309-1/topics/cqp-basic.md)