/*
 * By downloading, copying, installing or using the software you agree to this license.
 * If you do not agree to this license, do not download, install,
 * copy or use the software.
 *
 *
 *                           License Agreement
 *                For Open Source Computer Vision Library
 *                        (3-clause BSD License)
 *
 * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
 * Third party copyrights are property of their respective owners.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *
 *   * Redistributions in binary form must reproduce the above copyright notice,
 *     this list of conditions and the following disclaimer in the documentation
 *     and/or other materials provided with the distribution.
 *
 *   * Neither the names of the copyright holders nor the names of the contributors
 *     may be used to endorse or promote products derived from this software
 *     without specific prior written permission.
 *
 * This software is provided by the copyright holders and contributors "as is" and
 * any express or implied warranties, including, but not limited to, the implied
 * warranties of merchantability and fitness for a particular purpose are disclaimed.
 * In no event shall copyright holders or contributors be liable for any direct,
 * indirect, incidental, special, exemplary, or consequential damages
 * (including, but not limited to, procurement of substitute goods or services;
 * loss of use, data, or profits; or business interruption) however caused
 * and on any theory of liability, whether in contract, strict liability,
 * or tort (including negligence or otherwise) arising in any way out of
 * the use of this software, even if advised of the possibility of such damage.
 */

#include "common.hpp"

#include <algorithm>
#include <limits>
#include <vector>
#include <cstring>

namespace CAROTENE_NS {

bool isMorph3x3Supported(const Size2D &size, BORDER_MODE border)
{
    return isSupportedConfiguration() && size.width >= 16 &&
        (border == BORDER_MODE_CONSTANT ||
            border == BORDER_MODE_REPLICATE);
}

#ifdef CAROTENE_NEON

namespace {

struct ErodeVecOp
{
    ErodeVecOp():borderValue(0){}

    ErodeVecOp(BORDER_MODE border, u8 borderValue_) :
        borderValue(borderValue_)
    {
        if (border == BORDER_MODE_REPLICATE)
            borderValue = std::numeric_limits<u8>::max();
    }

    inline uint8x16_t operator()(uint8x16_t a, uint8x16_t b) const
    {
        return vminq_u8(a, b);
    }

    inline uint8x8_t operator()(uint8x8_t a, uint8x8_t b) const
    {
        return vmin_u8(a, b);
    }

    inline u8 operator()(u8 a, u8 b) const
    {
        return std::min(a, b);
    }

    u8 borderValue;
};

struct DilateVecOp
{
    DilateVecOp():borderValue(0){}

    DilateVecOp(BORDER_MODE border, u8 borderValue_) :
        borderValue(borderValue_)
    {
        if (border == BORDER_MODE_REPLICATE)
            borderValue = std::numeric_limits<u8>::min();
    }

    inline uint8x16_t operator()(uint8x16_t a, uint8x16_t b) const
    {
        return vmaxq_u8(a, b);
    }

    inline uint8x8_t operator()(uint8x8_t a, uint8x8_t b) const
    {
        return vmax_u8(a, b);
    }

    inline u8 operator()(u8 a, u8 b) const
    {
        return std::max(a, b);
    }

    u8 borderValue;
};

template <typename VecOp>
void morph3x3(const Size2D &size,
              const u8 * srcBase, ptrdiff_t srcStride,
              u8 * dstBase, ptrdiff_t dstStride,
              BORDER_MODE border, const VecOp & vop)
{
    u8 borderValue = vop.borderValue;
    ptrdiff_t width = (ptrdiff_t)size.width, height = (ptrdiff_t)size.height;

    const uint8x16_t v_zero = vdupq_n_u8(0);
    const uint8x16_t v_border = vdupq_n_u8(borderValue);

    uint8x16_t tprev = v_zero, tcurr = v_zero, tnext = v_zero;
    uint8x16_t t0 = v_zero, t1 = v_zero, t2 = v_zero;

    for (ptrdiff_t y = 0; y < height; ++y)
    {
        const u8 * srow0 = y == 0 && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::max<ptrdiff_t>(y - 1, 0));
        const u8 * srow1 = internal::getRowPtr(srcBase, srcStride, y);
        const u8 * srow2 = y + 1 == height && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::min(y + 1, height - 1));
        u8 * drow = internal::getRowPtr(dstBase, dstStride, y);

        u8 prevx = 0, currx = 0, nextx = 0;
        ptrdiff_t x = 0;
        const ptrdiff_t bwidth = y + 2 < height ? width : (width - 16);

        // perform vertical convolution
        for ( ; x <= bwidth; x += 16)
        {
            internal::prefetch(srow0 + x);
            internal::prefetch(srow1 + x);
            internal::prefetch(srow2 + x);

            uint8x16_t x0 = !srow0 ? v_border : vld1q_u8(srow0 + x);
            uint8x16_t x1 = vld1q_u8(srow1 + x);
            uint8x16_t x2 = !srow2 ? v_border : vld1q_u8(srow2 + x);

            // calculate values for plain CPU part below if needed
            if (x + 16 >= bwidth)
            {
                ptrdiff_t x3 = x == width ? width - 1 : x;
                ptrdiff_t x4 = border == BORDER_MODE_CONSTANT ? x3 - 1 : std::max<ptrdiff_t>(x3 - 1, 0);

                if (border == BORDER_MODE_CONSTANT && x4 < 0)
                    prevx = borderValue;
                else
                    prevx = vop(srow1[x4],
                                vop(srow2 ? srow2[x4] : borderValue,
                                    srow0 ? srow0[x4] : borderValue));

                currx = vop(srow2 ? srow2[x3] : borderValue, vop(srow1[x3], srow0 ? srow0[x3] : borderValue));
            }

            // make shift
            if (x)
            {
                tprev = tcurr;
                tcurr = tnext;
            }

            // and calculate next value
            tnext = vop(vop(x0, x1), x2);

            // make extrapolation for the first elements
            if (!x)
            {
                // make border
                if (border == BORDER_MODE_CONSTANT)
                    tcurr = v_border;
                else if (border == BORDER_MODE_REPLICATE)
                    tcurr = vdupq_n_u8(vgetq_lane_u8(tnext, 0));

                continue;
            }

            // combine 3 "shifted" vectors
            t0 = vextq_u8(tprev, tcurr, 15);
            t1 = tcurr;
            t2 = vextq_u8(tcurr, tnext, 1);

            // and add them
            t0 = vop(t0, vop(t1, t2));

            vst1q_u8(drow + x - 16, t0);
        }

        x -= 16;
        if (x == width)
            --x;

        for ( ; x < width; ++x)
        {
            // make extrapolation for the last elements
            if (x + 1 >= width)
            {
                if (border == BORDER_MODE_CONSTANT)
                    nextx = borderValue;
                else if (border == BORDER_MODE_REPLICATE)
                    nextx = vop(srow2[x], vop(srow1[x], srow0[x]));
            }
            else
                nextx = vop(vop(srow2 ? srow2[x + 1] : borderValue,
                                srow0 ? srow0[x + 1] : borderValue),
                            srow1[x + 1]);

            drow[x] = vop(prevx, vop(currx, nextx));

            // make shift
            prevx = currx;
            currx = nextx;
        }
    }
}

} // namespace

#endif

void erode3x3(const Size2D &size,
              const u8 * srcBase, ptrdiff_t srcStride,
              u8 * dstBase, ptrdiff_t dstStride,
              BORDER_MODE border, u8 borderValue)
{
    internal::assertSupportedConfiguration(isMorph3x3Supported(size, border));
#ifdef CAROTENE_NEON
    morph3x3(size,
             srcBase, srcStride,
             dstBase, dstStride,
             border, ErodeVecOp(border, borderValue));
#else
    (void)size;
    (void)srcBase;
    (void)srcStride;
    (void)dstBase;
    (void)dstStride;
    (void)border;
    (void)borderValue;
#endif
}

void dilate3x3(const Size2D &size,
               const u8 * srcBase, ptrdiff_t srcStride,
               u8 * dstBase, ptrdiff_t dstStride,
               BORDER_MODE border, u8 borderValue)
{
    internal::assertSupportedConfiguration(isMorph3x3Supported(size, border));
#ifdef CAROTENE_NEON
    morph3x3(size,
             srcBase, srcStride,
             dstBase, dstStride,
             border, DilateVecOp(border, borderValue));
#else
    (void)size;
    (void)srcBase;
    (void)srcStride;
    (void)dstBase;
    (void)dstStride;
    (void)border;
    (void)borderValue;
#endif
}

#ifdef CAROTENE_NEON
namespace {

template<class VecUpdate>
void MorphRow(const u8* src, u8* dst, size_t width, s32 cn, size_t ksize)
{
    size_t i, j, k;
    size_t width16 = (width & -16) * cn;
    size_t width8 = (width & -8) * cn;
    width *= cn;

    if (ksize == 1)
    {
        for (i = 0; i < width; i++)
            dst[i] = src[i];
        return;
    }

    ksize = ksize*cn;
    VecUpdate updateOp;
    switch(cn)
    {
    case 1:
        for (i = 0; i < width16; i += 16)
        {
            const u8* sptr = src + i;
            uint8x16_t s = vld1q_u8(sptr);
            internal::prefetch(sptr);

            for( k = 1; k < ksize; ++k)
                s = updateOp(s, vld1q_u8(sptr + k));

            vst1q_u8(dst + i, s);
        }

        for (; i < width8; i += 8)
        {
            const u8* sptr = src + i;
            uint8x8_t s = vld1_u8(sptr);
            internal::prefetch(sptr);

            for( k = 1; k < ksize; ++k)
                s = updateOp(s, vld1_u8(sptr + k));

            vst1_u8(dst + i, s);
        }
        break;
    default:
        for (i = 0; i < width16; i += 16)
        {
            uint8x16_t s = vld1q_u8(src + i);
            internal::prefetch(src + i);

            for (k = cn; k < ksize; k += cn)
                s = updateOp(s, vld1q_u8(src + i + k));

            vst1q_u8(dst + i, s);
        }

        for (; i < width8; i += 8)
        {
            uint8x8_t s = vld1_u8(src + i);
            internal::prefetch(src + i);

            for (k = cn; k < ksize; k += cn)
                s = updateOp(s, vld1_u8(src + i + k));

            vst1_u8(dst + i, s);
        }
        break;
    }

    ptrdiff_t i0 = i;
    for( k = 0; k < (size_t)cn; k++, src++, dst++ )
    {
        for( i = i0; i <= width - cn*2; i += cn*2 )
        {
            const u8* s = src + i;
            u8 m = s[cn];
            for( j = cn*2; j < ksize; j += cn )
                m = updateOp(m, s[j]);
            dst[i] = updateOp(m, s[0]);
            dst[i+cn] = updateOp(m, s[j]);
        }

        for( ; i < width; i += cn )
        {
            const u8* s = src + i;
            u8 m = s[0];
            for( j = cn; j < ksize; j += cn )
                m = updateOp(m, s[j]);
            dst[i] = m;
        }
    }
}

template<class VecUpdate>
void MorphColumn(const u8** src, u8* dst, ptrdiff_t dststep, size_t count, size_t width, size_t ksize)
{
    size_t i, k;
    size_t width32 = width & -32;
    VecUpdate updateOp;

    uint8x16_t x0,x1,s0,s1;
    if (ksize == 3)
    {
        for (; count > 1; count -= 2, dst += dststep * 2, src += 2)
        {
            for (i = 0; i < width32; i += 32)
            {
                const u8* sptr = src[1] + i;
                s0 = vld1q_u8(sptr);
                s1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);

                sptr = src[2] + i;
                x0 = vld1q_u8(sptr);
                x1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);

                s0 = updateOp(s0, x0);
                s1 = updateOp(s1, x1);

                sptr = src[0] + i;
                x0 = vld1q_u8(sptr);
                x1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);

                vst1q_u8(dst+i, updateOp(s0, x0));
                vst1q_u8(dst+i+16, updateOp(s1, x1));

                sptr = src[3] + i;
                x0 = vld1q_u8(sptr);
                x1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);
                vst1q_u8(dst + dststep + i, updateOp(s0, x0));
                vst1q_u8(dst + dststep + i + 16, updateOp(s1, x1));

            }
            for(; i < width; i++ )
            {
                u8 s = src[1][i];

                for( k = 2; k < ksize; k++ )
                    s = updateOp(s, src[k][i]);

                dst[i] = updateOp(s, src[0][i]);
                dst[i+dststep] = updateOp(s, src[k][i]);
            }
        }
    }
    else if (ksize > 1)
        for (; count > 1; count -= 2, dst += dststep*2, src += 2)
        {
            for (i = 0; i < width32; i += 32)
            {
                const u8* sptr = src[1] + i;
                s0 = vld1q_u8(sptr);
                s1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);
                for (k = 2; k < ksize; k++)
                {
                    sptr = src[k] + i;
                    x0 = vld1q_u8(sptr);
                    x1 = vld1q_u8(sptr + 16);
                    internal::prefetch(sptr);

                    s0 = updateOp(s0, x0);
                    s1 = updateOp(s1, x1);
                }

                sptr = src[0] + i;
                x0 = vld1q_u8(sptr);
                x1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);

                vst1q_u8(dst+i, updateOp(s0, x0));
                vst1q_u8(dst+i+16, updateOp(s1, x1));

                sptr = src[k] + i;
                x0 = vld1q_u8(sptr);
                x1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);
                vst1q_u8(dst + dststep + i, updateOp(s0, x0));
                vst1q_u8(dst + dststep + i + 16, updateOp(s1, x1));
            }
            for(; i < width; i++ )
            {
                u8 s = src[1][i];

                for( k = 2; k < ksize; k++ )
                    s = updateOp(s, src[k][i]);

                dst[i] = updateOp(s, src[0][i]);
                dst[i+dststep] = updateOp(s, src[k][i]);
            }
        }

    for (; count > 0; count--, dst += dststep, src++)
    {
        for (i = 0; i < width32; i += 32)
        {
            const u8* sptr = src[0] + i;
            s0 = vld1q_u8(sptr);
            s1 = vld1q_u8(sptr + 16);
            internal::prefetch(sptr);

            for (k = 1; k < ksize; k++)
            {
                sptr = src[k] + i;
                x0 = vld1q_u8(sptr);
                x1 = vld1q_u8(sptr + 16);
                internal::prefetch(sptr);
                s0 = updateOp(s0, x0);
                s1 = updateOp(s1, x1);
            }

            vst1q_u8(dst + i, s0);
            vst1q_u8(dst + i + 16, s1);
        }
        for(; i < width; i++ )
        {
            u8 s = src[0][i];
            for( k = 1; k < ksize; k++ )
                s = updateOp(s, src[k][i]);
            dst[i] = s;
        }
    }
}

template <class Op>
inline void morphology(const Size2D &ssize, u32 cn,
                       const u8 * srcBase, ptrdiff_t srcStride,
                       u8 * dstBase, ptrdiff_t dstStride,
                       const Size2D &ksize,
                       size_t anchorX, size_t anchorY,
                       BORDER_MODE rowBorderType, BORDER_MODE columnBorderType,
                       const u8 * borderValues, Margin borderMargin)
{
    //Temporary buffers common for all iterations
    std::vector<u8> _srcRow(cn*(ssize.width + ksize.width - 1));
    u8* srcRow = &_srcRow[0];

    size_t bufRows = std::max<size_t>(ksize.height + 3, std::max<size_t>(anchorY, ksize.height-anchorY-1)*2+1);
    std::vector<u8*> _rows(bufRows);
    u8** rows = &_rows[0];

    // adjust swidthcn so that the used part of buffers stays compact in memory
    ptrdiff_t swidthcn = cn*((ssize.width + 15) & -16);// cn * (aligned ssize.width size)
    std::vector<u8> _ringBuf(swidthcn*bufRows+16);
    u8 * ringBuf = internal::alignPtr(&_ringBuf[0], 16);

    size_t borderLength = std::max<size_t>(ksize.width - 1, 1) * cn;
    std::vector<ptrdiff_t> _borderTab(borderLength);
    ptrdiff_t * borderTab = &_borderTab[0];

    std::vector<u8> _constBorderValue;
    std::vector<u8> _constBorderRow;
    u8 * constBorderValue = NULL;
    u8 * constBorderRow = NULL;
    if( rowBorderType == BORDER_MODE_CONSTANT || columnBorderType == BORDER_MODE_CONSTANT )
    {
        _constBorderValue.resize(borderLength);
        constBorderValue = &_constBorderValue[0];
        size_t i;
        for(i = 0; i < cn; i++)
            constBorderValue[i] = borderValues[i];
        for(; i < borderLength; i++)
            constBorderValue[i] = constBorderValue[i-cn];

        if( columnBorderType == BORDER_MODE_CONSTANT )
        {
            _constBorderRow.resize(cn*(ssize.width + ksize.width - 1 + 16));
            constBorderRow = internal::alignPtr(&_constBorderRow[0], 16);
            size_t N = (ssize.width + ksize.width - 1)*cn;
            for( i = 0; i < N; i += borderLength )
            {
                size_t n = std::min( borderLength, N - i );
                for(size_t j = 0; j < n; j++)
                    srcRow[i+j] = constBorderValue[j];
            }
            MorphRow<Op>(srcRow, constBorderRow, ssize.width, cn, ksize.width);
        }
    }

    Size2D wholeSize(ssize.width + borderMargin.left + borderMargin.right,
                     ssize.height + borderMargin.top + borderMargin.bottom);

    ptrdiff_t dx1 = std::max<ptrdiff_t>(anchorX - (ptrdiff_t)borderMargin.left, 0);
    ptrdiff_t dx2 = std::max<ptrdiff_t>((ptrdiff_t)ksize.width - anchorX - 1 - (ptrdiff_t)borderMargin.right, 0);
    // recompute border tables
    if( dx1 > 0 || dx2 > 0 )
    {
        if( rowBorderType == BORDER_MODE_CONSTANT )
        {
            memcpy( srcRow, &constBorderValue[0], dx1*cn );
            memcpy( srcRow + (ssize.width + ksize.width - 1 - dx2)*cn, &constBorderValue[0], dx2*cn );
        }
        else
        {
            ptrdiff_t xofs1 = std::min<ptrdiff_t>(borderMargin.left, anchorX) - borderMargin.left;

            ptrdiff_t wholeWidth = wholeSize.width;

            ptrdiff_t i, j;
            for( i = 0; i < dx1; i++ )
            {
                ptrdiff_t p0 = (internal::borderInterpolate(i-dx1, wholeWidth, rowBorderType) + xofs1)*cn;
                for( j = 0; j < (ptrdiff_t)cn; j++ )
                    borderTab[i*cn + j] = p0 + j;
            }

            for( i = 0; i < dx2; i++ )
            {
                ptrdiff_t p0 = (internal::borderInterpolate(wholeWidth + i, wholeWidth, rowBorderType) + xofs1)*cn;
                for( j = 0; j < (ptrdiff_t)cn; j++ )
                    borderTab[(i + dx1)*cn + j] = p0 + j;
            }
        }
    }

    ptrdiff_t startY, startY0, endY, rowCount;
    startY = startY0 = std::max<ptrdiff_t>(borderMargin.top - anchorY, 0);
    endY = std::min<ptrdiff_t>(borderMargin.top + ssize.height + ksize.height - anchorY - 1, wholeSize.height);

    const u8* src = srcBase + (startY - borderMargin.top)*srcStride;
    u8* dst = dstBase;

    ptrdiff_t width = ssize.width, kwidth = ksize.width;
    ptrdiff_t kheight = ksize.height, ay = anchorY;
    ptrdiff_t width1 = ssize.width + kwidth - 1;
    ptrdiff_t xofs1 = std::min<ptrdiff_t>(borderMargin.left, anchorX);
    bool makeBorder = (dx1 > 0 || dx2 > 0) && rowBorderType != BORDER_MODE_CONSTANT;
    ptrdiff_t dy = 0, i = 0;

    src -= xofs1*cn;
    ptrdiff_t count = endY - startY;

    rowCount = 0;
    for(;; dst += dstStride*i, dy += i)
    {
        ptrdiff_t dcount = bufRows - ay - startY - rowCount + borderMargin.top;
        dcount = dcount > 0 ? dcount : bufRows - kheight + 1;
        dcount = std::min(dcount, count);
        count -= dcount;
        for( ; dcount-- > 0; src += srcStride )
        {
            ptrdiff_t bi = (startY - startY0 + rowCount) % bufRows;
            u8* brow = ringBuf + bi*swidthcn;

            if( (size_t)(++rowCount) > bufRows )
            {
                --rowCount;
                ++startY;
            }

            memcpy( srcRow + dx1*cn, src, (width1 - dx2 - dx1)*cn );

            if( makeBorder )
            {
                    for( i = 0; i < (ptrdiff_t)(dx1*cn); i++ )
                        srcRow[i] = src[borderTab[i]];
                    for( i = 0; i < (ptrdiff_t)(dx2*cn); i++ )
                        srcRow[i + (width1 - dx2)*cn] = src[borderTab[i+dx1*cn]];
            }

            MorphRow<Op>(srcRow, brow, width, cn, ksize.width);
        }

        ptrdiff_t max_i = std::min<ptrdiff_t>(bufRows, ssize.height - dy + (kheight - 1));
        for( i = 0; i < max_i; i++ )
        {
            ptrdiff_t srcY = internal::borderInterpolate(dy + i + borderMargin.top - ay,
                                               wholeSize.height, columnBorderType);
            if( srcY < 0 ) // can happen only with constant border type
                rows[i] = constBorderRow;
            else
            {
                if( srcY >= startY + rowCount )
                    break;
                ptrdiff_t bi = (srcY - startY0) % bufRows;
                rows[i] = ringBuf + bi*swidthcn;
            }
        }
        if( i < kheight )
            break;
        i -= kheight - 1;
        MorphColumn<Op>((const u8**)rows, dst, dstStride, i, ssize.width*cn, ksize.height);
    }
}

} // namespace
#endif // CAROTENE_NEON

void erode(const Size2D &ssize, u32 cn,
           const u8 * srcBase, ptrdiff_t srcStride,
           u8 * dstBase, ptrdiff_t dstStride,
           const Size2D &ksize,
           size_t anchorX, size_t anchorY,
           BORDER_MODE rowBorderType, BORDER_MODE columnBorderType,
           const u8 * borderValues, Margin borderMargin)
{
    internal::assertSupportedConfiguration(ssize.width > 0 && ssize.height > 0 &&
                                           anchorX < ksize.width && anchorY < ksize.height);
#ifdef CAROTENE_NEON
    morphology<ErodeVecOp>(ssize, cn, srcBase, srcStride, dstBase, dstStride,
                           ksize, anchorX, anchorY, rowBorderType, columnBorderType,
                           borderValues, borderMargin);
#else
    (void)cn;
    (void)srcBase;
    (void)srcStride;
    (void)dstBase;
    (void)dstStride;
    (void)rowBorderType;
    (void)columnBorderType;
    (void)borderValues;
    (void)borderMargin;
#endif
}

void dilate(const Size2D &ssize, u32 cn,
            const u8 * srcBase, ptrdiff_t srcStride,
            u8 * dstBase, ptrdiff_t dstStride,
            const Size2D &ksize,
            size_t anchorX, size_t anchorY,
            BORDER_MODE rowBorderType, BORDER_MODE columnBorderType,
            const u8 * borderValues, Margin borderMargin)
{
    internal::assertSupportedConfiguration(ssize.width > 0 && ssize.height > 0 &&
                                           anchorX < ksize.width && anchorY < ksize.height);
#ifdef CAROTENE_NEON
    morphology<DilateVecOp>(ssize, cn, srcBase, srcStride, dstBase, dstStride,
                            ksize, anchorX, anchorY, rowBorderType, columnBorderType,
                            borderValues, borderMargin);
#else
    (void)cn;
    (void)srcBase;
    (void)srcStride;
    (void)dstBase;
    (void)dstStride;
    (void)rowBorderType;
    (void)columnBorderType;
    (void)borderValues;
    (void)borderMargin;
#endif
}

} // namespace CAROTENE_NS