/*
 *  Copyright 2015 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

package org.webrtc;

import android.graphics.Matrix;
import android.opengl.GLES20;
import android.opengl.GLException;
import androidx.annotation.Nullable;
import java.nio.ByteBuffer;
import org.webrtc.VideoFrame.I420Buffer;
import org.webrtc.VideoFrame.TextureBuffer;

/**
 * Class for converting OES textures to a YUV ByteBuffer. It can be constructed on any thread, but
 * should only be operated from a single thread with an active EGL context.
 */
public final class YuvConverter {
  private static final String TAG = "YuvConverter";

  private static final String FRAGMENT_SHADER =
      // Difference in texture coordinate corresponding to one
      // sub-pixel in the x direction.
      "uniform vec2 xUnit;\n"
      // Color conversion coefficients, including constant term
      + "uniform vec4 coeffs;\n"
      + "\n"
      + "void main() {\n"
      // Since the alpha read from the texture is always 1, this could
      // be written as a mat4 x vec4 multiply. However, that seems to
      // give a worse framerate, possibly because the additional
      // multiplies by 1.0 consume resources.
      + "  gl_FragColor.r = coeffs.a + dot(coeffs.rgb,\n"
      + "      sample(tc - 1.5 * xUnit).rgb);\n"
      + "  gl_FragColor.g = coeffs.a + dot(coeffs.rgb,\n"
      + "      sample(tc - 0.5 * xUnit).rgb);\n"
      + "  gl_FragColor.b = coeffs.a + dot(coeffs.rgb,\n"
      + "      sample(tc + 0.5 * xUnit).rgb);\n"
      + "  gl_FragColor.a = coeffs.a + dot(coeffs.rgb,\n"
      + "      sample(tc + 1.5 * xUnit).rgb);\n"
      + "}\n";

  private static class ShaderCallbacks implements GlGenericDrawer.ShaderCallbacks {
    // Y'UV444 to RGB888, see https://en.wikipedia.org/wiki/YUV#Y%E2%80%B2UV444_to_RGB888_conversion
    // We use the ITU-R BT.601 coefficients for Y, U and V.
    // The values in Wikipedia are inaccurate, the accurate values derived from the spec are:
    // Y = 0.299 * R + 0.587 * G + 0.114 * B
    // U = -0.168736 * R - 0.331264 * G + 0.5 * B + 0.5
    // V = 0.5 * R - 0.418688 * G - 0.0813124 * B + 0.5
    // To map the Y-values to range [16-235] and U- and V-values to range [16-240], the matrix has
    // been multiplied with matrix:
    // {{219 / 255, 0, 0, 16 / 255},
    // {0, 224 / 255, 0, 16 / 255},
    // {0, 0, 224 / 255, 16 / 255},
    // {0, 0, 0, 1}}
    private static final float[] yCoeffs =
        new float[] {0.256788f, 0.504129f, 0.0979059f, 0.0627451f};
    private static final float[] uCoeffs =
        new float[] {-0.148223f, -0.290993f, 0.439216f, 0.501961f};
    private static final float[] vCoeffs =
        new float[] {0.439216f, -0.367788f, -0.0714274f, 0.501961f};

    private int xUnitLoc;
    private int coeffsLoc;

    private float[] coeffs;
    private float stepSize;

    public void setPlaneY() {
      coeffs = yCoeffs;
      stepSize = 1.0f;
    }

    public void setPlaneU() {
      coeffs = uCoeffs;
      stepSize = 2.0f;
    }

    public void setPlaneV() {
      coeffs = vCoeffs;
      stepSize = 2.0f;
    }

    @Override
    public void onNewShader(GlShader shader) {
      xUnitLoc = shader.getUniformLocation("xUnit");
      coeffsLoc = shader.getUniformLocation("coeffs");
    }

    @Override
    public void onPrepareShader(GlShader shader, float[] texMatrix, int frameWidth, int frameHeight,
        int viewportWidth, int viewportHeight) {
      GLES20.glUniform4fv(coeffsLoc, /* count= */ 1, coeffs, /* offset= */ 0);
      // Matrix * (1;0;0;0) / (width / stepSize). Note that OpenGL uses column major order.
      GLES20.glUniform2f(
          xUnitLoc, stepSize * texMatrix[0] / frameWidth, stepSize * texMatrix[1] / frameWidth);
    }
  }

  private final ThreadUtils.ThreadChecker threadChecker = new ThreadUtils.ThreadChecker();
  private final GlTextureFrameBuffer i420TextureFrameBuffer =
      new GlTextureFrameBuffer(GLES20.GL_RGBA);
  private final ShaderCallbacks shaderCallbacks = new ShaderCallbacks();
  private final GlGenericDrawer drawer = new GlGenericDrawer(FRAGMENT_SHADER, shaderCallbacks);
  private final VideoFrameDrawer videoFrameDrawer;

  /**
   * This class should be constructed on a thread that has an active EGL context.
   */
  public YuvConverter() {
    this(new VideoFrameDrawer());
  }

  public YuvConverter(VideoFrameDrawer videoFrameDrawer) {
    this.videoFrameDrawer = videoFrameDrawer;
    threadChecker.detachThread();
  }

  /** Converts the texture buffer to I420. */
  @Nullable
  public I420Buffer convert(TextureBuffer inputTextureBuffer) {
    try {
      return convertInternal(inputTextureBuffer);
    } catch (GLException e) {
      Logging.w(TAG, "Failed to convert TextureBuffer", e);
    }
    return null;
  }

  private I420Buffer convertInternal(TextureBuffer inputTextureBuffer) {
    TextureBuffer preparedBuffer = (TextureBuffer) videoFrameDrawer.prepareBufferForViewportSize(
        inputTextureBuffer, inputTextureBuffer.getWidth(), inputTextureBuffer.getHeight());

    // We draw into a buffer laid out like
    //
    //    +---------+
    //    |         |
    //    |  Y      |
    //    |         |
    //    |         |
    //    +----+----+
    //    | U  | V  |
    //    |    |    |
    //    +----+----+
    //
    // In memory, we use the same stride for all of Y, U and V. The
    // U data starts at offset `height` * `stride` from the Y data,
    // and the V data starts at at offset |stride/2| from the U
    // data, with rows of U and V data alternating.
    //
    // Now, it would have made sense to allocate a pixel buffer with
    // a single byte per pixel (EGL10.EGL_COLOR_BUFFER_TYPE,
    // EGL10.EGL_LUMINANCE_BUFFER,), but that seems to be
    // unsupported by devices. So do the following hack: Allocate an
    // RGBA buffer, of width `stride`/4. To render each of these
    // large pixels, sample the texture at 4 different x coordinates
    // and store the results in the four components.
    //
    // Since the V data needs to start on a boundary of such a
    // larger pixel, it is not sufficient that `stride` is even, it
    // has to be a multiple of 8 pixels.
    final int frameWidth = preparedBuffer.getWidth();
    final int frameHeight = preparedBuffer.getHeight();
    final int stride = ((frameWidth + 7) / 8) * 8;
    final int uvHeight = (frameHeight + 1) / 2;
    // Total height of the combined memory layout.
    final int totalHeight = frameHeight + uvHeight;
    final ByteBuffer i420ByteBuffer = JniCommon.nativeAllocateByteBuffer(stride * totalHeight);
    // Viewport width is divided by four since we are squeezing in four color bytes in each RGBA
    // pixel.
    final int viewportWidth = stride / 4;

    // Produce a frame buffer starting at top-left corner, not bottom-left.
    final Matrix renderMatrix = new Matrix();
    renderMatrix.preTranslate(0.5f, 0.5f);
    renderMatrix.preScale(1f, -1f);
    renderMatrix.preTranslate(-0.5f, -0.5f);

    i420TextureFrameBuffer.setSize(viewportWidth, totalHeight);

    // Bind our framebuffer.
    GLES20.glBindFramebuffer(GLES20.GL_FRAMEBUFFER, i420TextureFrameBuffer.getFrameBufferId());
    GlUtil.checkNoGLES2Error("glBindFramebuffer");

    // Draw Y.
    shaderCallbacks.setPlaneY();
    VideoFrameDrawer.drawTexture(drawer, preparedBuffer, renderMatrix, frameWidth, frameHeight,
        /* viewportX= */ 0, /* viewportY= */ 0, viewportWidth,
        /* viewportHeight= */ frameHeight);

    // Draw U.
    shaderCallbacks.setPlaneU();
    VideoFrameDrawer.drawTexture(drawer, preparedBuffer, renderMatrix, frameWidth, frameHeight,
        /* viewportX= */ 0, /* viewportY= */ frameHeight, viewportWidth / 2,
        /* viewportHeight= */ uvHeight);

    // Draw V.
    shaderCallbacks.setPlaneV();
    VideoFrameDrawer.drawTexture(drawer, preparedBuffer, renderMatrix, frameWidth, frameHeight,
        /* viewportX= */ viewportWidth / 2, /* viewportY= */ frameHeight, viewportWidth / 2,
        /* viewportHeight= */ uvHeight);

    GLES20.glReadPixels(0, 0, i420TextureFrameBuffer.getWidth(), i420TextureFrameBuffer.getHeight(),
        GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, i420ByteBuffer);

    GlUtil.checkNoGLES2Error("YuvConverter.convert");

    // Restore normal framebuffer.
    GLES20.glBindFramebuffer(GLES20.GL_FRAMEBUFFER, 0);

    // Prepare Y, U, and V ByteBuffer slices.
    final int yPos = 0;
    final int uPos = yPos + stride * frameHeight;
    // Rows of U and V alternate in the buffer, so V data starts after the first row of U.
    final int vPos = uPos + stride / 2;

    i420ByteBuffer.position(yPos);
    i420ByteBuffer.limit(yPos + stride * frameHeight);
    final ByteBuffer dataY = i420ByteBuffer.slice();

    i420ByteBuffer.position(uPos);
    // The last row does not have padding.
    final int uvSize = stride * (uvHeight - 1) + stride / 2;
    i420ByteBuffer.limit(uPos + uvSize);
    final ByteBuffer dataU = i420ByteBuffer.slice();

    i420ByteBuffer.position(vPos);
    i420ByteBuffer.limit(vPos + uvSize);
    final ByteBuffer dataV = i420ByteBuffer.slice();

    preparedBuffer.release();

    return JavaI420Buffer.wrap(frameWidth, frameHeight, dataY, stride, dataU, stride, dataV, stride,
        () -> { JniCommon.nativeFreeByteBuffer(i420ByteBuffer); });
  }

  public void release() {
    threadChecker.checkIsOnValidThread();
    drawer.release();
    i420TextureFrameBuffer.release();
    videoFrameDrawer.release();
    // Allow this class to be reused.
    threadChecker.detachThread();
  }
}
