mirror of
https://git.ryujinx.app/ryubing/ryujinx.git
synced 2026-06-02 02:19:14 +00:00
Memory Changes part 2 (ryubing/ryujinx!123)
See merge request ryubing/ryujinx!123
This commit is contained in:
@@ -21,49 +21,67 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
private static void ReadTxModeProbs(ref Vp9EntropyProbs txProbs, ref Reader r)
|
||||
{
|
||||
Span<Array1<byte>> tx8x8ProbSpan1 = txProbs.Tx8x8Prob.AsSpan();
|
||||
Span<Array2<byte>> tx16x16ProbSpan1 = txProbs.Tx16x16Prob.AsSpan();
|
||||
Span<Array3<byte>> tx32x32ProbSpan1 = txProbs.Tx32x32Prob.AsSpan();
|
||||
|
||||
for (int i = 0; i < EntropyMode.TxSizeContexts; ++i)
|
||||
{
|
||||
Span<byte> tx8x8ProbSpan2 = tx8x8ProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < (int)TxSize.TxSizes - 3; ++j)
|
||||
{
|
||||
r.DiffUpdateProb(ref txProbs.Tx8x8Prob[i][j]);
|
||||
r.DiffUpdateProb(ref tx8x8ProbSpan2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < EntropyMode.TxSizeContexts; ++i)
|
||||
{
|
||||
Span<byte> tx16x16ProbSpan2 = tx16x16ProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < (int)TxSize.TxSizes - 2; ++j)
|
||||
{
|
||||
r.DiffUpdateProb(ref txProbs.Tx16x16Prob[i][j]);
|
||||
r.DiffUpdateProb(ref tx16x16ProbSpan2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < EntropyMode.TxSizeContexts; ++i)
|
||||
{
|
||||
Span<byte> tx32x32ProbSpan2 = tx32x32ProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < (int)TxSize.TxSizes - 1; ++j)
|
||||
{
|
||||
r.DiffUpdateProb(ref txProbs.Tx32x32Prob[i][j]);
|
||||
r.DiffUpdateProb(ref tx32x32ProbSpan2[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ReadSwitchableInterpProbs(ref Vp9EntropyProbs fc, ref Reader r)
|
||||
{
|
||||
for (int j = 0; j < Constants.SwitchableFilterContexts; ++j)
|
||||
Span<Array2<byte>> switchableInterpProbSpan1 = fc.SwitchableInterpProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.SwitchableFilterContexts; ++i)
|
||||
{
|
||||
for (int i = 0; i < Constants.SwitchableFilters - 1; ++i)
|
||||
Span<byte> switchableInterpProbSpan2 = switchableInterpProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < Constants.SwitchableFilters - 1; ++j)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.SwitchableInterpProb[j][i]);
|
||||
r.DiffUpdateProb(ref switchableInterpProbSpan2[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ReadInterModeProbs(ref Vp9EntropyProbs fc, ref Reader r)
|
||||
{
|
||||
Span<Array3<byte>> interModeProbSpan1 = fc.InterModeProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.InterModeContexts; ++i)
|
||||
{
|
||||
Span<byte> interModeProbSpan2 = interModeProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < Constants.InterModes - 1; ++j)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.InterModeProb[i][j]);
|
||||
r.DiffUpdateProb(ref interModeProbSpan2[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,30 +90,43 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
r.UpdateMvProbs(ctx.Joints.AsSpan(), EntropyMv.Joints - 1);
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
r.UpdateMvProbs(MemoryMarshal.CreateSpan(ref ctx.Sign[i], 1), 1);
|
||||
r.UpdateMvProbs(ctx.Classes[i].AsSpan(), EntropyMv.Classes - 1);
|
||||
r.UpdateMvProbs(ctx.Class0[i].AsSpan(), EntropyMv.Class0Size - 1);
|
||||
r.UpdateMvProbs(ctx.Bits[i].AsSpan(), EntropyMv.OffsetBits);
|
||||
}
|
||||
Span<byte> signSpan = ctx.Sign.AsSpan();
|
||||
Span<Array10<byte>> classesSpan = ctx.Classes.AsSpan();
|
||||
Span<Array1<byte>> class0Span = ctx.Class0.AsSpan();
|
||||
Span<Array10<byte>> bitsSpan = ctx.Bits.AsSpan();
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
r.UpdateMvProbs(MemoryMarshal.CreateSpan(ref signSpan[i], 1), 1);
|
||||
r.UpdateMvProbs(classesSpan[i].AsSpan(), EntropyMv.Classes - 1);
|
||||
r.UpdateMvProbs(class0Span[i].AsSpan(), EntropyMv.Class0Size - 1);
|
||||
r.UpdateMvProbs(bitsSpan[i].AsSpan(), EntropyMv.OffsetBits);
|
||||
}
|
||||
|
||||
Span<Array2<Array3<byte>>> class0FpSpan1 = ctx.Class0Fp.AsSpan();
|
||||
Span<Array3<byte>> fpSpan = ctx.Fp.AsSpan();
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
Span<Array3<byte>> class0FpSpan2 = class0FpSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < EntropyMv.Class0Size; ++j)
|
||||
{
|
||||
r.UpdateMvProbs(ctx.Class0Fp[i][j].AsSpan(), EntropyMv.FpSize - 1);
|
||||
r.UpdateMvProbs(class0FpSpan2[j].AsSpan(), EntropyMv.FpSize - 1);
|
||||
}
|
||||
|
||||
r.UpdateMvProbs(ctx.Fp[i].AsSpan(), 3);
|
||||
r.UpdateMvProbs(fpSpan[i].AsSpan(), 3);
|
||||
}
|
||||
|
||||
if (allowHp)
|
||||
{
|
||||
Span<byte> class0HpSpan = ctx.Class0Hp.AsSpan();
|
||||
Span<byte> hpSpan = ctx.Hp.AsSpan();
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
r.UpdateMvProbs(MemoryMarshal.CreateSpan(ref ctx.Class0Hp[i], 1), 1);
|
||||
r.UpdateMvProbs(MemoryMarshal.CreateSpan(ref ctx.Hp[i], 1), 1);
|
||||
r.UpdateMvProbs(MemoryMarshal.CreateSpan(ref class0HpSpan[i], 1), 1);
|
||||
r.UpdateMvProbs(MemoryMarshal.CreateSpan(ref hpSpan[i], 1), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -751,10 +782,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
int refr;
|
||||
bool isScaled;
|
||||
|
||||
Span<sbyte> refFrameSpan = mi.RefFrame.AsSpan();
|
||||
Span<Mv> mvSpan = mi.Mv.AsSpan();
|
||||
Span<RefBuffer> frameRefsSpan = cm.FrameRefs.AsSpan();
|
||||
Span<Ptr<RefBuffer>> blockRefsSpan = xd.BlockRefs.AsSpan();
|
||||
Span<MacroBlockDPlane> planeSpan = xd.Plane.AsSpan();
|
||||
|
||||
for (refr = 0; refr < 1 + isCompound; ++refr)
|
||||
{
|
||||
int frame = mi.RefFrame[refr];
|
||||
ref RefBuffer refBuf = ref cm.FrameRefs[frame - Constants.LastFrame];
|
||||
int frame = refFrameSpan[refr];
|
||||
ref RefBuffer refBuf = ref frameRefsSpan[frame - Constants.LastFrame];
|
||||
ref ScaleFactors sf = ref refBuf.Sf;
|
||||
ref Surface refFrameBuf = ref refBuf.Buf;
|
||||
|
||||
@@ -767,13 +804,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
isScaled = sf.IsScaled();
|
||||
ReconInter.SetupPrePlanes(ref xd, refr, ref refFrameBuf, miRow, miCol,
|
||||
isScaled ? new Ptr<ScaleFactors>(ref sf) : Ptr<ScaleFactors>.Null);
|
||||
xd.BlockRefs[refr] = new Ptr<RefBuffer>(ref refBuf);
|
||||
blockRefsSpan[refr] = new Ptr<RefBuffer>(ref refBuf);
|
||||
|
||||
if (sbType < BlockSize.Block8X8)
|
||||
{
|
||||
for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
ref MacroBlockDPlane pd = ref planeSpan[plane];
|
||||
ref Buf2D dstBuf = ref pd.Dst;
|
||||
int num4X4W = pd.N4W;
|
||||
int num4X4H = pd.N4H;
|
||||
@@ -811,10 +848,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
else
|
||||
{
|
||||
Mv mv = mi.Mv[refr];
|
||||
Mv mv = mvSpan[refr];
|
||||
for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
ref MacroBlockDPlane pd = ref planeSpan[plane];
|
||||
ref Buf2D dstBuf = ref pd.Dst;
|
||||
int num4X4W = pd.N4W;
|
||||
int num4X4H = pd.N4H;
|
||||
@@ -847,12 +884,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
private static void SetPlaneN4(ref MacroBlockD xd, int bw, int bh, int bwl, int bhl)
|
||||
{
|
||||
Span<MacroBlockDPlane> planeSpan = xd.Plane.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.MaxMbPlane; i++)
|
||||
{
|
||||
xd.Plane[i].N4W = (ushort)((bw << 1) >> xd.Plane[i].SubsamplingX);
|
||||
xd.Plane[i].N4H = (ushort)((bh << 1) >> xd.Plane[i].SubsamplingY);
|
||||
xd.Plane[i].N4Wl = (byte)(bwl - xd.Plane[i].SubsamplingX);
|
||||
xd.Plane[i].N4Hl = (byte)(bhl - xd.Plane[i].SubsamplingY);
|
||||
planeSpan[i].N4W = (ushort)((bw << 1) >> planeSpan[i].SubsamplingX);
|
||||
planeSpan[i].N4H = (ushort)((bh << 1) >> planeSpan[i].SubsamplingY);
|
||||
planeSpan[i].N4Wl = (byte)(bwl - planeSpan[i].SubsamplingX);
|
||||
planeSpan[i].N4Hl = (byte)(bhl - planeSpan[i].SubsamplingY);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -892,7 +931,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// as they are always compared to values that are in 1/8th pel units
|
||||
xd.SetMiRowCol(ref tile, miRow, bh, miCol, bw, cm.MiRows, cm.MiCols);
|
||||
|
||||
ReconInter.SetupDstPlanes(ref xd.Plane, ref xd.CurBuf, miRow, miCol);
|
||||
ReconInter.SetupDstPlanes(xd.Plane.AsSpan(), ref xd.CurBuf, miRow, miCol);
|
||||
return ref xd.Mi[0].Value;
|
||||
}
|
||||
|
||||
@@ -933,10 +972,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
if (!mi.IsInterBlock())
|
||||
{
|
||||
int plane;
|
||||
for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
|
||||
Span<MacroBlockDPlane> planeSpan = xd.Plane.AsSpan();
|
||||
|
||||
for (int plane = 0; plane < Constants.MaxMbPlane; ++plane)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
ref MacroBlockDPlane pd = ref planeSpan[plane];
|
||||
TxSize txSize = plane != 0 ? mi.GetUvTxSize(ref pd) : mi.TxSize;
|
||||
int num4X4W = pd.N4W;
|
||||
int num4X4H = pd.N4H;
|
||||
@@ -967,12 +1007,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Reconstruction
|
||||
if (mi.Skip == 0)
|
||||
{
|
||||
Span<MacroBlockDPlane> planeSpan = xd.Plane.AsSpan();
|
||||
|
||||
int eobtotal = 0;
|
||||
int plane;
|
||||
|
||||
for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
|
||||
for (int plane = 0; plane < Constants.MaxMbPlane; ++plane)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
ref MacroBlockDPlane pd = ref planeSpan[plane];
|
||||
TxSize txSize = plane != 0 ? mi.GetUvTxSize(ref pd) : mi.TxSize;
|
||||
int num4X4W = pd.N4W;
|
||||
int num4X4H = pd.N4H;
|
||||
@@ -1159,22 +1200,30 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
}
|
||||
|
||||
private static void ReadCoefProbsCommon(ref Array2<Array2<Array6<Array6<Array3<byte>>>>> coefProbs,
|
||||
private static void ReadCoefProbsCommon(ReadOnlySpan<Array2<Array6<Array6<Array3<byte>>>>> coefProbs1,
|
||||
ref Reader r, int txSize)
|
||||
{
|
||||
if (r.ReadBit() != 0)
|
||||
{
|
||||
for (int i = 0; i < Constants.PlaneTypes; ++i)
|
||||
{
|
||||
Span<Array6<Array6<Array3<byte>>>> coefProbs2 = coefProbs1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < Entropy.RefTypes; ++j)
|
||||
{
|
||||
Span<Array6<Array3<byte>>> coefProbs3 = coefProbs2[j].AsSpan();
|
||||
|
||||
for (int k = 0; k < Entropy.CoefBands; ++k)
|
||||
{
|
||||
Span<Array3<byte>> coefProbs4 = coefProbs3[k].AsSpan();
|
||||
|
||||
for (int l = 0; l < Entropy.BAND_COEFF_CONTEXTS(k); ++l)
|
||||
{
|
||||
Span<byte> coefProbs5 = coefProbs4[l].AsSpan();
|
||||
|
||||
for (int m = 0; m < Entropy.UnconstrainedNodes; ++m)
|
||||
{
|
||||
r.DiffUpdateProb(ref coefProbs[i][j][k][l][m]);
|
||||
r.DiffUpdateProb(ref coefProbs5[m]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1185,10 +1234,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
private static void ReadCoefProbs(ref Vp9EntropyProbs fc, TxMode txMode, ref Reader r)
|
||||
{
|
||||
Span<Array2<Array2<Array6<Array6<Array3<byte>>>>>> coefProbsSpan = fc.CoefProbs.AsSpan();
|
||||
|
||||
int maxTxSize = (int)Luts.TxModeToBiggestTxSize[(int)txMode];
|
||||
|
||||
for (int txSize = (int)TxSize.Tx4X4; txSize <= maxTxSize; ++txSize)
|
||||
{
|
||||
ReadCoefProbsCommon(ref fc.CoefProbs[txSize], ref r, txSize);
|
||||
ReadCoefProbsCommon(coefProbsSpan[txSize].AsSpan(), ref r, txSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1207,11 +1259,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
lf.ModeRefDeltaUpdate = rb.ReadBit() != 0;
|
||||
if (lf.ModeRefDeltaUpdate)
|
||||
{
|
||||
Span<sbyte> refDeltasSpan = lf.RefDeltas.AsSpan();
|
||||
Span<sbyte> modeDeltasSpan = lf.ModeDeltas.AsSpan();
|
||||
|
||||
for (int i = 0; i < LoopFilter.MaxRefLfDeltas; i++)
|
||||
{
|
||||
if (rb.ReadBit() != 0)
|
||||
{
|
||||
lf.RefDeltas[i] = (sbyte)rb.ReadSignedLiteral(6);
|
||||
refDeltasSpan[i] = (sbyte)rb.ReadSignedLiteral(6);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1219,7 +1274,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
if (rb.ReadBit() != 0)
|
||||
{
|
||||
lf.ModeDeltas[i] = (sbyte)rb.ReadSignedLiteral(6);
|
||||
modeDeltasSpan[i] = (sbyte)rb.ReadSignedLiteral(6);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1267,6 +1322,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
cm.ResizeContextBuffers(allocator, width, height);
|
||||
SetupRenderSize(ref cm, ref rb);
|
||||
|
||||
Span<RefCntBuffer> frameBuffsSpan = pool.FrameBufs.AsSpan();
|
||||
|
||||
if (cm.GetFrameNewBuffer().ReallocFrameBuffer(
|
||||
allocator,
|
||||
cm.Width,
|
||||
@@ -1276,21 +1333,21 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
cm.UseHighBitDepth,
|
||||
Surface.DecBorderInPixels,
|
||||
cm.ByteAlignment,
|
||||
new Ptr<VpxCodecFrameBuffer>(ref pool.FrameBufs[cm.NewFbIdx].RawFrameBuffer),
|
||||
new Ptr<VpxCodecFrameBuffer>(ref frameBuffsSpan[cm.NewFbIdx].RawFrameBuffer),
|
||||
FrameBuffers.GetFrameBuffer,
|
||||
pool.CbPriv) != 0)
|
||||
{
|
||||
cm.Error.InternalError(CodecErr.MemError, "Failed to allocate frame buffer");
|
||||
}
|
||||
|
||||
pool.FrameBufs[cm.NewFbIdx].Released = 0;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.SubsamplingX = cm.SubsamplingX;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.SubsamplingY = cm.SubsamplingY;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.BitDepth = (uint)cm.BitDepth;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.ColorSpace = cm.ColorSpace;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.ColorRange = cm.ColorRange;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.RenderWidth = cm.RenderWidth;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.RenderHeight = cm.RenderHeight;
|
||||
frameBuffsSpan[cm.NewFbIdx].Released = 0;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.SubsamplingX = cm.SubsamplingX;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.SubsamplingY = cm.SubsamplingY;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.BitDepth = (uint)cm.BitDepth;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.ColorSpace = cm.ColorSpace;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.ColorRange = cm.ColorRange;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.RenderWidth = cm.RenderWidth;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.RenderHeight = cm.RenderHeight;
|
||||
}
|
||||
|
||||
private static bool ValidRefFrameImgFmt(
|
||||
@@ -1311,13 +1368,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
bool hasValidRefFrame = false;
|
||||
ref BufferPool pool = ref cm.BufferPool.Value;
|
||||
Span<RefBuffer> frameRefsSpan = cm.FrameRefs.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
if (rb.ReadBit() != 0)
|
||||
{
|
||||
if (cm.FrameRefs[i].Idx != RefBuffer.InvalidIdx)
|
||||
if (frameRefsSpan[i].Idx != RefBuffer.InvalidIdx)
|
||||
{
|
||||
ref Surface buf = ref cm.FrameRefs[i].Buf;
|
||||
ref Surface buf = ref frameRefsSpan[i].Buf;
|
||||
width = buf.YCropWidth;
|
||||
height = buf.YCropHeight;
|
||||
found = true;
|
||||
@@ -1342,7 +1401,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// has valid dimensions.
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
ref RefBuffer refFrame = ref cm.FrameRefs[i];
|
||||
ref RefBuffer refFrame = ref frameRefsSpan[i];
|
||||
hasValidRefFrame |=
|
||||
refFrame.Idx != RefBuffer.InvalidIdx &&
|
||||
ScaleFactors.ValidRefFrameSize(refFrame.Buf.YCropWidth, refFrame.Buf.YCropHeight, width,
|
||||
@@ -1356,7 +1415,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
ref RefBuffer refFrame = ref cm.FrameRefs[i];
|
||||
ref RefBuffer refFrame = ref frameRefsSpan[i];
|
||||
if (refFrame.Idx == RefBuffer.InvalidIdx ||
|
||||
!ValidRefFrameImgFmt(
|
||||
(BitDepth)refFrame.Buf.BitDepth,
|
||||
@@ -1373,6 +1432,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
cm.ResizeContextBuffers(allocator, width, height);
|
||||
SetupRenderSize(ref cm, ref rb);
|
||||
|
||||
Span<RefCntBuffer> frameBuffsSpan = pool.FrameBufs.AsSpan();
|
||||
|
||||
if (cm.GetFrameNewBuffer().ReallocFrameBuffer(
|
||||
allocator,
|
||||
@@ -1383,21 +1444,21 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
cm.UseHighBitDepth,
|
||||
Surface.DecBorderInPixels,
|
||||
cm.ByteAlignment,
|
||||
new Ptr<VpxCodecFrameBuffer>(ref pool.FrameBufs[cm.NewFbIdx].RawFrameBuffer),
|
||||
new Ptr<VpxCodecFrameBuffer>(ref frameBuffsSpan[cm.NewFbIdx].RawFrameBuffer),
|
||||
FrameBuffers.GetFrameBuffer,
|
||||
pool.CbPriv) != 0)
|
||||
{
|
||||
cm.Error.InternalError(CodecErr.MemError, "Failed to allocate frame buffer");
|
||||
}
|
||||
|
||||
pool.FrameBufs[cm.NewFbIdx].Released = 0;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.SubsamplingX = cm.SubsamplingX;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.SubsamplingY = cm.SubsamplingY;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.BitDepth = (uint)cm.BitDepth;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.ColorSpace = cm.ColorSpace;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.ColorRange = cm.ColorRange;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.RenderWidth = cm.RenderWidth;
|
||||
pool.FrameBufs[cm.NewFbIdx].Buf.RenderHeight = cm.RenderHeight;
|
||||
frameBuffsSpan[cm.NewFbIdx].Released = 0;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.SubsamplingX = cm.SubsamplingX;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.SubsamplingY = cm.SubsamplingY;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.BitDepth = (uint)cm.BitDepth;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.ColorSpace = cm.ColorSpace;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.ColorRange = cm.ColorRange;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.RenderWidth = cm.RenderWidth;
|
||||
frameBuffsSpan[cm.NewFbIdx].Buf.RenderHeight = cm.RenderHeight;
|
||||
}
|
||||
|
||||
// Reads the next tile returning its size and adjusting '*data' accordingly
|
||||
@@ -1437,7 +1498,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
|
||||
private static void GetTileBuffers(ref Vp9Common cm, ArrayPtr<byte> data, int tileCols,
|
||||
ref Array64<TileBuffer> tileBuffers)
|
||||
Span<TileBuffer> tileBuffers)
|
||||
{
|
||||
for (int c = 0; c < tileCols; ++c)
|
||||
{
|
||||
@@ -1453,14 +1514,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ArrayPtr<byte> data,
|
||||
int tileCols,
|
||||
int tileRows,
|
||||
ref Array4<Array64<TileBuffer>> tileBuffers)
|
||||
Span<Array64<TileBuffer>> tileBuffers1)
|
||||
{
|
||||
for (int r = 0; r < tileRows; ++r)
|
||||
{
|
||||
Span<TileBuffer> tileBuffers2 = tileBuffers1[r].AsSpan();
|
||||
|
||||
for (int c = 0; c < tileCols; ++c)
|
||||
{
|
||||
bool isLast = r == tileRows - 1 && c == tileCols - 1;
|
||||
ref TileBuffer buf = ref tileBuffers[r][c];
|
||||
ref TileBuffer buf = ref tileBuffers2[c];
|
||||
GetTileBuffer(isLast, ref cm.Error, ref data, ref buf);
|
||||
}
|
||||
}
|
||||
@@ -1484,15 +1547,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
MemoryUtil.Fill(cm.AboveSegContext.ToPointer(), (sbyte)0, alignedCols);
|
||||
|
||||
LoopFilter.ResetLfm(ref cm);
|
||||
|
||||
Span<Array64<TileBuffer>> tileBuffers1 = tileBuffers.AsSpan();
|
||||
Span<TileWorkerData> tileWorkerDataSpan = cm.TileWorkerData.AsSpan();
|
||||
|
||||
GetTileBuffers(ref cm, data, tileCols, tileRows, ref tileBuffers);
|
||||
GetTileBuffers(ref cm, data, tileCols, tileRows, tileBuffers1);
|
||||
// Load all tile information into tile_data.
|
||||
for (tileRow = 0; tileRow < tileRows; ++tileRow)
|
||||
{
|
||||
Span<TileBuffer> tileBuffers2 = tileBuffers1[tileRow].AsSpan();
|
||||
|
||||
for (tileCol = 0; tileCol < tileCols; ++tileCol)
|
||||
{
|
||||
ref TileBuffer buf = ref tileBuffers[tileRow][tileCol];
|
||||
ref TileWorkerData tileData = ref cm.TileWorkerData[(tileCols * tileRow) + tileCol];
|
||||
ref TileBuffer buf = ref tileBuffers2[tileCol];
|
||||
ref TileWorkerData tileData = ref tileWorkerDataSpan[(tileCols * tileRow) + tileCol];
|
||||
tileData.Xd = cm.Mb;
|
||||
tileData.Xd.Corrupted = false;
|
||||
tileData.Xd.Counts = cm.Counts;
|
||||
@@ -1512,7 +1580,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
for (tileCol = 0; tileCol < tileCols; ++tileCol)
|
||||
{
|
||||
int col = tileCol;
|
||||
ref TileWorkerData tileData = ref cm.TileWorkerData[(tileCols * tileRow) + col];
|
||||
ref TileWorkerData tileData = ref tileWorkerDataSpan[(tileCols * tileRow) + col];
|
||||
tile.SetCol(ref cm, col);
|
||||
tileData.Xd.LeftContext = new Array3<Array16<sbyte>>();
|
||||
tileData.Xd.LeftSegContext = new Array8<sbyte>();
|
||||
@@ -1531,11 +1599,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
|
||||
// Get last tile data.
|
||||
return cm.TileWorkerData[(tileCols * tileRows) - 1].BitReader.FindEnd();
|
||||
return tileWorkerDataSpan[(tileCols * tileRows) - 1].BitReader.FindEnd();
|
||||
}
|
||||
|
||||
private static bool DecodeTileCol(ref TileWorkerData tileData, ref Vp9Common cm,
|
||||
ref Array64<TileBuffer> tileBuffers)
|
||||
Span<TileBuffer> tileBuffers)
|
||||
{
|
||||
ref TileInfo tile = ref tileData.Xd.Tile;
|
||||
int finalCol = (1 << cm.Log2TileCols) - 1;
|
||||
@@ -1593,10 +1661,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
cm.AboveContext.AsSpan().Clear();
|
||||
cm.AboveSegContext.AsSpan().Clear();
|
||||
|
||||
Span<TileWorkerData> tileWorkerDataSpan = cm.TileWorkerData.AsSpan();
|
||||
|
||||
for (n = 0; n < numWorkers; ++n)
|
||||
{
|
||||
ref TileWorkerData tileData = ref cm.TileWorkerData[n + totalTiles];
|
||||
ref TileWorkerData tileData = ref tileWorkerDataSpan[n + totalTiles];
|
||||
|
||||
tileData.Xd = cm.Mb;
|
||||
tileData.Xd.Counts = new Ptr<Vp9BackwardUpdates>(ref tileData.Counts);
|
||||
@@ -1604,17 +1674,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
|
||||
Array64<TileBuffer> tileBuffers = new();
|
||||
Span<TileBuffer> tileBuffersSpan = tileBuffers.AsSpan();
|
||||
|
||||
GetTileBuffers(ref cm, data, tileCols, ref tileBuffers);
|
||||
GetTileBuffers(ref cm, data, tileCols, tileBuffersSpan);
|
||||
|
||||
tileBuffers.AsSpan()[..tileCols].Sort(CompareTileBuffers);
|
||||
tileBuffersSpan[..tileCols].Sort(CompareTileBuffers);
|
||||
|
||||
if (numWorkers == tileCols)
|
||||
{
|
||||
TileBuffer largest = tileBuffers[0];
|
||||
Span<TileBuffer> buffers = tileBuffers.AsSpan();
|
||||
buffers[1..].CopyTo(buffers[..(tileBuffers.Length - 1)]);
|
||||
tileBuffers[tileCols - 1] = largest;
|
||||
TileBuffer largest = tileBuffersSpan[0];
|
||||
tileBuffersSpan[1..].CopyTo(tileBuffersSpan[..^1]);
|
||||
tileBuffersSpan[tileCols - 1] = largest;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1625,9 +1695,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// larger tile implies it is more difficult to decode.
|
||||
while (start < end)
|
||||
{
|
||||
tmp = tileBuffers[start];
|
||||
tileBuffers[start] = tileBuffers[end];
|
||||
tileBuffers[end] = tmp;
|
||||
tmp = tileBuffersSpan[start];
|
||||
tileBuffersSpan[start] = tileBuffersSpan[end];
|
||||
tileBuffersSpan[end] = tmp;
|
||||
start += 2;
|
||||
end -= 2;
|
||||
}
|
||||
@@ -1640,7 +1710,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
for (n = 0; n < numWorkers; ++n)
|
||||
{
|
||||
int count = baseVal + ((remain + n) / numWorkers);
|
||||
ref TileWorkerData tileData = ref cm.TileWorkerData[n + totalTiles];
|
||||
ref TileWorkerData tileData = ref tileWorkerDataSpan[n + totalTiles];
|
||||
|
||||
tileData.BufStart = bufStart;
|
||||
tileData.BufEnd = bufStart + count - 1;
|
||||
@@ -1654,7 +1724,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
ref TileWorkerData tileData = ref cmPtr.Value.TileWorkerData[n + totalTiles];
|
||||
|
||||
if (!DecodeTileCol(ref tileData, ref cmPtr.Value, ref tileBuffers))
|
||||
if (!DecodeTileCol(ref tileData, ref cmPtr.Value, tileBuffers.AsSpan()))
|
||||
{
|
||||
cmPtr.Value.Mb.Corrupted = true;
|
||||
}
|
||||
@@ -1664,14 +1734,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
if (bitReaderEnd.IsNull)
|
||||
{
|
||||
ref TileWorkerData tileData = ref cm.TileWorkerData[n - 1 + totalTiles];
|
||||
ref TileWorkerData tileData = ref tileWorkerDataSpan[n - 1 + totalTiles];
|
||||
bitReaderEnd = tileData.DataEnd;
|
||||
}
|
||||
}
|
||||
|
||||
for (n = 0; n < numWorkers; ++n)
|
||||
{
|
||||
ref TileWorkerData tileData = ref cm.TileWorkerData[n + totalTiles];
|
||||
ref TileWorkerData tileData = ref tileWorkerDataSpan[n + totalTiles];
|
||||
AccumulateFrameCounts(ref cm.Counts.Value, ref tileData.Counts);
|
||||
}
|
||||
|
||||
@@ -1705,7 +1775,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
if (cm.FrameType == FrameType.KeyFrame && cm.CurrentVideoFrame > 0)
|
||||
{
|
||||
ref Array12<RefCntBuffer> frameBufs = ref cm.BufferPool.Value.FrameBufs;
|
||||
Span<RefCntBuffer> frameBuffs = cm.BufferPool.Value.FrameBufs.AsSpan();
|
||||
ref BufferPool pool = ref cm.BufferPool.Value;
|
||||
|
||||
for (int i = 0; i < Constants.FrameBuffers; ++i)
|
||||
@@ -1715,11 +1785,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
continue;
|
||||
}
|
||||
|
||||
frameBufs[i].RefCount = 0;
|
||||
if (frameBufs[i].Released == 0)
|
||||
frameBuffs[i].RefCount = 0;
|
||||
if (frameBuffs[i].Released == 0)
|
||||
{
|
||||
FrameBuffers.ReleaseFrameBuffer(pool.CbPriv, ref frameBufs[i].RawFrameBuffer);
|
||||
frameBufs[i].Released = 1;
|
||||
FrameBuffers.ReleaseFrameBuffer(pool.CbPriv, ref frameBuffs[i].RawFrameBuffer);
|
||||
frameBuffs[i].Released = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1742,7 +1812,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
readSyncCode2 == SyncCode2;
|
||||
}
|
||||
|
||||
private static void RefCntFb(ref Array12<RefCntBuffer> bufs, ref int idx, int newIdx)
|
||||
private static void RefCntFb(Span<RefCntBuffer> bufs, ref int idx, int newIdx)
|
||||
{
|
||||
int refIndex = idx;
|
||||
|
||||
@@ -1761,13 +1831,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
ref Vp9Common cm = ref pbi.Common;
|
||||
ref BufferPool pool = ref cm.BufferPool.Value;
|
||||
ref Array12<RefCntBuffer> frameBufs = ref pool.FrameBufs;
|
||||
Span<RefCntBuffer> frameBuffs = pool.FrameBufs.AsSpan();
|
||||
int mask, refIndex = 0;
|
||||
ulong sz;
|
||||
|
||||
cm.LastFrameType = cm.FrameType;
|
||||
cm.LastIntraOnly = cm.IntraOnly;
|
||||
|
||||
Span<int> refFrameSpan = cm.RefFrameMap.AsSpan();
|
||||
|
||||
if (rb.ReadLiteral(2) != FrameMarker)
|
||||
{
|
||||
cm.Error.InternalError(CodecErr.UnsupBitstream, "Invalid frame marker");
|
||||
@@ -1783,14 +1855,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
if (cm.ShowExistingFrame != 0)
|
||||
{
|
||||
// Show an existing frame directly.
|
||||
int frameToShow = cm.RefFrameMap[rb.ReadLiteral(3)];
|
||||
if (frameToShow < 0 || frameBufs[frameToShow].RefCount < 1)
|
||||
int frameToShow = refFrameSpan[rb.ReadLiteral(3)];
|
||||
if (frameToShow < 0 || frameBuffs[frameToShow].RefCount < 1)
|
||||
{
|
||||
cm.Error.InternalError(CodecErr.UnsupBitstream,
|
||||
$"Buffer {frameToShow} does not contain a decoded frame");
|
||||
}
|
||||
|
||||
RefCntFb(ref frameBufs, ref cm.NewFbIdx, frameToShow);
|
||||
RefCntFb(frameBuffs, ref cm.NewFbIdx, frameToShow);
|
||||
pbi.RefreshFrameFlags = 0;
|
||||
cm.Lf.FilterLevel = 0;
|
||||
cm.ShowFrame = 1;
|
||||
@@ -1812,16 +1884,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
cm.ReadBitdepthColorspaceSampling(ref rb);
|
||||
pbi.RefreshFrameFlags = (1 << Constants.RefFrames) - 1;
|
||||
|
||||
Span<RefBuffer> frameRefsSpan = cm.FrameRefs.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
cm.FrameRefs[i].Idx = RefBuffer.InvalidIdx;
|
||||
cm.FrameRefs[i].Buf = default;
|
||||
frameRefsSpan[i].Idx = RefBuffer.InvalidIdx;
|
||||
frameRefsSpan[i].Buf = default;
|
||||
}
|
||||
|
||||
SetupFrameSize(allocator, ref cm, ref rb);
|
||||
if (pbi.NeedResync != 0)
|
||||
{
|
||||
cm.RefFrameMap.AsSpan().Fill(-1);
|
||||
refFrameSpan.Fill(-1);
|
||||
FlushAllFbOnKey(ref cm);
|
||||
pbi.NeedResync = 0;
|
||||
}
|
||||
@@ -1860,22 +1934,25 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
SetupFrameSize(allocator, ref cm, ref rb);
|
||||
if (pbi.NeedResync != 0)
|
||||
{
|
||||
cm.RefFrameMap.AsSpan().Fill(-1);
|
||||
refFrameSpan.Fill(-1);
|
||||
pbi.NeedResync = 0;
|
||||
}
|
||||
}
|
||||
else if (pbi.NeedResync != 1)
|
||||
{
|
||||
Span<RefBuffer> frameRefsSpan = cm.FrameRefs.AsSpan();
|
||||
Span<sbyte> refFrameSignBiasSpan = cm.RefFrameSignBias.AsSpan();
|
||||
|
||||
/* Skip if need resync */
|
||||
pbi.RefreshFrameFlags = rb.ReadLiteral(Constants.RefFrames);
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
int refr = rb.ReadLiteral(Constants.RefFramesLog2);
|
||||
int idx = cm.RefFrameMap[refr];
|
||||
ref RefBuffer refFrame = ref cm.FrameRefs[i];
|
||||
int idx = refFrameSpan[refr];
|
||||
ref RefBuffer refFrame = ref frameRefsSpan[i];
|
||||
refFrame.Idx = idx;
|
||||
refFrame.Buf = frameBufs[idx].Buf;
|
||||
cm.RefFrameSignBias[Constants.LastFrame + i] = (sbyte)rb.ReadBit();
|
||||
refFrame.Buf = frameBuffs[idx].Buf;
|
||||
refFrameSignBiasSpan[Constants.LastFrame + i] = (sbyte)rb.ReadBit();
|
||||
}
|
||||
|
||||
SetupFrameSizeWithRefs(allocator, ref cm, ref rb);
|
||||
@@ -1885,7 +1962,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
ref RefBuffer refBuf = ref cm.FrameRefs[i];
|
||||
ref RefBuffer refBuf = ref frameRefsSpan[i];
|
||||
refBuf.Sf.SetupScaleFactorsForFrame(
|
||||
refBuf.Buf.YCropWidth,
|
||||
refBuf.Buf.YCropHeight,
|
||||
@@ -1926,23 +2003,25 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// below, forcing the use of context 0 for those frame types.
|
||||
cm.FrameContextIdx = (uint)rb.ReadLiteral(Constants.FrameContextsLog2);
|
||||
|
||||
Span<int> nextRefFrameMapSpan = cm.NextRefFrameMap.AsSpan();
|
||||
|
||||
// Generate next_ref_frame_map.
|
||||
for (mask = pbi.RefreshFrameFlags; mask != 0; mask >>= 1)
|
||||
{
|
||||
if ((mask & 1) != 0)
|
||||
{
|
||||
cm.NextRefFrameMap[refIndex] = cm.NewFbIdx;
|
||||
++frameBufs[cm.NewFbIdx].RefCount;
|
||||
nextRefFrameMapSpan[refIndex] = cm.NewFbIdx;
|
||||
++frameBuffs[cm.NewFbIdx].RefCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
cm.NextRefFrameMap[refIndex] = cm.RefFrameMap[refIndex];
|
||||
nextRefFrameMapSpan[refIndex] = cm.RefFrameMap[refIndex];
|
||||
}
|
||||
|
||||
// Current thread holds the reference frame.
|
||||
if (cm.RefFrameMap[refIndex] >= 0)
|
||||
{
|
||||
++frameBufs[cm.RefFrameMap[refIndex]].RefCount;
|
||||
++frameBuffs[cm.RefFrameMap[refIndex]].RefCount;
|
||||
}
|
||||
|
||||
++refIndex;
|
||||
@@ -1950,11 +2029,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
for (; refIndex < Constants.RefFrames; ++refIndex)
|
||||
{
|
||||
cm.NextRefFrameMap[refIndex] = cm.RefFrameMap[refIndex];
|
||||
nextRefFrameMapSpan[refIndex] = refFrameSpan[refIndex];
|
||||
// Current thread holds the reference frame.
|
||||
if (cm.RefFrameMap[refIndex] >= 0)
|
||||
if (refFrameSpan[refIndex] >= 0)
|
||||
{
|
||||
++frameBufs[cm.RefFrameMap[refIndex]].RefCount;
|
||||
++frameBuffs[refFrameSpan[refIndex]].RefCount;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2001,9 +2080,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
ReadCoefProbs(ref fc, cm.TxMode, ref r);
|
||||
|
||||
Span<byte> skipProbSpan = fc.SkipProb.AsSpan();
|
||||
|
||||
for (int k = 0; k < Constants.SkipContexts; ++k)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.SkipProb[k]);
|
||||
r.DiffUpdateProb(ref skipProbSpan[k]);
|
||||
}
|
||||
|
||||
if (!cm.FrameIsIntraOnly())
|
||||
@@ -2014,10 +2095,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
ReadSwitchableInterpProbs(ref fc, ref r);
|
||||
}
|
||||
|
||||
Span<byte> intraInterProbSpan = fc.IntraInterProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.IntraInterContexts; i++)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.IntraInterProb[i]);
|
||||
r.DiffUpdateProb(ref intraInterProbSpan[i]);
|
||||
}
|
||||
|
||||
cm.ReferenceMode = cm.ReadFrameReferenceMode(ref r);
|
||||
@@ -2027,20 +2110,28 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
|
||||
cm.ReadFrameReferenceModeProbs(ref r);
|
||||
|
||||
Span<Array9<byte>> yModeProbSpan1 = fc.YModeProb.AsSpan();
|
||||
|
||||
for (int j = 0; j < EntropyMode.BlockSizeGroups; j++)
|
||||
{
|
||||
Span<byte> yModeProbSpan2 = yModeProbSpan1[j].AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.IntraModes - 1; ++i)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.YModeProb[j][i]);
|
||||
r.DiffUpdateProb(ref yModeProbSpan2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
Span<Array3<byte>> partitionProbSpan1 = fc.PartitionProb.AsSpan();
|
||||
|
||||
for (int j = 0; j < Constants.PartitionContexts; ++j)
|
||||
{
|
||||
Span<byte> partitionProbSpan2 = partitionProbSpan1[j].AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.PartitionTypes - 1; ++i)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.PartitionProb[j][i]);
|
||||
r.DiffUpdateProb(ref partitionProbSpan2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2098,7 +2189,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
xd.SetupBlockPlanes(cm.SubsamplingX, cm.SubsamplingY);
|
||||
|
||||
cm.Fc = new Ptr<Vp9EntropyProbs>(ref cm.FrameContexts[(int)cm.FrameContextIdx]);
|
||||
Span<Vp9EntropyProbs> frameContextsSpan = cm.FrameContexts.AsSpan();
|
||||
|
||||
cm.Fc = new Ptr<Vp9EntropyProbs>(ref frameContextsSpan[(int)cm.FrameContextIdx]);
|
||||
|
||||
xd.Corrupted = false;
|
||||
newFb.Corrupted = ReadCompressedHeader(ref pbi, data, firstPartitionSize) ? 1 : 0;
|
||||
@@ -2167,7 +2260,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Non frame parallel update frame context here.
|
||||
if (cm.RefreshFrameContext != 0 && contextUpdated == 0)
|
||||
{
|
||||
cm.FrameContexts[(int)cm.FrameContextIdx] = cm.Fc.Value;
|
||||
frameContextsSpan[(int)cm.FrameContextIdx] = cm.Fc.Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,9 +50,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
return PredictionMode.NearestMv + mode;
|
||||
}
|
||||
|
||||
private static int ReadSegmentId(ref Reader r, ref Array7<byte> segTreeProbs)
|
||||
private static int ReadSegmentId(ref Reader r, ReadOnlySpan<byte> segTreeProbs)
|
||||
{
|
||||
return r.ReadTree(Luts.SegmentTree, segTreeProbs.AsSpan());
|
||||
return r.ReadTree(Luts.SegmentTree, segTreeProbs);
|
||||
}
|
||||
|
||||
private static ReadOnlySpan<byte> GetTxProbs(ref Vp9EntropyProbs fc, TxSize maxTxSize, int ctx)
|
||||
@@ -187,7 +187,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
return 0;
|
||||
}
|
||||
|
||||
segmentId = ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb);
|
||||
segmentId = ReadSegmentId(ref r, cm.Fc.Value.SegTreeProb.AsSpan());
|
||||
SetSegmentId(ref cm, miOffset, xMis, yMis, segmentId);
|
||||
return segmentId;
|
||||
}
|
||||
@@ -223,15 +223,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
if (seg.TemporalUpdate)
|
||||
{
|
||||
byte predProb = Segmentation.GetPredProbSegId(ref cm.Fc.Value.SegPredProb, ref xd);
|
||||
byte predProb = Segmentation.GetPredProbSegId(cm.Fc.Value.SegPredProb.AsSpan(), ref xd);
|
||||
mi.SegIdPredicted = (sbyte)r.Read(predProb);
|
||||
segmentId = mi.SegIdPredicted != 0
|
||||
? predictedSegmentId
|
||||
: ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb);
|
||||
: ReadSegmentId(ref r, cm.Fc.Value.SegTreeProb.AsSpan());
|
||||
}
|
||||
else
|
||||
{
|
||||
segmentId = ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb);
|
||||
segmentId = ReadSegmentId(ref r, cm.Fc.Value.SegTreeProb.AsSpan());
|
||||
}
|
||||
|
||||
SetSegmentId(ref cm, miOffset, xMis, yMis, segmentId);
|
||||
@@ -272,10 +272,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
int n = (int)mvClass + Constants.Class0Bits - 1; // Number of bits
|
||||
|
||||
Span<byte> bitsSpan = fc.Bits[mvcomp].AsSpan();
|
||||
|
||||
d = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
d |= r.Read(fc.Bits[mvcomp][i]) << i;
|
||||
d |= r.Read(bitsSpan[i]) << i;
|
||||
}
|
||||
|
||||
mag = Constants.Class0Size << ((int)mvClass + 2);
|
||||
@@ -343,7 +345,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ref MacroBlockD xd,
|
||||
ref Reader r,
|
||||
int segmentId,
|
||||
ref Array2<sbyte> refFrame)
|
||||
Span<sbyte> refFrame)
|
||||
{
|
||||
ref Vp9EntropyProbs fc = ref cm.Fc.Value;
|
||||
|
||||
@@ -418,23 +420,25 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
BlockSize bsize = mi.SbType;
|
||||
|
||||
Span<BModeInfo> bmiSpan = mi.Bmi.AsSpan();
|
||||
|
||||
switch (bsize)
|
||||
{
|
||||
case BlockSize.Block4X4:
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
mi.Bmi[i].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
bmiSpan[i].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
}
|
||||
|
||||
mi.Mode = mi.Bmi[3].Mode;
|
||||
mi.Mode = bmiSpan[3].Mode;
|
||||
break;
|
||||
case BlockSize.Block4X8:
|
||||
mi.Bmi[0].Mode = mi.Bmi[2].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
mi.Bmi[1].Mode = mi.Bmi[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
bmiSpan[0].Mode = bmiSpan[2].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
bmiSpan[1].Mode = bmiSpan[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
break;
|
||||
case BlockSize.Block8X4:
|
||||
mi.Bmi[0].Mode = mi.Bmi[1].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
mi.Bmi[2].Mode = mi.Bmi[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
bmiSpan[0].Mode = bmiSpan[1].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
bmiSpan[2].Mode = bmiSpan[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
|
||||
break;
|
||||
default:
|
||||
mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, Luts.SizeGroupLookup[(int)bsize]);
|
||||
@@ -447,29 +451,19 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// modes in GetPredContextSwitchableInterp()
|
||||
mi.InterpFilter = Constants.SwitchableFilters;
|
||||
|
||||
mi.RefFrame[0] = Constants.IntraFrame;
|
||||
mi.RefFrame[1] = Constants.None;
|
||||
}
|
||||
|
||||
private static void CopyPair(ref Array2<Mv> dst, ref Array2<Mv> src)
|
||||
{
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
}
|
||||
|
||||
private static void ZeroPair(ref Array2<Mv> dst)
|
||||
{
|
||||
dst[0] = new Mv();
|
||||
dst[1] = new Mv();
|
||||
Span<sbyte> refFrameSpan = mi.RefFrame.AsSpan();
|
||||
|
||||
refFrameSpan[0] = Constants.IntraFrame;
|
||||
refFrameSpan[1] = Constants.None;
|
||||
}
|
||||
|
||||
private static bool Assign(
|
||||
ref Vp9Common cm,
|
||||
ref MacroBlockD xd,
|
||||
PredictionMode mode,
|
||||
ref Array2<Mv> mv,
|
||||
ref Array2<Mv> refMv,
|
||||
ref Array2<Mv> nearNearestMv,
|
||||
Span<Mv> mv,
|
||||
Span<Mv> refMv,
|
||||
Span<Mv> nearNearestMv,
|
||||
int isCompound,
|
||||
bool allowHp,
|
||||
ref Reader r)
|
||||
@@ -491,12 +485,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
case PredictionMode.NearMv:
|
||||
case PredictionMode.NearestMv:
|
||||
{
|
||||
CopyPair(ref mv, ref nearNearestMv);
|
||||
nearNearestMv.CopyTo(mv);
|
||||
break;
|
||||
}
|
||||
case PredictionMode.ZeroMv:
|
||||
{
|
||||
ZeroPair(ref mv);
|
||||
mv.Clear();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -559,26 +553,29 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
private static bool IsDiffRefFrameAddEb(
|
||||
ref ModeInfo mbmi,
|
||||
sbyte refFrame,
|
||||
ref Array4<sbyte> refSignBias,
|
||||
Span<sbyte> refSignBias,
|
||||
ref int refmvCount,
|
||||
Span<Mv> mvRefList,
|
||||
bool earlyBreak)
|
||||
{
|
||||
if (mbmi.IsInterBlock())
|
||||
{
|
||||
if (mbmi.RefFrame[0] != refFrame)
|
||||
Span<sbyte> refFrameSpan = mbmi.RefFrame.AsSpan();
|
||||
Span<Mv> mvSpan = mbmi.Mv.AsSpan();
|
||||
|
||||
if (refFrameSpan[0] != refFrame)
|
||||
{
|
||||
if (AddRefListEb(mbmi.ScaleMv(0, refFrame, ref refSignBias), ref refmvCount, mvRefList,
|
||||
if (AddRefListEb(mbmi.ScaleMv(0, refFrame, refSignBias), ref refmvCount, mvRefList,
|
||||
earlyBreak))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (mbmi.HasSecondRef() && mbmi.RefFrame[1] != refFrame &&
|
||||
Unsafe.As<Mv, int>(ref mbmi.Mv[1]) != Unsafe.As<Mv, int>(ref mbmi.Mv[0]))
|
||||
if (mbmi.HasSecondRef() && refFrameSpan[1] != refFrame &&
|
||||
Unsafe.As<Mv, int>(ref mvSpan[1]) != Unsafe.As<Mv, int>(ref mvSpan[0]))
|
||||
{
|
||||
if (AddRefListEb(mbmi.ScaleMv(1, refFrame, ref refSignBias), ref refmvCount, mvRefList,
|
||||
if (AddRefListEb(mbmi.ScaleMv(1, refFrame, refSignBias), ref refmvCount, mvRefList,
|
||||
earlyBreak))
|
||||
{
|
||||
return true;
|
||||
@@ -603,7 +600,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
int block,
|
||||
int isSub8X8)
|
||||
{
|
||||
ref Array4<sbyte> refSignBias = ref cm.RefFrameSignBias;
|
||||
Span<sbyte> refSignBias = cm.RefFrameSignBias.AsSpan();
|
||||
int i, refmvCount = 0;
|
||||
bool differentRefFound = false;
|
||||
Ptr<MvRef> prevFrameMvs = cm.UsePrevFrameMvs
|
||||
@@ -616,7 +613,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
// Blank the reference vector list
|
||||
mvRefList[..Constants.MaxMvRefCandidates].Clear();
|
||||
|
||||
|
||||
i = 0;
|
||||
if (isSub8X8 != 0)
|
||||
{
|
||||
@@ -630,7 +627,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ref ModeInfo candidateMi = ref xd.Mi[mvRef.Col + (mvRef.Row * xd.MiStride)].Value;
|
||||
differentRefFound = true;
|
||||
|
||||
if (candidateMi.RefFrame[0] == refFrame)
|
||||
Span<sbyte> refFrameSpan = candidateMi.RefFrame.AsSpan();
|
||||
|
||||
if (refFrameSpan[0] == refFrame)
|
||||
{
|
||||
if (AddRefListEb(candidateMi.GetSubBlockMv(0, mvRef.Col, block), ref refmvCount,
|
||||
mvRefList, earlyBreak))
|
||||
@@ -638,7 +637,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
else if (candidateMi.RefFrame[1] == refFrame)
|
||||
else if (refFrameSpan[1] == refFrame)
|
||||
{
|
||||
if (AddRefListEb(candidateMi.GetSubBlockMv(1, mvRef.Col, block), ref refmvCount,
|
||||
mvRefList, earlyBreak))
|
||||
@@ -660,17 +659,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
ref ModeInfo candidate = ref xd.Mi[mvRef.Col + (mvRef.Row * xd.MiStride)].Value;
|
||||
differentRefFound = true;
|
||||
|
||||
Span<sbyte> refFrameSpan = candidate.RefFrame.AsSpan();
|
||||
Span<Mv> mvSpan = candidate.Mv.AsSpan();
|
||||
|
||||
if (candidate.RefFrame[0] == refFrame)
|
||||
if (refFrameSpan[0] == refFrame)
|
||||
{
|
||||
if (AddRefListEb(candidate.Mv[0], ref refmvCount, mvRefList, earlyBreak))
|
||||
if (AddRefListEb(mvSpan[0], ref refmvCount, mvRefList, earlyBreak))
|
||||
{
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
else if (candidate.RefFrame[1] == refFrame)
|
||||
else if (refFrameSpan[1] == refFrame)
|
||||
{
|
||||
if (AddRefListEb(candidate.Mv[1], ref refmvCount, mvRefList, earlyBreak))
|
||||
if (AddRefListEb(mvSpan[1], ref refmvCount, mvRefList, earlyBreak))
|
||||
{
|
||||
goto Done;
|
||||
}
|
||||
@@ -681,16 +683,19 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Check the last frame's mode and mv info.
|
||||
if (!prevFrameMvs.IsNull)
|
||||
{
|
||||
if (prevFrameMvs.Value.RefFrame[0] == refFrame)
|
||||
Span<sbyte> refFrameSpan = prevFrameMvs.Value.RefFrame.AsSpan();
|
||||
Span<Mv> mvSpan = prevFrameMvs.Value.Mv.AsSpan();
|
||||
|
||||
if (refFrameSpan[0] == refFrame)
|
||||
{
|
||||
if (AddRefListEb(prevFrameMvs.Value.Mv[0], ref refmvCount, mvRefList, earlyBreak))
|
||||
if (AddRefListEb(mvSpan[0], ref refmvCount, mvRefList, earlyBreak))
|
||||
{
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
else if (prevFrameMvs.Value.RefFrame[1] == refFrame)
|
||||
else if (refFrameSpan[1] == refFrame)
|
||||
{
|
||||
if (AddRefListEb(prevFrameMvs.Value.Mv[1], ref refmvCount, mvRefList, earlyBreak))
|
||||
if (AddRefListEb(mvSpan[1], ref refmvCount, mvRefList, earlyBreak))
|
||||
{
|
||||
goto Done;
|
||||
}
|
||||
@@ -710,7 +715,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ref ModeInfo candidate = ref xd.Mi[mvRef.Col + (mvRef.Row * xd.MiStride)].Value;
|
||||
|
||||
// If the candidate is Intra we don't want to consider its mv.
|
||||
if (IsDiffRefFrameAddEb(ref candidate, refFrame, ref refSignBias, ref refmvCount, mvRefList,
|
||||
if (IsDiffRefFrameAddEb(ref candidate, refFrame, refSignBias, ref refmvCount, mvRefList,
|
||||
earlyBreak))
|
||||
{
|
||||
goto Done;
|
||||
@@ -722,10 +727,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Since we still don't have a candidate we'll try the last frame.
|
||||
if (!prevFrameMvs.IsNull)
|
||||
{
|
||||
if (prevFrameMvs.Value.RefFrame[0] != refFrame && prevFrameMvs.Value.RefFrame[0] > Constants.IntraFrame)
|
||||
Span<sbyte> refFrameSpan = prevFrameMvs.Value.RefFrame.AsSpan();
|
||||
Span<Mv> mvSpan = prevFrameMvs.Value.Mv.AsSpan();
|
||||
|
||||
if (refFrameSpan[0] != refFrame && refFrameSpan[0] > Constants.IntraFrame)
|
||||
{
|
||||
Mv mv = prevFrameMvs.Value.Mv[0];
|
||||
if (refSignBias[prevFrameMvs.Value.RefFrame[0]] != refSignBias[refFrame])
|
||||
Mv mv = mvSpan[0];
|
||||
if (refSignBias[refFrameSpan[0]] != refSignBias[refFrame])
|
||||
{
|
||||
mv.Row *= -1;
|
||||
mv.Col *= -1;
|
||||
@@ -737,13 +745,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
}
|
||||
|
||||
if (prevFrameMvs.Value.RefFrame[1] > Constants.IntraFrame &&
|
||||
prevFrameMvs.Value.RefFrame[1] != refFrame &&
|
||||
Unsafe.As<Mv, int>(ref prevFrameMvs.Value.Mv[1]) !=
|
||||
Unsafe.As<Mv, int>(ref prevFrameMvs.Value.Mv[0]))
|
||||
if (refFrameSpan[1] > Constants.IntraFrame &&
|
||||
refFrameSpan[1] != refFrame &&
|
||||
Unsafe.As<Mv, int>(ref mvSpan[1]) !=
|
||||
Unsafe.As<Mv, int>(ref mvSpan[0]))
|
||||
{
|
||||
Mv mv = prevFrameMvs.Value.Mv[1];
|
||||
if (refSignBias[prevFrameMvs.Value.RefFrame[1]] != refSignBias[refFrame])
|
||||
Mv mv = mvSpan[1];
|
||||
if (refSignBias[refFrameSpan[1]] != refSignBias[refFrame])
|
||||
{
|
||||
mv.Row *= -1;
|
||||
mv.Col *= -1;
|
||||
@@ -789,7 +797,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
Span<Mv> mvList = stackalloc Mv[Constants.MaxMvRefCandidates];
|
||||
ref ModeInfo mi = ref xd.Mi[0].Value;
|
||||
ref Array4<BModeInfo> bmi = ref mi.Bmi;
|
||||
Span<BModeInfo> bmi = mi.Bmi.AsSpan();
|
||||
int refmvCount;
|
||||
|
||||
Debug.Assert(Constants.MaxMvRefCandidates == 2);
|
||||
@@ -884,11 +892,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
BlockSize bsize = mi.SbType;
|
||||
bool allowHp = cm.AllowHighPrecisionMv;
|
||||
Array2<Mv> bestRefMvs = new();
|
||||
Span<Mv> bestRefMvsSpan = bestRefMvs.AsSpan();
|
||||
int refr, isCompound;
|
||||
byte interModeCtx;
|
||||
Span<Position> mvRefSearch = Luts.MvRefBlocks[(int)bsize];
|
||||
|
||||
ReadRefFrames(ref cm, ref xd, ref r, mi.SegmentId, ref mi.RefFrame);
|
||||
ReadRefFrames(ref cm, ref xd, ref r, mi.SegmentId, mi.RefFrame.AsSpan());
|
||||
isCompound = mi.HasSecondRef() ? 1 : 0;
|
||||
interModeCtx = GetModeContext(ref cm, ref xd, mvRefSearch, miRow, miCol);
|
||||
|
||||
@@ -920,16 +929,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
if (mi.Mode != PredictionMode.ZeroMv)
|
||||
{
|
||||
Span<Mv> tmpMvs = stackalloc Mv[Constants.MaxMvRefCandidates];
|
||||
Span<sbyte> refFrameSpan = mi.RefFrame.AsSpan();
|
||||
|
||||
for (refr = 0; refr < 1 + isCompound; ++refr)
|
||||
{
|
||||
sbyte frame = mi.RefFrame[refr];
|
||||
sbyte frame = refFrameSpan[refr];
|
||||
int refmvCount;
|
||||
|
||||
refmvCount = DecFindRefs(ref cm, ref xd, mi.Mode, frame, mvRefSearch, tmpMvs, miRow, miCol,
|
||||
-1, 0);
|
||||
|
||||
DecFindBestRefs(allowHp, tmpMvs, ref bestRefMvs[refr], refmvCount);
|
||||
DecFindBestRefs(allowHp, tmpMvs, ref bestRefMvsSpan[refr], refmvCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -945,10 +955,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
int idx, idy;
|
||||
PredictionMode bMode = 0;
|
||||
Array2<Mv> bestSub8X8 = new();
|
||||
Span<Mv> bestSub8X8Span = bestSub8X8.AsSpan();
|
||||
Span<BModeInfo> bmiSpan = mi.Bmi.AsSpan();
|
||||
const uint InvalidMv = 0x80008000;
|
||||
// Initialize the 2nd element as even though it won't be used meaningfully
|
||||
// if isCompound is false.
|
||||
Unsafe.As<Mv, uint>(ref bestSub8X8[1]) = InvalidMv;
|
||||
Unsafe.As<Mv, uint>(ref bestSub8X8Span[1]) = InvalidMv;
|
||||
for (idy = 0; idy < 2; idy += num4X4H)
|
||||
{
|
||||
for (idx = 0; idx < 2; idx += num4X4W)
|
||||
@@ -961,11 +973,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
for (refr = 0; refr < 1 + isCompound; ++refr)
|
||||
{
|
||||
AppendSub8X8ForIdx(ref cm, ref xd, mvRefSearch, bMode, j, refr, miRow, miCol,
|
||||
ref bestSub8X8[refr]);
|
||||
ref bestSub8X8Span[refr]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Assign(ref cm, ref xd, bMode, ref mi.Bmi[j].Mv, ref bestRefMvs, ref bestSub8X8,
|
||||
if (!Assign(ref cm, ref xd, bMode, bmiSpan[j].Mv.AsSpan(), bestRefMvs.AsSpan(), bestSub8X8.AsSpan(),
|
||||
isCompound, allowHp, ref r))
|
||||
{
|
||||
xd.Corrupted |= true;
|
||||
@@ -974,23 +986,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
if (num4X4H == 2)
|
||||
{
|
||||
mi.Bmi[j + 2] = mi.Bmi[j];
|
||||
bmiSpan[j + 2] = bmiSpan[j];
|
||||
}
|
||||
|
||||
if (num4X4W == 2)
|
||||
{
|
||||
mi.Bmi[j + 1] = mi.Bmi[j];
|
||||
bmiSpan[j + 1] = bmiSpan[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mi.Mode = bMode;
|
||||
|
||||
CopyPair(ref mi.Mv, ref mi.Bmi[3].Mv);
|
||||
bmiSpan[3].Mv.AsSpan().CopyTo(mi.Mv.AsSpan());
|
||||
}
|
||||
else
|
||||
{
|
||||
xd.Corrupted |= !Assign(ref cm, ref xd, mi.Mode, ref mi.Mv, ref bestRefMvs, ref bestRefMvs,
|
||||
xd.Corrupted |= !Assign(ref cm, ref xd, mi.Mode, mi.Mv.AsSpan(), bestRefMvs.AsSpan(), bestRefMvs.AsSpan(),
|
||||
isCompound, allowHp, ref r);
|
||||
}
|
||||
}
|
||||
@@ -1082,33 +1093,42 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
int miOffset = (miRow * cm.MiCols) + miCol;
|
||||
|
||||
Span<sbyte> refFrameSpan = mi.Value.RefFrame.AsSpan();
|
||||
|
||||
mi.Value.SegmentId = (sbyte)ReadIntraSegmentId(ref cm, miOffset, xMis, yMis, ref r);
|
||||
mi.Value.Skip = (sbyte)ReadSkip(ref cm, ref xd, mi.Value.SegmentId, ref r);
|
||||
mi.Value.TxSize = ReadTxSize(ref cm, ref xd, true, ref r);
|
||||
mi.Value.RefFrame[0] = Constants.IntraFrame;
|
||||
mi.Value.RefFrame[1] = Constants.None;
|
||||
refFrameSpan[0] = Constants.IntraFrame;
|
||||
refFrameSpan[1] = Constants.None;
|
||||
|
||||
Span<BModeInfo> bmiSpan;
|
||||
switch (bsize)
|
||||
{
|
||||
case BlockSize.Block4X4:
|
||||
bmiSpan = mi.Value.Bmi.AsSpan();
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
mi.Value.Bmi[i].Mode =
|
||||
bmiSpan[i].Mode =
|
||||
ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, i));
|
||||
}
|
||||
|
||||
mi.Value.Mode = mi.Value.Bmi[3].Mode;
|
||||
mi.Value.Mode = bmiSpan[3].Mode;
|
||||
break;
|
||||
case BlockSize.Block4X8:
|
||||
mi.Value.Bmi[0].Mode = mi.Value.Bmi[2].Mode =
|
||||
bmiSpan = mi.Value.Bmi.AsSpan();
|
||||
|
||||
bmiSpan[0].Mode = bmiSpan[2].Mode =
|
||||
ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0));
|
||||
mi.Value.Bmi[1].Mode = mi.Value.Bmi[3].Mode = mi.Value.Mode =
|
||||
bmiSpan[1].Mode = bmiSpan[3].Mode = mi.Value.Mode =
|
||||
ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 1));
|
||||
break;
|
||||
case BlockSize.Block8X4:
|
||||
mi.Value.Bmi[0].Mode = mi.Value.Bmi[1].Mode =
|
||||
bmiSpan = mi.Value.Bmi.AsSpan();
|
||||
|
||||
bmiSpan[0].Mode = bmiSpan[1].Mode =
|
||||
ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0));
|
||||
mi.Value.Bmi[2].Mode = mi.Value.Bmi[3].Mode = mi.Value.Mode =
|
||||
bmiSpan[2].Mode = bmiSpan[3].Mode = mi.Value.Mode =
|
||||
ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 2));
|
||||
break;
|
||||
default:
|
||||
@@ -1119,12 +1139,6 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
mi.Value.UvMode = ReadIntraMode(ref r, cm.Fc.Value.KfUvModeProb[(int)mi.Value.Mode].AsSpan());
|
||||
}
|
||||
|
||||
private static void CopyRefFramePair(ref Array2<sbyte> dst, ref Array2<sbyte> src)
|
||||
{
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
}
|
||||
|
||||
public static void ReadModeInfo(
|
||||
ref TileWorkerData twd,
|
||||
ref Vp9Common cm,
|
||||
@@ -1151,8 +1165,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
for (int w = 0; w < xMis; ++w)
|
||||
{
|
||||
ref MvRef mv = ref frameMvs[w];
|
||||
CopyRefFramePair(ref mv.RefFrame, ref mi.RefFrame);
|
||||
CopyPair(ref mv.Mv, ref mi.Mv);
|
||||
mi.RefFrame.AsSpan().CopyTo(mv.RefFrame.AsSpan());
|
||||
mi.Mv.AsSpan().CopyTo(mv.Mv.AsSpan());
|
||||
}
|
||||
|
||||
frameMvs = frameMvs.Slice(cm.MiCols);
|
||||
|
||||
@@ -89,9 +89,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
cm.Lf.RefDeltas = pictureInfo.RefDeltas;
|
||||
cm.Lf.ModeDeltas = pictureInfo.ModeDeltas;
|
||||
|
||||
cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference;
|
||||
cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference;
|
||||
cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference;
|
||||
Span<RefBuffer> frameRefsSpan = cm.FrameRefs.AsSpan();
|
||||
frameRefsSpan[0].Buf = (Surface)pictureInfo.LastReference;
|
||||
frameRefsSpan[1].Buf = (Surface)pictureInfo.GoldenReference;
|
||||
frameRefsSpan[2].Buf = (Surface)pictureInfo.AltReference;
|
||||
cm.Mb.CurBuf = (Surface)output;
|
||||
|
||||
cm.Mb.SetupBlockPlanes(1, 1);
|
||||
|
||||
@@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
PlaneType type,
|
||||
Span<int> dqcoeff,
|
||||
TxSize txSize,
|
||||
ref Array2<short> dq,
|
||||
ReadOnlySpan<short> dq,
|
||||
int ctx,
|
||||
ReadOnlySpan<short> scan,
|
||||
ReadOnlySpan<short> nb,
|
||||
@@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ref Vp9EntropyProbs fc = ref xd.Fc.Value;
|
||||
int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
|
||||
int band, c = 0;
|
||||
ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
|
||||
ReadOnlySpan<Array6<Array3<byte>>> coefProbs = fc.CoefProbs[(int)txSize][(int)type][refr].AsSpan();
|
||||
Span<byte> tokenCache = stackalloc byte[32 * 32];
|
||||
ReadOnlySpan<byte> bandTranslate = Luts.GetBandTranslate(txSize);
|
||||
int dqShift = txSize == TxSize.Tx32X32 ? 1 : 0;
|
||||
@@ -56,23 +56,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ulong value = r.Value;
|
||||
uint range = r.Range;
|
||||
int count = r.Count;
|
||||
|
||||
ReadOnlySpan<Array6<Array4<uint>>> coefSpan = counts.Coef[(int)txSize][(int)type][refr].AsSpan();
|
||||
ReadOnlySpan<Array6<uint>> eobBranchSpan = counts.EobBranch[(int)txSize][(int)type][refr].AsSpan();
|
||||
|
||||
while (c < maxEob)
|
||||
{
|
||||
int val = -1;
|
||||
band = bandTranslate[0];
|
||||
bandTranslate = bandTranslate[1..];
|
||||
ref Array3<byte> prob = ref coefProbs[band][ctx];
|
||||
ReadOnlySpan<byte> prob = coefProbs[band][ctx].AsSpan();
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
|
||||
++eobBranchSpan[band][ctx];
|
||||
}
|
||||
|
||||
if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
|
||||
{
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
|
||||
++coefSpan[band][ctx][Constants.EobModelToken];
|
||||
}
|
||||
|
||||
break;
|
||||
@@ -82,7 +85,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
|
||||
++coefSpan[band][ctx][Constants.ZeroToken];
|
||||
}
|
||||
|
||||
dqv = dq[1];
|
||||
@@ -99,7 +102,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ctx = GetCoefContext(nb, tokenCache, c);
|
||||
band = bandTranslate[0];
|
||||
bandTranslate = bandTranslate[1..];
|
||||
prob = ref coefProbs[band][ctx];
|
||||
prob = coefProbs[band][ctx].AsSpan();
|
||||
}
|
||||
|
||||
if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
|
||||
@@ -107,7 +110,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ReadOnlySpan<byte> p = Luts.Pareto8Full[prob[Constants.PivotNode] - 1];
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
|
||||
++coefSpan[band][ctx][Constants.TwoToken];
|
||||
}
|
||||
|
||||
if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
|
||||
@@ -175,7 +178,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
|
||||
++coefSpan[band][ctx][Constants.OneToken];
|
||||
}
|
||||
|
||||
tokenCache[scan[c]] = 1;
|
||||
@@ -232,7 +235,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ref Reader r = ref twd.BitReader;
|
||||
ref MacroBlockD xd = ref twd.Xd;
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
ref Array2<short> dequant = ref pd.SegDequant[segId];
|
||||
Span<short> dequant = pd.SegDequant[segId].AsSpan();
|
||||
int eob;
|
||||
Span<sbyte> a = pd.AboveContext.AsSpan()[x..];
|
||||
Span<sbyte> l = pd.LeftContext.AsSpan()[y..];
|
||||
@@ -250,7 +253,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.AsSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
@@ -266,7 +269,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.AsSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
@@ -283,7 +286,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.AsSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
@@ -303,7 +306,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.AsSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
@@ -129,7 +130,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
Span<short> xFilter = xFilters[xQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
@@ -164,7 +165,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
Span<short> xFilter = xFilters[xQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
@@ -275,7 +276,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
Span<short> yFilter = yFilters[yQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
@@ -310,7 +311,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
Span<short> yFilter = yFilters[yQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
@@ -619,7 +620,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
Span<short> xFilter = xFilters[xQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
@@ -655,7 +656,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
Span<short> xFilter = xFilters[xQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
@@ -692,7 +693,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
Span<short> yFilter = yFilters[yQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
@@ -728,7 +729,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
Span<short> yFilter = yFilters[yQ4 & SubpelMask].AsSpan();
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
|
||||
@@ -304,31 +304,40 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
|
||||
internal static void TxCountsToBranchCounts32X32(ReadOnlySpan<uint> txCount32X32P,
|
||||
ref Array3<Array2<uint>> ct32X32P)
|
||||
ReadOnlySpan<Array2<uint>> ct32X32P)
|
||||
{
|
||||
ct32X32P[0][0] = txCount32X32P[(int)TxSize.Tx4X4];
|
||||
ct32X32P[0][1] = txCount32X32P[(int)TxSize.Tx8X8] + txCount32X32P[(int)TxSize.Tx16X16] +
|
||||
Span<uint> ct32X32PSpan0 = ct32X32P[0].AsSpan();
|
||||
Span<uint> ct32X32PSpan1 = ct32X32P[1].AsSpan();
|
||||
Span<uint> ct32X32PSpan2 = ct32X32P[2].AsSpan();
|
||||
|
||||
ct32X32PSpan0[0] = txCount32X32P[(int)TxSize.Tx4X4];
|
||||
ct32X32PSpan0[1] = txCount32X32P[(int)TxSize.Tx8X8] + txCount32X32P[(int)TxSize.Tx16X16] +
|
||||
txCount32X32P[(int)TxSize.Tx32X32];
|
||||
ct32X32P[1][0] = txCount32X32P[(int)TxSize.Tx8X8];
|
||||
ct32X32P[1][1] = txCount32X32P[(int)TxSize.Tx16X16] + txCount32X32P[(int)TxSize.Tx32X32];
|
||||
ct32X32P[2][0] = txCount32X32P[(int)TxSize.Tx16X16];
|
||||
ct32X32P[2][1] = txCount32X32P[(int)TxSize.Tx32X32];
|
||||
ct32X32PSpan1[0] = txCount32X32P[(int)TxSize.Tx8X8];
|
||||
ct32X32PSpan1[1] = txCount32X32P[(int)TxSize.Tx16X16] + txCount32X32P[(int)TxSize.Tx32X32];
|
||||
ct32X32PSpan2[0] = txCount32X32P[(int)TxSize.Tx16X16];
|
||||
ct32X32PSpan2[1] = txCount32X32P[(int)TxSize.Tx32X32];
|
||||
}
|
||||
|
||||
internal static void TxCountsToBranchCounts16X16(ReadOnlySpan<uint> txCount16X16P,
|
||||
ref Array2<Array2<uint>> ct16X16P)
|
||||
ReadOnlySpan<Array2<uint>> ct16X16P)
|
||||
{
|
||||
ct16X16P[0][0] = txCount16X16P[(int)TxSize.Tx4X4];
|
||||
ct16X16P[0][1] = txCount16X16P[(int)TxSize.Tx8X8] + txCount16X16P[(int)TxSize.Tx16X16];
|
||||
ct16X16P[1][0] = txCount16X16P[(int)TxSize.Tx8X8];
|
||||
ct16X16P[1][1] = txCount16X16P[(int)TxSize.Tx16X16];
|
||||
Span<uint> ct16X16PSpan0 = ct16X16P[0].AsSpan();
|
||||
Span<uint> ct16X16PSpan1 = ct16X16P[1].AsSpan();
|
||||
|
||||
ct16X16PSpan0[0] = txCount16X16P[(int)TxSize.Tx4X4];
|
||||
ct16X16PSpan0[1] = txCount16X16P[(int)TxSize.Tx8X8] + txCount16X16P[(int)TxSize.Tx16X16];
|
||||
ct16X16PSpan1[0] = txCount16X16P[(int)TxSize.Tx8X8];
|
||||
ct16X16PSpan1[1] = txCount16X16P[(int)TxSize.Tx16X16];
|
||||
}
|
||||
|
||||
internal static void TxCountsToBranchCounts8X8(ReadOnlySpan<uint> txCount8X8P,
|
||||
ref Array1<Array2<uint>> ct8X8P)
|
||||
ReadOnlySpan<Array2<uint>> ct8X8P)
|
||||
{
|
||||
ct8X8P[0][0] = txCount8X8P[(int)TxSize.Tx4X4];
|
||||
ct8X8P[0][1] = txCount8X8P[(int)TxSize.Tx8X8];
|
||||
Span<uint> ct8X8PSpan = ct8X8P[0].AsSpan();
|
||||
|
||||
ct8X8PSpan[0] = txCount8X8P[(int)TxSize.Tx4X4];
|
||||
ct8X8PSpan[1] = txCount8X8P[(int)TxSize.Tx8X8];
|
||||
}
|
||||
|
||||
public static unsafe void SetupPastIndependence(ref Vp9Common cm)
|
||||
|
||||
@@ -378,9 +378,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
int index = shiftY;
|
||||
|
||||
Span<byte> lflYSpan = lfm.LflY.AsSpan();
|
||||
|
||||
for (int i = 0; i < bh; i++)
|
||||
{
|
||||
MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index)[..bw].Fill((byte)filterLevel);
|
||||
MemoryMarshal.CreateSpan(ref lflYSpan[index], 64 - index)[..bw].Fill((byte)filterLevel);
|
||||
index += 8;
|
||||
}
|
||||
|
||||
@@ -444,25 +446,30 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
const ulong AboveBorder = 0x000000ff000000ffUL;
|
||||
const ushort LeftBorderUv = 0x1111;
|
||||
const ushort AboveBorderUv = 0x000f;
|
||||
|
||||
Span<ulong> leftYSpan = lfm.LeftY.AsSpan();
|
||||
Span<ulong> aboveYSpan = lfm.AboveY.AsSpan();
|
||||
Span<ushort> leftUvSpan = lfm.LeftUv.AsSpan();
|
||||
Span<ushort> aboveUvSpan = lfm.AboveUv.AsSpan();
|
||||
|
||||
// The largest loopfilter we have is 16x16 so we use the 16x16 mask
|
||||
// for 32x32 transforms also.
|
||||
lfm.LeftY[(int)TxSize.Tx16X16] |= lfm.LeftY[(int)TxSize.Tx32X32];
|
||||
lfm.AboveY[(int)TxSize.Tx16X16] |= lfm.AboveY[(int)TxSize.Tx32X32];
|
||||
lfm.LeftUv[(int)TxSize.Tx16X16] |= lfm.LeftUv[(int)TxSize.Tx32X32];
|
||||
lfm.AboveUv[(int)TxSize.Tx16X16] |= lfm.AboveUv[(int)TxSize.Tx32X32];
|
||||
leftYSpan[(int)TxSize.Tx16X16] |= leftYSpan[(int)TxSize.Tx32X32];
|
||||
aboveYSpan[(int)TxSize.Tx16X16] |= aboveYSpan[(int)TxSize.Tx32X32];
|
||||
leftUvSpan[(int)TxSize.Tx16X16] |= leftUvSpan[(int)TxSize.Tx32X32];
|
||||
aboveUvSpan[(int)TxSize.Tx16X16] |= aboveUvSpan[(int)TxSize.Tx32X32];
|
||||
|
||||
// We do at least 8 tap filter on every 32x32 even if the transform size
|
||||
// is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
|
||||
// remove it from the 4x4.
|
||||
lfm.LeftY[(int)TxSize.Tx8X8] |= lfm.LeftY[(int)TxSize.Tx4X4] & LeftBorder;
|
||||
lfm.LeftY[(int)TxSize.Tx4X4] &= ~LeftBorder;
|
||||
lfm.AboveY[(int)TxSize.Tx8X8] |= lfm.AboveY[(int)TxSize.Tx4X4] & AboveBorder;
|
||||
lfm.AboveY[(int)TxSize.Tx4X4] &= ~AboveBorder;
|
||||
lfm.LeftUv[(int)TxSize.Tx8X8] |= (ushort)(lfm.LeftUv[(int)TxSize.Tx4X4] & LeftBorderUv);
|
||||
lfm.LeftUv[(int)TxSize.Tx4X4] &= unchecked((ushort)~LeftBorderUv);
|
||||
lfm.AboveUv[(int)TxSize.Tx8X8] |= (ushort)(lfm.AboveUv[(int)TxSize.Tx4X4] & AboveBorderUv);
|
||||
lfm.AboveUv[(int)TxSize.Tx4X4] &= unchecked((ushort)~AboveBorderUv);
|
||||
leftYSpan[(int)TxSize.Tx8X8] |= leftYSpan[(int)TxSize.Tx4X4] & LeftBorder;
|
||||
leftYSpan[(int)TxSize.Tx4X4] &= ~LeftBorder;
|
||||
aboveYSpan[(int)TxSize.Tx8X8] |= aboveYSpan[(int)TxSize.Tx4X4] & AboveBorder;
|
||||
aboveYSpan[(int)TxSize.Tx4X4] &= ~AboveBorder;
|
||||
leftUvSpan[(int)TxSize.Tx8X8] |= (ushort)(leftUvSpan[(int)TxSize.Tx4X4] & LeftBorderUv);
|
||||
leftUvSpan[(int)TxSize.Tx4X4] &= unchecked((ushort)~LeftBorderUv);
|
||||
aboveUvSpan[(int)TxSize.Tx8X8] |= (ushort)(aboveUvSpan[(int)TxSize.Tx4X4] & AboveBorderUv);
|
||||
aboveUvSpan[(int)TxSize.Tx4X4] &= unchecked((ushort)~AboveBorderUv);
|
||||
|
||||
// We do some special edge handling.
|
||||
if (miRow + Constants.MiBlockSize > cm.MiRows)
|
||||
@@ -476,10 +483,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Remove values completely outside our border.
|
||||
for (int i = 0; i < (int)TxSize.Tx32X32; i++)
|
||||
{
|
||||
lfm.LeftY[i] &= maskY;
|
||||
lfm.AboveY[i] &= maskY;
|
||||
lfm.LeftUv[i] &= maskUv;
|
||||
lfm.AboveUv[i] &= maskUv;
|
||||
leftYSpan[i] &= maskY;
|
||||
aboveYSpan[i] &= maskY;
|
||||
leftUvSpan[i] &= maskUv;
|
||||
aboveUvSpan[i] &= maskUv;
|
||||
}
|
||||
|
||||
lfm.Int4X4Y &= maskY;
|
||||
@@ -489,14 +496,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// apply the shorter one instead.
|
||||
if (rows == 1)
|
||||
{
|
||||
lfm.AboveUv[(int)TxSize.Tx8X8] |= lfm.AboveUv[(int)TxSize.Tx16X16];
|
||||
lfm.AboveUv[(int)TxSize.Tx16X16] = 0;
|
||||
aboveUvSpan[(int)TxSize.Tx8X8] |= aboveUvSpan[(int)TxSize.Tx16X16];
|
||||
aboveUvSpan[(int)TxSize.Tx16X16] = 0;
|
||||
}
|
||||
|
||||
if (rows == 5)
|
||||
{
|
||||
lfm.AboveUv[(int)TxSize.Tx8X8] |= (ushort)(lfm.AboveUv[(int)TxSize.Tx16X16] & 0xff00);
|
||||
lfm.AboveUv[(int)TxSize.Tx16X16] &= (ushort)~(lfm.AboveUv[(int)TxSize.Tx16X16] & 0xff00);
|
||||
aboveUvSpan[(int)TxSize.Tx8X8] |= (ushort)(aboveUvSpan[(int)TxSize.Tx16X16] & 0xff00);
|
||||
aboveUvSpan[(int)TxSize.Tx16X16] &= (ushort)~(aboveUvSpan[(int)TxSize.Tx16X16] & 0xff00);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -516,10 +523,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Remove the bits outside the image edge.
|
||||
for (int i = 0; i < (int)TxSize.Tx32X32; i++)
|
||||
{
|
||||
lfm.LeftY[i] &= maskY;
|
||||
lfm.AboveY[i] &= maskY;
|
||||
lfm.LeftUv[i] &= maskUv;
|
||||
lfm.AboveUv[i] &= maskUv;
|
||||
leftYSpan[i] &= maskY;
|
||||
aboveYSpan[i] &= maskY;
|
||||
leftUvSpan[i] &= maskUv;
|
||||
aboveUvSpan[i] &= maskUv;
|
||||
}
|
||||
|
||||
lfm.Int4X4Y &= maskY;
|
||||
@@ -529,14 +536,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// apply the shorter one instead.
|
||||
if (columns == 1)
|
||||
{
|
||||
lfm.LeftUv[(int)TxSize.Tx8X8] |= lfm.LeftUv[(int)TxSize.Tx16X16];
|
||||
lfm.LeftUv[(int)TxSize.Tx16X16] = 0;
|
||||
leftUvSpan[(int)TxSize.Tx8X8] |= leftUvSpan[(int)TxSize.Tx16X16];
|
||||
leftUvSpan[(int)TxSize.Tx16X16] = 0;
|
||||
}
|
||||
|
||||
if (columns == 5)
|
||||
{
|
||||
lfm.LeftUv[(int)TxSize.Tx8X8] |= (ushort)(lfm.LeftUv[(int)TxSize.Tx16X16] & 0xcccc);
|
||||
lfm.LeftUv[(int)TxSize.Tx16X16] &= (ushort)~(lfm.LeftUv[(int)TxSize.Tx16X16] & 0xcccc);
|
||||
leftUvSpan[(int)TxSize.Tx8X8] |= (ushort)(leftUvSpan[(int)TxSize.Tx16X16] & 0xcccc);
|
||||
leftUvSpan[(int)TxSize.Tx16X16] &= (ushort)~(leftUvSpan[(int)TxSize.Tx16X16] & 0xcccc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -546,28 +553,28 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
for (int i = 0; i < (int)TxSize.Tx32X32; i++)
|
||||
{
|
||||
lfm.LeftY[i] &= 0xfefefefefefefefeUL;
|
||||
lfm.LeftUv[i] &= 0xeeee;
|
||||
leftYSpan[i] &= 0xfefefefefefefefeUL;
|
||||
leftUvSpan[i] &= 0xeeee;
|
||||
}
|
||||
}
|
||||
|
||||
// Assert if we try to apply 2 different loop filters at the same position.
|
||||
Debug.Assert((lfm.LeftY[(int)TxSize.Tx16X16] & lfm.LeftY[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((lfm.LeftY[(int)TxSize.Tx16X16] & lfm.LeftY[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.LeftY[(int)TxSize.Tx8X8] & lfm.LeftY[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Y & lfm.LeftY[(int)TxSize.Tx16X16]) == 0);
|
||||
Debug.Assert((lfm.LeftUv[(int)TxSize.Tx16X16] & lfm.LeftUv[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((lfm.LeftUv[(int)TxSize.Tx16X16] & lfm.LeftUv[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.LeftUv[(int)TxSize.Tx8X8] & lfm.LeftUv[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Uv & lfm.LeftUv[(int)TxSize.Tx16X16]) == 0);
|
||||
Debug.Assert((lfm.AboveY[(int)TxSize.Tx16X16] & lfm.AboveY[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((lfm.AboveY[(int)TxSize.Tx16X16] & lfm.AboveY[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.AboveY[(int)TxSize.Tx8X8] & lfm.AboveY[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Y & lfm.AboveY[(int)TxSize.Tx16X16]) == 0);
|
||||
Debug.Assert((lfm.AboveUv[(int)TxSize.Tx16X16] & lfm.AboveUv[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((lfm.AboveUv[(int)TxSize.Tx16X16] & lfm.AboveUv[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.AboveUv[(int)TxSize.Tx8X8] & lfm.AboveUv[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Uv & lfm.AboveUv[(int)TxSize.Tx16X16]) == 0);
|
||||
Debug.Assert((leftYSpan[(int)TxSize.Tx16X16] & leftYSpan[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((leftYSpan[(int)TxSize.Tx16X16] & leftYSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((leftYSpan[(int)TxSize.Tx8X8] & leftYSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Y & leftYSpan[(int)TxSize.Tx16X16]) == 0);
|
||||
Debug.Assert((leftUvSpan[(int)TxSize.Tx16X16] & leftUvSpan[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((leftUvSpan[(int)TxSize.Tx16X16] & leftUvSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((leftUvSpan[(int)TxSize.Tx8X8] & leftUvSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Uv & leftUvSpan[(int)TxSize.Tx16X16]) == 0);
|
||||
Debug.Assert((aboveYSpan[(int)TxSize.Tx16X16] & aboveYSpan[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((aboveYSpan[(int)TxSize.Tx16X16] & aboveYSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((aboveYSpan[(int)TxSize.Tx8X8] & aboveYSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Y & aboveYSpan[(int)TxSize.Tx16X16]) == 0);
|
||||
Debug.Assert((aboveUvSpan[(int)TxSize.Tx16X16] & aboveUvSpan[(int)TxSize.Tx8X8]) == 0);
|
||||
Debug.Assert((aboveUvSpan[(int)TxSize.Tx16X16] & aboveUvSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((aboveUvSpan[(int)TxSize.Tx8X8] & aboveUvSpan[(int)TxSize.Tx4X4]) == 0);
|
||||
Debug.Assert((lfm.Int4X4Uv & aboveUvSpan[(int)TxSize.Tx16X16]) == 0);
|
||||
}
|
||||
|
||||
public static unsafe void ResetLfm(ref Vp9Common cm)
|
||||
@@ -583,6 +590,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
int lvl;
|
||||
|
||||
Span<LoopFilterThresh> lFThrSpan = lfi.Lfthr.AsSpan();
|
||||
|
||||
// For each possible value for the loop filter fill out limits
|
||||
for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
|
||||
{
|
||||
@@ -602,8 +611,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
blockInsideLimit = 1;
|
||||
}
|
||||
|
||||
lfi.Lfthr[lvl].Lim.AsSpan().Fill((byte)blockInsideLimit);
|
||||
lfi.Lfthr[lvl].Mblim.AsSpan().Fill((byte)((2 * (lvl + 2)) + blockInsideLimit));
|
||||
lFThrSpan[lvl].Lim.AsSpan().Fill((byte)blockInsideLimit);
|
||||
lFThrSpan[lvl].Mblim.AsSpan().Fill((byte)((2 * (lvl + 2)) + blockInsideLimit));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -625,6 +634,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
lf.LastSharpnessLevel = lf.SharpnessLevel;
|
||||
}
|
||||
|
||||
Span<Array4<Array2<byte>>> lvlSpan = lfi.Lvl.AsSpan();
|
||||
Span<sbyte> refDeltasSpan = lf.RefDeltas.AsSpan();
|
||||
Span<sbyte> modeDeltasSpan = lf.ModeDeltas.AsSpan();
|
||||
sbyte intraFrameRefDelta = refDeltasSpan[Constants.IntraFrame];
|
||||
|
||||
for (segId = 0; segId < Constants.MaxSegments; segId++)
|
||||
{
|
||||
int lvlSeg = defaultFiltLvl;
|
||||
@@ -639,20 +653,24 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
// We could get rid of this if we assume that deltas are set to
|
||||
// zero when not in use; encoder always uses deltas
|
||||
MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].AsSpan()).Fill((byte)lvlSeg);
|
||||
MemoryMarshal.Cast<Array2<byte>, byte>(lvlSpan[segId].AsSpan()).Fill((byte)lvlSeg);
|
||||
}
|
||||
else
|
||||
{
|
||||
int refr, mode;
|
||||
int intraLvl = lvlSeg + (lf.RefDeltas[Constants.IntraFrame] * scale);
|
||||
lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
|
||||
int intraLvl = lvlSeg + (intraFrameRefDelta * scale);
|
||||
lvlSpan[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
|
||||
|
||||
Span<Array2<byte>> lvlSpan2 = lvlSpan[segId].AsSpan();
|
||||
|
||||
for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
|
||||
{
|
||||
Span<byte> lvlSpan3 = lvlSpan2[refr].AsSpan();
|
||||
|
||||
for (mode = 0; mode < MaxModeLfDeltas; ++mode)
|
||||
{
|
||||
int interLvl = lvlSeg + (lf.RefDeltas[refr] * scale) + (lf.ModeDeltas[mode] * scale);
|
||||
lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
|
||||
int interLvl = lvlSeg + (refDeltasSpan[refr] * scale) + (modeDeltasSpan[mode] * scale);
|
||||
lvlSpan3[mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -811,18 +829,32 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
lfis[1] = lfthr[lfl[lflForward]];
|
||||
ss[1] = ss[0].Slice(8 * pitch);
|
||||
|
||||
Span<byte> mblim0Span = lfis[0].Mblim.AsSpan();
|
||||
Span<byte> lim0Span = lfis[0].Lim.AsSpan();
|
||||
Span<byte> hevThr0Span = lfis[0].HevThr.AsSpan();
|
||||
Span<byte> mblim1Span = lfis[1].Mblim.AsSpan();
|
||||
Span<byte> lim1Span = lfis[1].Lim.AsSpan();
|
||||
Span<byte> hevThr1Span = lfis[1].HevThr.AsSpan();
|
||||
|
||||
if ((mask16X16 & dualOne) != 0)
|
||||
{
|
||||
if ((mask16X16 & dualOne) == dualOne)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical16Dual(ss[0], pitch, lfis[0].Mblim[0], lfis[0].Lim[0],
|
||||
lfis[0].HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfVertical16Dual(ss[0], pitch, mblim0Span[0], lim0Span[0],
|
||||
hevThr0Span[0], bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
ref LoopFilterThresh lfi = ref lfis[(mask16X16 & 1) == 0 ? 1 : 0];
|
||||
LoopFilterScalar.HighBdLpfVertical16(ss[(mask16X16 & 1) == 0 ? 1 : 0], pitch, lfi.Mblim[0],
|
||||
lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
if ((mask16X16 & 1) == 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical16(ss[1], pitch, mblim1Span[0],
|
||||
lim1Span[0], hevThr1Span[0], bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical16(ss[0], pitch, mblim0Span[0],
|
||||
lim0Span[0], hevThr0Span[0], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -833,24 +865,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
LoopFilterScalar.HighBdLpfVertical8Dual(
|
||||
ss[0],
|
||||
pitch,
|
||||
lfis[0].Mblim[0],
|
||||
lfis[0].Lim[0],
|
||||
lfis[0].HevThr[0],
|
||||
lfis[1].Mblim[0],
|
||||
lfis[1].Lim[0],
|
||||
lfis[1].HevThr[0],
|
||||
mblim0Span[0],
|
||||
lim0Span[0],
|
||||
hevThr0Span[0],
|
||||
mblim1Span[0],
|
||||
lim1Span[0],
|
||||
hevThr1Span[0],
|
||||
bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
ref LoopFilterThresh lfi = ref lfis[(mask8X8 & 1) == 0 ? 1 : 0];
|
||||
LoopFilterScalar.HighBdLpfVertical8(
|
||||
ss[(mask8X8 & 1) == 0 ? 1 : 0],
|
||||
pitch,
|
||||
lfi.Mblim[0],
|
||||
lfi.Lim[0],
|
||||
lfi.HevThr[0],
|
||||
bd);
|
||||
if ((mask8X8 & 1) == 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical8(ss[1], pitch, mblim1Span[0],
|
||||
lim1Span[0], hevThr1Span[0], bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical8(ss[0], pitch, mblim0Span[0],
|
||||
lim0Span[0], hevThr0Span[0], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -861,19 +895,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
LoopFilterScalar.HighBdLpfVertical4Dual(
|
||||
ss[0],
|
||||
pitch,
|
||||
lfis[0].Mblim[0],
|
||||
lfis[0].Lim[0],
|
||||
lfis[0].HevThr[0],
|
||||
lfis[1].Mblim[0],
|
||||
lfis[1].Lim[0],
|
||||
lfis[1].HevThr[0],
|
||||
mblim0Span[0],
|
||||
lim0Span[0],
|
||||
hevThr0Span[0],
|
||||
mblim1Span[0],
|
||||
lim1Span[0],
|
||||
hevThr1Span[0],
|
||||
bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
ref LoopFilterThresh lfi = ref lfis[(mask4X4 & 1) == 0 ? 1 : 0];
|
||||
LoopFilterScalar.HighBdLpfVertical4(ss[(mask4X4 & 1) == 0 ? 1 : 0], pitch, lfi.Mblim[0],
|
||||
lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
if ((mask4X4 & 1) == 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical4(ss[1], pitch, mblim1Span[0],
|
||||
lim1Span[0], hevThr1Span[0], bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical4(ss[0], pitch, mblim0Span[0],
|
||||
lim0Span[0], hevThr0Span[0], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -884,19 +925,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
LoopFilterScalar.HighBdLpfVertical4Dual(
|
||||
ss[0].Slice(4),
|
||||
pitch,
|
||||
lfis[0].Mblim[0],
|
||||
lfis[0].Lim[0],
|
||||
lfis[0].HevThr[0],
|
||||
lfis[1].Mblim[0],
|
||||
lfis[1].Lim[0],
|
||||
lfis[1].HevThr[0],
|
||||
mblim0Span[0],
|
||||
lim0Span[0],
|
||||
hevThr0Span[0],
|
||||
mblim1Span[0],
|
||||
lim1Span[0],
|
||||
hevThr1Span[0],
|
||||
bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
ref LoopFilterThresh lfi = ref lfis[(mask4X4Int & 1) == 0 ? 1 : 0];
|
||||
LoopFilterScalar.HighBdLpfVertical4(ss[(mask4X4Int & 1) == 0 ? 1 : 0].Slice(4), pitch,
|
||||
lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
if ((mask4X4Int & 1) == 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical4(ss[1].Slice(4), pitch, mblim1Span[0],
|
||||
lim1Span[0], hevThr1Span[0], bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical4(ss[0].Slice(4), pitch, mblim0Span[0],
|
||||
lim0Span[0], hevThr0Span[0], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1086,18 +1134,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
if ((mask & 1) != 0)
|
||||
{
|
||||
LoopFilterThresh lfi = lfthr[lfl[0]];
|
||||
|
||||
Span<byte> mblimSpan = lfi.Mblim.AsSpan();
|
||||
Span<byte> limSpan = lfi.Lim.AsSpan();
|
||||
Span<byte> hevThrSpan = lfi.HevThr.AsSpan();
|
||||
|
||||
if ((mask16X16 & 1) != 0)
|
||||
{
|
||||
if ((mask16X16 & 3) == 3)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal16Dual(s, pitch, lfi.Mblim[0], lfi.Lim[0],
|
||||
lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal16Dual(s, pitch, mblimSpan[0], limSpan[0],
|
||||
hevThrSpan[0], bd);
|
||||
count = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal16(s, pitch, lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0],
|
||||
LoopFilterScalar.HighBdLpfHorizontal16(s, pitch, mblimSpan[0], limSpan[0], hevThrSpan[0],
|
||||
bd);
|
||||
}
|
||||
}
|
||||
@@ -1107,16 +1159,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
// Next block's thresholds.
|
||||
LoopFilterThresh lfin = lfthr[lfl[1]];
|
||||
|
||||
Span<byte> nMblimSpan = lfin.Mblim.AsSpan();
|
||||
Span<byte> nLimSpan = lfin.Lim.AsSpan();
|
||||
Span<byte> nHevThrSpan = lfin.HevThr.AsSpan();
|
||||
|
||||
LoopFilterScalar.HighBdLpfHorizontal8Dual(
|
||||
s,
|
||||
pitch,
|
||||
lfi.Mblim[0],
|
||||
lfi.Lim[0],
|
||||
lfi.HevThr[0],
|
||||
lfin.Mblim[0],
|
||||
lfin.Lim[0],
|
||||
lfin.HevThr[0],
|
||||
mblimSpan[0],
|
||||
limSpan[0],
|
||||
hevThrSpan[0],
|
||||
nMblimSpan[0],
|
||||
nLimSpan[0],
|
||||
nHevThrSpan[0],
|
||||
bd);
|
||||
|
||||
if ((mask4X4Int & 3) == 3)
|
||||
@@ -1124,36 +1180,36 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
LoopFilterScalar.HighBdLpfHorizontal4Dual(
|
||||
s.Slice(4 * pitch),
|
||||
pitch,
|
||||
lfi.Mblim[0],
|
||||
lfi.Lim[0],
|
||||
lfi.HevThr[0],
|
||||
lfin.Mblim[0],
|
||||
lfin.Lim[0],
|
||||
lfin.HevThr[0],
|
||||
mblimSpan[0],
|
||||
limSpan[0],
|
||||
hevThrSpan[0],
|
||||
nMblimSpan[0],
|
||||
nLimSpan[0],
|
||||
nHevThrSpan[0],
|
||||
bd);
|
||||
}
|
||||
else if ((mask4X4Int & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, lfi.Mblim[0],
|
||||
lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, mblimSpan[0],
|
||||
limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
else if ((mask4X4Int & 2) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(8 + (4 * pitch)), pitch, lfin.Mblim[0],
|
||||
lfin.Lim[0], lfin.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(8 + (4 * pitch)), pitch, nMblimSpan[0],
|
||||
nLimSpan[0], nHevThrSpan[0], bd);
|
||||
}
|
||||
|
||||
count = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal8(s, pitch, lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0],
|
||||
LoopFilterScalar.HighBdLpfHorizontal8(s, pitch, mblimSpan[0], limSpan[0], hevThrSpan[0],
|
||||
bd);
|
||||
|
||||
if ((mask4X4Int & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, lfi.Mblim[0],
|
||||
lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, mblimSpan[0],
|
||||
limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1163,16 +1219,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
// Next block's thresholds.
|
||||
LoopFilterThresh lfin = lfthr[lfl[1]];
|
||||
|
||||
Span<byte> nMblimSpan = lfin.Mblim.AsSpan();
|
||||
Span<byte> nLimSpan = lfin.Lim.AsSpan();
|
||||
Span<byte> nHevThrSpan = lfin.HevThr.AsSpan();
|
||||
|
||||
LoopFilterScalar.HighBdLpfHorizontal4Dual(
|
||||
s,
|
||||
pitch,
|
||||
lfi.Mblim[0],
|
||||
lfi.Lim[0],
|
||||
lfi.HevThr[0],
|
||||
lfin.Mblim[0],
|
||||
lfin.Lim[0],
|
||||
lfin.HevThr[0],
|
||||
mblimSpan[0],
|
||||
limSpan[0],
|
||||
hevThrSpan[0],
|
||||
nMblimSpan[0],
|
||||
nLimSpan[0],
|
||||
nHevThrSpan[0],
|
||||
bd);
|
||||
|
||||
if ((mask4X4Int & 3) == 3)
|
||||
@@ -1180,43 +1240,43 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
LoopFilterScalar.HighBdLpfHorizontal4Dual(
|
||||
s.Slice(4 * pitch),
|
||||
pitch,
|
||||
lfi.Mblim[0],
|
||||
lfi.Lim[0],
|
||||
lfi.HevThr[0],
|
||||
lfin.Mblim[0],
|
||||
lfin.Lim[0],
|
||||
lfin.HevThr[0],
|
||||
mblimSpan[0],
|
||||
limSpan[0],
|
||||
hevThrSpan[0],
|
||||
nMblimSpan[0],
|
||||
nLimSpan[0],
|
||||
nHevThrSpan[0],
|
||||
bd);
|
||||
}
|
||||
else if ((mask4X4Int & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, lfi.Mblim[0],
|
||||
lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, mblimSpan[0],
|
||||
limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
else if ((mask4X4Int & 2) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(8 + (4 * pitch)), pitch, lfin.Mblim[0],
|
||||
lfin.Lim[0], lfin.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(8 + (4 * pitch)), pitch, nMblimSpan[0],
|
||||
nLimSpan[0], nHevThrSpan[0], bd);
|
||||
}
|
||||
|
||||
count = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s, pitch, lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0],
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s, pitch, mblimSpan[0], limSpan[0], hevThrSpan[0],
|
||||
bd);
|
||||
|
||||
if ((mask4X4Int & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, lfi.Mblim[0],
|
||||
lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, mblimSpan[0],
|
||||
limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, lfi.Mblim[0], lfi.Lim[0],
|
||||
lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfHorizontal4(s.Slice(4 * pitch), pitch, mblimSpan[0], limSpan[0],
|
||||
hevThrSpan[0], bd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1291,26 +1351,30 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
for (uint mask = mask16X16 | mask8X8 | mask4X4 | mask4X4Int; mask != 0; mask >>= 1)
|
||||
{
|
||||
LoopFilterThresh lfi = lfthr[lfl[0]];
|
||||
|
||||
Span<byte> mblimSpan = lfi.Mblim.AsSpan();
|
||||
Span<byte> limSpan = lfi.Lim.AsSpan();
|
||||
Span<byte> hevThrSpan = lfi.HevThr.AsSpan();
|
||||
|
||||
if ((mask & 1) != 0)
|
||||
{
|
||||
if ((mask16X16 & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical16(s, pitch, lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfVertical16(s, pitch, mblimSpan[0], limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
else if ((mask8X8 & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical8(s, pitch, lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfVertical8(s, pitch, mblimSpan[0], limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
else if ((mask4X4 & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical4(s, pitch, lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfVertical4(s, pitch, mblimSpan[0], limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask4X4Int & 1) != 0)
|
||||
{
|
||||
LoopFilterScalar.HighBdLpfVertical4(s.Slice(4), pitch, lfi.Mblim[0], lfi.Lim[0], lfi.HevThr[0], bd);
|
||||
LoopFilterScalar.HighBdLpfVertical4(s.Slice(4), pitch, mblimSpan[0], limSpan[0], hevThrSpan[0], bd);
|
||||
}
|
||||
|
||||
s = s.Slice(8);
|
||||
@@ -1555,9 +1619,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
ref Buf2D dst = ref plane.Dst;
|
||||
ArrayPtr<byte> dst0 = dst.Buf;
|
||||
ulong mask16X16 = lfm.LeftY[(int)TxSize.Tx16X16];
|
||||
ulong mask8X8 = lfm.LeftY[(int)TxSize.Tx8X8];
|
||||
ulong mask4X4 = lfm.LeftY[(int)TxSize.Tx4X4];
|
||||
Span<ulong> leftYSpan = lfm.LeftY.AsSpan();
|
||||
ulong mask16X16 = leftYSpan[(int)TxSize.Tx16X16];
|
||||
ulong mask8X8 = leftYSpan[(int)TxSize.Tx8X8];
|
||||
ulong mask4X4 = leftYSpan[(int)TxSize.Tx4X4];
|
||||
ulong mask4X4Int = lfm.Int4X4Y;
|
||||
|
||||
Debug.Assert(plane.SubsamplingX == 0 && plane.SubsamplingY == 0);
|
||||
@@ -1604,9 +1669,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
// Horizontal pass
|
||||
dst.Buf = dst0;
|
||||
mask16X16 = lfm.AboveY[(int)TxSize.Tx16X16];
|
||||
mask8X8 = lfm.AboveY[(int)TxSize.Tx8X8];
|
||||
mask4X4 = lfm.AboveY[(int)TxSize.Tx4X4];
|
||||
Span<ulong> aboveYSpan = lfm.AboveY.AsSpan();
|
||||
mask16X16 = aboveYSpan[(int)TxSize.Tx16X16];
|
||||
mask8X8 = aboveYSpan[(int)TxSize.Tx8X8];
|
||||
mask4X4 = aboveYSpan[(int)TxSize.Tx4X4];
|
||||
mask4X4Int = lfm.Int4X4Y;
|
||||
|
||||
for (int r = 0; r < Constants.MiBlockSize && miRow + r < cm.MiRows; r++)
|
||||
@@ -1669,10 +1735,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ArrayPtr<byte> dst0 = dst.Buf;
|
||||
|
||||
Span<byte> lflUv = stackalloc byte[16];
|
||||
Span<byte> lflY = lfm.LflY.AsSpan();
|
||||
|
||||
ushort mask16X16 = lfm.LeftUv[(int)TxSize.Tx16X16];
|
||||
ushort mask8X8 = lfm.LeftUv[(int)TxSize.Tx8X8];
|
||||
ushort mask4X4 = lfm.LeftUv[(int)TxSize.Tx4X4];
|
||||
Span<ushort> leftUvSpan = lfm.LeftUv.AsSpan();
|
||||
ushort mask16X16 = leftUvSpan[(int)TxSize.Tx16X16];
|
||||
ushort mask8X8 = leftUvSpan[(int)TxSize.Tx8X8];
|
||||
ushort mask4X4 = leftUvSpan[(int)TxSize.Tx4X4];
|
||||
ushort mask4X4Int = lfm.Int4X4Uv;
|
||||
|
||||
Debug.Assert(plane.SubsamplingX == 1 && plane.SubsamplingY == 1);
|
||||
@@ -1682,8 +1750,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
for (int c = 0; c < Constants.MiBlockSize >> 1; c++)
|
||||
{
|
||||
lflUv[(r << 1) + c] = lfm.LflY[(r << 3) + (c << 1)];
|
||||
lflUv[((r + 2) << 1) + c] = lfm.LflY[((r + 2) << 3) + (c << 1)];
|
||||
lflUv[(r << 1) + c] = lflY[(r << 3) + (c << 1)];
|
||||
lflUv[((r + 2) << 1) + c] = lflY[((r + 2) << 3) + (c << 1)];
|
||||
}
|
||||
|
||||
if (cm.UseHighBitDepth)
|
||||
@@ -1725,9 +1793,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
// Horizontal pass
|
||||
dst.Buf = dst0;
|
||||
mask16X16 = lfm.AboveUv[(int)TxSize.Tx16X16];
|
||||
mask8X8 = lfm.AboveUv[(int)TxSize.Tx8X8];
|
||||
mask4X4 = lfm.AboveUv[(int)TxSize.Tx4X4];
|
||||
Span<ushort> aboveUvSpan = lfm.AboveUv.AsSpan();
|
||||
mask16X16 = aboveUvSpan[(int)TxSize.Tx16X16];
|
||||
mask8X8 = aboveUvSpan[(int)TxSize.Tx8X8];
|
||||
mask4X4 = aboveUvSpan[(int)TxSize.Tx4X4];
|
||||
mask4X4Int = lfm.Int4X4Uv;
|
||||
|
||||
for (int r = 0; r < Constants.MiBlockSize && miRow + r < cm.MiRows; r += 2)
|
||||
@@ -1806,16 +1875,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
int sbCols = TileInfo.MiColsAlignedToSb(cm.MiCols) >> Constants.MiBlockSizeLog2;
|
||||
LfPath path;
|
||||
int miRow, miCol;
|
||||
Span<MacroBlockDPlane> planesSpan = planes.AsSpan();
|
||||
|
||||
if (yOnly)
|
||||
{
|
||||
path = LfPath.LfPath444;
|
||||
}
|
||||
else if (planes[1].SubsamplingY == 1 && planes[1].SubsamplingX == 1)
|
||||
else if (planesSpan[1].SubsamplingY == 1 && planesSpan[1].SubsamplingX == 1)
|
||||
{
|
||||
path = LfPath.LfPath420;
|
||||
}
|
||||
else if (planes[1].SubsamplingY == 0 && planes[1].SubsamplingX == 0)
|
||||
else if (planesSpan[1].SubsamplingY == 0 && planesSpan[1].SubsamplingX == 0)
|
||||
{
|
||||
path = LfPath.LfPath444;
|
||||
}
|
||||
@@ -1837,23 +1907,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
lfSync.SyncRead(r, c);
|
||||
|
||||
ReconInter.SetupDstPlanes(ref planes, ref frameBuffer, miRow, miCol);
|
||||
ReconInter.SetupDstPlanes(planesSpan, ref frameBuffer, miRow, miCol);
|
||||
|
||||
AdjustMask(ref cm, miRow, miCol, ref lfm[0]);
|
||||
|
||||
FilterBlockPlaneSs00(ref cm, ref planes[0], miRow, ref lfm[0]);
|
||||
FilterBlockPlaneSs00(ref cm, ref planesSpan[0], miRow, ref lfm[0]);
|
||||
for (plane = 1; plane < numPlanes; ++plane)
|
||||
{
|
||||
switch (path)
|
||||
{
|
||||
case LfPath.LfPath420:
|
||||
FilterBlockPlaneSs11(ref cm, ref planes[plane], miRow, ref lfm[0]);
|
||||
FilterBlockPlaneSs11(ref cm, ref planesSpan[plane], miRow, ref lfm[0]);
|
||||
break;
|
||||
case LfPath.LfPath444:
|
||||
FilterBlockPlaneSs00(ref cm, ref planes[plane], miRow, ref lfm[0]);
|
||||
FilterBlockPlaneSs00(ref cm, ref planesSpan[plane], miRow, ref lfm[0]);
|
||||
break;
|
||||
case LfPath.LfPathSlow:
|
||||
FilterBlockPlaneNon420(ref cm, ref planes[plane], mi.Slice(miCol), miRow,
|
||||
FilterBlockPlaneNon420(ref cm, ref planesSpan[plane], mi.Slice(miCol), miRow,
|
||||
miCol);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -270,17 +270,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
private static Array8<short> NewArray8Short(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7)
|
||||
{
|
||||
Array8<short> output = new()
|
||||
{
|
||||
[0] = e0,
|
||||
[1] = e1,
|
||||
[2] = e2,
|
||||
[3] = e3,
|
||||
[4] = e4,
|
||||
[5] = e5,
|
||||
[6] = e6,
|
||||
[7] = e7
|
||||
};
|
||||
Array8<short> output = new();
|
||||
Span<short> outputSpan = output.AsSpan();
|
||||
outputSpan[0] = e0;
|
||||
outputSpan[1] = e1;
|
||||
outputSpan[2] = e2;
|
||||
outputSpan[3] = e3;
|
||||
outputSpan[4] = e4;
|
||||
outputSpan[5] = e5;
|
||||
outputSpan[6] = e6;
|
||||
outputSpan[7] = e7;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
@@ -109,15 +110,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
sbyte vrfa = aSg ? xd.AboveMi.Value.RefFrame[0] : xd.AboveMi.Value.RefFrame[varRefIdx];
|
||||
sbyte vrfl = lSg ? xd.LeftMi.Value.RefFrame[0] : xd.LeftMi.Value.RefFrame[varRefIdx];
|
||||
|
||||
if (vrfa == vrfl && cm.CompVarRef[1] == vrfa)
|
||||
Span<sbyte> compVarRefSpan = cm.CompVarRef.AsSpan();
|
||||
|
||||
if (vrfa == vrfl && compVarRefSpan[1] == vrfa)
|
||||
{
|
||||
predContext = 0;
|
||||
}
|
||||
else if (lSg && aSg)
|
||||
{
|
||||
// Single/Single
|
||||
if ((vrfa == cm.CompFixedRef && vrfl == cm.CompVarRef[0]) ||
|
||||
(vrfl == cm.CompFixedRef && vrfa == cm.CompVarRef[0]))
|
||||
if ((vrfa == cm.CompFixedRef && vrfl == compVarRefSpan[0]) ||
|
||||
(vrfl == cm.CompFixedRef && vrfa == compVarRefSpan[0]))
|
||||
{
|
||||
predContext = 4;
|
||||
}
|
||||
@@ -135,11 +138,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Single/Comp
|
||||
sbyte vrfc = lSg ? vrfa : vrfl;
|
||||
sbyte rfs = aSg ? vrfa : vrfl;
|
||||
if (vrfc == cm.CompVarRef[1] && rfs != cm.CompVarRef[1])
|
||||
if (vrfc == compVarRefSpan[1] && rfs != compVarRefSpan[1])
|
||||
{
|
||||
predContext = 1;
|
||||
}
|
||||
else if (rfs == cm.CompVarRef[1] && vrfc != cm.CompVarRef[1])
|
||||
else if (rfs == compVarRefSpan[1] && vrfc != compVarRefSpan[1])
|
||||
{
|
||||
predContext = 2;
|
||||
}
|
||||
@@ -212,14 +215,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
// Intra/Inter or Inter/Intra
|
||||
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
|
||||
Span<sbyte> refFrameSpan = edgeMi.RefFrame.AsSpan();
|
||||
if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
|
||||
predContext = 4 * (refFrameSpan[0] == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.LastFrame
|
||||
predContext = 1 + (refFrameSpan[0] == Constants.LastFrame ||
|
||||
refFrameSpan[1] == Constants.LastFrame
|
||||
? 1
|
||||
: 0);
|
||||
}
|
||||
@@ -229,10 +233,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Inter/Inter
|
||||
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
|
||||
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
|
||||
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
|
||||
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
|
||||
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
|
||||
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
|
||||
Span<sbyte> aRefFrameSpan = xd.AboveMi.Value.RefFrame.AsSpan();
|
||||
sbyte above0 = aRefFrameSpan[0];
|
||||
sbyte above1 = aRefFrameSpan[1];
|
||||
Span<sbyte> lRefFrameSpan = xd.LeftMi.Value.RefFrame.AsSpan();
|
||||
sbyte left0 = lRefFrameSpan[0];
|
||||
sbyte left1 = lRefFrameSpan[1];
|
||||
|
||||
if (aboveHasSecond && leftHasSecond)
|
||||
{
|
||||
@@ -281,8 +287,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.LastFrame
|
||||
Span<sbyte> refFrameSpan = edgeMi.RefFrame.AsSpan();
|
||||
|
||||
predContext = 1 + (refFrameSpan[0] == Constants.LastFrame ||
|
||||
refFrameSpan[1] == Constants.LastFrame
|
||||
? 1
|
||||
: 0);
|
||||
}
|
||||
@@ -321,21 +329,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
// Intra/Inter or Inter/Intra
|
||||
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
|
||||
Span<sbyte> refFrameSpan = edgeMi.RefFrame.AsSpan();
|
||||
if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
if (edgeMi.RefFrame[0] == Constants.LastFrame)
|
||||
if (refFrameSpan[0] == Constants.LastFrame)
|
||||
{
|
||||
predContext = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
|
||||
predContext = 4 * (refFrameSpan[0] == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1 + (2 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.GoldenFrame
|
||||
predContext = 1 + (2 * (refFrameSpan[0] == Constants.GoldenFrame ||
|
||||
refFrameSpan[1] == Constants.GoldenFrame
|
||||
? 1
|
||||
: 0));
|
||||
}
|
||||
@@ -345,10 +354,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
// Inter/Inter
|
||||
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
|
||||
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
|
||||
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
|
||||
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
|
||||
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
|
||||
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
|
||||
Span<sbyte> aRefFrameSpan = xd.AboveMi.Value.RefFrame.AsSpan();
|
||||
sbyte above0 = aRefFrameSpan[0];
|
||||
sbyte above1 = aRefFrameSpan[1];
|
||||
Span<sbyte> lRefFrameSpan = xd.LeftMi.Value.RefFrame.AsSpan();
|
||||
sbyte left0 = lRefFrameSpan[0];
|
||||
sbyte left1 = lRefFrameSpan[1];
|
||||
|
||||
if (aboveHasSecond && leftHasSecond)
|
||||
{
|
||||
@@ -407,19 +418,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
// One edge available
|
||||
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
|
||||
Span<sbyte> refFrameSpan = edgeMi.RefFrame.AsSpan();
|
||||
|
||||
if (!edgeMi.IsInterBlock() || (edgeMi.RefFrame[0] == Constants.LastFrame && !edgeMi.HasSecondRef()))
|
||||
if (!edgeMi.IsInterBlock() || (refFrameSpan[0] == Constants.LastFrame && !edgeMi.HasSecondRef()))
|
||||
{
|
||||
predContext = 2;
|
||||
}
|
||||
else if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
|
||||
predContext = 4 * (refFrameSpan[0] == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 3 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.GoldenFrame
|
||||
predContext = 3 * (refFrameSpan[0] == Constants.GoldenFrame ||
|
||||
refFrameSpan[1] == Constants.GoldenFrame
|
||||
? 1
|
||||
: 0);
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
return (byte)BitUtils.RoundPowerOfTwo((prob1 * (256 - factor)) + (prob2 * factor), 8);
|
||||
}
|
||||
|
||||
public static byte MergeProbs(byte preProb, ref Array2<uint> ct, uint countSat, uint maxUpdateFactor)
|
||||
public static byte MergeProbs(byte preProb, ReadOnlySpan<uint> ct, uint countSat, uint maxUpdateFactor)
|
||||
{
|
||||
byte prob = GetBinaryProb(ct[0], ct[1]);
|
||||
uint count = Math.Min(ct[0] + ct[1], countSat);
|
||||
@@ -53,7 +53,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
private const int ModeMvCountSat = 20;
|
||||
|
||||
public static byte ModeMvMergeProbs(byte preProb, ref Array2<uint> ct)
|
||||
public static byte ModeMvMergeProbs(byte preProb, ReadOnlySpan<uint> ct)
|
||||
{
|
||||
uint den = ct[0] + ct[1];
|
||||
if (den == 0)
|
||||
@@ -81,7 +81,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
Array2<uint> ct = new();
|
||||
ct[0] = leftCount;
|
||||
ct[1] = rightCount;
|
||||
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], ref ct);
|
||||
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], ct.AsSpan());
|
||||
return leftCount + rightCount;
|
||||
}
|
||||
|
||||
|
||||
@@ -164,7 +164,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
}
|
||||
|
||||
public static void SetupDstPlanes(
|
||||
ref Array3<MacroBlockDPlane> planes,
|
||||
Span<MacroBlockDPlane> planes,
|
||||
ref Surface src,
|
||||
int miRow,
|
||||
int miCol)
|
||||
@@ -205,9 +205,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
strides[1] = src.UvStride;
|
||||
strides[2] = src.UvStride;
|
||||
|
||||
Span<MacroBlockDPlane> planeSpan = xd.Plane.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.MaxMbPlane; ++i)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[i];
|
||||
ref MacroBlockDPlane pd = ref planeSpan[i];
|
||||
SetupPredPlanes(ref pd.Pre[idx], buffers[i], strides[i], miRow, miCol, sf, pd.SubsamplingX,
|
||||
pd.SubsamplingY);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
@@ -28,13 +29,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
ModeRefDeltaEnabled = true;
|
||||
ModeRefDeltaUpdate = true;
|
||||
|
||||
Span<sbyte> refDeltasSpan = RefDeltas.AsSpan();
|
||||
Span<sbyte> modeDeltasSpan = ModeDeltas.AsSpan();
|
||||
|
||||
RefDeltas[Constants.IntraFrame] = 1;
|
||||
RefDeltas[Constants.LastFrame] = 0;
|
||||
RefDeltas[Constants.GoldenFrame] = -1;
|
||||
RefDeltas[Constants.AltRefFrame] = -1;
|
||||
ModeDeltas[0] = 0;
|
||||
ModeDeltas[1] = 0;
|
||||
refDeltasSpan[Constants.IntraFrame] = 1;
|
||||
refDeltasSpan[Constants.LastFrame] = 0;
|
||||
refDeltasSpan[Constants.GoldenFrame] = -1;
|
||||
refDeltasSpan[Constants.AltRefFrame] = -1;
|
||||
modeDeltasSpan[0] = 0;
|
||||
modeDeltasSpan[1] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
@@ -147,10 +148,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
public void SetupBlockPlanes(int ssX, int ssY)
|
||||
{
|
||||
Span<MacroBlockDPlane> planeSpan = Plane.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.MaxMbPlane; i++)
|
||||
{
|
||||
Plane[i].SubsamplingX = i != 0 ? ssX : 0;
|
||||
Plane[i].SubsamplingY = i != 0 ? ssY : 0;
|
||||
planeSpan[i].SubsamplingX = i != 0 ? ssX : 0;
|
||||
planeSpan[i].SubsamplingY = i != 0 ? ssY : 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,12 +161,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
int aboveIdx = miCol * 2;
|
||||
int leftIdx = (miRow * 2) & 15;
|
||||
|
||||
Span<MacroBlockDPlane> planeSpan = Plane.AsSpan();
|
||||
Span<ArrayPtr<sbyte>> aboveContextSpan = AboveContext.AsSpan();
|
||||
Span<Array16<sbyte>> leftContextSpan = LeftContext.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.MaxMbPlane; ++i)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref Plane[i];
|
||||
pd.AboveContext = AboveContext[i].Slice(aboveIdx >> pd.SubsamplingX);
|
||||
pd.LeftContext = new ArrayPtr<sbyte>(ref LeftContext[i][leftIdx >> pd.SubsamplingY],
|
||||
ref MacroBlockDPlane pd = ref planeSpan[i];
|
||||
pd.AboveContext = aboveContextSpan[i].Slice(aboveIdx >> pd.SubsamplingX);
|
||||
pd.LeftContext = new ArrayPtr<sbyte>(ref leftContextSpan[i][leftIdx >> pd.SubsamplingY],
|
||||
16 - (leftIdx >> pd.SubsamplingY));
|
||||
}
|
||||
}
|
||||
@@ -182,9 +189,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
public unsafe void DecResetSkipContext()
|
||||
{
|
||||
Span<MacroBlockDPlane> planeSpan = Plane.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.MaxMbPlane; i++)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref Plane[i];
|
||||
ref MacroBlockDPlane pd = ref planeSpan[i];
|
||||
MemoryUtil.Fill(pd.AboveContext.ToPointer(), (sbyte)0, pd.N4W);
|
||||
MemoryUtil.Fill(pd.LeftContext.ToPointer(), (sbyte)0, pd.N4H);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
@@ -65,36 +66,41 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
public Mv MvPredQ4(int idx)
|
||||
{
|
||||
Span<BModeInfo> bmiSpan = Bmi.AsSpan();
|
||||
|
||||
Mv res = new()
|
||||
{
|
||||
Row = (short)ReconInter.RoundMvCompQ4(
|
||||
Bmi[0].Mv[idx].Row + Bmi[1].Mv[idx].Row +
|
||||
Bmi[2].Mv[idx].Row + Bmi[3].Mv[idx].Row),
|
||||
bmiSpan[0].Mv[idx].Row + bmiSpan[1].Mv[idx].Row +
|
||||
bmiSpan[2].Mv[idx].Row + bmiSpan[3].Mv[idx].Row),
|
||||
Col = (short)ReconInter.RoundMvCompQ4(
|
||||
Bmi[0].Mv[idx].Col + Bmi[1].Mv[idx].Col +
|
||||
Bmi[2].Mv[idx].Col + Bmi[3].Mv[idx].Col)
|
||||
bmiSpan[0].Mv[idx].Col + bmiSpan[1].Mv[idx].Col +
|
||||
bmiSpan[2].Mv[idx].Col + bmiSpan[3].Mv[idx].Col)
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
public Mv MvPredQ2(int idx, int block0, int block1)
|
||||
{
|
||||
Span<BModeInfo> bmiSpan = Bmi.AsSpan();
|
||||
|
||||
Mv res = new()
|
||||
{
|
||||
Row = (short)ReconInter.RoundMvCompQ2(
|
||||
Bmi[block0].Mv[idx].Row +
|
||||
Bmi[block1].Mv[idx].Row),
|
||||
bmiSpan[block0].Mv[idx].Row +
|
||||
bmiSpan[block1].Mv[idx].Row),
|
||||
Col = (short)ReconInter.RoundMvCompQ2(
|
||||
Bmi[block0].Mv[idx].Col +
|
||||
Bmi[block1].Mv[idx].Col)
|
||||
bmiSpan[block0].Mv[idx].Col +
|
||||
bmiSpan[block1].Mv[idx].Col)
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
// Performs mv sign inversion if indicated by the reference frame combination.
|
||||
public Mv ScaleMv(int refr, sbyte thisRefFrame, ref Array4<sbyte> refSignBias)
|
||||
public Mv ScaleMv(int refr, sbyte thisRefFrame, Span<sbyte> refSignBias)
|
||||
{
|
||||
Mv mv = Mv[refr];
|
||||
|
||||
if (refSignBias[RefFrame[refr]] != refSignBias[thisRefFrame])
|
||||
{
|
||||
mv.Row *= -1;
|
||||
|
||||
@@ -99,10 +99,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
}
|
||||
else
|
||||
{
|
||||
Span<Array2<uint>> bitsSpan = counts.Bits[comp].AsSpan();
|
||||
|
||||
int b = c + Constants.Class0Bits - 1; // Number of bits
|
||||
for (int i = 0; i < b; ++i)
|
||||
{
|
||||
counts.Bits[comp][i][(d >> i) & 1] += (uint)incr;
|
||||
bitsSpan[i][(d >> i) & 1] += (uint)incr;
|
||||
}
|
||||
|
||||
counts.Fp[comp][f] += (uint)incr;
|
||||
|
||||
@@ -29,7 +29,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
public Array8<uint> FeatureMask;
|
||||
public int AqAvOffset;
|
||||
|
||||
public static byte GetPredProbSegId(ref Array3<byte> segPredProbs, ref MacroBlockD xd)
|
||||
public static byte GetPredProbSegId(ReadOnlySpan<byte> segPredProbs, ref MacroBlockD xd)
|
||||
{
|
||||
return segPredProbs[xd.GetPredContextSegId()];
|
||||
}
|
||||
@@ -105,9 +105,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
UpdateMap = rb.ReadBit() != 0;
|
||||
if (UpdateMap)
|
||||
{
|
||||
Span<byte> segTreeProbSpan = fc.SegTreeProb.AsSpan();
|
||||
Span<byte> segPredProbSpan = fc.SegPredProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < SegTreeProbs; i++)
|
||||
{
|
||||
fc.SegTreeProb[i] = rb.ReadBit() != 0
|
||||
segTreeProbSpan[i] = rb.ReadBit() != 0
|
||||
? (byte)rb.ReadLiteral(8)
|
||||
: (byte)Prob.MaxProb;
|
||||
}
|
||||
@@ -117,7 +120,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
for (int i = 0; i < PredictionProbs; i++)
|
||||
{
|
||||
fc.SegPredProb[i] = rb.ReadBit() != 0
|
||||
segPredProbSpan[i] = rb.ReadBit() != 0
|
||||
? (byte)rb.ReadLiteral(8)
|
||||
: (byte)Prob.MaxProb;
|
||||
}
|
||||
@@ -126,7 +129,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
for (int i = 0; i < PredictionProbs; i++)
|
||||
{
|
||||
fc.SegPredProb[i] = Prob.MaxProb;
|
||||
segPredProbSpan[i] = Prob.MaxProb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -155,9 +155,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
public bool CompoundReferenceAllowed()
|
||||
{
|
||||
Span<sbyte> refFrameSignBiasSpan = RefFrameSignBias.AsSpan();
|
||||
|
||||
for (int i = 1; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
if (RefFrameSignBias[i + 1] != RefFrameSignBias[1])
|
||||
if (refFrameSignBiasSpan[i + 1] != refFrameSignBiasSpan[1])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -173,13 +175,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
public readonly int GetFreeFb()
|
||||
{
|
||||
ref Array12<RefCntBuffer> frameBufs = ref BufferPool.Value.FrameBufs;
|
||||
Span<RefCntBuffer> frameBuffs = BufferPool.Value.FrameBufs.AsSpan();
|
||||
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Constants.FrameBuffers; ++i)
|
||||
{
|
||||
if (frameBufs[i].RefCount == 0)
|
||||
if (frameBuffs[i].RefCount == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
@@ -187,7 +189,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
if (i != Constants.FrameBuffers)
|
||||
{
|
||||
frameBufs[i].RefCount = 1;
|
||||
frameBuffs[i].RefCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -240,25 +242,29 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
private void AllocSegMap(MemoryAllocator allocator, int segMapSize)
|
||||
{
|
||||
Span<ArrayPtr<byte>> segMapArraySpan = SegMapArray.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.NumPingPongBuffers; ++i)
|
||||
{
|
||||
SegMapArray[i] = allocator.Allocate<byte>(segMapSize);
|
||||
segMapArraySpan[i] = allocator.Allocate<byte>(segMapSize);
|
||||
}
|
||||
|
||||
// Init the index.
|
||||
SegMapIdx = 0;
|
||||
PrevSegMapIdx = 1;
|
||||
|
||||
CurrentFrameSegMap = SegMapArray[SegMapIdx];
|
||||
LastFrameSegMap = SegMapArray[PrevSegMapIdx];
|
||||
CurrentFrameSegMap = segMapArraySpan[SegMapIdx];
|
||||
LastFrameSegMap = segMapArraySpan[PrevSegMapIdx];
|
||||
}
|
||||
|
||||
private void FreeSegMap(MemoryAllocator allocator)
|
||||
{
|
||||
Span<ArrayPtr<byte>> segMapArraySpan = SegMapArray.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.NumPingPongBuffers; ++i)
|
||||
{
|
||||
allocator.Free(SegMapArray[i]);
|
||||
SegMapArray[i] = ArrayPtr<byte>.Null;
|
||||
allocator.Free(segMapArraySpan[i]);
|
||||
segMapArraySpan[i] = ArrayPtr<byte>.Null;
|
||||
}
|
||||
|
||||
CurrentFrameSegMap = ArrayPtr<byte>.Null;
|
||||
@@ -366,18 +372,21 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
internal void InitMacroBlockD(ref MacroBlockD xd, ArrayPtr<int> dqcoeff)
|
||||
{
|
||||
Span<MacroBlockDPlane> planeSpan = xd.Plane.AsSpan();
|
||||
Span<ArrayPtr<sbyte>> aboveContextSpan = xd.AboveContext.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.MaxMbPlane; ++i)
|
||||
{
|
||||
xd.Plane[i].DqCoeff = dqcoeff;
|
||||
xd.AboveContext[i] = AboveContext.Slice(i * 2 * TileInfo.MiColsAlignedToSb(MiCols));
|
||||
planeSpan[i].DqCoeff = dqcoeff;
|
||||
aboveContextSpan[i] = AboveContext.Slice(i * 2 * TileInfo.MiColsAlignedToSb(MiCols));
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref YDequant);
|
||||
MemoryUtil.Copy(ref planeSpan[i].SegDequant, ref YDequant);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref UvDequant);
|
||||
MemoryUtil.Copy(ref planeSpan[i].SegDequant, ref UvDequant);
|
||||
}
|
||||
|
||||
xd.Fc = new Ptr<Vp9EntropyProbs>(ref Fc.Value);
|
||||
@@ -395,32 +404,43 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
// Build y/uv dequant values based on segmentation.
|
||||
if (Seg.Enabled)
|
||||
{
|
||||
Span<Array2<short>> yDequantSpan1 = YDequant.AsSpan();
|
||||
Span<Array2<short>> uvDequantSpan1 = UvDequant.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.MaxSegments; ++i)
|
||||
{
|
||||
Span<short> yDequantSpan2 = yDequantSpan1[i].AsSpan();
|
||||
Span<short> uvDequantSpan2 = uvDequantSpan1[i].AsSpan();
|
||||
|
||||
int qindex = Seg.GetQIndex(i, BaseQindex);
|
||||
YDequant[i][0] = QuantCommon.DcQuant(qindex, YDcDeltaQ, BitDepth);
|
||||
YDequant[i][1] = QuantCommon.AcQuant(qindex, 0, BitDepth);
|
||||
UvDequant[i][0] = QuantCommon.DcQuant(qindex, UvDcDeltaQ, BitDepth);
|
||||
UvDequant[i][1] = QuantCommon.AcQuant(qindex, UvAcDeltaQ, BitDepth);
|
||||
yDequantSpan2[0] = QuantCommon.DcQuant(qindex, YDcDeltaQ, BitDepth);
|
||||
yDequantSpan2[1] = QuantCommon.AcQuant(qindex, 0, BitDepth);
|
||||
uvDequantSpan2[0] = QuantCommon.DcQuant(qindex, UvDcDeltaQ, BitDepth);
|
||||
uvDequantSpan2[1] = QuantCommon.AcQuant(qindex, UvAcDeltaQ, BitDepth);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Span<short> yDequantSpan = YDequant[0].AsSpan();
|
||||
Span<short> uvDequantSpan = UvDequant[0].AsSpan();
|
||||
|
||||
int qindex = BaseQindex;
|
||||
// When segmentation is disabled, only the first value is used. The
|
||||
// remaining are don't cares.
|
||||
YDequant[0][0] = QuantCommon.DcQuant(qindex, YDcDeltaQ, BitDepth);
|
||||
YDequant[0][1] = QuantCommon.AcQuant(qindex, 0, BitDepth);
|
||||
UvDequant[0][0] = QuantCommon.DcQuant(qindex, UvDcDeltaQ, BitDepth);
|
||||
UvDequant[0][1] = QuantCommon.AcQuant(qindex, UvAcDeltaQ, BitDepth);
|
||||
yDequantSpan[0] = QuantCommon.DcQuant(qindex, YDcDeltaQ, BitDepth);
|
||||
yDequantSpan[1] = QuantCommon.AcQuant(qindex, 0, BitDepth);
|
||||
uvDequantSpan[0] = QuantCommon.DcQuant(qindex, UvDcDeltaQ, BitDepth);
|
||||
uvDequantSpan[1] = QuantCommon.AcQuant(qindex, UvAcDeltaQ, BitDepth);
|
||||
}
|
||||
}
|
||||
|
||||
public void SetupScaleFactors()
|
||||
{
|
||||
Span<RefBuffer> frameRefsSpan = FrameRefs.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
ref RefBuffer refBuf = ref FrameRefs[i];
|
||||
ref RefBuffer refBuf = ref frameRefsSpan[i];
|
||||
refBuf.Sf.SetupScaleFactorsForFrame(refBuf.Buf.Width, refBuf.Buf.Height, Width, Height);
|
||||
}
|
||||
}
|
||||
@@ -431,26 +451,34 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
if (ReferenceMode == ReferenceMode.Select)
|
||||
{
|
||||
Span<byte> compInterProbSpan = fc.CompInterProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.CompInterContexts; ++i)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.CompInterProb[i]);
|
||||
r.DiffUpdateProb(ref compInterProbSpan[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (ReferenceMode != ReferenceMode.Compound)
|
||||
{
|
||||
Span<Array2<byte>> singleRefProbSpan1 = fc.SingleRefProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.RefContexts; ++i)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.SingleRefProb[i][0]);
|
||||
r.DiffUpdateProb(ref fc.SingleRefProb[i][1]);
|
||||
Span<byte> singleRefProbSpan2 = singleRefProbSpan1[i].AsSpan();
|
||||
|
||||
r.DiffUpdateProb(ref singleRefProbSpan2[0]);
|
||||
r.DiffUpdateProb(ref singleRefProbSpan2[1]);
|
||||
}
|
||||
}
|
||||
|
||||
if (ReferenceMode != ReferenceMode.Single)
|
||||
{
|
||||
Span<byte> compRefProbSpan = fc.CompRefProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.RefContexts; ++i)
|
||||
{
|
||||
r.DiffUpdateProb(ref fc.CompRefProb[i]);
|
||||
r.DiffUpdateProb(ref compRefProbSpan[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -469,99 +497,124 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
public void SetupCompoundReferenceMode()
|
||||
{
|
||||
if (RefFrameSignBias[Constants.LastFrame] == RefFrameSignBias[Constants.GoldenFrame])
|
||||
Span<sbyte> refFrameSignBiasSpan = RefFrameSignBias.AsSpan();
|
||||
Span<sbyte> compVarRefSpan = CompVarRef.AsSpan();
|
||||
|
||||
if (refFrameSignBiasSpan[Constants.LastFrame] == refFrameSignBiasSpan[Constants.GoldenFrame])
|
||||
{
|
||||
CompFixedRef = Constants.AltRefFrame;
|
||||
CompVarRef[0] = Constants.LastFrame;
|
||||
CompVarRef[1] = Constants.GoldenFrame;
|
||||
compVarRefSpan[0] = Constants.LastFrame;
|
||||
compVarRefSpan[1] = Constants.GoldenFrame;
|
||||
}
|
||||
else if (RefFrameSignBias[Constants.LastFrame] == RefFrameSignBias[Constants.AltRefFrame])
|
||||
else if (refFrameSignBiasSpan[Constants.LastFrame] == refFrameSignBiasSpan[Constants.AltRefFrame])
|
||||
{
|
||||
CompFixedRef = Constants.GoldenFrame;
|
||||
CompVarRef[0] = Constants.LastFrame;
|
||||
CompVarRef[1] = Constants.AltRefFrame;
|
||||
compVarRefSpan[0] = Constants.LastFrame;
|
||||
compVarRefSpan[1] = Constants.AltRefFrame;
|
||||
}
|
||||
else
|
||||
{
|
||||
CompFixedRef = Constants.LastFrame;
|
||||
CompVarRef[0] = Constants.GoldenFrame;
|
||||
CompVarRef[1] = Constants.AltRefFrame;
|
||||
compVarRefSpan[0] = Constants.GoldenFrame;
|
||||
compVarRefSpan[1] = Constants.AltRefFrame;
|
||||
}
|
||||
}
|
||||
|
||||
public readonly void InitMvProbs()
|
||||
{
|
||||
Fc.Value.Joints[0] = 32;
|
||||
Fc.Value.Joints[1] = 64;
|
||||
Fc.Value.Joints[2] = 96;
|
||||
Span<byte> jointsSpan = Fc.Value.Joints.AsSpan();
|
||||
Span<byte> signSpan = Fc.Value.Sign.AsSpan();
|
||||
Span<Array10<byte>> classesSpan = Fc.Value.Classes.AsSpan();
|
||||
Span<byte> classes0Span = classesSpan[0].AsSpan();
|
||||
Span<byte> classes1Span = classesSpan[1].AsSpan();
|
||||
Span<Array1<byte>> class0Span = Fc.Value.Class0.AsSpan();
|
||||
Span<Array10<byte>> bitsSpan = Fc.Value.Bits.AsSpan();
|
||||
Span<byte> bits0Span = bitsSpan[0].AsSpan();
|
||||
Span<byte> bits1Span = bitsSpan[1].AsSpan();
|
||||
Span<Array2<Array3<byte>>> class0FpSpan = Fc.Value.Class0Fp.AsSpan();
|
||||
Span<Array3<byte>> class0Fp0Span = class0FpSpan[0].AsSpan();
|
||||
Span<Array3<byte>> class0Fp1Span = class0FpSpan[1].AsSpan();
|
||||
Span<byte> class0Fp00Span = class0Fp0Span[0].AsSpan();
|
||||
Span<byte> class0Fp01Span = class0Fp0Span[1].AsSpan();
|
||||
Span<byte> class0Fp10Span = class0Fp1Span[0].AsSpan();
|
||||
Span<byte> class0Fp11Span = class0Fp1Span[1].AsSpan();
|
||||
Span<Array3<byte>> fpSpan = Fc.Value.Fp.AsSpan();
|
||||
Span<byte> fp0Span = fpSpan[0].AsSpan();
|
||||
Span<byte> fp1Span = fpSpan[1].AsSpan();
|
||||
Span<byte> class0HpSpan = Fc.Value.Class0Hp.AsSpan();
|
||||
Span<byte> hpSpan = Fc.Value.Hp.AsSpan();
|
||||
|
||||
jointsSpan[0] = 32;
|
||||
jointsSpan[1] = 64;
|
||||
jointsSpan[2] = 96;
|
||||
|
||||
Fc.Value.Sign[0] = 128;
|
||||
Fc.Value.Classes[0][0] = 224;
|
||||
Fc.Value.Classes[0][1] = 144;
|
||||
Fc.Value.Classes[0][2] = 192;
|
||||
Fc.Value.Classes[0][3] = 168;
|
||||
Fc.Value.Classes[0][4] = 192;
|
||||
Fc.Value.Classes[0][5] = 176;
|
||||
Fc.Value.Classes[0][6] = 192;
|
||||
Fc.Value.Classes[0][7] = 198;
|
||||
Fc.Value.Classes[0][8] = 198;
|
||||
Fc.Value.Classes[0][9] = 245;
|
||||
Fc.Value.Class0[0][0] = 216;
|
||||
Fc.Value.Bits[0][0] = 136;
|
||||
Fc.Value.Bits[0][1] = 140;
|
||||
Fc.Value.Bits[0][2] = 148;
|
||||
Fc.Value.Bits[0][3] = 160;
|
||||
Fc.Value.Bits[0][4] = 176;
|
||||
Fc.Value.Bits[0][5] = 192;
|
||||
Fc.Value.Bits[0][6] = 224;
|
||||
Fc.Value.Bits[0][7] = 234;
|
||||
Fc.Value.Bits[0][8] = 234;
|
||||
Fc.Value.Bits[0][9] = 240;
|
||||
Fc.Value.Class0Fp[0][0][0] = 128;
|
||||
Fc.Value.Class0Fp[0][0][1] = 128;
|
||||
Fc.Value.Class0Fp[0][0][2] = 64;
|
||||
Fc.Value.Class0Fp[0][1][0] = 96;
|
||||
Fc.Value.Class0Fp[0][1][1] = 112;
|
||||
Fc.Value.Class0Fp[0][1][2] = 64;
|
||||
Fc.Value.Fp[0][0] = 64;
|
||||
Fc.Value.Fp[0][1] = 96;
|
||||
Fc.Value.Fp[0][2] = 64;
|
||||
Fc.Value.Class0Hp[0] = 160;
|
||||
Fc.Value.Hp[0] = 128;
|
||||
signSpan[0] = 128;
|
||||
classes0Span[0] = 224;
|
||||
classes0Span[1] = 144;
|
||||
classes0Span[2] = 192;
|
||||
classes0Span[3] = 168;
|
||||
classes0Span[4] = 192;
|
||||
classes0Span[5] = 176;
|
||||
classes0Span[6] = 192;
|
||||
classes0Span[7] = 198;
|
||||
classes0Span[8] = 198;
|
||||
classes0Span[9] = 245;
|
||||
class0Span[0][0] = 216;
|
||||
bits0Span[0] = 136;
|
||||
bits0Span[1] = 140;
|
||||
bits0Span[2] = 148;
|
||||
bits0Span[3] = 160;
|
||||
bits0Span[4] = 176;
|
||||
bits0Span[5] = 192;
|
||||
bits0Span[6] = 224;
|
||||
bits0Span[7] = 234;
|
||||
bits0Span[8] = 234;
|
||||
bits0Span[9] = 240;
|
||||
class0Fp00Span[0] = 128;
|
||||
class0Fp00Span[1] = 128;
|
||||
class0Fp00Span[2] = 64;
|
||||
class0Fp01Span[0] = 96;
|
||||
class0Fp01Span[1] = 112;
|
||||
class0Fp01Span[2] = 64;
|
||||
fp0Span[0] = 64;
|
||||
fp0Span[1] = 96;
|
||||
fp0Span[2] = 64;
|
||||
class0HpSpan[0] = 160;
|
||||
hpSpan[0] = 128;
|
||||
|
||||
Fc.Value.Sign[1] = 128;
|
||||
Fc.Value.Classes[1][0] = 216;
|
||||
Fc.Value.Classes[1][1] = 128;
|
||||
Fc.Value.Classes[1][2] = 176;
|
||||
Fc.Value.Classes[1][3] = 160;
|
||||
Fc.Value.Classes[1][4] = 176;
|
||||
Fc.Value.Classes[1][5] = 176;
|
||||
Fc.Value.Classes[1][6] = 192;
|
||||
Fc.Value.Classes[1][7] = 198;
|
||||
Fc.Value.Classes[1][8] = 198;
|
||||
Fc.Value.Classes[1][9] = 208;
|
||||
Fc.Value.Class0[1][0] = 208;
|
||||
Fc.Value.Bits[1][0] = 136;
|
||||
Fc.Value.Bits[1][1] = 140;
|
||||
Fc.Value.Bits[1][2] = 148;
|
||||
Fc.Value.Bits[1][3] = 160;
|
||||
Fc.Value.Bits[1][4] = 176;
|
||||
Fc.Value.Bits[1][5] = 192;
|
||||
Fc.Value.Bits[1][6] = 224;
|
||||
Fc.Value.Bits[1][7] = 234;
|
||||
Fc.Value.Bits[1][8] = 234;
|
||||
Fc.Value.Bits[1][9] = 240;
|
||||
Fc.Value.Class0Fp[1][0][0] = 128;
|
||||
Fc.Value.Class0Fp[1][0][1] = 128;
|
||||
Fc.Value.Class0Fp[1][0][2] = 64;
|
||||
Fc.Value.Class0Fp[1][1][0] = 96;
|
||||
Fc.Value.Class0Fp[1][1][1] = 112;
|
||||
Fc.Value.Class0Fp[1][1][2] = 64;
|
||||
Fc.Value.Fp[1][0] = 64;
|
||||
Fc.Value.Fp[1][1] = 96;
|
||||
Fc.Value.Fp[1][2] = 64;
|
||||
Fc.Value.Class0Hp[1] = 160;
|
||||
Fc.Value.Hp[1] = 128;
|
||||
signSpan[1] = 128;
|
||||
classes1Span[0] = 216;
|
||||
classes1Span[1] = 128;
|
||||
classes1Span[2] = 176;
|
||||
classes1Span[3] = 160;
|
||||
classes1Span[4] = 176;
|
||||
classes1Span[5] = 176;
|
||||
classes1Span[6] = 192;
|
||||
classes1Span[7] = 198;
|
||||
classes1Span[8] = 198;
|
||||
classes1Span[9] = 208;
|
||||
class0Span[1][0] = 208;
|
||||
bits1Span[0] = 136;
|
||||
bits1Span[1] = 140;
|
||||
bits1Span[2] = 148;
|
||||
bits1Span[3] = 160;
|
||||
bits1Span[4] = 176;
|
||||
bits1Span[5] = 192;
|
||||
bits1Span[6] = 224;
|
||||
bits1Span[7] = 234;
|
||||
bits1Span[8] = 234;
|
||||
bits1Span[9] = 240;
|
||||
class0Fp10Span[0] = 128;
|
||||
class0Fp10Span[1] = 128;
|
||||
class0Fp10Span[2] = 64;
|
||||
class0Fp11Span[0] = 96;
|
||||
class0Fp11Span[1] = 112;
|
||||
class0Fp11Span[2] = 64;
|
||||
fp1Span[0] = 64;
|
||||
fp1Span[1] = 96;
|
||||
fp1Span[2] = 64;
|
||||
class0HpSpan[1] = 160;
|
||||
hpSpan[1] = 128;
|
||||
}
|
||||
|
||||
public void AdaptMvProbs(bool allowHp)
|
||||
@@ -576,41 +629,74 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
counts.Joints.AsSpan(),
|
||||
fc.Joints.AsSpan());
|
||||
|
||||
Span<byte> fSignSpan = fc.Sign.AsSpan();
|
||||
Span<byte> pSignSpan = preFc.Sign.AsSpan();
|
||||
Span<Array2<uint>> cSignSpan = counts.Sign.AsSpan();
|
||||
Span<Array10<byte>> fClassesSpan = fc.Classes.AsSpan();
|
||||
Span<Array10<byte>> pClassesSpan = preFc.Classes.AsSpan();
|
||||
Span<Array11<uint>> cClassesSpan = counts.Classes.AsSpan();
|
||||
Span<Array1<byte>> fClass0Span = fc.Class0.AsSpan();
|
||||
Span<Array1<byte>> pClass0Span = preFc.Class0.AsSpan();
|
||||
Span<Array2<uint>> cClass0Span = counts.Class0.AsSpan();
|
||||
Span<Array10<byte>> fBitsSpan1 = fc.Bits.AsSpan();
|
||||
Span<Array10<byte>> pBitsSpan1 = preFc.Bits.AsSpan();
|
||||
Span<Array10<Array2<uint>>> cBitsSpan1 = counts.Bits.AsSpan();
|
||||
Span<Array2<Array3<byte>>> fClass0FpSpan1 = fc.Class0Fp.AsSpan();
|
||||
Span<Array2<Array3<byte>>> pClass0FpSpan1 = preFc.Class0Fp.AsSpan();
|
||||
Span<Array2<Array4<uint>>> cClass0FpSpan1 = counts.Class0Fp.AsSpan();
|
||||
Span<Array3<byte>> fFpSpan = fc.Fp.AsSpan();
|
||||
Span<Array3<byte>> pFpSpan = preFc.Fp.AsSpan();
|
||||
Span<Array4<uint>> cFpSpan = counts.Fp.AsSpan();
|
||||
Span<byte> fClass0HpSpan = fc.Class0Hp.AsSpan();
|
||||
Span<byte> pClass0HpSpan = preFc.Class0Hp.AsSpan();
|
||||
Span<Array2<uint>> cClass0HpSpan = counts.Class0Hp.AsSpan();
|
||||
Span<byte> fHpSpan = fc.Hp.AsSpan();
|
||||
Span<byte> pHpSpan = preFc.Hp.AsSpan();
|
||||
Span<Array2<uint>> cHpSpan = counts.Hp.AsSpan();
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
fc.Sign[i] = Prob.ModeMvMergeProbs(preFc.Sign[i], ref counts.Sign[i]);
|
||||
fSignSpan[i] = Prob.ModeMvMergeProbs(pSignSpan[i], cSignSpan[i].AsSpan());
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMv.ClassTree,
|
||||
preFc.Classes[i].AsSpan(),
|
||||
counts.Classes[i].AsSpan(),
|
||||
fc.Classes[i].AsSpan());
|
||||
pClassesSpan[i].AsSpan(),
|
||||
cClassesSpan[i].AsSpan(),
|
||||
fClassesSpan[i].AsSpan());
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMv.Class0Tree,
|
||||
preFc.Class0[i].AsSpan(),
|
||||
counts.Class0[i].AsSpan(),
|
||||
fc.Class0[i].AsSpan());
|
||||
pClass0Span[i].AsSpan(),
|
||||
cClass0Span[i].AsSpan(),
|
||||
fClass0Span[i].AsSpan());
|
||||
|
||||
Span<byte> fBitsSpan2 = fBitsSpan1[i].AsSpan();
|
||||
Span<byte> pBitsSpan2 = pBitsSpan1[i].AsSpan();
|
||||
Span<Array2<uint>> cBitsSpan2 = cBitsSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < EntropyMv.OffsetBits; ++j)
|
||||
{
|
||||
fc.Bits[i][j] = Prob.ModeMvMergeProbs(preFc.Bits[i][j], ref counts.Bits[i][j]);
|
||||
fBitsSpan2[j] = Prob.ModeMvMergeProbs(pBitsSpan2[j], cBitsSpan2[j].AsSpan());
|
||||
}
|
||||
|
||||
Span<Array3<byte>> fClass0FpSpan2 = fClass0FpSpan1[i].AsSpan();
|
||||
Span<Array3<byte>> pClass0FpSpan2 = pClass0FpSpan1[i].AsSpan();
|
||||
Span<Array4<uint>> cClass0FpSpan2 = cClass0FpSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < EntropyMv.Class0Size; ++j)
|
||||
{
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMv.FpTree,
|
||||
preFc.Class0Fp[i][j].AsSpan(),
|
||||
counts.Class0Fp[i][j].AsSpan(),
|
||||
fc.Class0Fp[i][j].AsSpan());
|
||||
pClass0FpSpan2[j].AsSpan(),
|
||||
cClass0FpSpan2[j].AsSpan(),
|
||||
fClass0FpSpan2[j].AsSpan());
|
||||
}
|
||||
|
||||
Prob.VpxTreeMergeProbs(EntropyMv.FpTree, preFc.Fp[i].AsSpan(), counts.Fp[i].AsSpan(),
|
||||
fc.Fp[i].AsSpan());
|
||||
Prob.VpxTreeMergeProbs(EntropyMv.FpTree, pFpSpan[i].AsSpan(), cFpSpan[i].AsSpan(),
|
||||
fFpSpan[i].AsSpan());
|
||||
|
||||
if (allowHp)
|
||||
{
|
||||
fc.Class0Hp[i] = Prob.ModeMvMergeProbs(preFc.Class0Hp[i], ref counts.Class0Hp[i]);
|
||||
fc.Hp[i] = Prob.ModeMvMergeProbs(preFc.Hp[i], ref counts.Hp[i]);
|
||||
fClass0HpSpan[i] = Prob.ModeMvMergeProbs(pClass0HpSpan[i], cClass0HpSpan[i].AsSpan());
|
||||
fHpSpan[i] = Prob.ModeMvMergeProbs(pHpSpan[i], cHpSpan[i].AsSpan());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -769,75 +855,115 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
ref Vp9EntropyProbs preFc = ref FrameContexts[(int)FrameContextIdx];
|
||||
ref Vp9BackwardUpdates counts = ref Counts.Value;
|
||||
|
||||
Span<byte> fIntraInterProbSpan = fc.IntraInterProb.AsSpan();
|
||||
Span<byte> pIntraInterProbSpan = preFc.IntraInterProb.AsSpan();
|
||||
Span<Array2<uint>> cIntraInterSpan = counts.IntraInter.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.IntraInterContexts; i++)
|
||||
{
|
||||
fc.IntraInterProb[i] = Prob.ModeMvMergeProbs(preFc.IntraInterProb[i], ref counts.IntraInter[i]);
|
||||
fIntraInterProbSpan[i] = Prob.ModeMvMergeProbs(pIntraInterProbSpan[i], cIntraInterSpan[i].AsSpan());
|
||||
}
|
||||
|
||||
Span<byte> fCompInterProbSpan = fc.CompInterProb.AsSpan();
|
||||
Span<byte> pCompInterProbSpan = preFc.CompInterProb.AsSpan();
|
||||
Span<Array2<uint>> cCompInterSpan = counts.CompInter.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.CompInterContexts; i++)
|
||||
{
|
||||
fc.CompInterProb[i] = Prob.ModeMvMergeProbs(preFc.CompInterProb[i], ref counts.CompInter[i]);
|
||||
fCompInterProbSpan[i] = Prob.ModeMvMergeProbs(pCompInterProbSpan[i], cCompInterSpan[i].AsSpan());
|
||||
}
|
||||
|
||||
Span<byte> fCompRefProbSpan = fc.CompRefProb.AsSpan();
|
||||
Span<byte> pCompRefProbSpan = preFc.CompRefProb.AsSpan();
|
||||
Span<Array2<uint>> cCompRefSpan = counts.CompRef.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.RefContexts; i++)
|
||||
{
|
||||
fc.CompRefProb[i] = Prob.ModeMvMergeProbs(preFc.CompRefProb[i], ref counts.CompRef[i]);
|
||||
fCompRefProbSpan[i] = Prob.ModeMvMergeProbs(pCompRefProbSpan[i], cCompRefSpan[i].AsSpan());
|
||||
}
|
||||
|
||||
Span<Array2<byte>> fSingleRefProbSpan1 = fc.SingleRefProb.AsSpan();
|
||||
Span<Array2<byte>> pSingleRefProbSpan1 = preFc.SingleRefProb.AsSpan();
|
||||
Span<Array2<Array2<uint>>> cSingleRefSpan1 = counts.SingleRef.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.RefContexts; i++)
|
||||
{
|
||||
Span<byte> fSingleRefProbSpan2 = fSingleRefProbSpan1[i].AsSpan();
|
||||
Span<byte> pSingleRefProbSpan2 = pSingleRefProbSpan1[i].AsSpan();
|
||||
Span<Array2<uint>> cSingleRefSpan2 = cSingleRefSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < 2; j++)
|
||||
{
|
||||
fc.SingleRefProb[i][j] =
|
||||
Prob.ModeMvMergeProbs(preFc.SingleRefProb[i][j], ref counts.SingleRef[i][j]);
|
||||
fSingleRefProbSpan2[j] =
|
||||
Prob.ModeMvMergeProbs(pSingleRefProbSpan2[j], cSingleRefSpan2[j].AsSpan());
|
||||
}
|
||||
}
|
||||
|
||||
Span<Array3<byte>> fInterModeProbSpan = fc.InterModeProb.AsSpan();
|
||||
Span<Array3<byte>> pInterModeProbSpan = preFc.InterModeProb.AsSpan();
|
||||
Span<Array4<uint>> cInterModeSpan = counts.InterMode.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.InterModeContexts; i++)
|
||||
{
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMode.InterModeTree,
|
||||
preFc.InterModeProb[i].AsSpan(),
|
||||
counts.InterMode[i].AsSpan(),
|
||||
fc.InterModeProb[i].AsSpan());
|
||||
pInterModeProbSpan[i].AsSpan(),
|
||||
cInterModeSpan[i].AsSpan(),
|
||||
fInterModeProbSpan[i].AsSpan());
|
||||
}
|
||||
|
||||
Span<Array9<byte>> fYModeProbSpan = fc.YModeProb.AsSpan();
|
||||
Span<Array9<byte>> pYModeProbSpan = preFc.YModeProb.AsSpan();
|
||||
Span<Array10<uint>> cYModeSpan = counts.YMode.AsSpan();
|
||||
|
||||
for (int i = 0; i < EntropyMode.BlockSizeGroups; i++)
|
||||
{
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMode.IntraModeTree,
|
||||
preFc.YModeProb[i].AsSpan(),
|
||||
counts.YMode[i].AsSpan(),
|
||||
fc.YModeProb[i].AsSpan());
|
||||
pYModeProbSpan[i].AsSpan(),
|
||||
cYModeSpan[i].AsSpan(),
|
||||
fYModeProbSpan[i].AsSpan());
|
||||
}
|
||||
|
||||
Span<Array9<byte>> fUvModeProbSpan = fc.UvModeProb.AsSpan();
|
||||
Span<Array9<byte>> pUvModeProbSpan = preFc.UvModeProb.AsSpan();
|
||||
Span<Array10<uint>> cUvModeSpan = counts.UvMode.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.IntraModes; ++i)
|
||||
{
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMode.IntraModeTree,
|
||||
preFc.UvModeProb[i].AsSpan(),
|
||||
counts.UvMode[i].AsSpan(),
|
||||
fc.UvModeProb[i].AsSpan());
|
||||
pUvModeProbSpan[i].AsSpan(),
|
||||
cUvModeSpan[i].AsSpan(),
|
||||
fUvModeProbSpan[i].AsSpan());
|
||||
}
|
||||
|
||||
Span<Array3<byte>> fPartitionProbSpan = fc.PartitionProb.AsSpan();
|
||||
Span<Array3<byte>> pPartitionProbSpan = preFc.PartitionProb.AsSpan();
|
||||
Span<Array4<uint>> cPartitionSpan = counts.Partition.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.PartitionContexts; i++)
|
||||
{
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMode.PartitionTree,
|
||||
preFc.PartitionProb[i].AsSpan(),
|
||||
counts.Partition[i].AsSpan(),
|
||||
fc.PartitionProb[i].AsSpan());
|
||||
pPartitionProbSpan[i].AsSpan(),
|
||||
cPartitionSpan[i].AsSpan(),
|
||||
fPartitionProbSpan[i].AsSpan());
|
||||
}
|
||||
|
||||
if (InterpFilter == Constants.Switchable)
|
||||
{
|
||||
Span<Array2<byte>> fSwitchableInterpProbSpan = fc.SwitchableInterpProb.AsSpan();
|
||||
Span<Array2<byte>> pSwitchableInterpProbSpan = preFc.SwitchableInterpProb.AsSpan();
|
||||
Span<Array3<uint>> cSwitchableInterpSpan = counts.SwitchableInterp.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.SwitchableFilterContexts; i++)
|
||||
{
|
||||
Prob.VpxTreeMergeProbs(
|
||||
EntropyMode.SwitchableInterpTree,
|
||||
preFc.SwitchableInterpProb[i].AsSpan(),
|
||||
counts.SwitchableInterp[i].AsSpan(),
|
||||
fc.SwitchableInterpProb[i].AsSpan());
|
||||
pSwitchableInterpProbSpan[i].AsSpan(),
|
||||
cSwitchableInterpSpan[i].AsSpan(),
|
||||
fSwitchableInterpProbSpan[i].AsSpan());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -846,34 +972,62 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
Array1<Array2<uint>> branchCt8X8P = new();
|
||||
Array2<Array2<uint>> branchCt16X16P = new();
|
||||
Array3<Array2<uint>> branchCt32X32P = new();
|
||||
|
||||
Span<Array2<uint>> branchCt8X8PSpan = branchCt8X8P.AsSpan();
|
||||
Span<Array2<uint>> branchCt16X16PSpan = branchCt16X16P.AsSpan();
|
||||
Span<Array2<uint>> branchCt32X32PSpan = branchCt32X32P.AsSpan();
|
||||
|
||||
Span<Array2<uint>> tx8x8Span = counts.Tx8x8.AsSpan();
|
||||
Span<Array2<uint>> tx16x16Span = counts.Tx8x8.AsSpan();
|
||||
Span<Array2<uint>> tx32x32Span = counts.Tx8x8.AsSpan();
|
||||
|
||||
//There is no need for a Span2, as there is only ever 1 iteration
|
||||
Span<Array1<byte>> fTx8x8ProbSpan = fc.Tx8x8Prob.AsSpan();
|
||||
Span<Array1<byte>> pTx8x8ProbSpan = preFc.Tx8x8Prob.AsSpan();
|
||||
|
||||
Span<Array2<byte>> fTx16x16ProbSpan1 = fc.Tx16x16Prob.AsSpan();
|
||||
Span<Array2<byte>> pTx16x16ProbSpan1 = preFc.Tx16x16Prob.AsSpan();
|
||||
|
||||
Span<Array3<byte>> fTx32x32ProbSpan1 = fc.Tx32x32Prob.AsSpan();
|
||||
Span<Array3<byte>> pTx32x32ProbSpan1 = preFc.Tx32x32Prob.AsSpan();
|
||||
|
||||
for (int i = 0; i < EntropyMode.TxSizeContexts; ++i)
|
||||
{
|
||||
EntropyMode.TxCountsToBranchCounts8X8(counts.Tx8x8[i].AsSpan(), ref branchCt8X8P);
|
||||
EntropyMode.TxCountsToBranchCounts8X8(tx8x8Span[i].AsSpan(), branchCt8X8P.AsSpan());
|
||||
for (int j = 0; j < (int)TxSize.TxSizes - 3; ++j)
|
||||
{
|
||||
fc.Tx8x8Prob[i][j] = Prob.ModeMvMergeProbs(preFc.Tx8x8Prob[i][j], ref branchCt8X8P[j]);
|
||||
fTx8x8ProbSpan[i][j] = Prob.ModeMvMergeProbs(pTx8x8ProbSpan[i][j], branchCt8X8PSpan[j].AsSpan());
|
||||
}
|
||||
|
||||
Span<byte> fTx16x16ProbSpan2 = fTx16x16ProbSpan1[i].AsSpan();
|
||||
Span<byte> pTx16x16ProbSpan2 = pTx16x16ProbSpan1[i].AsSpan();
|
||||
|
||||
EntropyMode.TxCountsToBranchCounts16X16(counts.Tx16x16[i].AsSpan(), ref branchCt16X16P);
|
||||
EntropyMode.TxCountsToBranchCounts16X16(tx16x16Span[i].AsSpan(), branchCt16X16P.AsSpan());
|
||||
for (int j = 0; j < (int)TxSize.TxSizes - 2; ++j)
|
||||
{
|
||||
fc.Tx16x16Prob[i][j] =
|
||||
Prob.ModeMvMergeProbs(preFc.Tx16x16Prob[i][j], ref branchCt16X16P[j]);
|
||||
fTx16x16ProbSpan2[j] =
|
||||
Prob.ModeMvMergeProbs(pTx16x16ProbSpan2[j], branchCt16X16PSpan[j].AsSpan());
|
||||
}
|
||||
|
||||
Span<byte> fTx32x32ProbSpan2 = fTx32x32ProbSpan1[i].AsSpan();
|
||||
Span<byte> pTx32x32ProbSpan2 = pTx32x32ProbSpan1[i].AsSpan();
|
||||
|
||||
EntropyMode.TxCountsToBranchCounts32X32(counts.Tx32x32[i].AsSpan(), ref branchCt32X32P);
|
||||
EntropyMode.TxCountsToBranchCounts32X32(tx32x32Span[i].AsSpan(), branchCt32X32P.AsSpan());
|
||||
for (int j = 0; j < (int)TxSize.TxSizes - 1; ++j)
|
||||
{
|
||||
fc.Tx32x32Prob[i][j] =
|
||||
Prob.ModeMvMergeProbs(preFc.Tx32x32Prob[i][j], ref branchCt32X32P[j]);
|
||||
fTx32x32ProbSpan2[j] =
|
||||
Prob.ModeMvMergeProbs(pTx32x32ProbSpan2[j], branchCt32X32PSpan[j].AsSpan());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Span<byte> fSkipProbSpan = fc.SkipProb.AsSpan();
|
||||
Span<byte> pSkipProbSpan = preFc.SkipProb.AsSpan();
|
||||
Span<Array2<uint>> cSkipSpan = counts.Skip.AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.SkipContexts; ++i)
|
||||
{
|
||||
fc.SkipProb[i] = Prob.ModeMvMergeProbs(preFc.SkipProb[i], ref counts.Skip[i]);
|
||||
fSkipProbSpan[i] = Prob.ModeMvMergeProbs(pSkipProbSpan[i], cSkipSpan[i].AsSpan());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -916,13 +1070,19 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
ref MvRef mv = ref PrevFrameMvs[i];
|
||||
|
||||
mv.Mv[0].Row = mvs[i].Mvs[0].Row;
|
||||
mv.Mv[0].Col = mvs[i].Mvs[0].Col;
|
||||
mv.Mv[1].Row = mvs[i].Mvs[1].Row;
|
||||
mv.Mv[1].Col = mvs[i].Mvs[1].Col;
|
||||
Span<Mv> mvSpan = mv.Mv.AsSpan();
|
||||
Span<Vp9Mv> mvsSpan = mvs[i].Mvs.AsSpan();
|
||||
|
||||
mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0];
|
||||
mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1];
|
||||
mvSpan[0].Row = mvsSpan[0].Row;
|
||||
mvSpan[0].Col = mvsSpan[0].Col;
|
||||
mvSpan[1].Row = mvsSpan[1].Row;
|
||||
mvSpan[1].Col = mvsSpan[1].Col;
|
||||
|
||||
Span<sbyte> refFrameSpan = mv.RefFrame.AsSpan();
|
||||
Span<int> refFramesSpan = mvs[i].RefFrames.AsSpan();
|
||||
|
||||
refFrameSpan[0] = (sbyte)refFramesSpan[0];
|
||||
refFrameSpan[1] = (sbyte)refFramesSpan[1];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -937,47 +1097,76 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
for (int i = 0; i < mvs.Length; i++)
|
||||
{
|
||||
ref MvRef mv = ref CurFrameMvs[i];
|
||||
|
||||
Span<Mv> mvSpan = mv.Mv.AsSpan();
|
||||
Span<Vp9Mv> mvsSpan = mvs[i].Mvs.AsSpan();
|
||||
|
||||
mvs[i].Mvs[0].Row = mv.Mv[0].Row;
|
||||
mvs[i].Mvs[0].Col = mv.Mv[0].Col;
|
||||
mvs[i].Mvs[1].Row = mv.Mv[1].Row;
|
||||
mvs[i].Mvs[1].Col = mv.Mv[1].Col;
|
||||
mvsSpan[0].Row = mvSpan[0].Row;
|
||||
mvsSpan[0].Col = mvSpan[0].Col;
|
||||
mvsSpan[1].Row = mvSpan[1].Row;
|
||||
mvsSpan[1].Col = mvSpan[1].Col;
|
||||
|
||||
Span<sbyte> refFrameSpan = mv.RefFrame.AsSpan();
|
||||
Span<int> refFramesSpan = mvs[i].RefFrames.AsSpan();
|
||||
|
||||
mvs[i].RefFrames[0] = mv.RefFrame[0];
|
||||
mvs[i].RefFrames[1] = mv.RefFrame[1];
|
||||
refFramesSpan[0] = refFrameSpan[0];
|
||||
refFramesSpan[1] = refFrameSpan[1];
|
||||
}
|
||||
}
|
||||
|
||||
private void AdaptCoefProbs(byte txSize, uint countSat, uint updateFactor)
|
||||
{
|
||||
ref Vp9EntropyProbs preFc = ref FrameContexts[(int)FrameContextIdx];
|
||||
ref Array2<Array2<Array6<Array6<Array3<byte>>>>> probs = ref Fc.Value.CoefProbs[txSize];
|
||||
ref Array2<Array2<Array6<Array6<Array3<byte>>>>> preProbs = ref preFc.CoefProbs[txSize];
|
||||
ref Array2<Array2<Array6<Array6<Array4<uint>>>>> counts = ref Counts.Value.Coef[txSize];
|
||||
ref Array2<Array2<Array6<Array6<uint>>>> eobCounts = ref Counts.Value.EobBranch[txSize];
|
||||
Span<Array2<Array6<Array6<Array3<byte>>>>> probsSpan1 = Fc.Value.CoefProbs[txSize].AsSpan();
|
||||
Span<Array2<Array6<Array6<Array3<byte>>>>> preProbsSpan1 = preFc.CoefProbs[txSize].AsSpan();
|
||||
Span<Array2<Array6<Array6<Array4<uint>>>>> countsSpan1 = Counts.Value.Coef[txSize].AsSpan();
|
||||
Span<Array2<Array6<Array6<uint>>>> eobCountsSpan1 = Counts.Value.EobBranch[txSize].AsSpan();
|
||||
|
||||
for (int i = 0; i < Constants.PlaneTypes; ++i)
|
||||
{
|
||||
Span<Array6<Array6<Array3<byte>>>> probsSpan2 = probsSpan1[i].AsSpan();
|
||||
Span<Array6<Array6<Array3<byte>>>> preProbsSpan2 = preProbsSpan1[i].AsSpan();
|
||||
Span<Array6<Array6<Array4<uint>>>> countsSpan2 = countsSpan1[i].AsSpan();
|
||||
Span<Array6<Array6<uint>>> eobCountsSpan2 = eobCountsSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < Entropy.RefTypes; ++j)
|
||||
{
|
||||
Span<Array6<Array3<byte>>> probsSpan3 = probsSpan2[j].AsSpan();
|
||||
Span<Array6<Array3<byte>>> preProbsSpan3 = preProbsSpan2[j].AsSpan();
|
||||
Span<Array6<Array4<uint>>> countsSpan3 = countsSpan2[j].AsSpan();
|
||||
Span<Array6<uint>> eobCountsSpan3 = eobCountsSpan2[j].AsSpan();
|
||||
|
||||
for (int k = 0; k < Entropy.CoefBands; ++k)
|
||||
{
|
||||
Span<Array3<byte>> probsSpan4 = probsSpan3[k].AsSpan();
|
||||
Span<Array3<byte>> preProbsSpan4 = preProbsSpan3[k].AsSpan();
|
||||
Span<Array4<uint>> countsSpan4 = countsSpan3[k].AsSpan();
|
||||
Span<uint> eobCountsSpan4 = eobCountsSpan3[k].AsSpan();
|
||||
|
||||
for (int l = 0; l < Entropy.BAND_COEFF_CONTEXTS(k); ++l)
|
||||
{
|
||||
int n0 = (int)counts[i][j][k][l][Entropy.ZeroToken];
|
||||
int n1 = (int)counts[i][j][k][l][Entropy.OneToken];
|
||||
int n2 = (int)counts[i][j][k][l][Entropy.TwoToken];
|
||||
int neob = (int)counts[i][j][k][l][Entropy.EobModelToken];
|
||||
Span<byte> probsSpan5 = probsSpan4[l].AsSpan();
|
||||
Span<byte> preProbsSpan5 = preProbsSpan4[l].AsSpan();
|
||||
Span<uint> countsSpan5 = countsSpan4[l].AsSpan();
|
||||
|
||||
int n0 = (int)countsSpan5[Entropy.ZeroToken];
|
||||
int n1 = (int)countsSpan5[Entropy.OneToken];
|
||||
int n2 = (int)countsSpan5[Entropy.TwoToken];
|
||||
int neob = (int)countsSpan5[Entropy.EobModelToken];
|
||||
Array3<Array2<uint>> branchCt = new();
|
||||
branchCt[0][0] = (uint)neob;
|
||||
branchCt[0][1] = (uint)(eobCounts[i][j][k][l] - neob);
|
||||
branchCt[1][0] = (uint)n0;
|
||||
branchCt[1][1] = (uint)(n1 + n2);
|
||||
branchCt[2][0] = (uint)n1;
|
||||
branchCt[2][1] = (uint)n2;
|
||||
Span<Array2<uint>> branchCtSpan = branchCt.AsSpan();
|
||||
Span<uint> branchCt0Span = branchCtSpan[0].AsSpan();
|
||||
Span<uint> branchCt1Span = branchCtSpan[1].AsSpan();
|
||||
Span<uint> branchCt2Span = branchCtSpan[2].AsSpan();
|
||||
branchCt0Span[0] = (uint)neob;
|
||||
branchCt0Span[1] = (uint)(eobCountsSpan4[l] - neob);
|
||||
branchCt1Span[0] = (uint)n0;
|
||||
branchCt1Span[1] = (uint)(n1 + n2);
|
||||
branchCt2Span[0] = (uint)n1;
|
||||
branchCt2Span[1] = (uint)n2;
|
||||
for (int m = 0; m < Entropy.UnconstrainedNodes; ++m)
|
||||
{
|
||||
probs[i][j][k][l][m] = Prob.MergeProbs(preProbs[i][j][k][l][m], ref branchCt[m],
|
||||
probsSpan5[m] = Prob.MergeProbs(preProbsSpan5[m], branchCt[m].AsSpan(),
|
||||
countSat, updateFactor);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
@@ -16,20 +17,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
public int NeedResync; // Wait for key/intra-only frame.
|
||||
public int HoldRefBuf; // Hold the reference buffer.
|
||||
|
||||
private static void DecreaseRefCount(int idx, ref Array12<RefCntBuffer> frameBufs, ref BufferPool pool)
|
||||
private static void DecreaseRefCount(int idx, Span<RefCntBuffer> frameBuffs, ref BufferPool pool)
|
||||
{
|
||||
if (idx >= 0 && frameBufs[idx].RefCount > 0)
|
||||
if (idx >= 0 && frameBuffs[idx].RefCount > 0)
|
||||
{
|
||||
--frameBufs[idx].RefCount;
|
||||
--frameBuffs[idx].RefCount;
|
||||
// A worker may only get a free framebuffer index when calling GetFreeFb.
|
||||
// But the private buffer is not set up until finish decoding header.
|
||||
// So any error happens during decoding header, the frame_bufs will not
|
||||
// have valid priv buffer.
|
||||
if (frameBufs[idx].Released == 0 && frameBufs[idx].RefCount == 0 &&
|
||||
!frameBufs[idx].RawFrameBuffer.Priv.IsNull)
|
||||
if (frameBuffs[idx].Released == 0 && frameBuffs[idx].RefCount == 0 &&
|
||||
!frameBuffs[idx].RawFrameBuffer.Priv.IsNull)
|
||||
{
|
||||
FrameBuffers.ReleaseFrameBuffer(pool.CbPriv, ref frameBufs[idx].RawFrameBuffer);
|
||||
frameBufs[idx].Released = 1;
|
||||
FrameBuffers.ReleaseFrameBuffer(pool.CbPriv, ref frameBuffs[idx].RawFrameBuffer);
|
||||
frameBuffs[idx].Released = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -43,22 +44,32 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
cm.CheckMemError(ref cm.FrameContexts,
|
||||
allocator.Allocate<Vp9EntropyProbs>(Constants.FrameContexts));
|
||||
|
||||
Span<Array10<Array9<byte>>> cKfYModeProbSpan1 = cm.Fc.Value.KfYModeProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < EntropyMode.KfYModeProb.Length; i++)
|
||||
{
|
||||
Span<Array9<byte>> cKfYModeProbSpan2 = cKfYModeProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < EntropyMode.KfYModeProb[i].Length; j++)
|
||||
{
|
||||
Span<byte> cKfYModeProbSpan3 = cKfYModeProbSpan2[j].AsSpan();
|
||||
|
||||
for (int k = 0; k < EntropyMode.KfYModeProb[i][j].Length; k++)
|
||||
{
|
||||
cm.Fc.Value.KfYModeProb[i][j][k] = EntropyMode.KfYModeProb[i][j][k];
|
||||
cKfYModeProbSpan3[k] = EntropyMode.KfYModeProb[i][j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Span<Array9<byte>> cKfUvModeProbSpan1 = cm.Fc.Value.KfUvModeProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < EntropyMode.KfUvModeProb.Length; i++)
|
||||
{
|
||||
Span<byte> cKfUvModeProbSpan2 = cKfUvModeProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < EntropyMode.KfUvModeProb[i].Length; j++)
|
||||
{
|
||||
cm.Fc.Value.KfUvModeProb[i][j] = EntropyMode.KfUvModeProb[i][j];
|
||||
cKfUvModeProbSpan2[j] = EntropyMode.KfUvModeProb[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,12 +99,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
[57, 15, 9], // l split, a not split
|
||||
[12, 3, 3] // a/l both split
|
||||
];
|
||||
|
||||
Span<Array3<byte>> cKfPartitionProbSpan1 = cm.Fc.Value.KfPartitionProb.AsSpan();
|
||||
|
||||
for (int i = 0; i < kfPartitionProbs.Length; i++)
|
||||
{
|
||||
Span<byte> cKfPartitionProbSpan2 = cKfPartitionProbSpan1[i].AsSpan();
|
||||
|
||||
for (int j = 0; j < kfPartitionProbs[i].Length; j++)
|
||||
{
|
||||
cm.Fc.Value.KfPartitionProb[i][j] = kfPartitionProbs[i][j];
|
||||
cKfPartitionProbSpan2[j] = kfPartitionProbs[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,11 +116,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
|
||||
NeedResync = 1;
|
||||
|
||||
Span<int> refFrameMapSpan = cm.RefFrameMap.AsSpan();
|
||||
Span<int> nextRefFrameMapSpan = cm.NextRefFrameMap.AsSpan();
|
||||
|
||||
// Initialize the references to not point to any frame buffers.
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
cm.RefFrameMap[i] = -1;
|
||||
cm.NextRefFrameMap[i] = -1;
|
||||
refFrameMapSpan[i] = -1;
|
||||
nextRefFrameMapSpan[i] = -1;
|
||||
}
|
||||
|
||||
cm.CurrentVideoFrame = 0;
|
||||
@@ -124,30 +142,34 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
int refIndex = 0, mask;
|
||||
ref Vp9Common cm = ref Common;
|
||||
ref BufferPool pool = ref cm.BufferPool.Value;
|
||||
ref Array12<RefCntBuffer> frameBufs = ref cm.BufferPool.Value.FrameBufs;
|
||||
Span<RefCntBuffer> frameBufs = cm.BufferPool.Value.FrameBufs.AsSpan();
|
||||
|
||||
Span<int> refFrameMapSpan = cm.RefFrameMap.AsSpan();
|
||||
Span<int> nextRefFrameMapSpan = cm.NextRefFrameMap.AsSpan();
|
||||
Span<RefBuffer> frameRefsSpan = cm.FrameRefs.AsSpan();
|
||||
|
||||
for (mask = RefreshFrameFlags; mask != 0; mask >>= 1)
|
||||
{
|
||||
int oldIdx = cm.RefFrameMap[refIndex];
|
||||
int oldIdx = refFrameMapSpan[refIndex];
|
||||
// Current thread releases the holding of reference frame.
|
||||
DecreaseRefCount(oldIdx, ref frameBufs, ref pool);
|
||||
DecreaseRefCount(oldIdx, frameBufs, ref pool);
|
||||
|
||||
// Release the reference frame in reference map.
|
||||
if ((mask & 1) != 0)
|
||||
{
|
||||
DecreaseRefCount(oldIdx, ref frameBufs, ref pool);
|
||||
DecreaseRefCount(oldIdx, frameBufs, ref pool);
|
||||
}
|
||||
|
||||
cm.RefFrameMap[refIndex] = cm.NextRefFrameMap[refIndex];
|
||||
refFrameMapSpan[refIndex] = nextRefFrameMapSpan[refIndex];
|
||||
++refIndex;
|
||||
}
|
||||
|
||||
// Current thread releases the holding of reference frame.
|
||||
for (; refIndex < Constants.RefFrames && cm.ShowExistingFrame == 0; ++refIndex)
|
||||
{
|
||||
int oldIdx = cm.RefFrameMap[refIndex];
|
||||
DecreaseRefCount(oldIdx, ref frameBufs, ref pool);
|
||||
cm.RefFrameMap[refIndex] = cm.NextRefFrameMap[refIndex];
|
||||
int oldIdx = refFrameMapSpan[refIndex];
|
||||
DecreaseRefCount(oldIdx, frameBufs, ref pool);
|
||||
refFrameMapSpan[refIndex] = nextRefFrameMapSpan[refIndex];
|
||||
}
|
||||
|
||||
HoldRefBuf = 0;
|
||||
@@ -158,7 +180,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
// Invalidate these references until the next frame starts.
|
||||
for (refIndex = 0; refIndex < 3; refIndex++)
|
||||
{
|
||||
cm.FrameRefs[refIndex].Idx = RefBuffer.InvalidIdx;
|
||||
frameRefsSpan[refIndex].Idx = RefBuffer.InvalidIdx;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,7 +188,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
ref Vp9Common cm = ref Common;
|
||||
ref BufferPool pool = ref cm.BufferPool.Value;
|
||||
ref Array12<RefCntBuffer> frameBufs = ref cm.BufferPool.Value.FrameBufs;
|
||||
Span<RefCntBuffer> frameBufs = cm.BufferPool.Value.FrameBufs.AsSpan();
|
||||
ArrayPtr<byte> source = psource;
|
||||
CodecErr retcode = 0;
|
||||
cm.Error.ErrorCode = CodecErr.Ok;
|
||||
@@ -177,10 +199,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
// We do not know if the missing frame(s) was supposed to update
|
||||
// any of the reference buffers, but we act conservative and
|
||||
// mark only the last buffer as corrupted.
|
||||
|
||||
Span<RefBuffer> frameRefsSpan = cm.FrameRefs.AsSpan();
|
||||
|
||||
if (cm.FrameRefs[0].Idx > 0)
|
||||
if (frameRefsSpan[0].Idx > 0)
|
||||
{
|
||||
cm.FrameRefs[0].Buf.Corrupted = 1;
|
||||
frameRefsSpan[0].Buf.Corrupted = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -279,8 +303,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
ArrayPtr<byte> dataStart = data;
|
||||
CodecErr res;
|
||||
Array8<uint> frameSizes = new();
|
||||
|
||||
res = Decoder.ParseSuperframeIndex(data, (ulong)data.Length, ref frameSizes, out int frameCount);
|
||||
Span<uint> frameSizesSpan = frameSizes.AsSpan();
|
||||
|
||||
res = Decoder.ParseSuperframeIndex(data, (ulong)data.Length, frameSizesSpan, out int frameCount);
|
||||
if (res != CodecErr.Ok)
|
||||
{
|
||||
return res;
|
||||
@@ -292,7 +317,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
for (int i = 0; i < frameCount; ++i)
|
||||
{
|
||||
ArrayPtr<byte> dataStartCopy = dataStart;
|
||||
uint frameSize = frameSizes[i];
|
||||
uint frameSize = frameSizesSpan[i];
|
||||
if (frameSize > (uint)dataStart.Length)
|
||||
{
|
||||
return CodecErr.CorruptFrame;
|
||||
@@ -343,7 +368,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
return data[0];
|
||||
}
|
||||
|
||||
public static CodecErr ParseSuperframeIndex(ArrayPtr<byte> data, ulong dataSz, ref Array8<uint> sizes, out int count)
|
||||
public static CodecErr ParseSuperframeIndex(ArrayPtr<byte> data, ulong dataSz, Span<uint> sizes, out int count)
|
||||
{
|
||||
// A chunk ending with a byte matching 0xc0 is an invalid chunk unless
|
||||
// it is a super frame index. If the last byte of real video compression
|
||||
|
||||
Reference in New Issue
Block a user