Skip to content

Commit

Permalink
Added Whisper Factory Options
Browse files Browse the repository at this point in the history
  • Loading branch information
sandrohanea committed Dec 20, 2024
1 parent 78e1651 commit 96a8d4a
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 186 deletions.
31 changes: 0 additions & 31 deletions Whisper.net/Ggml/GgmlType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,3 @@ public enum GgmlType
LargeV3,
LargeV3Turbo
}

public enum WhisperAlignmentHeadsPreset
{
None,
NTopMost, // All heads from the N-top-most text-layers
Custom,
TinyEn,
Tiny,
BaseEn,
Base,
SmallEn,
Small,
MediumEn,
Medium,
LargeV1,
LargeV2,
LargeV3,
LargeV3Turbo
}

public class WhisperAlignmentHead
{
public int TextLayer;
public int Head;

public WhisperAlignmentHead(int textLayer, int head)
{
TextLayer = textLayer;
Head = head;
}
}
64 changes: 64 additions & 0 deletions Whisper.net/Internals/ModelLoader/ModelLoaderUtils.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed under the MIT license: https://opensource.org/licenses/MIT

using System.Runtime.InteropServices;
using Whisper.net.Native;
using NativeHeadsPreset = Whisper.net.Native.WhisperAlignmentHeadsPreset;

namespace Whisper.net.Internals.ModelLoader;

internal static class ModelLoaderUtils
{
public static NativeHeadsPreset Map(WhisperAlignmentHeadsPreset preset)
{
return preset switch
{
WhisperAlignmentHeadsPreset.None => NativeHeadsPreset.WHISPER_AHEADS_NONE,
WhisperAlignmentHeadsPreset.NTopMost => NativeHeadsPreset.WHISPER_AHEADS_N_TOP_MOST,
WhisperAlignmentHeadsPreset.Custom => NativeHeadsPreset.WHISPER_AHEADS_CUSTOM,
WhisperAlignmentHeadsPreset.TinyEn => NativeHeadsPreset.WHISPER_AHEADS_TINY_EN,
WhisperAlignmentHeadsPreset.Tiny => NativeHeadsPreset.WHISPER_AHEADS_TINY,
WhisperAlignmentHeadsPreset.Base => NativeHeadsPreset.WHISPER_AHEADS_BASE,
WhisperAlignmentHeadsPreset.BaseEn => NativeHeadsPreset.WHISPER_AHEADS_BASE_EN,
WhisperAlignmentHeadsPreset.Small => NativeHeadsPreset.WHISPER_AHEADS_SMALL,
WhisperAlignmentHeadsPreset.SmallEn => NativeHeadsPreset.WHISPER_AHEADS_SMALL_EN,
WhisperAlignmentHeadsPreset.Medium => NativeHeadsPreset.WHISPER_AHEADS_MEDIUM,
WhisperAlignmentHeadsPreset.MediumEn => NativeHeadsPreset.WHISPER_AHEADS_MEDIUM_EN,
WhisperAlignmentHeadsPreset.LargeV1 => NativeHeadsPreset.WHISPER_AHEADS_LARGE_V1,
WhisperAlignmentHeadsPreset.LargeV2 => NativeHeadsPreset.WHISPER_AHEADS_LARGE_V2,
WhisperAlignmentHeadsPreset.LargeV3 => NativeHeadsPreset.WHISPER_AHEADS_LARGE_V3,
WhisperAlignmentHeadsPreset.LargeV3Turbo => NativeHeadsPreset.WHISPER_AHEADS_LARGE_V3_TURBO,
_ => throw new ArgumentOutOfRangeException(nameof(preset), preset, null)
};
}

public static WhisperAheads GetWhisperAlignmentHeads(WhisperAlignmentHead[]? alignmentHeads, out GCHandle? aHeadsHandle)
{
if (alignmentHeads == null || alignmentHeads.Length == 0)
{
aHeadsHandle = null;
return default;
}

var nHeads = alignmentHeads.Length;
var aHeads = new int[nHeads * 2];

aHeadsHandle = GCHandle.Alloc(aHeads, GCHandleType.Pinned);

for (var i = 0; i < nHeads; i++)
{
aHeads[i * 2] = alignmentHeads![i].TextLayer;
aHeads[i * 2 + 1] = alignmentHeads[i].Head;
}

return new WhisperAheads()
{
NHeads = (UIntPtr)nHeads,
Heads = aHeadsHandle.Value.AddrOfPinnedObject()
};
}

public static byte AsByte(this bool value)
{
return value ? (byte)1 : (byte)0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,50 @@

using System.Runtime.InteropServices;
using Whisper.net.Internals.Native;
using Whisper.net.LibraryLoader;
using Whisper.net.Native;

namespace Whisper.net.Internals.ModelLoader;

internal class WhisperProcessorModelBufferLoader(byte[] buffer) : IWhisperProcessorModelLoader
internal class WhisperProcessorModelBufferLoader : IWhisperProcessorModelLoader
{
private readonly GCHandle pinnedBuffer = GCHandle.Alloc(buffer, GCHandleType.Pinned);
private GCHandle aheadsHandle;
private readonly GCHandle pinnedBuffer;
private readonly WhisperAheads aHeads;
private readonly GCHandle? aheadsHandle;
private readonly UIntPtr bufferLength;

private readonly WhisperFactoryOptions options;

public WhisperProcessorModelBufferLoader(byte[] buffer, WhisperFactoryOptions options)
{
this.options = options;

pinnedBuffer = GCHandle.Alloc(buffer, GCHandleType.Pinned);
aHeads = ModelLoaderUtils.GetWhisperAlignmentHeads(options.CustomAlignmentHeads, out aheadsHandle);
bufferLength = new UIntPtr((uint)buffer.Length);
}

public void Dispose()
{
pinnedBuffer.Free();
if (aheadsHandle.IsAllocated)
if (aheadsHandle.HasValue)
{
aheadsHandle.Free();
aheadsHandle.Value.Free();
}
}

public IntPtr LoadNativeContext(INativeWhisper nativeWhisper)
{
var bufferLength = new UIntPtr((uint)buffer.Length);

var aHeads = WhisperProcessorModelFileLoader.GetWhisperAlignmentHeads(RuntimeOptions.Instance.CustomAlignmentHeads, ref aheadsHandle);

return nativeWhisper.Whisper_Init_From_Buffer_With_Params_No_State(pinnedBuffer.AddrOfPinnedObject(), bufferLength,
new WhisperContextParams()
{
UseGpu = RuntimeOptions.Instance.UseGpu ? (byte)1 : (byte)0,
FlashAttention = RuntimeOptions.Instance.UseFlashAttention ? (byte)1 : (byte)0,
GpuDevice = RuntimeOptions.Instance.GpuDevice,
DtwTokenLevelTimestamp = RuntimeOptions.Instance.UseDtwTimeStamps ? (byte)1 : (byte)0,
HeadsPreset = (WhisperAlignmentHeadsPreset)RuntimeOptions.Instance.HeadsPreset,
DtwNTop = -1,
UseGpu = options.UseGpu.AsByte(),
FlashAttention = options.UseFlashAttention.AsByte(),
GpuDevice = options.GpuDevice,
DtwTokenLevelTimestamp = options.UseDtwTimeStamps.AsByte(),
HeadsPreset = ModelLoaderUtils.Map(options.HeadsPreset),
DtwNTop = options.DtwNTop,
WhisperAheads = aHeads,
Dtw_mem_size = 1024 * 1024 * 128,
Dtw_mem_size = new UIntPtr(options.DtwMemSize),
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,67 +2,47 @@

using System.Runtime.InteropServices;
using Whisper.net.Internals.Native;
using Whisper.net.LibraryLoader;
using Whisper.net.Native;

namespace Whisper.net.Internals.ModelLoader;

internal sealed class WhisperProcessorModelFileLoader(string pathModel) : IWhisperProcessorModelLoader
internal sealed class WhisperProcessorModelFileLoader : IWhisperProcessorModelLoader
{
private GCHandle aheadsHandle;
private readonly string pathModel;
private readonly WhisperFactoryOptions options;
private readonly WhisperAheads aHeads;
private readonly GCHandle? aheadsHandle;

public void Dispose()
public WhisperProcessorModelFileLoader(string pathModel, WhisperFactoryOptions options)
{
if (aheadsHandle.IsAllocated)
{
aheadsHandle.Free();
}
this.pathModel = pathModel;
this.options = options;
aHeads = ModelLoaderUtils.GetWhisperAlignmentHeads(options.CustomAlignmentHeads, out aheadsHandle);
}

public static WhisperAheads GetWhisperAlignmentHeads(Ggml.WhisperAlignmentHead[]? alignmentHeads, ref GCHandle aHeadsHandle)
public void Dispose()
{
var aHeadsPtr = IntPtr.Zero;
var nHeads = alignmentHeads?.Length ?? 0;

if (nHeads > 0)
if (aheadsHandle.HasValue)
{
var aHeads = new int[nHeads * 2];
if (aHeadsHandle.IsAllocated)
{
aHeadsHandle.Free();
}
aHeadsHandle = GCHandle.Alloc(aHeads, GCHandleType.Pinned);
aHeadsPtr = aHeadsHandle.AddrOfPinnedObject();

for (var i = 0; i < nHeads; i++)
{
aHeads[i * 2] = alignmentHeads![i].TextLayer;
aHeads[i * 2 + 1] = alignmentHeads[i].Head;
}
aheadsHandle.Value.Free();
}

return new WhisperAheads()
{
NHeads = (nuint)nHeads,
Heads = aHeadsPtr
};
}

public IntPtr LoadNativeContext(INativeWhisper nativeWhisper)
{
var aHeads = GetWhisperAlignmentHeads(RuntimeOptions.Instance.CustomAlignmentHeads, ref aheadsHandle);
ModelLoaderUtils.GetWhisperAlignmentHeads(options.CustomAlignmentHeads, out var aheadsHandle);

return nativeWhisper.Whisper_Init_From_File_With_Params_No_State(pathModel,
new WhisperContextParams()
{
UseGpu = RuntimeOptions.Instance.UseGpu ? (byte)1 : (byte)0,
FlashAttention = RuntimeOptions.Instance.UseFlashAttention ? (byte)1 : (byte)0,
GpuDevice = RuntimeOptions.Instance.GpuDevice,
DtwTokenLevelTimestamp = RuntimeOptions.Instance.UseDtwTimeStamps ? (byte)1 : (byte)0,
HeadsPreset = (WhisperAlignmentHeadsPreset)RuntimeOptions.Instance.HeadsPreset,
DtwNTop = -1,
UseGpu = options.UseGpu.AsByte(),
FlashAttention = options.UseFlashAttention.AsByte(),
GpuDevice = options.GpuDevice,
DtwTokenLevelTimestamp = options.UseDtwTimeStamps.AsByte(),
HeadsPreset = ModelLoaderUtils.Map(options.HeadsPreset),
DtwNTop = options.DtwNTop,
WhisperAheads = aHeads,
Dtw_mem_size = 1024 * 1024 * 128,
Dtw_mem_size = new UIntPtr(options.DtwMemSize)
});
}
}
10 changes: 5 additions & 5 deletions Whisper.net/Internals/Native/Data.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ internal struct WhisperAhead
[StructLayout(LayoutKind.Sequential)]
internal struct WhisperAheads
{
public nuint NHeads;
public UIntPtr NHeads;
public IntPtr Heads;
}

Expand All @@ -94,7 +94,7 @@ internal struct WhisperContextParams
public WhisperAlignmentHeadsPreset HeadsPreset;
public int DtwNTop;
public WhisperAheads WhisperAheads;
public nuint Dtw_mem_size;
public UIntPtr Dtw_mem_size;
}

[StructLayout(LayoutKind.Sequential)]
Expand Down Expand Up @@ -235,9 +235,9 @@ internal struct WhisperFullParams

public IntPtr WhisperGrammarElement;

public nuint NGrammarRules;
public UIntPtr NGrammarRules;

public nuint StartGrammarRule;
public UIntPtr StartGrammarRule;

public float GrammarPenalty;
}
Expand All @@ -260,4 +260,4 @@ internal struct WhisperTokenData
public long t1;
public long t_dtw;
public float vlen;
}
}
Loading

0 comments on commit 96a8d4a

Please sign in to comment.