Add basic speech synthesis, with a Linux Speech Dispatcher backend.

Signed-off-by: Ritchie Frodomar <alkalinethunder@gmail.com>
This commit is contained in:
Ritchie Frodomar 2025-01-21 22:01:18 -05:00
parent 19c0e6ce2c
commit fca8889c38
No known key found for this signature in database
GPG key ID: 47384A02C174B15F
5 changed files with 147 additions and 0 deletions

View file

@ -0,0 +1,15 @@
namespace Ritchie.Toolbox.Accessibility;
/// <summary>
/// Interface for a class that can be used as a speech synthesis backend for the <see cref="TextToSpeech"/> accessibility API.
/// </summary>
public interface ISpeechHandler
{
/// <summary>
/// Speak the specified text aloud.
/// </summary>
/// <param name="textToSpeak">The text to speak.</param>
/// <param name="interruptToken">A <see cref="CancellationToken"/> that will be cancelled if speech is interrupted.</param>
/// <returns></returns>
Task SpeakAsync(string textToSpeak, CancellationToken interruptToken);
}

View file

@ -0,0 +1,42 @@
using System.Diagnostics;
namespace Ritchie.Toolbox.Accessibility;
/// <summary>
/// Implementation of <see cref="ISpeechHandler"/> that uses the Speech Dispatcher daemon (speechd).
/// </summary>
public sealed class SpeechDispatcherBackend : ISpeechHandler
{
private readonly string SayCommandName = "/usr/bin/spd-say";
/// <inheritdoc />
public async Task SpeakAsync(string textToSpeak, CancellationToken interruptToken)
{
var startInfo = new ProcessStartInfo();
startInfo.FileName = SayCommandName;
startInfo.UseShellExecute = false;
startInfo.RedirectStandardInput = true;
startInfo.ArgumentList.Add("-w"); // Tells it to wait for speech to finish, since the game expects that.
startInfo.ArgumentList.Add("--pipe-mode"); // Allows us to stream text in, instead of passing it by CLI args, which is safer.
var process = Process.Start(startInfo);
if (process == null)
throw new InvalidOperationException($"Could not start the spd-say process. Ensure that Speech Dispatcher is installed on this computer and that spd-say is in PATH.");
try
{
await process.StandardInput.WriteAsync(textToSpeak);
process.StandardInput.Close();
await process.WaitForExitAsync(interruptToken);
}
catch (OperationCanceledException)
{
process.Kill();
}
finally
{
process.Dispose();
}
}
}

View file

@ -0,0 +1,57 @@
using System.Runtime.CompilerServices;
using Silk.NET.SDL;
namespace Ritchie.Toolbox.Accessibility;
/// <summary>
/// Accessibility API for reading text aloud with a speech synthesis backend.
/// </summary>
public static class TextToSpeech
{
private static ISpeechHandler? speechHandler;
private static CancellationTokenSource? currentSpeechToken;
/// <summary>
/// Switch the current speech backend.
/// </summary>
/// <param name="newSpeechHandler">The new backend to use. If null is provided, speech synthesis support will be disabled.</param>
public static void SetSpeechHandler(ISpeechHandler? newSpeechHandler)
{
StopSpeaking();
if (speechHandler is IDisposable disposable)
disposable.Dispose();
speechHandler = newSpeechHandler;
}
/// <summary>
/// Tries to read the specified text aloud with the current speech backend.
/// </summary>
/// <param name="textToSpeak">The text to speak.</param>
/// <returns>A task that waits for speech to finish, and returns true if speech was successful.</returns>
public static async Task<bool> TrySpeakAsync(string textToSpeak)
{
if (speechHandler == null)
return false;
StopSpeaking();
try
{
currentSpeechToken = new();
await speechHandler.SpeakAsync(textToSpeak, currentSpeechToken.Token);
return true;
}
catch (Exception ex)
{
return false;
}
}
private static void StopSpeaking()
{
currentSpeechToken?.Cancel();
currentSpeechToken = null;
}
}

View file

@ -0,0 +1,27 @@
using Ritchie.Toolbox.Accessibility;
using SociallyDistant.Architecture;
using SociallyDistant.Core.Modules;
using SociallyDistant.Core.OS.Tasks;
namespace SociallyDistant.Commands.Misc;
[Command("say")]
public sealed class SayCommand : ScriptableCommand
{
public SayCommand(IGameContext gameContext) : base(gameContext)
{
}
public override string Name => "say";
protected override async Task DefaultHandler()
{
string textToSpeak = string.Join(" ", Arguments);
bool didSpeak = await TextToSpeech.TrySpeakAsync(textToSpeak);
if (!didSpeak)
{
Console.WriteLine("Speech service is unavailable.");
}
}
}

View file

@ -1,6 +1,7 @@
using System.Diagnostics; using System.Diagnostics;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using Microsoft.Xna.Framework; using Microsoft.Xna.Framework;
using Ritchie.Toolbox.Accessibility;
using Serilog; using Serilog;
using Silk.NET.SDL; using Silk.NET.SDL;
using SociallyDistant.Core.Config; using SociallyDistant.Core.Config;
@ -23,6 +24,11 @@ internal sealed class GameApplication : Application
public GameApplication() public GameApplication()
{ {
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
TextToSpeech.SetSpeechHandler(new SpeechDispatcherBackend());
}
var videoDriversSupported = GetSdlDrivers(); var videoDriversSupported = GetSdlDrivers();
var sdlSupportsWayland = videoDriversSupported.Contains("wayland"); var sdlSupportsWayland = videoDriversSupported.Contains("wayland");