Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Silero tts #74

Merged
merged 18 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public TOutput PredictResult(TInput input)
{
throw new InvalidOperationException("Model is not loaded");
}

// TODO: predict isnot return result, just freezing
return _predictor.Predict(input);
}

Expand Down
97 changes: 97 additions & 0 deletions src/Translumo.Infrastructure/Python/PythonEngineWrapper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
using System;
using System.IO;
using Python.Runtime;
using Translumo.Infrastructure.Constants;

namespace Translumo.Infrastructure.Python;

public class PythonEngineWrapper : IDisposable
{
private bool _disposedValue;
private int _countUsage;
private IntPtr _threadState;

public PythonEngineWrapper()
{
Runtime.PythonDLL = Path.Combine(Global.PythonPath, "python38.dll");
PythonEngine.PythonHome = Global.PythonPath;
}

public PyObject Import(string libName) => Py.Import(libName);

public void Execute(Action action)
{
Execute<object?>(() => { action(); return null; });
}

public T Execute<T>(Func<T> func)
{
using (Py.GIL())
{
return func();
}
}

public void Init()
{
if (_countUsage++ > 0)
{
return;
}

InitInternal();
}

private void InitInternal()
{
_disposedValue = false;

if (!PythonEngine.IsInitialized)
{
// TODO: move to common place, also used in EasyOCR
Runtime.PythonDLL = Path.Combine(Global.PythonPath, "python38.dll");
PythonEngine.Initialize();
PythonEngine.BeginAllowThreads();
}
}


protected virtual void Dispose(bool disposing)
{
if (!_disposedValue)
{
if (disposing)
{
// TODO: dispose managed state (managed objects)
}

if (PythonEngine.IsInitialized)
{
// Causes PythonEngine.Shutdown() hanging (https://github.com/pythonnet/pythonnet/issues/1701)
// PythonEngine.EndAllowThreads(_threadState);
PythonEngine.Shutdown();
}

_disposedValue = true;
}
}

// TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
~PythonEngineWrapper()
{
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(disposing: false);
}

public void Dispose()
{
if (--_countUsage > 0)
{
return;
}

// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
<ItemGroup>
<PackageReference Include="Microsoft.ML" Version="2.0.1" />
<PackageReference Include="Microsoft.PowerShell.SDK" Version="7.1.7" />
<PackageReference Include="pythonnet" Version="3.0.1" />
</ItemGroup>

<ItemGroup>
Expand Down
43 changes: 16 additions & 27 deletions src/Translumo.OCR/EasyOCR/EasyOCREngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using Python.Runtime;
using Translumo.Infrastructure.Constants;
using Translumo.Infrastructure.Language;
using Translumo.Infrastructure.Python;

namespace Translumo.OCR.EasyOCR
{
Expand All @@ -16,27 +17,26 @@ public class EasyOCREngine : IOCREngine, IDisposable
public int Confidence => 9;
public Languages DetectionLanguage => _languageDescriptor.Language;

private IntPtr _threadState;
private bool _objectsInitialized;
private bool _readerIsUsed;

private readonly object _obj = new object();
private readonly object _obj = new object();
private readonly LanguageDescriptor _languageDescriptor;
private readonly PythonEngineWrapper _pythonEngine;
private readonly string _modelPath = Path.Combine(Global.ModelsPath, "easyocr");
private readonly ILogger _logger;

#region Python objects
private PyObject _builtinsLib;
private dynamic _builtinsLib;
private dynamic _easyOcrLib;
private dynamic _reader;
private dynamic _bytes;
#endregion

public EasyOCREngine(LanguageDescriptor languageDescriptor, ILogger logger)
public EasyOCREngine(LanguageDescriptor languageDescriptor, PythonEngineWrapper pythonEngine, ILogger logger)
{
Runtime.PythonDLL = Path.Combine(Global.PythonPath, "python38.dll");
PythonEngine.PythonHome = Global.PythonPath;
_languageDescriptor = languageDescriptor;
_pythonEngine = pythonEngine;
_logger = logger;

_logger.LogTrace($"Initialization EasyOCR from path: '{Global.PythonPath}'");
Expand All @@ -54,13 +54,13 @@ public string[] GetTextLines(byte[] image)
throw new InvalidOperationException($"EasyOCR is not initialized");
}

using (Py.GIL())
return _pythonEngine.Execute(() =>
{
dynamic ocrResult = _reader.readtext(_bytes.Invoke(image.ToPython()), detail: 0, paragraph: true);
_readerIsUsed = true;

return (string[])ocrResult;
}
});
}
}

Expand All @@ -70,11 +70,7 @@ private void EnsureInitialized()
{
try
{
if (!PythonEngine.IsInitialized)
{
PythonEngine.Initialize();
_threadState = PythonEngine.BeginAllowThreads();
}
_pythonEngine.Init();

if (!_objectsInitialized)
{
Expand All @@ -88,18 +84,16 @@ private void EnsureInitialized()
}
}

private void InitializeObjects()
{
using (Py.GIL())
private void InitializeObjects() =>
_pythonEngine.Execute(() =>
{
_builtinsLib = Py.Import("builtins");
_easyOcrLib = Py.Import("easyocr");
_builtinsLib = _pythonEngine.Import("builtins");
_easyOcrLib = _pythonEngine.Import("easyocr");
_reader = _easyOcrLib.Reader(new[] { _languageDescriptor.EasyOcrCode }, model_storage_directory: _modelPath, download_enabled: false, recog_network: _languageDescriptor.EasyOcrModel);
_bytes = _builtinsLib.GetAttr("bytes");
_bytes = _builtinsLib.bytes;

_objectsInitialized = true;
}
}
});

public void Dispose()
{
Expand All @@ -118,12 +112,7 @@ public void Dispose()
_objectsInitialized = false;
}

if (PythonEngine.IsInitialized)
{
//Causes PythonEngine.Shutdown() hanging (https://github.com/pythonnet/pythonnet/issues/1701)
//PythonEngine.EndAllowThreads(_threadState);
PythonEngine.Shutdown();
}
_pythonEngine.Dispose();
}
}
}
Expand Down
11 changes: 7 additions & 4 deletions src/Translumo.OCR/OcrEnginesFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Linq;
using Microsoft.Extensions.Logging;
using Translumo.Infrastructure.Language;
using Translumo.Infrastructure.Python;
using Translumo.OCR.Configuration;
using Translumo.OCR.EasyOCR;
using Translumo.OCR.Tesseract;
Expand All @@ -15,11 +16,13 @@ public class OcrEnginesFactory
private IList<IOCREngine> _cachedEngines;

private readonly LanguageService _languageService;
private readonly PythonEngineWrapper _pythonEngine;
private readonly ILogger _logger;

public OcrEnginesFactory(LanguageService languageService, ILogger<OcrEnginesFactory> logger)
public OcrEnginesFactory(LanguageService languageService, PythonEngineWrapper pythonEngine, ILogger<OcrEnginesFactory> logger)
{
this._languageService = languageService;
_pythonEngine = pythonEngine;
this._logger = logger;
this._cachedEngines = new List<IOCREngine>();
}
Expand All @@ -36,7 +39,7 @@ public IEnumerable<IOCREngine> GetEngines(IEnumerable<OcrConfiguration> ocrConfi
{
if (!TryRemoveIfDisabled<WindowsOCREngine>(ocrConfiguration))
yield return GetEngine(() => new WindowsOCREngine(langDescriptor), detectionLanguage);

if (!TryRemoveIfDisabled<WinOCREngineWithPreprocess>(ocrConfiguration))
yield return GetEngine(() => new WinOCREngineWithPreprocess(langDescriptor), detectionLanguage);
}
Expand All @@ -53,7 +56,7 @@ public IEnumerable<IOCREngine> GetEngines(IEnumerable<OcrConfiguration> ocrConfi
if (confType == typeof(EasyOCRConfiguration))
{
if (!TryRemoveIfDisabled<EasyOCREngine>(ocrConfiguration))
yield return GetEngine(() => new EasyOCREngine(langDescriptor, _logger), detectionLanguage);
yield return GetEngine(() => new EasyOCREngine(langDescriptor, _pythonEngine, _logger), detectionLanguage);
}
}

Expand All @@ -72,7 +75,7 @@ bool TryRemoveIfDisabled<TEngine>(OcrConfiguration configuration)
}

private IOCREngine GetEngine<TEngine>(Func<TEngine> ocrFactoryFunc, Languages detectionLanguage)
where TEngine: IOCREngine
where TEngine : IOCREngine
{
var cachedEngine = _cachedEngines.FirstOrDefault(engine => engine.GetType() == typeof(TEngine));
if (cachedEngine == null)
Expand Down
1 change: 0 additions & 1 deletion src/Translumo.OCR/Translumo.OCR.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
<PackageReference Include="OpenCvSharp4" Version="4.7.0.20230115" />
<PackageReference Include="OpenCvSharp4.Extensions" Version="4.7.0.20230115" />
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.7.0.20230115" />
<PackageReference Include="pythonnet" Version="3.0.1" />
<PackageReference Include="Tesseract" Version="5.2.0" />
</ItemGroup>

Expand Down
2 changes: 1 addition & 1 deletion src/Translumo.Processing/TranslationProcessingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ private void TranslateOnceInternal(RectangleF captureArea)
{
byte[] screenshot = _onceTimeCapturer.CaptureScreen(captureArea);
var taskResults = _engines.Select(engine => _textProvider.GetTextAsync(engine, screenshot)).ToArray();

// TODO: sometimes one of task (win tts) is not complete long time and translation is not working
Task.WaitAll(taskResults);
TextDetectionResult bestDetected = GetBestDetectionResult(taskResults, 3);
translationTask = TranslateTextAsync(bestDetected.Text, Guid.NewGuid());
Expand Down
Loading