Detects the language of the page from a list of languages that is provided by the user.
public string[] DetectLanguages(
string[] languages
)
Function DetectLanguages( _
ByVal languages() As String _
) As String()
string[] DetectLanguages(
string[] languages
)
- (NSArray<NSNumber *> *)detectLanguages:(NSArray<NSNumber *> *)languages error:(NSError **)error
function Leadtools.Forms.Ocr.IOcrPage.DetectLanguages(
languages
)
array<String^>^ DetectLanguages(
array<String^>^ languages
)
languages
An array of String objects that contains the names of the languages to check for. The language values used throughout the LEADTOOLS OCR toolkit are string values based on RFC 4646 (Windows Vista and later). The name could be an ISO 639 two-letter lowercase culture code associated with a language or a combination of ISO 630 and ISO 3166 two-letter uppercase subculture codes associated with a country or region.
It's suggested to use few (fewer than 5) languages only. The recognition engine will use all those language dictionaries to recognize the words in the document.
An array of String objects that contains the names of the detected page language(s).
DetectLanguages is only supported in the LEADTOOLS Professional and Advantage OCR engines.
The page may contain a single language only, but it can be any Latin-alphabet or Asian language.
Note: Automatic detection of Greek and Cyrillic is not proposed.
If the language can not be determined, the language of the previous page will be used.
To improve the detection accuracy, set an array with possible languages to be used, then pass it through the languages parameter.
Automatic language detection is performed as a relatively fast initial processing step before character recognition. Therefore, the accuracy will dependent on the image quality and many other conditions.
Use the GetSupportedLanguages to obtain a list of the languages supported by the IOcrEngine.
Use the IsLanguageSupported to check if a given language is supported by the IOcrEngine.
This example will detect images with English, French, and German languages.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Forms.Ocr;
using Leadtools.Forms;
using Leadtools.Forms.DocumentWriters;
using Leadtools.WinForms;
using Leadtools.Drawing;
using Leadtools.ImageProcessing;
using Leadtools.ImageProcessing.Color;
public void DetectLanguagesExample()
{
string tifFileName = Path.Combine(LEAD_VARS.ImagesDir, "OCR1.tif");
// Create an instance of the engine
using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Professional, false))
{
// Start the engine using default parameters
ocrEngine.Startup(null, null, null, null);
// Create an OCR document
using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument())
{
// Add this image to the document
IOcrPage ocrPage = ocrDocument.Pages.AddPage(tifFileName, null);
// Auto detect page language.
string[] languages = { "en", "fr", "de" };
string[] pageLanguages = ocrPage.DetectLanguages(languages);
if (pageLanguages != null && pageLanguages.Length > 0)
{
Console.WriteLine("Page language(s):{0}", Environment.NewLine);
foreach (string lang in pageLanguages)
{
// Get the friendly name of this language using the .NET CultureInfo class
CultureInfo ci = new CultureInfo(lang);
Console.WriteLine("{0}{1}", ci.EnglishName, Environment.NewLine);
}
}
}
// Shutdown the engine
// Note: calling Dispose will also automatically shutdown the engine if it has been started
ocrEngine.Shutdown();
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\Users\Public\Documents\LEADTOOLS Images";
}
Imports Leadtools
Imports Leadtools.Codecs
Imports Leadtools.Forms.Ocr
Imports Leadtools.Forms.DocumentWriters
Public Sub DetectLanguagesExample()
Dim tifFileName As String = Path.Combine(LEAD_VARS.ImagesDir, "OCR1.tif")
' Create an instance of the engine
Using ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Professional, False)
' Start the engine using default parameters
ocrEngine.Startup(Nothing, Nothing, Nothing, Nothing)
' Create an OCR document
Using ocrDocument As IOcrDocument = ocrEngine.DocumentManager.CreateDocument()
' Add this image to the document
Dim ocrPage As IOcrPage = ocrDocument.Pages.AddPage(tifFileName, Nothing)
' Auto detect page language.
Dim languages As String() = {"en", "fr", "de"}
Dim pageLanguages As String() = ocrPage.DetectLanguages(languages)
If pageLanguages IsNot Nothing AndAlso pageLanguages.Length > 0 Then
Console.WriteLine("Page language(s):{0}", Environment.NewLine)
For Each lang As String In pageLanguages
' Get the friendly name of this language using the .NET CultureInfo class
Dim ci As New CultureInfo(lang)
Console.WriteLine("{0}{1}", ci.EnglishName, Environment.NewLine)
Next
End If
End Using
' Shutdown the engine
' Note: calling Dispose will also automatically shutdown the engine if it has been started
ocrEngine.Shutdown()
End Using
End Sub
Public NotInheritable Class LEAD_VARS
Public Const ImagesDir As String = "C:\Users\Public\Documents\LEADTOOLS Images"
End Class
Raster .NET | C API | C++ Class Library | JavaScript HTML5
Document .NET | C API | C++ Class Library | JavaScript HTML5
Medical .NET | C API | C++ Class Library | JavaScript HTML5
Medical Web Viewer .NET