Detects the language of the page from a list of languages that is provided by the user.
#include "ltocr.h"
L_LTOCR_API L_INT EXT_FUNCTION L_OcrPage_DetectLanguages(page, languages, languageCount, detectedLanguages, detectedLanguageCount)
The L_OcrPage handle to detect its language.
An array of the languages to check for. It is recommended to use few (fewer than 5) languages only. The recognition engine will use all those language dictionaries to recognize the words in the document.
Number of elements passed for 'languages' parameter, this should be the _countof(languages).
Pointer to L_OcrLanguage* variable to be allocated and updated with the detect page language(s).
Pointer to L_UINT variable to be updated with the number of detected page languages.
Value | Meaning |
---|---|
SUCCESS | The function was successful. |
< 1 | An error occurred. Refer to Return Codes. |
The page may contain a single language only, but it can be any Latin-alphabet or Asian language.
It is recommended to use few (fewer than 5) languages only. The recognition engine will use all those language dictionaries to recognize the words in the document.
Note: Automatic detection of Greek and Cyrillic is not proposed.
If the language can not be determined, the language of the previous page will be used.
To improve the detection accuracy, set an array with possible languages to be used, then pass it through the languages parameter.
Automatic language detection is performed as a relatively fast initial processing step before character recognition. Therefore, the accuracy will dependent on the bitmap quality and many other conditions.
Use the L_OcrLanguageManager_GetSupportedLanguages to obtain a list of the languages supported by the OCR engine.
Use the L_OcrLanguageManager_IsLanguageSupported to check if a given language is supported by the OCR engine.
L_INT L_OcrPage_DetectLanguagesExample()
{
BITMAPHANDLE bitmap = { 0 };
L_OcrEngine ocrEngine = NULL;
L_OcrPage ocrPage = NULL;
// Create an instance of the engine
L_INT retCode = L_OcrEngineManager_CreateEngine(L_OcrEngineType_LEAD, &ocrEngine);
if(retCode != SUCCESS)
return retCode;
// Start the engine using default parameters
retCode = L_OcrEngine_Startup(ocrEngine, NULL, OCR_LEAD_RUNTIME_DIR);
// Load an image to process
L_LoadBitmap(MAKE_IMAGE_PATH(L_TEXT("Ocr1.tif")), &bitmap, sizeof(BITMAPHANDLE), 0, ORDER_RGB, NULL, NULL);
// Add the page to an OCR page
if(L_OcrPage_FromBitmap(ocrEngine, &ocrPage, &bitmap, L_OcrBitmapSharingMode_AutoFree, NULL, NULL) == SUCCESS)
{
// Tranfer ownership of image to OCR page
bitmap.Flags.Allocated = 0;
// Auto detect page language.
//const L_UINT count = 3;
L_OcrLanguage languages[3] = {L_OcrLanguage_EN, L_OcrLanguage_FR, L_OcrLanguage_DE };
L_OcrLanguage* detectedLanguages = NULL;
L_UINT detectedLanguagesCount = 0;
L_OcrPage_DetectLanguages(ocrPage, languages, 3, &detectedLanguages, &detectedLanguagesCount);
if(detectedLanguages != NULL && detectedLanguagesCount > 0)
{
std::wcout << L"Page language(s):\n";
for(L_UINT langIndex = 0; langIndex < detectedLanguagesCount; langIndex++)
{
// Print the friendly name of this language
switch ((L_OcrLanguage)detectedLanguages[langIndex])
{
case L_OcrLanguage_None: // Special value means "no language"
std::wcout << L_TEXT("no language") << std::endl;
break;
case L_OcrLanguage_EN:
std::wcout << L_TEXT("English") << std::endl;
break;
case L_OcrLanguage_ES:
std::wcout << L_TEXT("Spanish") << std::endl;
break;
case L_OcrLanguage_FR:
std::wcout << L_TEXT("French") << std::endl;
break;
case L_OcrLanguage_DE:
std::wcout << L_TEXT("German") << std::endl;
break;
case L_OcrLanguage_IT:
std::wcout << L_TEXT("Italian") << std::endl;
break;
case L_OcrLanguage_BG:
std::wcout << L_TEXT("Bulgarian") << std::endl;
break;
case L_OcrLanguage_CA:
std::wcout << L_TEXT("Catalan") << std::endl;
break;
case L_OcrLanguage_CS:
std::wcout << L_TEXT("Czech") << std::endl;
break;
case L_OcrLanguage_DA:
std::wcout << L_TEXT("Danish") << std::endl;
break;
case L_OcrLanguage_EL:
std::wcout << L_TEXT("Greek") << std::endl;
break;
case L_OcrLanguage_FI:
std::wcout << L_TEXT("Finnish") << std::endl;
break;
case L_OcrLanguage_HU:
std::wcout << L_TEXT("Hungarian") << std::endl;
break;
case L_OcrLanguage_ID:
std::wcout << L_TEXT("Indonesian") << std::endl;
break;
case L_OcrLanguage_LT:
std::wcout << L_TEXT("Lithuanian") << std::endl;
break;
case L_OcrLanguage_LV:
std::wcout << L_TEXT("Latvian") << std::endl;
break;
case L_OcrLanguage_NL:
std::wcout << L_TEXT("Dutch") << std::endl;
break;
case L_OcrLanguage_NO:
std::wcout << L_TEXT("Norwegian") << std::endl;
break;
case L_OcrLanguage_PL:
std::wcout << L_TEXT("Polish") << std::endl;
break;
case L_OcrLanguage_PT:
std::wcout << L_TEXT("Portuguese") << std::endl;
break;
case L_OcrLanguage_RO:
std::wcout << L_TEXT("Romanian") << std::endl;
break;
case L_OcrLanguage_RU:
std::wcout << L_TEXT("Russian") << std::endl;
break;
case L_OcrLanguage_SK:
std::wcout << L_TEXT("Slovak") << std::endl;
break;
case L_OcrLanguage_SL:
std::wcout << L_TEXT("Slovenian") << std::endl;
break;
case L_OcrLanguage_SR:
std::wcout << L_TEXT("Serbian") << std::endl;
break;
case L_OcrLanguage_SV:
std::wcout << L_TEXT("Swedish") << std::endl;
break;
case L_OcrLanguage_TR:
std::wcout << L_TEXT("Turkish") << std::endl;
break;
case L_OcrLanguage_UK:
std::wcout << L_TEXT("Ukrainian") << std::endl;
break;
case L_OcrLanguage_VI:
std::wcout << L_TEXT("Vietnamese") << std::endl;
break;
case L_OcrLanguage_JA:
std::wcout << L_TEXT("Japanese") << std::endl;
break;
case L_OcrLanguage_KO:
std::wcout << L_TEXT("Korean") << std::endl;
break;
case L_OcrLanguage_ZH_HANS:
std::wcout << L_TEXT("Chinese (Simplified)") << std::endl;
break;
case L_OcrLanguage_ZH_HANT:
std::wcout << L_TEXT("Chinese (Traditional)") << std::endl;
break;
case L_OcrLanguage_AF:
std::wcout << L_TEXT("Afrikaans") << std::endl;
break;
case L_OcrLanguage_AZ:
std::wcout << L_TEXT("Azerbaijani") << std::endl;
break;
case L_OcrLanguage_BE:
std::wcout << L_TEXT("Belarusian") << std::endl;
break;
case L_OcrLanguage_ET:
std::wcout << L_TEXT("Estonian") << std::endl;
break;
case L_OcrLanguage_EU:
std::wcout << L_TEXT("Basque") << std::endl;
break;
case L_OcrLanguage_GL:
std::wcout << L_TEXT("Galician") << std::endl;
break;
case L_OcrLanguage_HR:
std::wcout << L_TEXT("Croatian") << std::endl;
break;
case L_OcrLanguage_IS:
std::wcout << L_TEXT("Icelandic") << std::endl;
break;
case L_OcrLanguage_MK:
std::wcout << L_TEXT("Macedonian") << std::endl;
break;
case L_OcrLanguage_MT:
std::wcout << L_TEXT("Maltese") << std::endl;
break;
case L_OcrLanguage_MS:
std::wcout << L_TEXT("Malay") << std::endl;
break;
case L_OcrLanguage_SQ:
std::wcout << L_TEXT("Albanian") << std::endl;
break;
case L_OcrLanguage_SW:
std::wcout << L_TEXT("Swahili") << std::endl;
break;
case L_OcrLanguage_TH:
std::wcout << L_TEXT("Thai") << std::endl;
break;
case L_OcrLanguage_TE:
std::wcout << L_TEXT("Telugu") << std::endl;
break;
}
}
}
}
//CLEAN UP
//Free the bitmap if error creating OCR page
if(bitmap.Flags.Allocated)
L_FreeBitmap(&bitmap);
//Destroy the page & free bitmap
if(ocrPage != NULL)
L_OcrPage_Destroy(ocrPage);
// Shutdown the engine
L_OcrEngine_Destroy(ocrEngine);
return SUCCESS;
}