LEADTOOLS has defined the following data types for referencing an OCR setting manager handle:
Data Type | Definition and Usage |
---|---|
L_OcrSettingManager | Defined as Handle, used as a handle of the OCR setting manager. |
void DumpAllSettings(L_OcrSettingManager settingManager)
{
L_UINT settingsCount = 0;
L_TCHAR settingName[80] = { 0 };
// Write all the settings to console
std::cout << "-----------------------------------------------------\n"
<< "Settings\n";
L_OcrSettingManager_GetSettingCount(settingManager, &settingsCount);
for(L_UINT settingIndex = 0; settingIndex < settingsCount; settingIndex++)
{
// Get each setting
L_OcrSettingManager_GetSettingName(settingManager, settingIndex, settingName, _countof(settingName));
//Get information about each setting
L_OcrSettingDescriptor descriptor;
descriptor.StructSize = sizeof(L_OcrSettingDescriptor);
L_OcrSettingManager_GetSettingDescriptor(settingManager, settingName, &descriptor);
// Show info
std::wcout << L" Name: " << descriptor.Name << std::endl;
std::wcout << L" ValueType: " << descriptor.ValueType << std::endl;
std::wcout << L" FriendlyName: " << descriptor.FriendlyName << std::endl;
switch (descriptor.ValueType)
{
case L_OcrSettingValueType_BeginCategory:
std::wcout << L"-------------------------------------\n";
break;
case L_OcrSettingValueType_Integer:
std::wcout << L" Units: " << descriptor.Units << std::endl;
std::wcout << L" IntegerMinimumValue: " << descriptor.IntegerMinimumValue << std::endl;
std::wcout << L" IntegerMaximumValue: " << descriptor.IntegerMaximumValue << std::endl << std::endl;
break;
case L_OcrSettingValueType_Enum:
std::wcout << L" EnumIsFlags: " << descriptor.EnumIsFlags << std::endl;
std::wcout << L" EnumMemberFriendlyNames" << std::endl;
{
L_TCHAR friendlyNames[1024] = { 0 };
L_TCHAR* values= { 0 };
L_OcrSettingManager_GetEnumValueAsString(settingManager, descriptor.Name, &values);
wcscpy_s(friendlyNames, 1024, descriptor.EnumMemberFriendlyNames);
L_TCHAR* separator = wcschr(friendlyNames, ',');
// Cycle through each value until there are no more
while(separator != NULL)
{
// Get the value
L_TCHAR substr[50];
wcsncpy_s(substr, friendlyNames, (int)(separator-friendlyNames));
// Update our list of values
wcscpy_s(friendlyNames, separator+1);
// Output the value
std::wcout << L" " << substr << std::endl;
// Get the next value
separator = wcschr(friendlyNames, ',');
}
//Print the last value
std::wcout << L" " << friendlyNames << std::endl;
// Free data now that we are done with it
L_OcrMemory_Free(values);
}
break;
case L_OcrSettingValueType_Double:
std::wcout << L" Units: " << descriptor.Units << std::endl;
std::wcout << L" DoubleMinimumValue: " << descriptor.DoubleMinimumValue << std::endl;
std::wcout << L" DoubleMaximumValue: " << descriptor.DoubleMaximumValue << std::endl << std::endl;
break;
case L_OcrSettingValueType_Boolean:
break;
case L_OcrSettingValueType_String:
std::wcout << L" StringMaximumLength: " << descriptor.StringMaximumLength << std::endl;
std::wcout << L" StringNullAllowed: " << descriptor.StringNullAllowed << std::endl << std::endl;
break;
case L_OcrSettingValueType_EndCategory:
break;
}
}
}
L_INT L_OcrEngine_GetSettingManagerExample()
{
BITMAPHANDLE bitmap = { 0 };
L_TCHAR outFileName[MAX_PATH] = { 0 },
tifFileName[MAX_PATH] = { 0 };
L_OcrEngine ocrEngine = NULL;
L_OcrSettingManager ocrSettingManager = NULL;
L_OcrPage ocrPage = NULL;
L_OcrDocumentManager ocrDocumentManager = NULL;
L_OcrDocument ocrDocument = NULL;
L_TCHAR *detectFontStyles = { 0 },
*tempFontStyles = { 0 };
L_BOOL recognizeFontAttributes = false,
tempFontAttributes = false;
L_TCHAR settingName[80] = { 0 };
// Create an instance of the engine
L_INT retCode = L_OcrEngineManager_CreateEngine(L_OcrEngineType_Advantage, &ocrEngine);
if(retCode != SUCCESS)
return retCode;
// Start the engine using default parameters
L_OcrEngine_Startup(ocrEngine, NULL, OCR_ADVANTAGE_RUNTIME_DIR);
// Get the manager to modify global OCR Engine settings
L_OcrEngine_GetSettingManager(ocrEngine, &ocrSettingManager);
// Dump all the settings supported by this engine to console output
DumpAllSettings(ocrSettingManager);
// Image file to OCR
wcscpy_s(tifFileName, MAX_PATH, MAKE_IMAGE_PATH(L_TEXT("Ocr1.tif")));
// File formats to save
DOCWRTFORMAT formats[2] = {DOCUMENTFORMAT_TXT, DOCUMENTFORMAT_PDF};
for(int formatIndex = 0; formatIndex < _countof(formats); formatIndex++)
{
if (formats[formatIndex] == DOCUMENTFORMAT_TXT)
{
// Generate & set the output file name
wcscpy_s(outFileName, MAX_PATH, MAKE_IMAGE_PATH(L_TEXT("Ocr1.txt")));
std::wcout << L"Format: " << L_TEXT("DOCUMENTFORMAT_TXT") << L"\nOutput file: " << outFileName << std::endl;
// This is 'text' format, we dont need to recognize fonts attributes such as bold and italic
// This will make the recognition process faster
std::wcout << L"Turning off font attributes\n";
// Save old settings & turn them off
wcscpy_s(settingName, 80, L_TEXT("Recognition.Fonts.DetectFontStyles"));
L_OcrSettingManager_GetEnumValueAsString(ocrSettingManager, settingName, &detectFontStyles);
L_OcrSettingManager_SetEnumValueAsString(ocrSettingManager, settingName, L_TEXT("None"));
wcscpy_s(settingName, 80, L_TEXT("Recognition.Fonts.RecognizeFontAttributes"));
L_OcrSettingManager_GetBooleanValue(ocrSettingManager, settingName, &recognizeFontAttributes);
L_OcrSettingManager_SetBooleanValue(ocrSettingManager, settingName, false);
}
else
{
// Generate & set the output file name
wcscpy_s(outFileName, MAX_PATH, MAKE_IMAGE_PATH(L_TEXT("Ocr1.pdf")));
std::wcout << L"Format: " << L_TEXT("DOCUMENTFORMAT_PDF") << L"\nOutput file: " << outFileName << std::endl;
}
// Show the settings we are using
wcscpy_s(settingName, 80, L_TEXT("Recognition.Fonts.DetectFontStyles"));
L_OcrSettingManager_GetEnumValueAsString(ocrSettingManager, settingName, &tempFontStyles);
wcscpy_s(settingName, 80, L_TEXT("Recognition.Fonts.RecognizeFontAttributes"));
L_OcrSettingManager_GetBooleanValue(ocrSettingManager, settingName, &tempFontAttributes);
std::wcout << L"Recognizing using these font attributes settings:\n";
std::wcout << L"Recognition.Fonts.DetectFontStyles: " << tempFontStyles << std::endl;
std::wcout << L"Recognition.Fonts.RecognizeFontAttributes: "<< tempFontAttributes << std::endl;
// Load an image to process
L_LoadBitmap(tifFileName, &bitmap, sizeof(BITMAPHANDLE), 0, ORDER_RGB, NULL, NULL);
// Add image to OCR page
L_OcrPage_FromBitmap(ocrEngine, &ocrPage, &bitmap, L_OcrBitmapSharingMode_AutoFree, NULL, NULL);
// Transfer ownership to OCR page
bitmap.Flags.Allocated = 0;
// Recognize the page (automatically zoned)
L_OcrPage_Recognize(ocrPage, NULL, NULL);
// Recognize and save the file to the output format
L_OcrEngine_GetDocumentManager(ocrEngine, &ocrDocumentManager);
// Create an OCR document
L_OcrDocumentManager_CreateDocument(ocrDocumentManager, &ocrDocument, L_OcrCreateDocumentOptions_AutoDeleteFile, NULL);
// In Document File Mode, add OcrPage to OcrDocument after recognition
L_OcrDocument_AddPage(ocrDocument, ocrPage);
L_OcrPage_Destroy(ocrPage);
// Save the document we have
L_OcrDocument_Save(ocrDocument, outFileName, formats[formatIndex], NULL, NULL);
L_OcrDocument_Destroy(ocrDocument);
/// Re-set the original settings
if (formats[formatIndex] == DOCUMENTFORMAT_TXT)
{
std::wcout << L"Resetting original settings\n";
wcscpy_s(settingName, 80, L_TEXT("Recognition.Fonts.DetectFontStyles"));
L_OcrSettingManager_SetEnumValueAsString(ocrSettingManager, settingName, detectFontStyles);
wcscpy_s(settingName, 80, L_TEXT("Recognition.Fonts.RecognizeFontAttributes"));
L_OcrSettingManager_SetBooleanValue(ocrSettingManager, settingName, recognizeFontAttributes);
}
}
//CLEANUP
L_OcrEngine_Destroy(ocrEngine);
return SUCCESS;
}