#include "ltocr.h"
L_LTOCR_API L_INT EXT_FUNCTION L_OcrDocumentManager_CreateDocument(documentManager, document, options, fileName)
L_OcrDocumentManager documentManager; | handle to the OCR engine document manager |
L_OcrDocument* document; | address to L_OcrDocument variable to be updated |
L_OcrCreateDocumentOptions options; | options to control how the document is created or loaded |
const L_TCHAR* fileName; | the document file name. This value can be NULL. |
Creates a new OCR file or memory-based document handle.
Parameter | Description |
---|---|
documentManager | Handle to the OCR engine document manager. |
document | Address to L_OcrDocument variable to be updated with the created document handle that can participate in recognition and saving operations. |
options | Options to control how the document is created or loaded. |
fileName | The document file name. This value can be NULL. |
SUCCESS | The function was successful. |
< 1 | An error occurred. Refer to Return Codes. |
This method can either create a file or memory-based OCR document, or load a previously created file-based document based on the values of fileName and options as follows:
To create a memory-based document, pass L_OcrCreateDocumentOptions_InMemory to options. fileName is not used and the engine will not use a disk file to store the document data.
To create a file-based document that will be not be re-used, pass NULL to fileName and L_OcrCreateDocumentOptions_AutoDeleteFile to options. In this case, the engine will create a temporary file on disk to use as the store for the document file. The file is deleted when L_OcrDocument is destroyed. Note that you use your own file name in fileName along with L_OcrCreateDocumentOptions_AutoDeleteFile, the engine will overwrite this file if it exists and automatically deletes it when destroyed.
To create a file-based document that will be re-used, pass a file name to fileName and L_OcrCreateDocumentOptions_None to options. In this case, the engine will overwrite this file if it exists but will not delete it when L_OcrDocument is destroyed.
To re-load a document that was created with the previous option, pass the same file name to fileName and L_OcrCreateDocumentOptions_LoadExisting to options. In this case, the engine will re-generate the document from data found in the file.
Use L_OcrDocument_IsInMemory to test whether a document is memory or file-based and L_OcrDocument_GetFileName to get the name of the disk-file used by a file-based document. This will be set to the same value passed to fileName or the name of the temp file created.
For more information on memory and file-based documents, refer to Programming with LEADTOOLS OCR Advantage.
Typical OCR operation using the OCR engine involves starting up and then creating an OCR document using the L_OcrDocumentManager_CreateDocument method then adding the pages into it and perform either automatic or manual zoning. Once this is done, L_OcrPage_Recognize is called on each page to collect the recognition data and have it stored internally in the page. After the recognition data is collected, you use the various L_OcrDocument_Save or L_OcrDocument_SaveXml methods to save the document to its final format.
When you are done using the L_OcrDocument handle created by this method, you should destroy it as soon as possible to free its resources. Destroying an L_OcrDocument handle will free all the pages stored inside its internal pages list.
Notes:
1. When you are done with the L_OcrDocument handle you must destroy it using L_OcrDocument_Destroy.
2. In memory-based documents destroying L_OcrDocument handle will free the document pages so you shouldn't call L_OcrPage_Destroy on the added pages since the OCR document owns them.
Required DLLs and Libraries
LTOCR For a listing of the exact DLLs and Libraries needed, based on the toolkit version, refer to Files To Be Included With Your Application. |
#define MAKE_IMAGE_PATH(pFileName) TEXT("C:\\Users\\Public\\Documents\\LEADTOOLS Images\\")pFileName
#define OCR_ADVANTAGE_RUNTIME_DIR TEXT("C:\\LEADTOOLS 19\\Bin\\Common\\OcrAdvantageRuntime")
L_INT L_OcrDocumentManager_CreateDocumentExample(bool inMemoryMode)
{
BITMAPHANDLE bitmap = { 0 };
L_OcrEngine ocrEngine = NULL;
L_OcrPage ocrPage = NULL;
L_OcrDocumentManager ocrDocumentManager = NULL;
L_OcrDocument ocrDocument = NULL;
// Create an instance of the engine
L_INT retCode = L_OcrEngineManager_CreateEngine(L_OcrEngineType_Advantage, &ocrEngine);
if(retCode == SUCCESS)
{
// Start the engine using default parameters
retCode = L_OcrEngine_Startup(ocrEngine, NULL, OCR_ADVANTAGE_RUNTIME_DIR);
if(retCode != SUCCESS)
return retCode;
// Load a page to be recognized
retCode = L_LoadBitmap(MAKE_IMAGE_PATH(L_TEXT("Ocr1.tif")), &bitmap, sizeof(BITMAPHANDLE), 0, ORDER_RGB, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// Add an image to OCR page. Transfer ownership of the bitmap to the page
retCode = L_OcrPage_FromBitmap(ocrEngine, &ocrPage, &bitmap, L_OcrBitmapSharingMode_AutoFree, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// We have a valid page and bitmap ownership has transfered. So, we do not need to free the bitmap anymore.
// Bitmap will be freed when ocrPage is destroyed.
bitmap.Flags.Allocated = 0;
// Automatically find areas/zones on the page where text is located
retCode = L_OcrPage_AutoZone(ocrPage, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// Recognize the page
// Note: Recognize can be called without calling AutoZone or manually adding zones.
// The engine will check and automatically auto-zones the page.
retCode = L_OcrPage_Recognize(ocrPage, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
//Get the document manager
retCode = L_OcrEngine_GetDocumentManager(ocrEngine, &ocrDocumentManager);
if(retCode != SUCCESS)
goto CLEANUP;
// Create an OCR document
if(inMemoryMode)
retCode = L_OcrDocumentManager_CreateDocument(ocrDocumentManager, &ocrDocument, L_OcrCreateDocumentOptions_InMemory, NULL);
else
retCode = L_OcrDocumentManager_CreateDocument(ocrDocumentManager, &ocrDocument, L_OcrCreateDocumentOptions_AutoDeleteFile, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// Show created OCR document type
L_BOOL isInMemory = L_FALSE;
L_OcrDocument_IsInMemory(ocrDocument, &isInMemory);
if(isInMemory)
std::cout << "OCR document type: Memory-based";
else
{
L_TCHAR documentFileName[MAX_PATH] = {0};
L_OcrDocument_GetFileName(ocrDocument, documentFileName, _countof(documentFileName));
std::cout << "OCR document type: File-based, " << "Document file path: " << documentFileName;
}
// Add page to the document. Note: When using the OcrDocument in file mode,
// adding the page to the document must come after recognition. If the OcrPage,
// is added before it is recognized, no recognition data will be associated with
// the OcrPage in the OcrDocument.
retCode = L_OcrDocument_AddPage(ocrDocument, ocrPage);
if(retCode != SUCCESS)
goto CLEANUP;
// Adding the page to a file based document will take a snap shot of the recognition data and store it in the document. At this
// point, the page is no longer needed. So destroy it to free up memory not used anymore
L_OcrPage_Destroy(ocrPage);
// Set the handle to NULL so we do not free it in our clean-up code
ocrPage = NULL;
// Save the document we have as PDF
retCode = L_OcrDocument_Save(ocrDocument, MAKE_IMAGE_PATH(L_TEXT("Ocr1.pdf")), DOCUMENTFORMAT_PDF, NULL, NULL);
}
CLEANUP:
if(bitmap.Flags.Allocated)
L_FreeBitmap(&bitmap);
if(ocrPage != NULL)
L_OcrPage_Destroy(ocrPage);
if(ocrDocument != NULL)
L_OcrDocument_Destroy(ocrDocument);
if(ocrEngine != NULL)
L_OcrEngine_Destroy(ocrEngine);
return retCode;
}