Creates a new OCR file or memory-based document handle.
#include "ltocr.h"
L_LTOCR_API L_INT EXT_FUNCTION L_OcrDocumentManager_CreateDocument(documentManager, document, options, fileName)
Handle to the OCR engine document manager.
Address to L_OcrDocument variable to be updated with the created document handle that can participate in recognition and saving operations.
Options to control how the document is created or loaded.
The document file name. This value can be NULL.
Value | Meaning |
---|---|
SUCCESS | The function was successful. |
< 1 | An error occurred. Refer to Return Codes. |
This method can either create a file or memory-based OCR document, or load a previously created file-based document based on the values of fileName and options as follows:
To create a memory-based document, pass L_OcrCreateDocumentOptions_InMemory to options. fileName is not used and the engine will not use a disk file to store the document data.
To create a file-based document that will be not be re-used, pass NULL to fileName and L_OcrCreateDocumentOptions_AutoDeleteFile to options. In this case, the engine will create a temporary file on disk to use as the store for the document file. The file is deleted when L_OcrDocument is destroyed. Note that you use your own file name in fileName along with L_OcrCreateDocumentOptions_AutoDeleteFile, the engine will overwrite this file if it exists and automatically deletes it when destroyed.
To create a file-based document that will be re-used, pass a file name to fileName and L_OcrCreateDocumentOptions_None to options. In this case, the engine will overwrite this file if it exists but will not delete it when L_OcrDocument is destroyed.
To re-load a document that was created with the previous option, pass the same file name to fileName and L_OcrCreateDocumentOptions_LoadExisting to options. In this case, the engine will re-generate the document from data found in the file.
Use L_OcrDocument_IsInMemory to test whether a document is memory or file-based and L_OcrDocument_GetFileName to get the name of the disk-file used by a file-based document. This will be set to the same value passed to fileName or the name of the temp file created.
For more information on memory and file-based documents, refer to Programming with LEADTOOLS OCR Module - LEAD Engine.
Typical OCR operation using the OCR engine involves starting up and then creating an OCR document using the L_OcrDocumentManager_CreateDocument method then adding the pages into it and perform either automatic or manual zoning. Once this is done, L_OcrPage_Recognize is called on each page to collect the recognition data and have it stored internally in the page. After the recognition data is collected, you use the various L_OcrDocument_Save or L_OcrDocument_SaveXml methods to save the document to its final format.
When you are done using the L_OcrDocument handle created by this method, you should destroy it as soon as possible to free its resources. Destroying an L_OcrDocument handle will free all the pages stored inside its internal pages list.
Notes:
L_INT L_OcrDocumentManager_CreateDocumentExample(bool inMemoryMode)
{
BITMAPHANDLE bitmap = { 0 };
L_OcrEngine ocrEngine = NULL;
L_OcrPage ocrPage = NULL;
L_OcrDocumentManager ocrDocumentManager = NULL;
L_OcrDocument ocrDocument = NULL;
// Create an instance of the engine
L_INT retCode = L_OcrEngineManager_CreateEngine(L_OcrEngineType_LEAD, &ocrEngine);
if(retCode == SUCCESS)
{
// Start the engine using default parameters
retCode = L_OcrEngine_Startup(ocrEngine, NULL, OCR_LEAD_RUNTIME_DIR);
if(retCode != SUCCESS)
return retCode;
// Load a page to be recognized
retCode = L_LoadBitmap(MAKE_IMAGE_PATH(L_TEXT("Ocr1.tif")), &bitmap, sizeof(BITMAPHANDLE), 0, ORDER_RGB, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// Add an image to OCR page. Transfer ownership of the bitmap to the page
retCode = L_OcrPage_FromBitmap(ocrEngine, &ocrPage, &bitmap, L_OcrBitmapSharingMode_AutoFree, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// We have a valid page and bitmap ownership has transfered. So, we do not need to free the bitmap anymore.
// Bitmap will be freed when ocrPage is destroyed.
bitmap.Flags.Allocated = 0;
// Automatically find areas/zones on the page where text is located
retCode = L_OcrPage_AutoZone(ocrPage, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// Recognize the page
// Note: Recognize can be called without calling AutoZone or manually adding zones.
// The engine will check and automatically auto-zones the page.
retCode = L_OcrPage_Recognize(ocrPage, NULL, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
//Get the document manager
retCode = L_OcrEngine_GetDocumentManager(ocrEngine, &ocrDocumentManager);
if(retCode != SUCCESS)
goto CLEANUP;
// Create an OCR document
if(inMemoryMode)
retCode = L_OcrDocumentManager_CreateDocument(ocrDocumentManager, &ocrDocument, L_OcrCreateDocumentOptions_InMemory, NULL);
else
retCode = L_OcrDocumentManager_CreateDocument(ocrDocumentManager, &ocrDocument, L_OcrCreateDocumentOptions_AutoDeleteFile, NULL);
if(retCode != SUCCESS)
goto CLEANUP;
// Show created OCR document type
L_BOOL isInMemory = L_FALSE;
L_OcrDocument_IsInMemory(ocrDocument, &isInMemory);
if(isInMemory)
std::cout << "OCR document type: Memory-based";
else
{
L_TCHAR documentFileName[MAX_PATH] = {0};
L_OcrDocument_GetFileName(ocrDocument, documentFileName, _countof(documentFileName));
std::cout << "OCR document type: File-based, " << "Document file path: " << documentFileName;
}
// Add page to the document. Note: When using the OcrDocument in file mode,
// adding the page to the document must come after recognition. If the OcrPage,
// is added before it is recognized, no recognition data will be associated with
// the OcrPage in the OcrDocument.
retCode = L_OcrDocument_AddPage(ocrDocument, ocrPage);
if(retCode != SUCCESS)
goto CLEANUP;
// Adding the page to a file based document will take a snap shot of the recognition data and store it in the document. At this
// point, the page is no longer needed. So destroy it to free up memory not used anymore
L_OcrPage_Destroy(ocrPage);
// Set the handle to NULL so we do not free it in our clean-up code
ocrPage = NULL;
// Save the document we have as PDF
retCode = L_OcrDocument_Save(ocrDocument, MAKE_IMAGE_PATH(L_TEXT("Ocr1.pdf")), DOCUMENTFORMAT_PDF, NULL, NULL);
}
CLEANUP:
if(bitmap.Flags.Allocated)
L_FreeBitmap(&bitmap);
if(ocrPage != NULL)
L_OcrPage_Destroy(ocrPage);
if(ocrDocument != NULL)
L_OcrDocument_Destroy(ocrDocument);
if(ocrEngine != NULL)
L_OcrEngine_Destroy(ocrEngine);
return retCode;
}