Welcome Guest! To enable all features, please Login or Register.

Notification

Icon
Error

Options
View
Last Go to last post Unread Go to first unread post
#1 Posted : Friday, October 25, 2019 1:12:32 PM(UTC)
Pawel L.

Groups: Registered, Manager, Tech Support, Administrators
Posts: 14


OCR to Speech


By using Leadtools OCR with System.Speech.Synthesis, we are able to OCR an image, and get the audio output read back to us.

Code:

using System;
using System.Collections.Generic;
using System.Windows.Forms;
using System.Speech.Synthesis;
using System.IO;
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Controls;
using Leadtools.Annotations.Engine;
using Leadtools.Annotations.Automation;
using Leadtools.Annotations.WinForms;
using Leadtools.Ocr;



namespace ImageToSpeechDemo
{
   public partial class MainForm : Form
   {
      #region variables
      private ImageViewer viewer;
      private AutomationInteractiveMode annInteractiveMode;
      private IAnnAutomationControl automationControl;
      private AnnAutomationManager annManager;
      private AnnAutomation annAutomation;
      private IOcrPageCharacters _ocrPageCharacters;
      private List<OcrWord> ocrZoneWords;
      private IOcrEngine ocrEngine;
      private IOcrPage ocrPage;
      private SpeechSynthesizer synth;
      private LeadRectD wordBounds = LeadRectD.Empty;
      #endregion variables
      public MainForm()
      {
         InitializeComponent();
         InitControls();
         InitAutomation();
      }
      private void InitControls()
      {
         //set the license and key to a path and a string
         string licenseFile = @"C:\LEADTOOLS 20\Support\Common\License\eval-license-files.lic";
         string licenseKey = File.ReadAllText(@"C:\LEADTOOLS 20\Support\Common\License\eval-license-files.lic.key");
         //pass in the license path and key string
         RasterSupport.SetLicense(licenseFile, licenseKey);


         //Create the Image Viewer
         viewer = new ImageViewer();
         viewer.Dock = DockStyle.Fill;
         panel1.Controls.Add(viewer);

         //Create a Speech Synthesizer
         synth = new SpeechSynthesizer();
         synth.SetOutputToDefaultAudioDevice();
         synth.SpeakStarted += (s, e) =>
         {
            this.Cursor = Cursors.WaitCursor;
         };

         synth.SpeakCompleted += (s, e) =>
         {
            this.Cursor = Cursors.Default;
         };

         synth.SpeakProgress += Synth_SpeakProgress;
      }
      //Create the AutomationManager 
      void InitAutomation()
      {
         annManager = new AnnAutomationManager();
         annManager.UserMode = AnnUserMode.Run;
         AutomationManagerHelper annHelper = new AutomationManagerHelper(annManager);

         automationControl = new ImageViewerAutomationControl();
         ((ImageViewerAutomationControl)automationControl).ImageViewer = viewer;

         annInteractiveMode = new AutomationInteractiveMode();
         annInteractiveMode.AutomationControl = automationControl;

         viewer.InteractiveModes.BeginUpdate();
         viewer.InteractiveModes.Add(annInteractiveMode);
         viewer.InteractiveModes.EndUpdate();

         annAutomation = new AnnAutomation(annManager, automationControl);
         annAutomation.Active = true;

         viewer.MouseDown += Viewer_MouseDown;

         viewer.Zoom(ControlSizeMode.FitWidth, 1, viewer.DefaultZoomOrigin);
      }

      private void Viewer_MouseDown(object sender, MouseEventArgs e)
      {
         if (e.Button == MouseButtons.Left && ocrPage != null && synth.State != SynthesizerState.Speaking)
         {
            synth.SpeakAsyncCancelAll();
            var imgPoint = viewer.ConvertPoint(null, ImageViewerCoordinateType.Control, ImageViewerCoordinateType.Image, LeadPointD.Create(e.Location.X, e.Location.Y));

            var word = ocrZoneWords.Find(s => s.Bounds.Contains(imgPoint.ToLeadPoint()));

            wordBounds = LeadRectD.Create(word.Bounds.X, word.Bounds.Y, word.Bounds.Width, word.Bounds.Height);

            if (!string.IsNullOrEmpty(word.Value))
               synth.SpeakAsync(word.Value);
         }
      }
      private void loadImageToolStripMenuItem_Click(object sender, EventArgs e)
      {
         synth.SpeakAsyncCancelAll();
         if (annAutomation.Container.Children.Count != 0)
            annAutomation.Container.Children.Clear();

         OpenFileDialog dlg = new OpenFileDialog();
         if (dlg.ShowDialog() == DialogResult.OK)
            using (RasterCodecs codecs = new RasterCodecs())
            {
               viewer.Image = codecs.Load(dlg.FileName);
               annAutomation.Container.Size = annAutomation.Container.Mapper.SizeToContainerCoordinates(viewer.ImageSize.ToLeadSizeD());
               OCRImage();
            }
      }
      private void readTextToolStripMenuItem_Click(object sender, EventArgs e)
      {
         if (synth.State != SynthesizerState.Speaking)
         {
            if (annAutomation.Container.Children.Count != 0)
               annAutomation.Container.Children.Clear();

            string text = ocrPage.GetText(-1);

            synth.SpeakAsync(text);
         }
      }
      private void Synth_SpeakProgress(object sender, SpeakProgressEventArgs e)
      {
         this.Cursor = Cursors.WaitCursor;

         string spokenWord = e.Text;

         LeadMatrix matrix = annAutomation.Container.Mapper.Transform;
         annAutomation.Container.Mapper.UpdateTransform(LeadMatrix.Identity);

         if (wordBounds == LeadRectD.Empty)
         {
            var words = ocrZoneWords.FindAll(s => s.Value.Contains(spokenWord) && s.Bounds != null);
            foreach (var word in words)
            {
               if (word.Value != null)
               {
                  LeadRectD contCoords = annAutomation.Container.Mapper.RectToContainerCoordinates(word.Bounds.ToLeadRectD());
                  if (annAutomation.Container.HitTestRect(contCoords).Length == 0) // see if it's highlighted already
                  {
                     AnnRectangleObject hilite = new AnnRectangleObject();
                     hilite.Stroke = AnnStroke.Create(AnnSolidColorBrush.Create("Transparent"), LeadLengthD.Create(1));
                     hilite.Fill = AnnSolidColorBrush.Create("Yellow");
                     hilite.Opacity = .5;
                     hilite.Rect = contCoords;
                     annAutomation.Container.Children.Add(hilite);
                     annAutomation.Invalidate(LeadRectD.Empty);
                     break;
                  }
               }
            }
         }
         else  //we clicked on a single word
         {
            LeadRectD contCoords = annAutomation.Container.Mapper.RectToContainerCoordinates(wordBounds);
            if (annAutomation.Container.HitTestRect(contCoords).Length == 0) // see if it's highlighted already
            {
               AnnRectangleObject hilite = new AnnRectangleObject();
               hilite.Stroke = AnnStroke.Create(AnnSolidColorBrush.Create("Transparent"), LeadLengthD.Create(1));
               hilite.Fill = AnnSolidColorBrush.Create("Yellow");
               hilite.Opacity = .5;
               hilite.Rect = contCoords;
               annAutomation.Container.Children.Add(hilite);
               annAutomation.Invalidate(LeadRectD.Empty);
               wordBounds = LeadRectD.Empty;
            }
         }

         annAutomation.Container.Mapper.UpdateTransform(matrix);
         this.Cursor = Cursors.Default;
      }
      private void OCRImage()
      {
         ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD, false);
         ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS 20\Bin\Common\OcrLEADRuntime");

         if (ocrPage != null)
         {
            ocrPage.Dispose();
            ocrPage = null;
         }

         ocrPage = ocrEngine.CreatePage(viewer.Image, OcrImageSharingMode.None);
         ocrPage.Recognize(null);
         _ocrPageCharacters = ocrPage.GetRecognizedCharacters();
         ocrZoneWords = new List<OcrWord>();

         foreach (IOcrZoneCharacters zoneCharacters in _ocrPageCharacters)
            ocrZoneWords.AddRange(zoneCharacters.GetWords());
      }

      private void stopReadingToolStripMenuItem_Click(object sender, EventArgs e)
      {
         synth.SpeakAsyncCancelAll();
         if (annAutomation.Container.Children.Count != 0)
            annAutomation.Container.Children.Clear();
      }

      private void MainForm_FormClosing(object sender, FormClosingEventArgs e)
      {
         if (synth != null)
         {
            synth.SpeakAsyncCancelAll();
            synth.Dispose();
         }

         if (viewer.HasImage)
            viewer.Image.Dispose();
         if (viewer != null)
            viewer.Dispose();
         if (ocrPage != null)
            ocrPage.Dispose();
         if (ocrEngine != null)
         {
            ocrEngine.Shutdown();
            ocrEngine.Dispose();
         }
      }
   }
}


You can download the full V20 sample project below:
File Attachment(s):
imagetospeechdemov20.zip (298kb) downloaded 54 time(s).

Troubleshooting:
To get this example working, you will need a Leadtools License and Key file. These should be located under: "C:\Leadtool 20\Support\Common\License" when you install the evaluation of the main SDK.
ltLic.png
After setting the license, the next important path to check for accuracy, would be the path to the OCR Engine Runtimes, this is passed into the fourth parameter of the startup method:
ltocrrun.png

Important:
If an image isn't working with the project, you will want to make sure that you have the required dll for that file format included in your references. This will be one of the many Leadtools.Codecs dll's which
you can find in the following path after having downloaded our SDK: C:\LEADTOOLS 20\Bin\Dotnet4\Win32
Pawel Lyko
Developer Support Engineer
LEAD Technologies, Inc.

 

Try the latest version of LEADTOOLS for free for 60 days by downloading the evaluation: https://www.leadtools.com/downloads

Wanna join the discussion? Login to your LEADTOOLS Support accountor Register a new forum account.

You cannot post new topics in this forum.
You cannot reply to topics in this forum.
You cannot delete your posts in this forum.
You cannot edit your posts in this forum.
You cannot create polls in this forum.
You cannot vote in polls in this forum.

Powered by YAF.NET | YAF.NET © 2003-2025, Yet Another Forum.NET
This page was generated in 0.060 seconds.