image2data

Der Tech-Blog

...weitermachen, wo OCR aufhört

Positionsdatenerkennung (sample_6.i2dspt)

Dez 012014
// *************************************************************************
// * sample_6.i2dspt                                                       *
// *                                                                       *
// * - Retrieving the position data from a single- or a multi page invoice *
// *   using OCR                                                           *
// * - Showing the result in the i2dTextViewer                             *
// *                                                                       *
// * Press F9 to execute the code or F7/F8 to debug it                     *
// *                                                                       *
// * Be sure that the sample data was installed!                           *
// *                                                                       *
// * Contact www.norpa.eu for more information                             *
// *************************************************************************
 
function TextPosToLine(Position: Integer; Data: TStringList): Integer;
var
  i: Integer;
 
begin
  Result := 0;
 
  for i := 1 to Position do
    if Data.Text[i] = #10 then
  inc(Result);
end;
 
function IsNewPosition(Line: String): Boolean;
begin
  Result := (Trim(Line) > '') and (Copy(Line, 1, 20) <> StringOfChar(#32, 20));
end;
 
var
  // Declare some variables
  i: Integer;
  s, t, u: String;
 
  iPageCount: Integer;
  iPageNo: Integer;
  iLineNo: Integer;
  iStartLine: Integer;
  iEndLine: Integer;
 
  sFileName: String;
  sPosText: String;
 
  oSL: TStringList;
  oText: TStringList;
  oCSVData: TStringList;
 
  oPage: TBitmap;
 
  rProcessSettings: Ti2dProcessSettings;
 
begin
  // Get the configured process settings. We must know the configured "in"-folder because that's
  // where the sample docs are located in!
  i2dGetProcessSettings(rProcessSettings);
 
  // Create some objects
  oSL := TStringList.Create;
  oText := TStringList.Create;
  oCSVData := TStringList.Create;
 
  oPage := TBitmap.Create;
 
  try
    // Choose the desired document
    i := i2dMessageBox('Extract the position data from the multi page invoice (select "No" for the single page invoice or "Cancel" to exit)?', mtConfirmation, [mbYes, mbNo, mbCancel]);
    if i = mrCancel then
      exit
    else if i = mrNo then
      sFileName := rProcessSettings.InPath + 'sample_multipos_singlepage_invoice.tif'
    else
      sFileName := rProcessSettings.InPath + 'sample_multipos_multipage_invoice.tif';
 
    // Get the number of pages from the image file
    iPageCount := i2dGetImagePageCount(sFileName);
 
    // Iterate over all pages
    for iPageNo := 1 to iPageCount do begin
    // Load page number i of the image into the bitmap object
    i2dLoadBitmap(sFileName, iPageNo, oPage);
 
    // Perform a character recognition (ocr) with format preservation on the bitmap object
    oText.Text := i2dOCRBitmap(oPage, 0, True);
 
    // Find the line before the first position
    iStartLine := TextPosToLine(Pos('Gesamtpreis', oText.Text), oText) + 1;
 
    // Find the line behind the last position
    if iPageNo < iPageCount then
      iEndLine := TextPosToLine(Pos('Übertrag', oText.Text), oText) - 1
    else
      iEndLine := TextPosToLine(Pos('Zahlungskond', oText.Text), oText) - 1;
 
    // Find the first position line
    while Trim(oText[iStartLine]) = '' do
      inc(iStartLine);
 
    iLineNo := iStartLine;
    sPosText := '';
 
    // Iterate over the lines
    repeat
      // Store the old position, if a new position is found
      if IsNewPosition(oText[iLineNo]) then begin
        if sPosText > '' then begin
          oCSVData.Add(sPosText);
          sPosText := '';
        end;
 
        // Find max. three more position data lines
        if (Trim(oText[iLineNo + 1]) > '') and (not IsNewPosition(oText[iLineNo + 1])) then begin
          u := #32 + Trim(oText[iLineNo + 1]);
          if (Trim(oText[iLineNo + 2]) > '') and (not IsNewPosition(oText[iLineNo + 2])) then begin
            u := u + #32 + Trim(oText[iLineNo + 2]);
            if (Trim(oText[iLineNo + 3]) > '') and (not IsNewPosition(oText[iLineNo + 3])) then
              u := u + #32 + Trim(oText[iLineNo + 3]);
          end;
        end;
 
        // More than one blank represents a new data field
        s := i2dReplaceString(Trim(oText[iLineNo]), #32#32, ';', [rfReplaceAll]);
 
        // Clean up more than one ;
        repeat
          t := s;
          s := i2dReplaceString(s, ';;', ';', [rfReplaceAll]);
        until s = t;
 
        i2dSplitString(s, ';', oSL);
 
        // Build result line
        for i := 0 to oSL.Count - 1 do
          if i <> 2 then
            sPosText := sPosText + Trim(oSL[i]) + ';'
          else
            sPosText := sPosText + Trim(oSL[i]) + u + ';'
        end;
 
        inc(iLineNo);
      until iLineNo >= iEndLine;
 
      if sPosText > '' then
        oCSVData.Add(sPosText);
    end;
 
    i2dShowInTextViewer(oCSVData.Text);
  finally
    // Destroy the objects and release memory
    oText.Free;
    oCSVData.Free;
    oSL.Free;
 
    oPage.Free;
  end;
end.

Atom

powered by Nibbleblog