forked from nvaccess/nvda
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support for the OCR engine included in Windows 10. (nvaccess#7361)
This accesses the UWP OCR API via code in the nvdaHelperLocalWin10 C++/CX dll. Users press NVDA+r to recognize the text of the current navigator object. Once recognition is complete, the result is presented in a document which can be read with the cursor keys, etc. Enter can also be pressed to click the text at the cursor. Much of the base content recognition functionality has been abstracted into the new contentRecog framework, allowing other recognizers to be easily implemented in future (in both NVDA core and add-ons).
- Loading branch information
Showing
22 changed files
with
1,070 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
Code for utilities for use in nvdaHelperLocalWin10 modules. | ||
This file is a part of the NVDA project. | ||
URL: http://www.nvaccess.org/ | ||
Copyright 2017 Tyler Spivey, NV Access Limited. | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License version 2.0, as published by | ||
the Free Software Foundation. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
This license can be found at: | ||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | ||
*/ | ||
|
||
#include <collection.h> | ||
#include <wrl.h> | ||
#include <robuffer.h> | ||
#include <common/log.h> | ||
|
||
using namespace Windows::Storage::Streams; | ||
using namespace Microsoft::WRL; | ||
|
||
byte* getBytes(IBuffer^ buffer) { | ||
// We want direct access to the buffer rather than copying it. | ||
// To do this, we need to get to the IBufferByteAccess interface. | ||
// See http://cm-bloggers.blogspot.com/2012/09/accessing-image-pixel-data-in-ccx.html | ||
ComPtr<IInspectable> insp = reinterpret_cast<IInspectable*>(buffer); | ||
ComPtr<IBufferByteAccess> bufferByteAccess; | ||
if (FAILED(insp.As(&bufferByteAccess))) { | ||
LOG_ERROR(L"Couldn't get IBufferByteAccess from IBuffer"); | ||
return nullptr; | ||
} | ||
byte* bytes = nullptr; | ||
bufferByteAccess->Buffer(&bytes); | ||
return bytes; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/* | ||
Header for utilities for use in nvdaHelperLocalWin10 modules. | ||
This file is a part of the NVDA project. | ||
URL: http://www.nvaccess.org/ | ||
Copyright 2017 Tyler Spivey, NV Access Limited. | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License version 2.0, as published by | ||
the Free Software Foundation. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
This license can be found at: | ||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | ||
*/ | ||
|
||
#include <robuffer.h> | ||
|
||
/** Get access to the raw byte array backing an IBuffer object. | ||
* This is necessary when interoperating with non-WinRT components; | ||
* e.g. returning bytes from an IBuffer to a C caller. | ||
* This byte array is mutable; it is *not* a copy. | ||
*/ | ||
byte* getBytes(Windows::Storage::Streams::IBuffer^ buffer); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/* | ||
Code for C dll bridge to UWP OCR. | ||
This file is a part of the NVDA project. | ||
URL: http://www.nvaccess.org/ | ||
Copyright 2017 NV Access Limited. | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License version 2.0, as published by | ||
the Free Software Foundation. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
This license can be found at: | ||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | ||
*/ | ||
|
||
#include <collection.h> | ||
#include <ppltasks.h> | ||
#include <wrl.h> | ||
#include <robuffer.h> | ||
#include <windows.h> | ||
#include <cstring> | ||
#include <common/log.h> | ||
#include "utils.h" | ||
#include "uwpOcr.h" | ||
|
||
using namespace std; | ||
using namespace Platform; | ||
using namespace concurrency; | ||
using namespace Windows::Storage::Streams; | ||
using namespace Microsoft::WRL; | ||
using namespace Windows::Media::Ocr; | ||
using namespace Windows::Foundation::Collections; | ||
using namespace Windows::Globalization; | ||
using namespace Windows::Graphics::Imaging; | ||
using namespace Windows::Data::Json; | ||
|
||
UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback) { | ||
auto engine = OcrEngine::TryCreateFromLanguage(ref new Language(ref new String(language))); | ||
if (!engine) | ||
return nullptr; | ||
auto instance = new UwpOcr; | ||
instance->engine = engine; | ||
instance->callback = callback; | ||
return instance; | ||
} | ||
|
||
void __stdcall uwpOcr_terminate(UwpOcr* instance) { | ||
delete instance; | ||
} | ||
|
||
void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height) { | ||
unsigned int numBytes = sizeof(RGBQUAD) * width * height; | ||
auto buf = ref new Buffer(numBytes); | ||
buf->Length = numBytes; | ||
BYTE* bytes = getBytes(buf); | ||
memcpy(bytes, image, numBytes); | ||
auto sbmp = SoftwareBitmap::CreateCopyFromBuffer(buf, BitmapPixelFormat::Bgra8, width, height, BitmapAlphaMode::Ignore); | ||
task<OcrResult^> ocrTask = create_task(instance->engine->RecognizeAsync(sbmp)); | ||
ocrTask.then([instance, sbmp] (OcrResult^ result) { | ||
auto lines = result->Lines; | ||
auto jLines = ref new JsonArray(); | ||
for (unsigned short l = 0; l < lines->Size; ++l) { | ||
auto words = lines->GetAt(l)->Words; | ||
auto jWords = ref new JsonArray(); | ||
for (unsigned short w = 0; w < words->Size; ++w) { | ||
auto word = words->GetAt(w); | ||
auto jWord = ref new JsonObject(); | ||
auto rect = word->BoundingRect; | ||
jWord->Insert("x", JsonValue::CreateNumberValue(rect.X)); | ||
jWord->Insert("y", JsonValue::CreateNumberValue(rect.Y)); | ||
jWord->Insert("width", JsonValue::CreateNumberValue(rect.Width)); | ||
jWord->Insert("height", JsonValue::CreateNumberValue(rect.Height)); | ||
jWord->Insert("text", JsonValue::CreateStringValue(word->Text)); | ||
jWords->Append(jWord); | ||
} | ||
jLines->Append(jWords); | ||
} | ||
instance->callback(jLines->Stringify()->Data()); | ||
}).then([instance] (task<void> previous) { | ||
// Catch any unhandled exceptions that occurred during these tasks. | ||
try { | ||
previous.get(); | ||
} catch (Platform::Exception^ e) { | ||
LOG_ERROR(L"Error " << e->HResult << L": " << e->Message->Data()); | ||
instance->callback(NULL); | ||
} | ||
}); | ||
} | ||
|
||
// We use BSTR because we need the string to stay around until the caller is done with it | ||
// but the caller then needs to free it. | ||
// We can't just use malloc because the caller might be using a different CRT | ||
// and calling malloc and free from different CRTs isn't safe. | ||
BSTR __stdcall uwpOcr_getLanguages() { | ||
wstring langsStr; | ||
auto langs = OcrEngine::AvailableRecognizerLanguages ; | ||
for (unsigned int i = 0; i < langs->Size; ++i) { | ||
langsStr += langs->GetAt(i)->LanguageTag->Data(); | ||
langsStr += L";"; | ||
} | ||
return SysAllocString(langsStr.c_str()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
Header for C dll bridge to UWP OCR. | ||
This file is a part of the NVDA project. | ||
URL: http://www.nvaccess.org/ | ||
Copyright 2017 NV Access Limited. | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License version 2.0, as published by | ||
the Free Software Foundation. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
This license can be found at: | ||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | ||
*/ | ||
|
||
#pragma once | ||
#define export __declspec(dllexport) | ||
|
||
typedef void (*uwpOcr_Callback)(const char16* result); | ||
typedef struct { | ||
Windows::Media::Ocr::OcrEngine^ engine; | ||
uwpOcr_Callback callback; | ||
} UwpOcr; | ||
|
||
extern "C" { | ||
export UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback); | ||
export void __stdcall uwpOcr_terminate(UwpOcr* instance); | ||
export void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height); | ||
// Returns a BSTR of language codes terminated by semi-colons; | ||
// e.g. "de-de;en-us;". | ||
export BSTR __stdcall uwpOcr_getLanguages(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.