Skip to content

Commit

Permalink
Support for the OCR engine included in Windows 10. (nvaccess#7361)
Browse files Browse the repository at this point in the history
This accesses the UWP OCR API via code in the nvdaHelperLocalWin10 C++/CX dll.
Users press NVDA+r to recognize the text of the current navigator object. Once recognition is complete, the result is presented in a document which can be read with the cursor keys, etc. Enter can also be pressed to click the text at the cursor.
Much of the base content recognition functionality has been abstracted into the new contentRecog framework, allowing other recognizers to be easily implemented in future (in both NVDA core and add-ons).
  • Loading branch information
jcsteh authored Aug 1, 2017
1 parent cc0aa5d commit 5527f27
Show file tree
Hide file tree
Showing 22 changed files with 1,070 additions and 34 deletions.
16 changes: 1 addition & 15 deletions nvdaHelper/localWin10/oneCoreSpeech.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This license can be found at:
#include <wrl.h>
#include <robuffer.h>
#include <common/log.h>
#include "utils.h"
#include "oneCoreSpeech.h"

using namespace std;
Expand All @@ -30,21 +31,6 @@ using namespace Microsoft::WRL;
using namespace Windows::Media;
using namespace Windows::Foundation::Collections;

byte* getBytes(IBuffer^ buffer) {
// We want direct access to the buffer rather than copying it.
// To do this, we need to get to the IBufferByteAccess interface.
// See http://cm-bloggers.blogspot.com/2012/09/accessing-image-pixel-data-in-ccx.html
ComPtr<IInspectable> insp = reinterpret_cast<IInspectable*>(buffer);
ComPtr<IBufferByteAccess> bufferByteAccess;
if (FAILED(insp.As(&bufferByteAccess))) {
LOG_ERROR(L"Couldn't get IBufferByteAccess from IBuffer");
return nullptr;
}
byte* bytes = nullptr;
bufferByteAccess->Buffer(&bytes);
return bytes;
}

OcSpeech* __stdcall ocSpeech_initialize() {
auto instance = new OcSpeech;
instance->synth = ref new SpeechSynthesizer();
Expand Down
2 changes: 2 additions & 0 deletions nvdaHelper/localWin10/sconscript
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ localWin10Lib = env.SharedLibrary(
target="nvdaHelperLocalWin10",
source=[
env['projectResFile'],
'utils.cpp',
'oneCoreSpeech.cpp',
'uwpOcr.cpp',
],
LIBS=["oleaut32", localLib[2]],
)
Expand Down
37 changes: 37 additions & 0 deletions nvdaHelper/localWin10/utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
Code for utilities for use in nvdaHelperLocalWin10 modules.
This file is a part of the NVDA project.
URL: http://www.nvaccess.org/
Copyright 2017 Tyler Spivey, NV Access Limited.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2.0, as published by
the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
This license can be found at:
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*/

#include <collection.h>
#include <wrl.h>
#include <robuffer.h>
#include <common/log.h>

using namespace Windows::Storage::Streams;
using namespace Microsoft::WRL;

byte* getBytes(IBuffer^ buffer) {
// We want direct access to the buffer rather than copying it.
// To do this, we need to get to the IBufferByteAccess interface.
// See http://cm-bloggers.blogspot.com/2012/09/accessing-image-pixel-data-in-ccx.html
ComPtr<IInspectable> insp = reinterpret_cast<IInspectable*>(buffer);
ComPtr<IBufferByteAccess> bufferByteAccess;
if (FAILED(insp.As(&bufferByteAccess))) {
LOG_ERROR(L"Couldn't get IBufferByteAccess from IBuffer");
return nullptr;
}
byte* bytes = nullptr;
bufferByteAccess->Buffer(&bytes);
return bytes;
}
23 changes: 23 additions & 0 deletions nvdaHelper/localWin10/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
Header for utilities for use in nvdaHelperLocalWin10 modules.
This file is a part of the NVDA project.
URL: http://www.nvaccess.org/
Copyright 2017 Tyler Spivey, NV Access Limited.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2.0, as published by
the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
This license can be found at:
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*/

#include <robuffer.h>

/** Get access to the raw byte array backing an IBuffer object.
* This is necessary when interoperating with non-WinRT components;
* e.g. returning bytes from an IBuffer to a C caller.
* This byte array is mutable; it is *not* a copy.
*/
byte* getBytes(Windows::Storage::Streams::IBuffer^ buffer);
102 changes: 102 additions & 0 deletions nvdaHelper/localWin10/uwpOcr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
Code for C dll bridge to UWP OCR.
This file is a part of the NVDA project.
URL: http://www.nvaccess.org/
Copyright 2017 NV Access Limited.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2.0, as published by
the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
This license can be found at:
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*/

#include <collection.h>
#include <ppltasks.h>
#include <wrl.h>
#include <robuffer.h>
#include <windows.h>
#include <cstring>
#include <common/log.h>
#include "utils.h"
#include "uwpOcr.h"

using namespace std;
using namespace Platform;
using namespace concurrency;
using namespace Windows::Storage::Streams;
using namespace Microsoft::WRL;
using namespace Windows::Media::Ocr;
using namespace Windows::Foundation::Collections;
using namespace Windows::Globalization;
using namespace Windows::Graphics::Imaging;
using namespace Windows::Data::Json;

UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback) {
auto engine = OcrEngine::TryCreateFromLanguage(ref new Language(ref new String(language)));
if (!engine)
return nullptr;
auto instance = new UwpOcr;
instance->engine = engine;
instance->callback = callback;
return instance;
}

void __stdcall uwpOcr_terminate(UwpOcr* instance) {
delete instance;
}

void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height) {
unsigned int numBytes = sizeof(RGBQUAD) * width * height;
auto buf = ref new Buffer(numBytes);
buf->Length = numBytes;
BYTE* bytes = getBytes(buf);
memcpy(bytes, image, numBytes);
auto sbmp = SoftwareBitmap::CreateCopyFromBuffer(buf, BitmapPixelFormat::Bgra8, width, height, BitmapAlphaMode::Ignore);
task<OcrResult^> ocrTask = create_task(instance->engine->RecognizeAsync(sbmp));
ocrTask.then([instance, sbmp] (OcrResult^ result) {
auto lines = result->Lines;
auto jLines = ref new JsonArray();
for (unsigned short l = 0; l < lines->Size; ++l) {
auto words = lines->GetAt(l)->Words;
auto jWords = ref new JsonArray();
for (unsigned short w = 0; w < words->Size; ++w) {
auto word = words->GetAt(w);
auto jWord = ref new JsonObject();
auto rect = word->BoundingRect;
jWord->Insert("x", JsonValue::CreateNumberValue(rect.X));
jWord->Insert("y", JsonValue::CreateNumberValue(rect.Y));
jWord->Insert("width", JsonValue::CreateNumberValue(rect.Width));
jWord->Insert("height", JsonValue::CreateNumberValue(rect.Height));
jWord->Insert("text", JsonValue::CreateStringValue(word->Text));
jWords->Append(jWord);
}
jLines->Append(jWords);
}
instance->callback(jLines->Stringify()->Data());
}).then([instance] (task<void> previous) {
// Catch any unhandled exceptions that occurred during these tasks.
try {
previous.get();
} catch (Platform::Exception^ e) {
LOG_ERROR(L"Error " << e->HResult << L": " << e->Message->Data());
instance->callback(NULL);
}
});
}

// We use BSTR because we need the string to stay around until the caller is done with it
// but the caller then needs to free it.
// We can't just use malloc because the caller might be using a different CRT
// and calling malloc and free from different CRTs isn't safe.
BSTR __stdcall uwpOcr_getLanguages() {
wstring langsStr;
auto langs = OcrEngine::AvailableRecognizerLanguages ;
for (unsigned int i = 0; i < langs->Size; ++i) {
langsStr += langs->GetAt(i)->LanguageTag->Data();
langsStr += L";";
}
return SysAllocString(langsStr.c_str());
}
32 changes: 32 additions & 0 deletions nvdaHelper/localWin10/uwpOcr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
Header for C dll bridge to UWP OCR.
This file is a part of the NVDA project.
URL: http://www.nvaccess.org/
Copyright 2017 NV Access Limited.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2.0, as published by
the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
This license can be found at:
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*/

#pragma once
#define export __declspec(dllexport)

typedef void (*uwpOcr_Callback)(const char16* result);
typedef struct {
Windows::Media::Ocr::OcrEngine^ engine;
uwpOcr_Callback callback;
} UwpOcr;

extern "C" {
export UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback);
export void __stdcall uwpOcr_terminate(UwpOcr* instance);
export void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height);
// Returns a BSTR of language codes terminated by semi-colons;
// e.g. "de-de;en-us;".
export BSTR __stdcall uwpOcr_getLanguages();
}
20 changes: 20 additions & 0 deletions source/NVDAHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,3 +477,23 @@ def terminate():
VBuf_getTextInRange=None
localLib.nvdaHelperLocal_terminate()
localLib=None

LOCAL_WIN10_DLL_PATH = ur"lib\nvdaHelperLocalWin10.dll"
def getHelperLocalWin10Dll():
"""Get a ctypes WinDLL instance for the nvdaHelperLocalWin10 dll.
This is a C++/CX dll used to provide access to certain UWP functionality.
"""
return windll[LOCAL_WIN10_DLL_PATH]

def bstrReturn(address):
"""Handle a BSTR returned from a ctypes function call.
This includes freeing the memory.
This is needed for nvdaHelperLocalWin10 functions which return a BSTR.
"""
# comtypes.BSTR.from_address seems to cause a crash for some reason. Not sure why.
# Just access the string ourselves.
# This will terminate at a null character, even though BSTR allows nulls.
# We're only using this for normal, null-terminated strings anyway.
val = wstring_at(address)
windll.oleaut32.SysFreeString(address)
return val
3 changes: 2 additions & 1 deletion source/NVDAObjects/IAccessible/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,8 @@ def _get_IAccessibleRole(self):
if role==0:
try:
role=self.IAccessibleObject.accRole(self.IAccessibleChildID)
except COMError:
except COMError as e:
log.debugWarning("accRole failed: %s" % e)
role=0
return role

Expand Down
3 changes: 3 additions & 0 deletions source/config/configSpec.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@
hwIo = boolean(default=false)
audioDucking = boolean(default=false)
[uwpOcr]
language = string(default="")
[upgrade]
newLaptopKeyboardLayout = boolean(default=false)
Expand Down
Loading

0 comments on commit 5527f27

Please sign in to comment.