diff --git a/nvdaHelper/localWin10/oneCoreSpeech.cpp b/nvdaHelper/localWin10/oneCoreSpeech.cpp index b622d952fb1..e79102cba6c 100644 --- a/nvdaHelper/localWin10/oneCoreSpeech.cpp +++ b/nvdaHelper/localWin10/oneCoreSpeech.cpp @@ -19,6 +19,7 @@ This license can be found at: #include #include #include +#include "utils.h" #include "oneCoreSpeech.h" using namespace std; @@ -30,21 +31,6 @@ using namespace Microsoft::WRL; using namespace Windows::Media; using namespace Windows::Foundation::Collections; -byte* getBytes(IBuffer^ buffer) { - // We want direct access to the buffer rather than copying it. - // To do this, we need to get to the IBufferByteAccess interface. - // See http://cm-bloggers.blogspot.com/2012/09/accessing-image-pixel-data-in-ccx.html - ComPtr insp = reinterpret_cast(buffer); - ComPtr bufferByteAccess; - if (FAILED(insp.As(&bufferByteAccess))) { - LOG_ERROR(L"Couldn't get IBufferByteAccess from IBuffer"); - return nullptr; - } - byte* bytes = nullptr; - bufferByteAccess->Buffer(&bytes); - return bytes; -} - OcSpeech* __stdcall ocSpeech_initialize() { auto instance = new OcSpeech; instance->synth = ref new SpeechSynthesizer(); diff --git a/nvdaHelper/localWin10/sconscript b/nvdaHelper/localWin10/sconscript index 65352145966..f2e7a72ccef 100644 --- a/nvdaHelper/localWin10/sconscript +++ b/nvdaHelper/localWin10/sconscript @@ -59,7 +59,9 @@ localWin10Lib = env.SharedLibrary( target="nvdaHelperLocalWin10", source=[ env['projectResFile'], + 'utils.cpp', 'oneCoreSpeech.cpp', + 'uwpOcr.cpp', ], LIBS=["oleaut32", localLib[2]], ) diff --git a/nvdaHelper/localWin10/utils.cpp b/nvdaHelper/localWin10/utils.cpp new file mode 100644 index 00000000000..657588afdef --- /dev/null +++ b/nvdaHelper/localWin10/utils.cpp @@ -0,0 +1,37 @@ +/* +Code for utilities for use in nvdaHelperLocalWin10 modules. +This file is a part of the NVDA project. +URL: http://www.nvaccess.org/ +Copyright 2017 Tyler Spivey, NV Access Limited. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2.0, as published by + the Free Software Foundation. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +This license can be found at: +http://www.gnu.org/licenses/old-licenses/gpl-2.0.html +*/ + +#include +#include +#include +#include + +using namespace Windows::Storage::Streams; +using namespace Microsoft::WRL; + +byte* getBytes(IBuffer^ buffer) { + // We want direct access to the buffer rather than copying it. + // To do this, we need to get to the IBufferByteAccess interface. + // See http://cm-bloggers.blogspot.com/2012/09/accessing-image-pixel-data-in-ccx.html + ComPtr insp = reinterpret_cast(buffer); + ComPtr bufferByteAccess; + if (FAILED(insp.As(&bufferByteAccess))) { + LOG_ERROR(L"Couldn't get IBufferByteAccess from IBuffer"); + return nullptr; + } + byte* bytes = nullptr; + bufferByteAccess->Buffer(&bytes); + return bytes; +} diff --git a/nvdaHelper/localWin10/utils.h b/nvdaHelper/localWin10/utils.h new file mode 100644 index 00000000000..693ffa5a6d0 --- /dev/null +++ b/nvdaHelper/localWin10/utils.h @@ -0,0 +1,23 @@ +/* +Header for utilities for use in nvdaHelperLocalWin10 modules. +This file is a part of the NVDA project. +URL: http://www.nvaccess.org/ +Copyright 2017 Tyler Spivey, NV Access Limited. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2.0, as published by + the Free Software Foundation. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +This license can be found at: +http://www.gnu.org/licenses/old-licenses/gpl-2.0.html +*/ + +#include + +/** Get access to the raw byte array backing an IBuffer object. + * This is necessary when interoperating with non-WinRT components; + * e.g. returning bytes from an IBuffer to a C caller. + * This byte array is mutable; it is *not* a copy. + */ +byte* getBytes(Windows::Storage::Streams::IBuffer^ buffer); diff --git a/nvdaHelper/localWin10/uwpOcr.cpp b/nvdaHelper/localWin10/uwpOcr.cpp new file mode 100644 index 00000000000..8b999b3d1b5 --- /dev/null +++ b/nvdaHelper/localWin10/uwpOcr.cpp @@ -0,0 +1,102 @@ +/* +Code for C dll bridge to UWP OCR. +This file is a part of the NVDA project. +URL: http://www.nvaccess.org/ +Copyright 2017 NV Access Limited. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2.0, as published by + the Free Software Foundation. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +This license can be found at: +http://www.gnu.org/licenses/old-licenses/gpl-2.0.html +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "uwpOcr.h" + +using namespace std; +using namespace Platform; +using namespace concurrency; +using namespace Windows::Storage::Streams; +using namespace Microsoft::WRL; +using namespace Windows::Media::Ocr; +using namespace Windows::Foundation::Collections; +using namespace Windows::Globalization; +using namespace Windows::Graphics::Imaging; +using namespace Windows::Data::Json; + +UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback) { + auto engine = OcrEngine::TryCreateFromLanguage(ref new Language(ref new String(language))); + if (!engine) + return nullptr; + auto instance = new UwpOcr; + instance->engine = engine; + instance->callback = callback; + return instance; +} + +void __stdcall uwpOcr_terminate(UwpOcr* instance) { + delete instance; +} + +void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height) { + unsigned int numBytes = sizeof(RGBQUAD) * width * height; + auto buf = ref new Buffer(numBytes); + buf->Length = numBytes; + BYTE* bytes = getBytes(buf); + memcpy(bytes, image, numBytes); + auto sbmp = SoftwareBitmap::CreateCopyFromBuffer(buf, BitmapPixelFormat::Bgra8, width, height, BitmapAlphaMode::Ignore); + task ocrTask = create_task(instance->engine->RecognizeAsync(sbmp)); + ocrTask.then([instance, sbmp] (OcrResult^ result) { + auto lines = result->Lines; + auto jLines = ref new JsonArray(); + for (unsigned short l = 0; l < lines->Size; ++l) { + auto words = lines->GetAt(l)->Words; + auto jWords = ref new JsonArray(); + for (unsigned short w = 0; w < words->Size; ++w) { + auto word = words->GetAt(w); + auto jWord = ref new JsonObject(); + auto rect = word->BoundingRect; + jWord->Insert("x", JsonValue::CreateNumberValue(rect.X)); + jWord->Insert("y", JsonValue::CreateNumberValue(rect.Y)); + jWord->Insert("width", JsonValue::CreateNumberValue(rect.Width)); + jWord->Insert("height", JsonValue::CreateNumberValue(rect.Height)); + jWord->Insert("text", JsonValue::CreateStringValue(word->Text)); + jWords->Append(jWord); + } + jLines->Append(jWords); + } + instance->callback(jLines->Stringify()->Data()); + }).then([instance] (task previous) { + // Catch any unhandled exceptions that occurred during these tasks. + try { + previous.get(); + } catch (Platform::Exception^ e) { + LOG_ERROR(L"Error " << e->HResult << L": " << e->Message->Data()); + instance->callback(NULL); + } + }); +} + +// We use BSTR because we need the string to stay around until the caller is done with it +// but the caller then needs to free it. +// We can't just use malloc because the caller might be using a different CRT +// and calling malloc and free from different CRTs isn't safe. +BSTR __stdcall uwpOcr_getLanguages() { + wstring langsStr; + auto langs = OcrEngine::AvailableRecognizerLanguages ; + for (unsigned int i = 0; i < langs->Size; ++i) { + langsStr += langs->GetAt(i)->LanguageTag->Data(); + langsStr += L";"; + } + return SysAllocString(langsStr.c_str()); +} diff --git a/nvdaHelper/localWin10/uwpOcr.h b/nvdaHelper/localWin10/uwpOcr.h new file mode 100644 index 00000000000..4a723de9f4f --- /dev/null +++ b/nvdaHelper/localWin10/uwpOcr.h @@ -0,0 +1,32 @@ +/* +Header for C dll bridge to UWP OCR. +This file is a part of the NVDA project. +URL: http://www.nvaccess.org/ +Copyright 2017 NV Access Limited. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2.0, as published by + the Free Software Foundation. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +This license can be found at: +http://www.gnu.org/licenses/old-licenses/gpl-2.0.html +*/ + +#pragma once +#define export __declspec(dllexport) + +typedef void (*uwpOcr_Callback)(const char16* result); +typedef struct { + Windows::Media::Ocr::OcrEngine^ engine; + uwpOcr_Callback callback; +} UwpOcr; + +extern "C" { +export UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback); +export void __stdcall uwpOcr_terminate(UwpOcr* instance); +export void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height); +// Returns a BSTR of language codes terminated by semi-colons; +// e.g. "de-de;en-us;". +export BSTR __stdcall uwpOcr_getLanguages(); +} diff --git a/source/NVDAHelper.py b/source/NVDAHelper.py index 4f1a6105465..53e19bd9207 100755 --- a/source/NVDAHelper.py +++ b/source/NVDAHelper.py @@ -477,3 +477,23 @@ def terminate(): VBuf_getTextInRange=None localLib.nvdaHelperLocal_terminate() localLib=None + +LOCAL_WIN10_DLL_PATH = ur"lib\nvdaHelperLocalWin10.dll" +def getHelperLocalWin10Dll(): + """Get a ctypes WinDLL instance for the nvdaHelperLocalWin10 dll. + This is a C++/CX dll used to provide access to certain UWP functionality. + """ + return windll[LOCAL_WIN10_DLL_PATH] + +def bstrReturn(address): + """Handle a BSTR returned from a ctypes function call. + This includes freeing the memory. + This is needed for nvdaHelperLocalWin10 functions which return a BSTR. + """ + # comtypes.BSTR.from_address seems to cause a crash for some reason. Not sure why. + # Just access the string ourselves. + # This will terminate at a null character, even though BSTR allows nulls. + # We're only using this for normal, null-terminated strings anyway. + val = wstring_at(address) + windll.oleaut32.SysFreeString(address) + return val diff --git a/source/NVDAObjects/IAccessible/__init__.py b/source/NVDAObjects/IAccessible/__init__.py index 25101656d6e..5a1bdaac1cc 100644 --- a/source/NVDAObjects/IAccessible/__init__.py +++ b/source/NVDAObjects/IAccessible/__init__.py @@ -774,7 +774,8 @@ def _get_IAccessibleRole(self): if role==0: try: role=self.IAccessibleObject.accRole(self.IAccessibleChildID) - except COMError: + except COMError as e: + log.debugWarning("accRole failed: %s" % e) role=0 return role diff --git a/source/config/configSpec.py b/source/config/configSpec.py index b03866e9a91..a4870703876 100644 --- a/source/config/configSpec.py +++ b/source/config/configSpec.py @@ -192,6 +192,9 @@ hwIo = boolean(default=false) audioDucking = boolean(default=false) +[uwpOcr] + language = string(default="") + [upgrade] newLaptopKeyboardLayout = boolean(default=false) diff --git a/source/contentRecog/__init__.py b/source/contentRecog/__init__.py new file mode 100644 index 00000000000..8802cd8e956 --- /dev/null +++ b/source/contentRecog/__init__.py @@ -0,0 +1,282 @@ +#contentRecog/__init__.py +#A part of NonVisual Desktop Access (NVDA) +#Copyright (C) 2017 NV Access Limited +#This file is covered by the GNU General Public License. +#See the file COPYING for more details. + +"""Framework for recognition of content; OCR, image recognition, etc. +When authors don't provide sufficient information for a screen reader user to determine the content of something, +various tools can be used to attempt to recognize the content from an image. +Some examples are optical character recognition (OCR) to recognize text in an image +and the Microsoft Cognitive Services Computer Vision and Google Cloud Vision APIs to describe images. +Recognizers take an image and produce text. +They are implemented using the L{ContentRecognizer} class. +""" + +from collections import namedtuple +import textInfos.offsets + +class ContentRecognizer(object): + """Implementation of a content recognizer. + """ + + def getResizeFactor(self, width, height): + """Return the factor by which an image must be resized + before it is passed to this recognizer. + @param width: The width of the image in pixels. + @type width: int + @param height: The height of the image in pixels. + @type height: int + @return: The resize factor, C{1} for no resizing. + @rtype: int or float + """ + return 1 + + def recognize(self, pixels, width, height, imageInfo, onResult): + """Asynchronously recognize content from an image. + This method should not block. + Only one recognition can be performed at a time. + @param pixels: The pixels of the image as a two dimensional array of RGBQUADs. + For example, to get the red value for the coordinate (1, 2): + pixels[2][1].rgbRed + This can be treated as raw bytes in BGRA8 format; + i.e. four bytes per pixel in the order blue, green, red, alpha. + However, the alpha channel should be ignored. + @type pixels: Two dimensional array (y then x) of L{winGDI.RGBQUAD} + @param imageInfo: Information about the image for recognition. + @type imageInfo: L{RecogImageInfo} + @param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure) as its only argument. + @type onResult: callable + """ + raise NotImplementedError + + def cancel(self): + """Cancel the recognition in progress (if any). + """ + raise NotImplementedError + +class RecogImageInfo(object): + """Encapsulates information about a recognized image and + provides functionality to convert coordinates. + An image captured for recognition can begin at any point on the screen. + However, the image must be cropped when passed to the recognizer. + Also, some recognizers need the image to be resized prior to recognition. + This class calculates the width and height of the image for recognition; + see the L{recogWidth} and L{recogHeight} attributes. + It can also convert coordinates in the recognized image + to screen coordinates suitable to be returned to NVDA; + e.g. in order to route the mouse. + This is done using the L{convertXToScreen} and L{convertYToScreen} methods. + """ + + def __init__(self, screenLeft, screenTop, screenWidth, screenHeight, resizeFactor): + """ + @param screenLeft: The x screen coordinate of the upper-left corner of the image. + @type screenLeft: int + @param screenTop: The y screen coordinate of the upper-left corner of the image. + @type screenTop: int + @param screenWidth: The width of the image on the screen. + @type screenWidth: int + @param screenHeight: The height of the image on the screen. + @type screenHeight: int + @param resizeFactor: The factor by which the image must be resized for recognition. + @type resizeFactor: int or float + @raise ValueError: If the supplied screen coordinates indicate that + the image is not visible; e.g. width or height of 0. + """ + if screenLeft < 0 or screenTop < 0 or screenWidth <= 0 or screenHeight <= 0: + raise ValueError("Image not visible (invalid screen coordinates)") + self.screenLeft = screenLeft + self.screenTop = screenTop + self.screenWidth = screenWidth + self.screenHeight = screenHeight + self.resizeFactor = resizeFactor + #: The width of the recognized image. + self.recogWidth = int(screenWidth * resizeFactor) + #: The height of the recognized image. + self.recogHeight = int(screenHeight * resizeFactor) + + @classmethod + def createFromRecognizer(cls, screenLeft, screenTop, screenWidth, screenHeight, recognizer): + """Convenience method to construct an instance using a L{ContentRecognizer}. + The resize factor is obtained by calling L{ContentRecognizer.getResizeFactor}. + """ + resize = recognizer.getResizeFactor(screenWidth, screenHeight) + return cls(screenLeft, screenTop, screenWidth, screenHeight, resize) + + def convertXToScreen(self, x): + """Convert an x coordinate in the recognized image to an x coordinate on the screen. + """ + return self.screenLeft + int(x / self.resizeFactor) + + def convertYToScreen(self, y): + """Convert an x coordinate in the recognized image to an x coordinate on the screen. + """ + return self.screenTop + int(y / self.resizeFactor) + +class RecognitionResult(object): + """Provides access to the result of recognition by a recognizer. + The result is textual, but to facilitate navigation by word, line, etc. + and to allow for retrieval of screen coordinates within the text, + L{TextInfo} objects are used. + Callers use the L{makeTextInfo} method to create a L{TextInfo}. + Most implementers should use one of the subclasses provided in this module. + """ + + def makeTextInfo(self, obj, position): + """Make a TextInfo within the recognition result text at the requested position. + @param obj: The object to return for the C{obj} property of the TextInfo. + The TextInfo itself doesn't use this, but NVDA requires it to set the review object, etc. + @param position: The requested position; one of the C{textInfos.POSITION_*} constants. + @return: The TextInfo at the requested position in the result. + @rtype: L{textInfos.TextInfo} + """ + raise NotImplementedError + +# Used internally by LinesWordsResult. +# (Lwr is short for LinesWordsResult.) +LwrWord = namedtuple("LwrWord", ("offset", "left", "top")) + +class LinesWordsResult(RecognitionResult): + """A L{RecognizerResult} which can create TextInfos based on a simple lines/words data structure. + The data structure is a list of lines, wherein each line is a list of words, + wherein each word is a dict containing the keys x, y, width, height and text. + Several OCR engines produce output in a format which can be easily converted to this. + """ + + def __init__(self, data, imageInfo): + """Constructor. + @param data: The lines/words data structure. For example: + [ + [ + {"x": 106, "y": 91, "width": 11, "height": 9, "text": "Word1"}, + {"x": 117, "y": 91, "width": 11, "height": 9, "text": "Word2"} + ], + [ + {"x": 106, "y": 105, "width": 11, "height": 9, "text": "Word3"}, + {"x": 117, "y": 105, "width": 11, "height": 9, "text": "Word4"} + ] + ] + @type data: list of lists of dicts + @param imageInfo: Information about the recognized image. + This is used to convert coordinates in the recognized image + to screen coordinates. + @type imageInfo: L{RecogImageInfo} + """ + self.data = data + self.imageInfo = imageInfo + self._textList = [] + self.textLen = 0 + #: End offsets for each line. + self.lines = [] + #: Start offsets and screen coordinates for each word. + self.words = [] + self._parseData() + self.text = "".join(self._textList) + + def _parseData(self): + for line in self.data: + firstWordOfLine = True + for word in line: + if firstWordOfLine: + firstWordOfLine = False + else: + # Separate with a space. + self._textList.append(" ") + self.textLen += 1 + self.words.append(LwrWord(self.textLen, + self.imageInfo.convertXToScreen(word["x"]), + self.imageInfo.convertYToScreen(word["y"]))) + text = word["text"] + self._textList.append(text) + self.textLen += len(text) + # End with new line. + self._textList.append("\n") + self.textLen += 1 + self.lines.append(self.textLen) + + def makeTextInfo(self, obj, position): + return LwrTextInfo(obj, position, self) + +class LwrTextInfo(textInfos.offsets.OffsetsTextInfo): + """TextInfo used by L{LinesWordsResult}. + This should only be instantiated by L{LinesWordsResult}. + """ + + def __init__(self, obj, position, result): + self.result = result + super(LwrTextInfo, self).__init__(obj, position) + + def copy(self): + return self.__class__(self.obj, self.bookmark, self.result) + + def _getTextRange(self, start, end): + return self.result.text[start:end] + + def _getStoryLength(self): + return self.result.textLen + + def _getLineOffsets(self, offset): + start = 0 + for end in self.result.lines: + if end > offset: + return (start, end) + start = end + # offset is too big. Fail gracefully by returning the last line. + return (start, self.result.textLen) + + def _getWordOffsets(self, offset): + start = 0 + for word in self.result.words: + if word.offset > offset: + return (start, word.offset) + start = word.offset + # offset is in the last word (or offset is too big). + return (start, self.result.textLen) + + def _getPointFromOffset(self, offset): + word = None + for nextWord in self.result.words: + if nextWord.offset > offset: + # Stop! We need the word before this. + break + word = nextWord + return textInfos.Point(word.left, word.top) + +class SimpleTextResult(RecognitionResult): + """A L{RecognitionResult} which presents a simple text string. + This should only be used if the recognizer only returns text + and no coordinate information. + In this case, NVDA calculates words and lines itself based on the text; + e.g. a new line character breaks a line. + Routing the mouse, etc. cannot be supported because even though NVDA + has the coordinates for the entire block of content, it doesn't have + the coordinates for individual words or characters. + """ + + def __init__(self, text): + self.text = text + + def makeTextInfo(self, obj, position): + return SimpleResultTextInfo(obj, position, self) + +class SimpleResultTextInfo(textInfos.offsets.OffsetsTextInfo): + """TextInfo used by L{SimpleTextResult}. + This should only be instantiated by L{SimpleTextResult}. + """ + + def __init__(self, obj, position, result): + self.result = result + super(SimpleResultTextInfo, self).__init__(obj, position) + + def copy(self): + return self.__class__(self.obj, self.bookmark, self.result) + + def _getStoryText(self): + return self.result.text + + def _getStoryLength(self): + return len(self.result.text) + + def _getStoryText(self): + return self.result.text diff --git a/source/contentRecog/recogUi.py b/source/contentRecog/recogUi.py new file mode 100644 index 00000000000..17a48dfd68c --- /dev/null +++ b/source/contentRecog/recogUi.py @@ -0,0 +1,153 @@ +#contentRecog/recogUi.py +#A part of NonVisual Desktop Access (NVDA) +#Copyright (C) 2017 NV Access Limited +#This file is covered by the GNU General Public License. +#See the file COPYING for more details. + +"""User interface for content recognition. +This module provides functionality to capture an image from the screen +for the current navigator object, pass it to a content recognizer for recognition +and present the result to the user so they can read it with cursor keys, etc. +NVDA scripts or GUI call the L{recognizeNavigatorObject} function with the recognizer they wish to use. +""" + +import api +import ui +import screenBitmap +import NVDAObjects.window +import controlTypes +import browseMode +import cursorManager +import eventHandler +import textInfos +from logHandler import log +import queueHandler +from . import RecogImageInfo + +class RecogResultNVDAObject(cursorManager.CursorManager, NVDAObjects.window.Window): + """Fake NVDAObject used to present a recognition result in a cursor manager. + This allows the user to read the result with cursor keys, etc. + Pressing enter will activate (e.g. click) the text at the cursor. + Pressing escape dismisses the recognition result. + """ + + role = controlTypes.ROLE_DOCUMENT + # Translators: The title of the document used to present the result of content recognition. + name = _("Result") + treeInterceptor = None + + def __init__(self, result=None, obj=None): + self.parent = parent = api.getFocusObject() + self.result = result + self._selection = self.makeTextInfo(textInfos.POSITION_FIRST) + super(RecogResultNVDAObject, self).__init__(windowHandle=parent.windowHandle) + + def makeTextInfo(self, position): + # Maintain our own fake selection/caret. + if position == textInfos.POSITION_SELECTION: + ti = self._selection.copy() + elif position == textInfos.POSITION_CARET: + ti = self._selection.copy() + ti.collapse() + else: + ti = self.result.makeTextInfo(self, position) + return self._patchTextInfo(ti) + + def _patchTextInfo(self, info): + # Patch TextInfos so that updateSelection/Caret updates our fake selection. + info.updateCaret = lambda: self._setSelection(info, True) + info.updateSelection = lambda: self._setSelection(info, False) + # Ensure any copies get patched too. + oldCopy = info.copy + info.copy = lambda: self._patchTextInfo(oldCopy()) + return info + + def _setSelection(self, textInfo, collapse): + self._selection = textInfo.copy() + if collapse: + self._selection.collapse() + + def setFocus(self): + ti = self.parent.treeInterceptor + if isinstance(ti, browseMode.BrowseModeDocumentTreeInterceptor): + # Normally, when entering browse mode from a descendant (e.g. dialog), + # we want the cursor to move to the focus (#3145). + # However, we don't want this for recognition results, as these aren't focusable. + ti._enteringFromOutside = True + # This might get called from a background thread and all NVDA events must run in the main thread. + eventHandler.queueEvent("gainFocus", self) + + def script_activatePosition(self, gesture): + try: + self._selection.activate() + except NotImplementedError: + log.debugWarning("Result TextInfo does not implement activate") + # Translators: Describes a command. + script_activatePosition.__doc__ = _("Activates the text at the cursor if possible") + + def script_exit(self, gesture): + eventHandler.executeEvent("gainFocus", self.parent) + # Translators: Describes a command. + script_exit.__doc__ = _("Dismiss the recognition result") + + # The find commands are tricky to support because they pop up dialogs. + # This moves the focus, so we lose our fake focus. + # See https://github.com/nvaccess/nvda/pull/7361#issuecomment-314698991 + def script_find(self, gesture): + # Translators: Reported when a user tries to use a find command when it isn't supported. + ui.message(_("Not supported in this document")) + + def script_findNext(self, gesture): + # Translators: Reported when a user tries to use a find command when it isn't supported. + ui.message(_("Not supported in this document")) + + def script_findPrevious(self, gesture): + # Translators: Reported when a user tries to use a find command when it isn't supported. + ui.message(_("Not supported in this document")) + + __gestures = { + "kb:enter": "activatePosition", + "kb:space": "activatePosition", + "kb:escape": "exit", + } + +#: Keeps track of the recognition in progress, if any. +_activeRecog = None +def recognizeNavigatorObject(recognizer): + """User interface function to recognize content in the navigator object. + This should be called from a script or in response to a GUI action. + @param recognizer: The content recognizer to use. + @type recognizer: L{contentRecog.ContentRecognizer} + """ + global _activeRecog + nav = api.getNavigatorObject() + left, top, width, height = nav.location + try: + imgInfo = RecogImageInfo.createFromRecognizer(left, top, width, height, recognizer) + except ValueError: + # Translators: Reported when content recognition (e.g. OCR) is attempted, + # but the content is not visible. + ui.message(_("Content is not visible")) + return + if _activeRecog: + _activeRecog.cancel() + # Translators: Reporting when content recognition (e.g. OCR) begins. + ui.message(_("Recognizing")) + sb = screenBitmap.ScreenBitmap(imgInfo.recogWidth, imgInfo.recogHeight) + pixels = sb.captureImage(left, top, width, height) + _activeRecog = recognizer + recognizer.recognize(pixels, imgInfo, _recogOnResult) + +def _recogOnResult(result): + global _activeRecog + _activeRecog = None + # This might get called from a background thread, so any UI calls must be queued to the main thread. + if isinstance(result, Exception): + # Translators: Reported when recognition (e.g. OCR) fails. + log.error("Recognition failed: %s" % result) + queueHandler.queueFunction(queueHandler.eventQueue, + ui.message, _("Recognition failed")) + return + resObj = RecogResultNVDAObject(result=result) + # This method queues an event to the main thread. + resObj.setFocus() diff --git a/source/contentRecog/uwpOcr.py b/source/contentRecog/uwpOcr.py new file mode 100644 index 00000000000..119470fb774 --- /dev/null +++ b/source/contentRecog/uwpOcr.py @@ -0,0 +1,111 @@ +#contentRecog/uwpOcr.py +#A part of NonVisual Desktop Access (NVDA) +#Copyright (C) 2017 NV Access Limited +#This file is covered by the GNU General Public License. +#See the file COPYING for more details. + +"""Recognition of text using the UWP OCR engine included in Windows 10. +""" + +import ctypes +import json +import NVDAHelper +from . import ContentRecognizer, LinesWordsResult +import config +import languageHandler + +uwpOcr_Callback = ctypes.CFUNCTYPE(None, ctypes.c_wchar_p) + +def getLanguages(): + """Return the available recognition languages. + @return: A list of language codes suitable to be passed to L{UwpOcr}'s constructor. + These need to be normalized with L{languageHandler.normalizeLanguage} + for use as NVDA language codes. + @rtype: list of unicode + """ + dll = NVDAHelper.getHelperLocalWin10Dll() + dll.uwpOcr_getLanguages.restype = NVDAHelper.bstrReturn + langs = dll.uwpOcr_getLanguages() + return langs.split(";")[:-1] + +def getInitialLanguage(): + """Get the language to use the first time UWP OCR is used. + The NVDA interface language is used if a matching OCR language is available. + Otherwise, this falls back to the first available language. + """ + nvdaLang = languageHandler.getLanguage() + ocrLangs = getLanguages() + return _getInitialLanguage(nvdaLang, ocrLangs) + +def _getInitialLanguage(nvdaLang, ocrLangs): + # Try the full language code. + for lang in ocrLangs: + normLang = languageHandler.normalizeLanguage(lang) + if nvdaLang == normLang: + return lang + # Try the language code without country. + nvdaLangPrimary = nvdaLang.split("_", 1)[0] + for lang in ocrLangs: + # Don't need to normalize here because the primary code is + # the same when normalized. + if lang.startswith(nvdaLangPrimary): + return lang + # Fall back to the first OCR language. + if len(ocrLangs) >= 1: + return ocrLangs[0] + raise LookupError("No UWP OCR languages installed") + +def getConfigLanguage(): + """Get the user's configured OCR language. + If no language has been configured, choose an initial language + and update the configuration. + """ + lang = config.conf["uwpOcr"]["language"] + if lang: + return lang + initial = getInitialLanguage() + config.conf["uwpOcr"]["language"] = initial + return initial + +class UwpOcr(ContentRecognizer): + + def getResizeFactor(self, width, height): + # UWP OCR performs poorly with small images, so increase their size. + if width < 100 or height < 100: + return 4 + return 1 + + def __init__(self, language=None): + """ + @param language: The language code of the desired recognition language, + C{None} to use the user's configured language. + """ + if language: + self.language = language + else: + self.language = getConfigLanguage() + self._dll = NVDAHelper.getHelperLocalWin10Dll() + + def recognize(self, pixels, imgInfo, onResult): + self._onResult = onResult + @uwpOcr_Callback + def callback(result): + # If self._onResult is None, recognition was cancelled. + if self._onResult: + if result: + data = json.loads(result) + self._onResult(LinesWordsResult(data, imgInfo)) + else: + self._onResult(RuntimeError("UWP OCR failed")) + self._dll.uwpOcr_terminate(self._handle) + self._callback = None + self._handle = None + self._callback = callback + self._handle = self._dll.uwpOcr_initialize(self.language, callback) + if not self._handle: + onResult(RuntimeError("UWP OCR initialization failed")) + return + self._dll.uwpOcr_recognize(self._handle, pixels, imgInfo.recogWidth, imgInfo.recogHeight) + + def cancel(self): + self._onResult = None diff --git a/source/globalCommands.py b/source/globalCommands.py index 0897e467ff2..3895c5337b5 100755 --- a/source/globalCommands.py +++ b/source/globalCommands.py @@ -40,6 +40,7 @@ import characterProcessing from baseObject import ScriptableObject import core +import winVersion #: Script category for text review commands. # Translators: The name of a category of NVDA commands. @@ -2023,6 +2024,17 @@ def script_interactWithMath(self, gesture): # Translators: Describes a command. script_interactWithMath.__doc__ = _("Begins interaction with math content") + def script_recognizeWithUwpOcr(self, gesture): + if not winVersion.isUwpOcrAvailable(): + # Translators: Reported when Windows 10 OCR is not available. + ui.message(_("Windows 10 OCR not available")) + return + from contentRecog import uwpOcr, recogUi + recog = uwpOcr.UwpOcr() + recogUi.recognizeNavigatorObject(recog) + # Translators: Describes a command. + script_recognizeWithUwpOcr.__doc__ = _("Recognize the content of the current navigator object with Windows 10 OCR") + __gestures = { # Basic "kb:NVDA+n": "showGui", @@ -2205,6 +2217,7 @@ def script_interactWithMath(self, gesture): "kb:NVDA+control+f3": "reloadPlugins", "kb(desktop):NVDA+control+f2": "test_navigatorDisplayModelText", "kb:NVDA+alt+m": "interactWithMath", + "kb:NVDA+r": "recognizeWithUwpOcr", } #: The single global commands instance. diff --git a/source/gui/__init__.py b/source/gui/__init__.py index fc26f579f0d..9b9d129f6b4 100644 --- a/source/gui/__init__.py +++ b/source/gui/__init__.py @@ -32,6 +32,7 @@ import winUser import api import guiHelper +import winVersion try: import updateCheck @@ -241,6 +242,9 @@ def onBrowseModeCommand(self,evt): def onDocumentFormattingCommand(self,evt): self._popupSettingsDialog(DocumentFormattingDialog) + def onUwpOcrCommand(self, evt): + self._popupSettingsDialog(UwpOcrDialog) + def onSpeechSymbolsCommand(self, evt): self._popupSettingsDialog(SpeechSymbolsDialog) @@ -355,6 +359,10 @@ def __init__(self, frame): # Translators: The label for the menu item to open Document Formatting settings dialog. item = menu_preferences.Append(wx.ID_ANY,_("Document &formatting..."),_("Change settings of document properties")) self.Bind(wx.EVT_MENU, frame.onDocumentFormattingCommand, item) + if winVersion.isUwpOcrAvailable(): + # Translators: The label for the menu item to open the Windows 10 OCR settings dialog. + item = menu_preferences.Append(wx.ID_ANY, _("Windows 10 OCR...")) + self.Bind(wx.EVT_MENU, frame.onUwpOcrCommand, item) subMenu_speechDicts = wx.Menu() if not globalVars.appArgs.secure: # Translators: The label for the menu item to open Default speech dictionary dialog. diff --git a/source/gui/settingsDialogs.py b/source/gui/settingsDialogs.py index cbe86d1c3bd..7436c1ebe91 100644 --- a/source/gui/settingsDialogs.py +++ b/source/gui/settingsDialogs.py @@ -1316,6 +1316,35 @@ def onOk(self,evt): config.conf["documentFormatting"]["reportClickable"]=self.clickableCheckBox.Value super(DocumentFormattingDialog, self).onOk(evt) +class UwpOcrDialog(SettingsDialog): + # Translators: The title of the Windows 10 OCR dialog. + title = _("Windows 10 OCR") + + def makeSettings(self, settingsSizer): + sHelper = guiHelper.BoxSizerHelper(self, sizer=settingsSizer) + # Lazily import this. + from contentRecog import uwpOcr + self.languageCodes = uwpOcr.getLanguages() + languageChoices = [ + languageHandler.getLanguageDescription(languageHandler.normalizeLanguage(lang)) + for lang in self.languageCodes] + # Translators: Label for an option in the Windows 10 OCR dialog. + languageLabel = _("Recognition &language:") + self.languageChoice = sHelper.addLabeledControl(languageLabel, wx.Choice, choices=languageChoices) + try: + langIndex = self.languageCodes.index(config.conf["uwpOcr"]["language"]) + self.languageChoice.Selection = langIndex + except ValueError: + self.languageChoice.Selection = 0 + + def postInit(self): + self.languageChoice.SetFocus() + + def onOk(self, evt): + lang = self.languageCodes[self.languageChoice.Selection] + config.conf["uwpOcr"]["language"] = lang + super(UwpOcrDialog, self).onOk(evt) + class DictionaryEntryDialog(wx.Dialog): TYPE_LABELS = { # Translators: This is a label for an Entry Type radio button in add dictionary entry dialog. diff --git a/source/screenBitmap.py b/source/screenBitmap.py index 7dc66bcfbcd..2e0e09f9f4d 100644 --- a/source/screenBitmap.py +++ b/source/screenBitmap.py @@ -1,3 +1,12 @@ +#screenBitmap.py +#A part of NonVisual Desktop Access (NVDA) +#Copyright (C) 2011-2017 NV Access Limited +#This file is covered by the GNU General Public License. +#See the file COPYING for more details. + +"""Functionality to capture and work with bitmaps of the screen. +""" + import ctypes import winGDI @@ -44,8 +53,8 @@ def captureImage(self,x,y,w,h): #Copy the requested content from the screen in to our memory device context, stretching/shrinking its size to fit. gdi32.StretchBlt(self._memDC,0,0,self.width,self.height,self._screenDC,x,y,w,h,winGDI.SRCCOPY) #Fetch the pixels from our memory bitmap and store them in a buffer to be returned - buffer=(winGDI.RGBQUAD*w*h)() - gdi32.GetDIBits(self._memDC,self._memBitmap,0,self.height,buffer,ctypes.byref(self._bmInfo),winGDI.DIB_RGB_COLORS); + buffer=(winGDI.RGBQUAD*self.width*self.height)() + gdi32.GetDIBits(self._memDC,self._memBitmap,0,self.height,buffer,ctypes.byref(self._bmInfo),winGDI.DIB_RGB_COLORS) return buffer def rgbPixelBrightness(p): diff --git a/source/synthDrivers/oneCore.py b/source/synthDrivers/oneCore.py index c259eb9f17c..7749fe6f2ad 100644 --- a/source/synthDrivers/oneCore.py +++ b/source/synthDrivers/oneCore.py @@ -20,6 +20,7 @@ import speechXml import languageHandler import winVersion +import NVDAHelper SAMPLES_PER_SEC = 22050 BITS_PER_SAMPLE = 16 @@ -28,19 +29,6 @@ HUNDRED_NS_PER_SEC = 10000000 # 1000000000 ns per sec / 100 ns WAV_HEADER_LEN = 44 ocSpeech_Callback = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_int, ctypes.c_wchar_p) -DLL_FILE = ur"lib\nvdaHelperLocalWin10.dll" - -def bstrReturn(address): - """Handle a BSTR returned from a ctypes function call. - This includes freeing the memory. - """ - # comtypes.BSTR.from_address seems to cause a crash for some reason. Not sure why. - # Just access the string ourselves. - # This will terminate at a null character, even though BSTR allows nulls. - # We're only using this for normal, null-terminated strings anyway. - val = ctypes.wstring_at(address) - ctypes.windll.oleaut32.SysFreeString(address) - return val class _OcSsmlConverter(speechXml.SsmlConverter): @@ -118,12 +106,12 @@ def check(cls): def __init__(self): super(SynthDriver, self).__init__() - self._dll = ctypes.windll[DLL_FILE] + self._dll = NVDAHelper.getHelperLocalWin10Dll() self._dll.ocSpeech_getCurrentVoiceLanguage.restype = ctypes.c_wchar_p self._handle = self._dll.ocSpeech_initialize() self._callbackInst = ocSpeech_Callback(self._callback) self._dll.ocSpeech_setCallback(self._handle, self._callbackInst) - self._dll.ocSpeech_getVoices.restype = bstrReturn + self._dll.ocSpeech_getVoices.restype = NVDAHelper.bstrReturn self._dll.ocSpeech_getCurrentVoiceId.restype = ctypes.c_wchar_p self._player = nvwave.WavePlayer(1, SAMPLES_PER_SEC, BITS_PER_SAMPLE, outputDevice=config.conf["speech"]["outputDevice"]) # Initialize state. diff --git a/source/winVersion.py b/source/winVersion.py index 94e14bccb7b..a2a39b59c1e 100644 --- a/source/winVersion.py +++ b/source/winVersion.py @@ -1,10 +1,11 @@ #winVersion.py #A part of NonVisual Desktop Access (NVDA) -#Copyright (C) 2006-2013 NV Access Limited +#Copyright (C) 2006-2017 NV Access Limited #This file is covered by the GNU General Public License. #See the file COPYING for more details. import sys +import os import winUser winVersion=sys.getwindowsversion() @@ -29,3 +30,7 @@ def canRunVc2010Builds(): # so all versions should be fine.) return winVersion.service_pack_major >= 1 return True + +UWP_OCR_DATA_PATH = os.path.expandvars(r"$windir\OCR") +def isUwpOcrAvailable(): + return os.path.isdir(UWP_OCR_DATA_PATH) diff --git a/tests/unit/contentRecog/__init__.py b/tests/unit/contentRecog/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/unit/contentRecog/test_contentRecog.py b/tests/unit/contentRecog/test_contentRecog.py new file mode 100644 index 00000000000..b7212e791be --- /dev/null +++ b/tests/unit/contentRecog/test_contentRecog.py @@ -0,0 +1,163 @@ +#tests/unit/contentRecog/test_contentRecog.py +#A part of NonVisual Desktop Access (NVDA) +#This file is covered by the GNU General Public License. +#See the file COPYING for more details. +#Copyright (C) 2017 NV Access Limited + +"""Unit tests for the contentRecog module. +""" + +import unittest +import contentRecog +import textInfos + +class TestRecogImageInfo(unittest.TestCase): + + def test_noOffsetNoResize(self): + info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 1) + self.assertEqual(info.recogWidth, 1000) + self.assertEqual(info.recogHeight, 2000) + self.assertEqual(info.convertXToScreen(100), 100) + self.assertEqual(info.convertYToScreen(200), 200) + + def test_withOffsetNoResize(self): + info = contentRecog.RecogImageInfo(10, 20, 1000, 2000, 1) + self.assertEqual(info.recogWidth, 1000) + self.assertEqual(info.recogHeight, 2000) + self.assertEqual(info.convertXToScreen(100), 110) + self.assertEqual(info.convertYToScreen(200), 220) + + def test_noOffsetWithResize(self): + info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 2) + self.assertEqual(info.recogWidth, 2000) + self.assertEqual(info.recogHeight, 4000) + self.assertEqual(info.convertXToScreen(200), 100) + self.assertEqual(info.convertYToScreen(400), 200) + + def test_withOffsetWithResize(self): + info = contentRecog.RecogImageInfo(10, 20, 1000, 2000, 2) + self.assertEqual(info.recogWidth, 2000) + self.assertEqual(info.recogHeight, 4000) + self.assertEqual(info.convertXToScreen(200), 110) + self.assertEqual(info.convertYToScreen(400), 220) + +class FakeNVDAObject(object): + pass + +class TestLinesWordsResult(unittest.TestCase): + """Tests that contentRecog.LinesWordsResult and contentRecog.LwrTextInfo + correctly parse and process the JSON from a recognizer. + """ + DATA = [ + [ + {"x": 100, "y": 200, "width": 10, "height": 20, "text": "word1"}, + {"x": 110, "y": 200, "width": 10, "height": 20, "text": "word2"} + ], + [ + {"x": 100, "y": 220, "width": 10, "height": 20, "text": "word3"}, + {"x": 110, "y": 220, "width": 10, "height": 20, "text": "word4"} + ] + ] + TOP = 0 + BOTTOM = 23 + WORD1_OFFSETS = (0, 6) + WORD1_SECOND = 1 + WORD1_LAST = 5 + WORD1_POINT = (100, 200) + WORD2_START = 6 + WORD2_OFFSETS = (6, 12) + WORD2_POINT = (110, 200) + WORD3_OFFSETS = (12, 18) + WORD3_START = 12 + WORD3_POINT = (100, 220) + WORD4_OFFSETS = (18, 24) + WORD4_POINT = (110, 220) + LINE1_OFFSETS = (0, 12) + LINE1_SECOND = 1 + LINE1_LAST = 11 + LINE2_OFFSETS = (12, 24) + LINE2_START = 12 + + def setUp(self): + info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 1) + self.result = contentRecog.LinesWordsResult(self.DATA, info) + self.fakeObj = FakeNVDAObject() + self.textInfo = self.result.makeTextInfo(self.fakeObj, textInfos.POSITION_FIRST) + + def test_text(self): + self.assertEqual(self.result.text, "word1 word2\nword3 word4\n") + + def test_textLen(self): + self.assertEqual(self.result.textLen, len(self.result.text)) + + def test_wordOffsetsAtTop(self): + actual = self.textInfo._getWordOffsets(self.TOP) + self.assertEqual(actual, self.WORD1_OFFSETS) + + def test_wordOffsetsAtWord1SecondChar(self): + actual = self.textInfo._getWordOffsets(self.WORD1_SECOND) + self.assertEqual(actual, self.WORD1_OFFSETS) + + def test_wordOffsetsAtWord1LastChar(self): + actual = self.textInfo._getWordOffsets(self.WORD1_LAST) + self.assertEqual(actual, self.WORD1_OFFSETS) + + def test_wordOffsetsAtWord2Start(self): + actual = self.textInfo._getWordOffsets(self.WORD2_START) + self.assertEqual(actual, self.WORD2_OFFSETS) + + def test_wordOffsetsAtLine2Start(self): + actual = self.textInfo._getWordOffsets(self.LINE2_START) + self.assertEqual(actual, self.WORD3_OFFSETS) + + def test_wordOffsetsAtBottom(self): + actual = self.textInfo._getWordOffsets(self.BOTTOM) + self.assertEqual(actual, self.WORD4_OFFSETS) + + def test_lineOffsetsAtTop(self): + actual = self.textInfo._getLineOffsets(self.TOP) + self.assertEqual(actual, self.LINE1_OFFSETS) + + def test_lineOffsetsAtLine1SecondChar(self): + actual = self.textInfo._getLineOffsets(self.LINE1_SECOND) + self.assertEqual(actual, self.LINE1_OFFSETS) + + def test_lineOffsetsAtLine1LastChar(self): + actual = self.textInfo._getLineOffsets(self.LINE1_LAST) + self.assertEqual(actual, self.LINE1_OFFSETS) + + def test_lineOffsetsAtLine2Start(self): + actual = self.textInfo._getLineOffsets(self.LINE2_START) + self.assertEqual(actual, self.LINE2_OFFSETS) + + def test_lineOffsetsAtBottom(self): + actual = self.textInfo._getLineOffsets(self.BOTTOM) + self.assertEqual(actual, self.LINE2_OFFSETS) + + def test_pointFromOffsetAtTop(self): + actual = self.textInfo._getPointFromOffset(self.TOP) + self.assertEqual((actual.x, actual.y), self.WORD1_POINT) + + def test_pointFromOffsetAtWord1SecondChar(self): + actual = self.textInfo._getPointFromOffset(self.WORD1_SECOND) + self.assertEqual((actual.x, actual.y), self.WORD1_POINT) + + def test_pointFromOffsetAtWord1LastChar(self): + actual = self.textInfo._getPointFromOffset(self.WORD1_LAST) + self.assertEqual((actual.x, actual.y), self.WORD1_POINT) + + def test_pointFromOffsetAtWord2Start(self): + actual = self.textInfo._getPointFromOffset(self.WORD2_START) + self.assertEqual((actual.x, actual.y), self.WORD2_POINT) + + def test_pointFromOffsetAtLine2Start(self): + actual = self.textInfo._getPointFromOffset(self.LINE2_START) + self.assertEqual((actual.x, actual.y), self.WORD3_POINT) + + def test_pointFromOffsetAtBottom(self): + actual = self.textInfo._getPointFromOffset(self.BOTTOM) + self.assertEqual((actual.x, actual.y), self.WORD4_POINT) + + def test_copyTextInfo(self): + copy = self.textInfo.copy() + self.assertEqual(copy, self.textInfo) diff --git a/tests/unit/contentRecog/test_uwpOcr.py b/tests/unit/contentRecog/test_uwpOcr.py new file mode 100644 index 00000000000..1f5a8123ffd --- /dev/null +++ b/tests/unit/contentRecog/test_uwpOcr.py @@ -0,0 +1,38 @@ +#tests/unit/contentRecog/test_uwpOcr.py +#A part of NonVisual Desktop Access (NVDA) +#This file is covered by the GNU General Public License. +#See the file COPYING for more details. +#Copyright (C) 2017 NV Access Limited + +"""Unit tests for the contentRecog.uwpOcr module. +""" + +import unittest +from contentRecog import uwpOcr + +class TestGetInitialLanguage(unittest.TestCase): + LANGS = ["de-DE", "en-US"] + + def test_fullLangFullMatch(self): + actual = uwpOcr._getInitialLanguage("en_US", self.LANGS) + self.assertEqual(actual, "en-US") + + def test_primaryLangPrimaryMatch(self): + actual = uwpOcr._getInitialLanguage("en", self.LANGS) + self.assertEqual(actual, "en-US") + + def test_fullLangPrimaryMatch(self): + actual = uwpOcr._getInitialLanguage("en_AU", self.LANGS) + self.assertEqual(actual, "en-US") + + def test_fullLangNoMatch(self): + """Test fall back to first available language if no match. + """ + actual = uwpOcr._getInitialLanguage("it_IT", self.LANGS) + self.assertEqual(actual, "de-DE") + + def test_primaryLangNoMatch(self): + """Test fall back to first available language if no match. + """ + actual = uwpOcr._getInitialLanguage("it", self.LANGS) + self.assertEqual(actual, "de-DE") diff --git a/user_docs/en/userGuide.t2t b/user_docs/en/userGuide.t2t index e6228e5941b..282a4b11bbd 100644 --- a/user_docs/en/userGuide.t2t +++ b/user_docs/en/userGuide.t2t @@ -652,6 +652,31 @@ Dot 8 translates any braille input and presses the enter key. Pressing dot 7 + dot 8 translates any braille input, but without adding a space or pressing enter. %kc:endInclude ++ Content Recognition + +When authors don't provide sufficient information for a screen reader user to determine the content of something, various tools can be used to attempt to recognize the content from an image. +NVDA supports the optical character recognition (OCR) functionality built into Windows 10 to recognize text from images. +Additional content recognizers can be provided in NVDA add-ons. + +When you use a content recognition command, NVDA recognizes content from the current [navigator object #ObjectNavigation]. +By default, the navigator object follows the system focus or browse mode cursor, so you can usually just move the focus or browse mode cursor where desired. +For example, if you move the browse mode cursor to a graphic, recognition will recognize content from the graphic by default. +However, you may wish to use object navigation directly to, for example, recognize the content of an entire application window. + +Once recognition is complete, the result will be presented in a document similar to browse mode, allowing you to read the information with cursor keys, etc. +Pressing enter or space will activate (normally click) the text at the cursor if possible. +Pressing escape dismisses the recognition result. + +++ Windows 10 OCR ++[Win10Ocr] +Windows 10 includes OCR for many languages. +NVDA can use this to recognize text from images or inaccessible applications. + +You can set the language to use for text recognition in the [Windows 10 OCR #Win10OcrSettings] settings dialog. +Additional languages can be installed by opening the Start menu, choosing Settings, selecting Time & Language -> Region & Language and then choosing Add a language. + +%kc:beginInclude +To recognize the text in the current navigator object using Windows 10 OCR, press NVDA+r. +%kc:endInclude + + Application Specific Features + NVDA provides its own extra features for some applications to make certain tasks easier or to provide access to functionality which is not otherwise accessible to screen reader users. @@ -1385,6 +1410,12 @@ The tone will increase in pitch every space, and for a tab, it will increase in - Both Speech and Tones: This option reads indentation using both of the above methods. - ++++ Windows 10 OCR Settings +++[Win10OcrSettings] +This dialog can be accessed from the NVDA Preferences menu and allows you to configure [Windows 10 OCR #Win10Ocr]. + +==== Recognition language ==== +This combo box allows you to choose the language to be used for text recognition. + +++ Speech dictionaries +++ The speech dictionaries menu (found in the Preferences menu) contains dialogs that allow you to manage the way NVDA pronounces particular words or phrases. There are currently three different types of speech dictionaries.