From 9a086fd6e33afa856249824932de686cbfdfc0cd Mon Sep 17 00:00:00 2001 From: GiviMAD Date: Tue, 8 Feb 2022 20:52:02 +0100 Subject: [PATCH] [watsonstt] initial contribution (#12161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [watsonstt] initial contribution Signed-off-by: Miguel Álvarez Díez --- CODEOWNERS | 1 + bom/openhab-addons/pom.xml | 5 + bundles/org.openhab.voice.watsonstt/NOTICE | 20 ++ bundles/org.openhab.voice.watsonstt/README.md | 65 ++++ bundles/org.openhab.voice.watsonstt/pom.xml | 70 ++++ .../src/main/feature/feature.xml | 9 + .../internal/WatsonSTTConfiguration.java | 63 ++++ .../internal/WatsonSTTConstants.java | 43 +++ .../watsonstt/internal/WatsonSTTService.java | 310 ++++++++++++++++++ .../main/resources/OH-INF/config/config.xml | 68 ++++ .../OH-INF/i18n/watsonstt.properties | 26 ++ bundles/pom.xml | 1 + 12 files changed, 681 insertions(+) create mode 100644 bundles/org.openhab.voice.watsonstt/NOTICE create mode 100644 bundles/org.openhab.voice.watsonstt/README.md create mode 100644 bundles/org.openhab.voice.watsonstt/pom.xml create mode 100644 bundles/org.openhab.voice.watsonstt/src/main/feature/feature.xml create mode 100644 bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConfiguration.java create mode 100644 bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConstants.java create mode 100644 bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTService.java create mode 100644 bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/config/config.xml create mode 100644 bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/i18n/watsonstt.properties diff --git a/CODEOWNERS b/CODEOWNERS index fc7819d27..50a78212a 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -383,6 +383,7 @@ /bundles/org.openhab.voice.pollytts/ @hillmanr /bundles/org.openhab.voice.porcupineks/ @GiviMAD /bundles/org.openhab.voice.voicerss/ @JochenHiller @lolodomo +/bundles/org.openhab.voice.watsonstt/ @GiviMAD /itests/org.openhab.binding.astro.tests/ @gerrieg /itests/org.openhab.binding.avmfritz.tests/ @cweitkamp /itests/org.openhab.binding.feed.tests/ @svilenvul diff --git a/bom/openhab-addons/pom.xml b/bom/openhab-addons/pom.xml index 7c2e899f5..930b8d50a 100644 --- a/bom/openhab-addons/pom.xml +++ b/bom/openhab-addons/pom.xml @@ -1906,6 +1906,11 @@ org.openhab.voice.voicerss ${project.version} + + org.openhab.addons.bundles + org.openhab.voice.watsonstt + ${project.version} + diff --git a/bundles/org.openhab.voice.watsonstt/NOTICE b/bundles/org.openhab.voice.watsonstt/NOTICE new file mode 100644 index 000000000..70d73908c --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/NOTICE @@ -0,0 +1,20 @@ +This content is produced and maintained by the openHAB project. + +* Project home: https://www.openhab.org + +== Declared Project Licenses + +This program and the accompanying materials are made available under the terms +of the Eclipse Public License 2.0 which is available at +https://www.eclipse.org/legal/epl-2.0/. + +== Source Code + +https://github.com/openhab/openhab-addons + +== Third-party Content + +com.ibm.watson: speech-to-text +* License: Apache 2.0 License +* Project: https://github.com/watson-developer-cloud/java-sdk +* Source: https://github.com/watson-developer-cloud/java-sdk/tree/master/speech-to-text diff --git a/bundles/org.openhab.voice.watsonstt/README.md b/bundles/org.openhab.voice.watsonstt/README.md new file mode 100644 index 000000000..adcfcf970 --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/README.md @@ -0,0 +1,65 @@ +# IBM Watson Speech-to-Text + +Watson STT Service uses the non-free IBM Watson Speech-to-Text API to transcript audio data to text. +Be aware that using this service may incur cost on your IBM account. +You can find pricing information on [this page](https://www.ibm.com/cloud/watson-speech-to-text/pricing). + +## Obtaining Credentials + +Before you can use this add-on, you should create a Speech-to-Text instance in the IBM Cloud service. + +* Go to the following [link](https://cloud.ibm.com/catalog/services/speech-to-text) and create the instance in your desired region. +* After the instance is created you should be able to view its url and api key. + +## Configuration + +### Authentication Configuration + +Use your favorite configuration UI to edit **Settings / Other Services - IBM Watson Speech-to-Text** and set: + +* **Api Key** - Api key for Speech-to-Text instance created on IBM Cloud. +* **Instance Url** - Url for Speech-to-Text instance created on IBM Cloud. + +### Speech to Text Configuration + +Use your favorite configuration UI to edit **Settings / Other Services - IBM Watson Speech-to-Text**: + +* **Background Audio Suppression** - Use the parameter to suppress side conversations or background noise. +* **Speech Detector Sensitivity** - Use the parameter to suppress word insertions from music, coughing, and other non-speech events. +* **Inactivity Timeout** - The time in seconds after which, if only silence (no speech) is detected in the audio, the connection is closed. +* **Opt Out Logging** - By default, all IBM Watson™ services log requests and their results. Logging is done only to improve the services for future users. The logged data is not shared or made public. +* **No Results Message** - Message to be told when no results. +* **Smart Formatting** - If true, the service converts dates, times, series of digits and numbers, phone numbers, currency values, and internet addresses into more readable. (Not available for all locales) +* **Redaction** - If true, the service redacts, or masks, numeric data from final transcripts. (Not available for all locales) + +### Configuration via a text file + +In case you would like to setup the service via a text file, create a new file in `$OPENHAB_ROOT/conf/services` named `watsonstt.cfg` + +Its contents should look similar to: + +``` +org.openhab.voice.watsonstt:apiKey=****** +org.openhab.voice.watsonstt:instanceUrl=https://api.***.speech-to-text.watson.cloud.ibm.com/instances/***** +org.openhab.voice.watsonstt:backgroundAudioSuppression=0.5 +org.openhab.voice.watsonstt:speechDetectorSensitivity=0.5 +org.openhab.voice.watsonstt:inactivityTimeout=2 +org.openhab.voice.watsonstt:optOutLogging=false +org.openhab.voice.watsonstt:smartFormatting=false +org.openhab.voice.watsonstt:redaction=false +org.openhab.voice.watsonstt:noResultsMessage="Sorry, I didn't understand you" +``` + +### Default Speech-to-Text Configuration + +You can setup your preferred default Speech-to-Text in the UI: + +* Go to **Settings**. +* Edit **System Services - Voice**. +* Set **Watson** as **Speech-to-Text**. + +In case you would like to setup these settings via a text file, you can edit the file `runtime.cfg` in `$OPENHAB_ROOT/conf/services` and set the following entries: + +``` +org.openhab.voice:defaultSTT=watsonstt +``` diff --git a/bundles/org.openhab.voice.watsonstt/pom.xml b/bundles/org.openhab.voice.watsonstt/pom.xml new file mode 100644 index 000000000..30ad6db8b --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/pom.xml @@ -0,0 +1,70 @@ + + + + 4.0.0 + + + org.openhab.addons.bundles + org.openhab.addons.reactor.bundles + 3.3.0-SNAPSHOT + + + org.openhab.voice.watsonstt + + openHAB Add-ons :: Bundles :: Voice :: IBM Watson Speech to Text + + !android.*,!dalvik.*,!kotlin.*,sun.security.*;resolution:=optional,org.openjsse.*;resolution:=optional,org.conscrypt.*;resolution:=optional,org.bouncycastle.*;resolution:=optional,okhttp3.logging.*;resolution:=optional,com.google.gson.*;resolution:=optional,io.reactivex;resolution:=optional,okio.*;resolution:=optional,org.apache.commons.*;resolution:=optional,* + + + + com.ibm.watson + speech-to-text + 9.3.1 + compile + + + + com.ibm.cloud + sdk-core + 9.15.0 + compile + + + com.ibm.watson + common + 9.3.1 + compile + + + com.squareup.okhttp3 + okhttp + 4.9.1 + compile + + + com.squareup.okhttp3 + okhttp-urlconnection + 4.9.1 + compile + + + org.jetbrains.kotlin + kotlin-stdlib + 1.4.10 + compile + + + com.squareup.okio + okio + 2.8.0 + compile + + + com.google.code.gson + gson + 2.8.9 + compile + + + diff --git a/bundles/org.openhab.voice.watsonstt/src/main/feature/feature.xml b/bundles/org.openhab.voice.watsonstt/src/main/feature/feature.xml new file mode 100644 index 000000000..8672ad7a0 --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/src/main/feature/feature.xml @@ -0,0 +1,9 @@ + + + mvn:org.openhab.core.features.karaf/org.openhab.core.features.karaf.openhab-core/${ohc.version}/xml/features + + + openhab-runtime-base + mvn:org.openhab.addons.bundles/org.openhab.voice.watsonstt/${project.version} + + diff --git a/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConfiguration.java b/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConfiguration.java new file mode 100644 index 000000000..b7785541e --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConfiguration.java @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2010-2022 Contributors to the openHAB project + * + * See the NOTICE file(s) distributed with this work for additional + * information. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0 + * + * SPDX-License-Identifier: EPL-2.0 + */ +package org.openhab.voice.watsonstt.internal; + +import org.eclipse.jdt.annotation.NonNullByDefault; + +/** + * The {@link WatsonSTTConfiguration} class contains fields mapping thing configuration parameters. + * + * @author Miguel Álvarez - Initial contribution + */ +@NonNullByDefault +public class WatsonSTTConfiguration { + + /** + * Api key for Speech-to-Text instance created on IBM Cloud. + */ + public String apiKey = ""; + /** + * Url for Speech-to-Text instance created on IBM Cloud. + */ + public String instanceUrl = ""; + /** + * Use the parameter to suppress side conversations or background noise. + */ + public float backgroundAudioSuppression = 0f; + /** + * Use the parameter to suppress word insertions from music, coughing, and other non-speech events. + */ + public float speechDetectorSensitivity = 0.5f; + /** + * If true, the service converts dates, times, series of digits and numbers, phone numbers, currency values, and + * internet addresses into more readable. + */ + public boolean smartFormatting = false; + /** + * If true, the service redacts, or masks, numeric data from final transcripts. + */ + public boolean redaction = false; + /** + * The time in seconds after which, if only silence (no speech) is detected in the audio, the connection is closed. + */ + public int inactivityTimeout = 3; + /** + * Message to be told when no results + */ + public String noResultsMessage = "No results"; + /** + * By default, all IBM Watson™ services log requests and their results. Logging is done only to improve the services + * for future users. The logged data is not shared or made public. + */ + public boolean optOutLogging = true; +} diff --git a/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConstants.java b/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConstants.java new file mode 100644 index 000000000..4754dd940 --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTConstants.java @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2010-2022 Contributors to the openHAB project + * + * See the NOTICE file(s) distributed with this work for additional + * information. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0 + * + * SPDX-License-Identifier: EPL-2.0 + */ +package org.openhab.voice.watsonstt.internal; + +import org.eclipse.jdt.annotation.NonNullByDefault; + +/** + * The {@link WatsonSTTConstants} class defines common constants, which are + * used across the whole binding. + * + * @author Miguel Álvarez - Initial contribution + */ +@NonNullByDefault +public class WatsonSTTConstants { + /** + * Service name + */ + public static final String SERVICE_NAME = "IBM Watson"; + /** + * Service id + */ + public static final String SERVICE_ID = "watsonstt"; + + /** + * Service category + */ + public static final String SERVICE_CATEGORY = "voice"; + + /** + * Service pid + */ + public static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID; +} diff --git a/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTService.java b/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTService.java new file mode 100644 index 000000000..a3dc80d0d --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/src/main/java/org/openhab/voice/watsonstt/internal/WatsonSTTService.java @@ -0,0 +1,310 @@ +/** + * Copyright (c) 2010-2022 Contributors to the openHAB project + * + * See the NOTICE file(s) distributed with this work for additional + * information. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0 + * + * SPDX-License-Identifier: EPL-2.0 + */ +package org.openhab.voice.watsonstt.internal; + +import static org.openhab.voice.watsonstt.internal.WatsonSTTConstants.*; + +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +import javax.net.ssl.SSLPeerUnverifiedException; + +import org.eclipse.jdt.annotation.NonNullByDefault; +import org.eclipse.jdt.annotation.Nullable; +import org.openhab.core.audio.AudioFormat; +import org.openhab.core.audio.AudioStream; +import org.openhab.core.common.ThreadPoolManager; +import org.openhab.core.config.core.ConfigurableService; +import org.openhab.core.config.core.Configuration; +import org.openhab.core.voice.RecognitionStartEvent; +import org.openhab.core.voice.RecognitionStopEvent; +import org.openhab.core.voice.STTException; +import org.openhab.core.voice.STTListener; +import org.openhab.core.voice.STTService; +import org.openhab.core.voice.STTServiceHandle; +import org.openhab.core.voice.SpeechRecognitionErrorEvent; +import org.openhab.core.voice.SpeechRecognitionEvent; +import org.osgi.framework.Constants; +import org.osgi.service.component.annotations.Activate; +import org.osgi.service.component.annotations.Component; +import org.osgi.service.component.annotations.Modified; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.ibm.cloud.sdk.core.http.HttpMediaType; +import com.ibm.cloud.sdk.core.security.IamAuthenticator; +import com.ibm.watson.speech_to_text.v1.SpeechToText; +import com.ibm.watson.speech_to_text.v1.model.RecognizeWithWebsocketsOptions; +import com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionAlternative; +import com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResult; +import com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults; +import com.ibm.watson.speech_to_text.v1.websocket.RecognizeCallback; + +import okhttp3.WebSocket; + +/** + * The {@link WatsonSTTService} allows to use Watson as Speech-to-Text engine + * + * @author Miguel Álvarez - Initial contribution + */ +@NonNullByDefault +@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID) +@ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME + + " Speech-to-Text", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID) +public class WatsonSTTService implements STTService { + private final Logger logger = LoggerFactory.getLogger(WatsonSTTService.class); + private final ScheduledExecutorService executor = ThreadPoolManager.getScheduledPool("OH-voice-watsonstt"); + private final List models = List.of("ar-AR_BroadbandModel", "de-DE_BroadbandModel", "en-AU_BroadbandModel", + "en-GB_BroadbandModel", "en-US_BroadbandModel", "es-AR_BroadbandModel", "es-CL_BroadbandModel", + "es-CO_BroadbandModel", "es-ES_BroadbandModel", "es-MX_BroadbandModel", "es-PE_BroadbandModel", + "fr-CA_BroadbandModel", "fr-FR_BroadbandModel", "it-IT_BroadbandModel", "ja-JP_BroadbandModel", + "ko-KR_BroadbandModel", "nl-NL_BroadbandModel", "pt-BR_BroadbandModel", "zh-CN_BroadbandModel"); + private final Set supportedLocales = models.stream().map(name -> name.split("_")[0]) + .map(Locale::forLanguageTag).collect(Collectors.toSet()); + private WatsonSTTConfiguration config = new WatsonSTTConfiguration(); + + @Activate + protected void activate(Map config) { + this.config = new Configuration(config).as(WatsonSTTConfiguration.class); + } + + @Modified + protected void modified(Map config) { + this.config = new Configuration(config).as(WatsonSTTConfiguration.class); + } + + @Override + public String getId() { + return SERVICE_ID; + } + + @Override + public String getLabel(@Nullable Locale locale) { + return SERVICE_NAME; + } + + @Override + public Set getSupportedLocales() { + return supportedLocales; + } + + @Override + public Set getSupportedFormats() { + return Set.of(AudioFormat.WAV, AudioFormat.OGG, new AudioFormat("OGG", "OPUS", null, null, null, null), + AudioFormat.MP3); + } + + @Override + public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set set) + throws STTException { + if (config.apiKey.isBlank() || config.instanceUrl.isBlank()) { + throw new STTException("service is not correctly configured"); + } + String contentType = getContentType(audioStream); + if (contentType == null) { + throw new STTException("Unsupported format, unable to resolve audio content type"); + } + logger.debug("Content-Type: {}", contentType); + var speechToText = new SpeechToText(new IamAuthenticator.Builder().apikey(config.apiKey).build()); + speechToText.setServiceUrl(config.instanceUrl); + if (config.optOutLogging) { + speechToText.setDefaultHeaders(Map.of("X-Watson-Learning-Opt-Out", "1")); + } + RecognizeWithWebsocketsOptions wsOptions = new RecognizeWithWebsocketsOptions.Builder().audio(audioStream) + .contentType(contentType).redaction(config.redaction).smartFormatting(config.smartFormatting) + .model(locale.toLanguageTag() + "_BroadbandModel").interimResults(true) + .backgroundAudioSuppression(config.backgroundAudioSuppression) + .speechDetectorSensitivity(config.speechDetectorSensitivity).inactivityTimeout(config.inactivityTimeout) + .build(); + final AtomicReference<@Nullable WebSocket> socketRef = new AtomicReference<>(); + var task = executor.submit(() -> { + int retries = 2; + while (retries > 0) { + try { + socketRef.set(speechToText.recognizeUsingWebSocket(wsOptions, + new TranscriptionListener(sttListener, config))); + break; + } catch (RuntimeException e) { + var cause = e.getCause(); + if (cause instanceof SSLPeerUnverifiedException) { + logger.debug("Retrying on error: {}", cause.getMessage()); + retries--; + } else { + var errorMessage = e.getMessage(); + logger.warn("Aborting on error: {}", errorMessage); + sttListener.sttEventReceived( + new SpeechRecognitionErrorEvent(errorMessage != null ? errorMessage : "Unknown error")); + break; + } + } + } + }); + return new STTServiceHandle() { + @Override + public void abort() { + var socket = socketRef.get(); + if (socket != null) { + socket.close(1000, null); + socket.cancel(); + try { + Thread.sleep(100); + } catch (InterruptedException ignored) { + } + } + task.cancel(true); + } + }; + } + + private @Nullable String getContentType(AudioStream audioStream) throws STTException { + AudioFormat format = audioStream.getFormat(); + String container = format.getContainer(); + String codec = format.getCodec(); + if (container == null || codec == null) { + throw new STTException("Missing audio stream info"); + } + Long frequency = format.getFrequency(); + Integer bitDepth = format.getBitDepth(); + switch (container) { + case AudioFormat.CONTAINER_WAVE: + if (AudioFormat.CODEC_PCM_SIGNED.equals(codec)) { + if (bitDepth == null || bitDepth != 16) { + return "audio/wav"; + } + // rate is a required parameter for this type + if (frequency == null) { + return null; + } + StringBuilder contentTypeL16 = new StringBuilder(HttpMediaType.AUDIO_PCM).append(";rate=") + .append(frequency); + // // those are optional + Integer channels = format.getChannels(); + if (channels != null) { + contentTypeL16.append(";channels=").append(channels); + } + Boolean bigEndian = format.isBigEndian(); + if (bigEndian != null) { + contentTypeL16.append(";") + .append(bigEndian ? "endianness=big-endian" : "endianness=little-endian"); + } + return contentTypeL16.toString(); + } + case AudioFormat.CONTAINER_OGG: + switch (codec) { + case AudioFormat.CODEC_VORBIS: + return "audio/ogg;codecs=vorbis"; + case "OPUS": + return "audio/ogg;codecs=opus"; + } + break; + case AudioFormat.CONTAINER_NONE: + if (AudioFormat.CODEC_MP3.equals(codec)) { + return "audio/mp3"; + } + break; + } + return null; + } + + private static class TranscriptionListener implements RecognizeCallback { + private final Logger logger = LoggerFactory.getLogger(TranscriptionListener.class); + private final StringBuilder transcriptBuilder = new StringBuilder(); + private final STTListener sttListener; + private final WatsonSTTConfiguration config; + private float confidenceSum = 0f; + private int responseCount = 0; + private boolean disconnected = false; + + public TranscriptionListener(STTListener sttListener, WatsonSTTConfiguration config) { + this.sttListener = sttListener; + this.config = config; + } + + @Override + public void onTranscription(@Nullable SpeechRecognitionResults speechRecognitionResults) { + logger.debug("onTranscription"); + if (speechRecognitionResults == null) { + return; + } + speechRecognitionResults.getResults().stream().filter(SpeechRecognitionResult::isXFinal).forEach(result -> { + SpeechRecognitionAlternative alternative = result.getAlternatives().stream().findFirst().orElse(null); + if (alternative == null) { + return; + } + logger.debug("onTranscription Final"); + Double confidence = alternative.getConfidence(); + transcriptBuilder.append(alternative.getTranscript()); + confidenceSum += confidence != null ? confidence.floatValue() : 0f; + responseCount++; + }); + } + + @Override + public void onConnected() { + logger.debug("onConnected"); + } + + @Override + public void onError(@Nullable Exception e) { + var errorMessage = e != null ? e.getMessage() : null; + if (errorMessage != null && disconnected && errorMessage.contains("Socket closed")) { + logger.debug("Error ignored: {}", errorMessage); + return; + } + logger.warn("TranscriptionError: {}", errorMessage); + sttListener.sttEventReceived( + new SpeechRecognitionErrorEvent(errorMessage != null ? errorMessage : "Unknown error")); + } + + @Override + public void onDisconnected() { + logger.debug("onDisconnected"); + disconnected = true; + sttListener.sttEventReceived(new RecognitionStopEvent()); + float averageConfidence = confidenceSum / (float) responseCount; + String transcript = transcriptBuilder.toString(); + if (!transcript.isBlank()) { + sttListener.sttEventReceived(new SpeechRecognitionEvent(transcript, averageConfidence)); + } else { + if (!config.noResultsMessage.isBlank()) { + sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.noResultsMessage)); + } else { + sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No results")); + } + } + } + + @Override + public void onInactivityTimeout(@Nullable RuntimeException e) { + if (e != null) { + logger.debug("InactivityTimeout: {}", e.getMessage()); + } + } + + @Override + public void onListening() { + logger.debug("onListening"); + sttListener.sttEventReceived(new RecognitionStartEvent()); + } + + @Override + public void onTranscriptionComplete() { + logger.debug("onTranscriptionComplete"); + } + } +} diff --git a/bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/config/config.xml b/bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/config/config.xml new file mode 100644 index 000000000..4dbd2a281 --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/config/config.xml @@ -0,0 +1,68 @@ + + + + + + + Information for connection to your Watson Speech-to-Text instance. + + + + Parameters for Watson Speech-to-Text API. + + + + Api key for Speech-to-Text instance created on IBM Cloud. + + + + Url for Speech-to-Text instance created on IBM Cloud. + + + + Use the parameter to suppress side conversations or background noise. + 0 + + + + Use the parameter to suppress word insertions from music, coughing, and other non-speech events. + 0.5 + + + + The time in seconds after which, if only silence (no speech) is detected in the audio, the connection is + closed. + 3 + + + + Message to be told when no transcription is done. + No results + + + + By default, all IBM Watson™ services log requests and their results. Logging is done only to improve the + services for future users. The logged data is not shared or made public. + true + + + + If true, the service converts dates, times, series of digits and numbers, phone numbers, currency + values, and internet addresses into more readable. (Not available for all locales) + false + true + + + + If true, the service redacts, or masks, numeric data from final transcripts. (Not available for all + locales) + false + true + + + + diff --git a/bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/i18n/watsonstt.properties b/bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/i18n/watsonstt.properties new file mode 100644 index 000000000..eebbd4792 --- /dev/null +++ b/bundles/org.openhab.voice.watsonstt/src/main/resources/OH-INF/i18n/watsonstt.properties @@ -0,0 +1,26 @@ +voice.config.watsonstt.apiKey.label = Api Key +voice.config.watsonstt.apiKey.description = Api key for Speech-to-Text instance created on IBM Cloud. +voice.config.watsonstt.backgroundAudioSuppression.label = Background Audio Suppression +voice.config.watsonstt.backgroundAudioSuppression.description = Use the parameter to suppress side conversations or background noise. +voice.config.watsonstt.group.authentication.label = Authentication +voice.config.watsonstt.group.authentication.description = Information for connection to your Watson Speech-to-Text instance. +voice.config.watsonstt.group.stt.label = STT Configuration +voice.config.watsonstt.group.stt.description = Parameters for Watson Speech-to-Text API. +voice.config.watsonstt.inactivityTimeout.label = Inactivity Timeout +voice.config.watsonstt.inactivityTimeout.description = The time in seconds after which, if only silence (no speech) is detected in the audio, the connection is closed. +voice.config.watsonstt.instanceUrl.label = Instance Url +voice.config.watsonstt.instanceUrl.description = Url for Speech-to-Text instance created on IBM Cloud. +voice.config.watsonstt.noResultsMessage.label = No Results Message +voice.config.watsonstt.noResultsMessage.description = Message to be told when no transcription is done. +voice.config.watsonstt.optOutLogging.label = Opt Out Logging +voice.config.watsonstt.optOutLogging.description = By default, all IBM Watson™ services log requests and their results. Logging is done only to improve the services for future users. The logged data is not shared or made public. +voice.config.watsonstt.redaction.label = Redaction +voice.config.watsonstt.redaction.description = If true, the service redacts, or masks, numeric data from final transcripts. (Not available for all locales) +voice.config.watsonstt.smartFormatting.label = Smart Formatting +voice.config.watsonstt.smartFormatting.description = If true, the service converts dates, times, series of digits and numbers, phone numbers, currency values, and internet addresses into more readable. (Not available for all locales) +voice.config.watsonstt.speechDetectorSensitivity.label = Speech Detector Sensitivity +voice.config.watsonstt.speechDetectorSensitivity.description = Use the parameter to suppress word insertions from music, coughing, and other non-speech events. + +# service + +service.voice.watsonstt.label = IBM Watson Speech-to-Text diff --git a/bundles/pom.xml b/bundles/pom.xml index 97ce006f0..e1ab1accb 100644 --- a/bundles/pom.xml +++ b/bundles/pom.xml @@ -401,6 +401,7 @@ org.openhab.voice.pollytts org.openhab.voice.porcupineks org.openhab.voice.voicerss + org.openhab.voice.watsonstt