From 7587e0c50409aa56002e13177204c7ffa26e6b35 Mon Sep 17 00:00:00 2001 From: Gwendal Roulleau Date: Wed, 12 Jul 2023 21:59:48 +0200 Subject: [PATCH] [mimictts] Add LRU cache (#14564) * [mimictts] Add LRU cache And simplifies code with new core capabilities (no more need to create temporary files implementing FixedLengthAudioStream) --------- Signed-off-by: Gwendal Roulleau --- bundles/org.openhab.voice.mimictts/README.md | 5 +- .../internal/AutoDeleteFileAudioStream.java | 84 ------------------- .../internal/InputStreamAudioStream.java | 11 +-- .../mimic/internal/MimicConfiguration.java | 1 - .../voice/mimic/internal/MimicTTSService.java | 66 +++++---------- .../main/resources/OH-INF/config/config.xml | 6 -- .../resources/OH-INF/i18n/mimictts.properties | 2 - 7 files changed, 28 insertions(+), 147 deletions(-) delete mode 100644 bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/AutoDeleteFileAudioStream.java diff --git a/bundles/org.openhab.voice.mimictts/README.md b/bundles/org.openhab.voice.mimictts/README.md index b5d572a25..93e9e84b3 100644 --- a/bundles/org.openhab.voice.mimictts/README.md +++ b/bundles/org.openhab.voice.mimictts/README.md @@ -17,7 +17,6 @@ It supports a subset of SSML, and if you want to use it, be sure to start your t Using your favorite configuration UI to edit **Settings / Other Services - Mimic Text-to-Speech** and set: * **url** - Mimic URL. Default to `http://localhost:59125` -* **workaroundServletSink** - A boolean activating a workaround for audiosink using the openHAB servlet. It stores audio file temporarily on disk, allowing the servlet to get a cloned stream as needed. Default false. * **speakingRate** - Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less than 1 is faster, and more than 1 is slower. * **audioVolatility** - The amount of noise added to the generated audio (0-1). Can help mask audio artifacts from the voice model. Multi-speaker models tend to sound better with a lower amount of noise than single speaker models. * **phonemeVolatility** - The amount of noise used to generate phoneme durations (0-1). Allows for variable speaking cadance, with a value closer to 1 being more variable. Multi-speaker models tend to sound better with a lower amount of phoneme variability than single speaker models. @@ -48,3 +47,7 @@ In case you would like to setup these settings via a text file, you can edit the org.openhab.voice:defaultTTS=mimictts org.openhab.voice:defaultVoice=mimictts:fr_FR_siwis_low ``` + +## Caching + +The mimic TTS service uses the openHAB TTS cache to cache audio files produced from the most recent queries in order to reduce traffic, improve performance and reduce number of requests. diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/AutoDeleteFileAudioStream.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/AutoDeleteFileAudioStream.java deleted file mode 100644 index 465a2b2bd..000000000 --- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/AutoDeleteFileAudioStream.java +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Copyright (c) 2010-2023 Contributors to the openHAB project - * - * See the NOTICE file(s) distributed with this work for additional - * information. - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0 - * - * SPDX-License-Identifier: EPL-2.0 - */ -package org.openhab.voice.mimic.internal; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -import org.eclipse.jdt.annotation.NonNullByDefault; -import org.openhab.core.audio.AudioException; -import org.openhab.core.audio.AudioFormat; -import org.openhab.core.audio.FileAudioStream; - -/** - * A FileAudioStream that autodelete after it and its clone are closed - * Useful to not congest temporary directory - * - * @author Gwendal Roulleau - Initial contribution - */ -@NonNullByDefault -public class AutoDeleteFileAudioStream extends FileAudioStream { - - private final File file; - private final AudioFormat audioFormat; - private final List clonedAudioStreams = new ArrayList<>(1); - private boolean isOpen = true; - - public AutoDeleteFileAudioStream(File file, AudioFormat format) throws AudioException { - super(file, format); - this.file = file; - this.audioFormat = format; - } - - @Override - public void close() throws IOException { - super.close(); - this.isOpen = false; - deleteIfPossible(); - } - - protected void deleteIfPossible() { - boolean aClonedStreamIsOpen = clonedAudioStreams.stream().anyMatch(as -> as.isOpen); - if (!isOpen && !aClonedStreamIsOpen) { - file.delete(); - } - } - - @Override - public InputStream getClonedStream() throws AudioException { - ClonedFileInputStream clonedInputStream = new ClonedFileInputStream(this, file, audioFormat); - clonedAudioStreams.add(clonedInputStream); - return clonedInputStream; - } - - private static class ClonedFileInputStream extends FileAudioStream { - protected boolean isOpen = true; - private final AutoDeleteFileAudioStream parent; - - public ClonedFileInputStream(AutoDeleteFileAudioStream parent, File file, AudioFormat audioFormat) - throws AudioException { - super(file, audioFormat); - this.parent = parent; - } - - @Override - public void close() throws IOException { - super.close(); - this.isOpen = false; - parent.deleteIfPossible(); - } - } -} diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java index 144ba4334..6d88cafea 100644 --- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java +++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java @@ -18,9 +18,9 @@ import java.io.OutputStream; import org.eclipse.jdt.annotation.NonNullByDefault; import org.eclipse.jdt.annotation.Nullable; -import org.openhab.core.audio.AudioException; import org.openhab.core.audio.AudioFormat; -import org.openhab.core.audio.FixedLengthAudioStream; +import org.openhab.core.audio.AudioStream; +import org.openhab.core.audio.SizeableAudioStream; /** * An AudioStream with an {@link InputStream} inside @@ -28,7 +28,7 @@ import org.openhab.core.audio.FixedLengthAudioStream; * @author Gwendal Roulleau - Initial contribution */ @NonNullByDefault -public class InputStreamAudioStream extends FixedLengthAudioStream { +public class InputStreamAudioStream extends AudioStream implements SizeableAudioStream { public InputStream innerInputStream; public AudioFormat audioFormat; @@ -115,9 +115,4 @@ public class InputStreamAudioStream extends FixedLengthAudioStream { public long length() { return length; } - - @Override - public InputStream getClonedStream() throws AudioException { - throw new AudioException("Operation not supported"); - } } diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicConfiguration.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicConfiguration.java index e35064e39..e8c56c146 100644 --- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicConfiguration.java +++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicConfiguration.java @@ -25,5 +25,4 @@ public class MimicConfiguration { public Double speakingRate = 1.0; public Double audioVolatility = 0.667; public Double phonemeVolatility = 0.8; - public Boolean workaroundServletSink = false; } diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java index 39364035c..5b26dbd99 100644 --- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java +++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java @@ -12,20 +12,19 @@ */ package org.openhab.voice.mimic.internal; -import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.math.BigInteger; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -38,13 +37,13 @@ import org.eclipse.jetty.client.util.InputStreamResponseListener; import org.eclipse.jetty.client.util.StringContentProvider; import org.eclipse.jetty.http.HttpHeader; import org.eclipse.jetty.http.HttpStatus; -import org.openhab.core.OpenHAB; -import org.openhab.core.audio.AudioException; import org.openhab.core.audio.AudioFormat; import org.openhab.core.audio.AudioStream; import org.openhab.core.config.core.ConfigurableService; import org.openhab.core.io.net.http.HttpClientFactory; import org.openhab.core.io.net.http.HttpRequestBuilder; +import org.openhab.core.voice.AbstractCachedTTSService; +import org.openhab.core.voice.TTSCache; import org.openhab.core.voice.TTSException; import org.openhab.core.voice.TTSService; import org.openhab.core.voice.Voice; @@ -67,11 +66,11 @@ import com.google.gson.JsonSyntaxException; * @author Gwendal Roulleau - Initial contribution */ @Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "=" - + MimicTTSService.SERVICE_PID) + + MimicTTSService.SERVICE_PID, service = TTSService.class) @ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME + " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID) @NonNullByDefault -public class MimicTTSService implements TTSService { +public class MimicTTSService extends AbstractCachedTTSService { private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class); @@ -84,7 +83,6 @@ public class MimicTTSService implements TTSService { * Configuration parameters */ private static final String PARAM_URL = "url"; - private static final String PARAM_WORKAROUNDSERVLETSINK = "workaroundServletSink"; private static final String PARAM_SPEAKINGRATE = "speakingRate"; private static final String PARAM_AUDIOVOLATITLITY = "audioVolatility"; private static final String PARAM_PHONEMEVOLATITLITY = "phonemeVolatility"; @@ -108,7 +106,9 @@ public class MimicTTSService implements TTSService { private final HttpClient httpClient; @Activate - public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map config) { + public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache, + Map config) { + super(ttsCache); updateConfig(config); this.httpClient = httpClientFactory.getCommonHttpClient(); } @@ -130,12 +130,6 @@ public class MimicTTSService implements TTSService { config.url = param.toString(); } - // workaround - param = newConfig.get(PARAM_WORKAROUNDSERVLETSINK); - if (param != null) { - config.workaroundServletSink = Boolean.parseBoolean(param.toString()); - } - // audio volatility try { param = newConfig.get(PARAM_AUDIOVOLATITLITY); @@ -227,8 +221,7 @@ public class MimicTTSService implements TTSService { * @throws TTSException in case the service is unavailable or a parameter is invalid. */ @Override - public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException { - + public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException { if (!availableVoices.contains(voice)) { // let a chance for the service to update : refreshVoices(); @@ -294,24 +287,7 @@ public class MimicTTSService implements TTSService { } InputStream inputStreamFromMimic = inputStreamResponseListener.getInputStream(); - try { - if (!config.workaroundServletSink) { - return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length); - } else { - // Some audio sinks use the openHAB servlet to get audio. This servlet require the - // getClonedStream() - // method - // So we cache the file on disk, thus implementing the method thanks to FileAudioStream. - return createTemporaryFile(inputStreamFromMimic, AUDIO_FORMAT); - } - } catch (TTSException e) { - try { - inputStreamFromMimic.close(); - } catch (IOException e1) { - } - throw e; - } - + return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length); } else { String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code " + response.getStatus() + " for reason " + response.getReason(); @@ -325,16 +301,16 @@ public class MimicTTSService implements TTSService { } } - private AudioStream createTemporaryFile(InputStream inputStream, AudioFormat audioFormat) throws TTSException { - File mimicDirectory = new File(OpenHAB.getUserDataFolder(), "mimic"); - mimicDirectory.mkdir(); + @Override + public String getCacheKey(String text, Voice voice, AudioFormat requestedFormat) { + MessageDigest md; try { - File tempFile = File.createTempFile(UUID.randomUUID().toString(), ".wav", mimicDirectory); - tempFile.deleteOnExit(); - Files.copy(inputStream, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - return new AutoDeleteFileAudioStream(tempFile, audioFormat); - } catch (AudioException | IOException e) { - throw new TTSException("Cannot create temporary audio file", e); + md = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + return "nomd5algorithm"; } + byte[] binaryKey = ((text + voice.getUID() + requestedFormat.toString() + config.speakingRate + + config.audioVolatility + config.phonemeVolatility).getBytes()); + return String.format("%032x", new BigInteger(1, md.digest(binaryKey))); } } diff --git a/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/config/config.xml b/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/config/config.xml index 2107070d7..ab8619a39 100644 --- a/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/config/config.xml +++ b/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/config/config.xml @@ -11,12 +11,6 @@ Mimic 3 URL. http://localhost:59125 - - - Enable this workaround to store temporarily the file on disk. Needed if you play on audiosink based on - the openHAB audio servlet. - false - Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less diff --git a/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/i18n/mimictts.properties b/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/i18n/mimictts.properties index 2472d5b89..f34268e20 100644 --- a/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/i18n/mimictts.properties +++ b/bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/i18n/mimictts.properties @@ -4,8 +4,6 @@ voice.config.mimictts.phonemeVolatility.label = Phoneme Volatility voice.config.mimictts.phonemeVolatility.description = The amount of noise used to generate phoneme durations (0-1). Allows for variable speaking cadance, with a value closer to 1 being more variable. Multi-speaker models tend to sound better with a lower amount of phoneme variability than single speaker models. voice.config.mimictts.speakingRate.label = Speaking Rate voice.config.mimictts.speakingRate.description = Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less than 1 is faster, and more than 1 is slower. -voice.config.mimictts.workaroundServletSink.label= Workaround For Servlet-Based Audiosink -voice.config.mimictts.workaroundServletSink.description= Enable this workaround to store temporarily the file on disk. Needed if you play on audiosink based on the openHAB audio servlet. voice.config.mimictts.url.label = URL voice.config.mimictts.url.description = Mimic 3 URL.