diff --git a/nongnu/packages/machine-learning.scm b/nongnu/packages/machine-learning.scm index 8aef14ae..6b8f6a5d 100644 --- a/nongnu/packages/machine-learning.scm +++ b/nongnu/packages/machine-learning.scm @@ -83,3 +83,57 @@ detection framework with a focus on performance and simplicity.") (string-append (package-description python-openwakeword) "\n\ Note: This minimal variant doesn't provide the additional Voice Activity Detection.")))) + +(define-public python-realtimestt + (package + (name "python-realtimestt") + (version "0.3.104") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/KoljaB/RealTimeSTT") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "1455z2m86qdldap211rp1z9zdwjvmxhdv49cg7bci37190yh1r3h")) + (patches + (search-patches "python-realtimestt-remove-pvporcupine.patch")))) + (build-system pyproject-build-system) + (arguments + (list + ;; XXX: Tests require sibling package realtimetts + #:tests? #f + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'switch-default-to-tflite + (lambda _ + (substitute* "RealtimeSTT/audio_recorder.py" + (("\"onnx\"") + "\"tflite\"")))) + (add-after 'unpack 'relax-requirements + (lambda _ + (substitute* "requirements.txt" + (("^(scipy|websockets)==.*" all target) + (string-append target "\n")))))))) + (propagated-inputs + (list python-faster-whisper + python-halo + python-openwakeword-minimal + python-pyaudio + python-scipy + python-soundfile + python-pytorch + python-torchaudio + python-webrtcvad-wheels + python-websocket-client + python-websockets)) + (native-inputs + (list ;; python-pyqt python-pytest ; required for tests. + python-setuptools python-wheel)) + (home-page "https://github.com/KoljaB/RealTimeSTT") + (synopsis "Voice Activity Detection and Transcription System") + (description + "This package provides a fast Voice Activity Detection and Transcription System. +Note: This package default on the tensorflow-lite backend.") + (license license:expat))) diff --git a/nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch b/nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch new file mode 100644 index 00000000..eb94a7cb --- /dev/null +++ b/nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch @@ -0,0 +1,170 @@ +From 54d3cd6077a1bb545ea4dbc75120d614412d7673 Mon Sep 17 00:00:00 2001 +Message-ID: <54d3cd6077a1bb545ea4dbc75120d614412d7673.1756745212.git.ngraves@ngraves.fr> +From: Nicolas Graves +Date: Mon, 1 Sep 2025 18:46:22 +0200 +Subject: [PATCH] Remove nonfree pvporcupine backend. + +--- + RealtimeSTT/audio_recorder.py | 52 ++++------------------------ + RealtimeSTT/audio_recorder_client.py | 2 +- + RealtimeSTT_server/stt_server.py | 2 +- + requirements.txt | 3 +- + 4 files changed, 10 insertions(+), 49 deletions(-) + +diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py +index 59461b5..cd04c6d 100644 +--- a/RealtimeSTT/audio_recorder.py ++++ b/RealtimeSTT/audio_recorder.py +@@ -7,7 +7,7 @@ The class employs the faster_whisper library to transcribe the recorded audio + into text using machine learning models, which can be run either on a GPU or + CPU. Voice activity detection (VAD) is built in, meaning the software can + automatically start or stop recording based on the presence or absence of +-speech. It integrates wake word detection through the pvporcupine library, ++speech. It integrates wake word detection through the openwakeword library, + allowing the software to initiate recording when a specific word or phrase + is spoken. The system provides real-time feedback and can be further + customized. +@@ -40,7 +40,6 @@ import faster_whisper + import openwakeword + import collections + import numpy as np +-import pvporcupine + import traceback + import threading + import webrtcvad +@@ -467,12 +466,7 @@ class AudioToTextRecorder: + - on_turn_detection_stop (callable, default=None): Callback function to + be called when the system stops listening for a turn of speech. + - wakeword_backend (str, default=""): Specifies the backend library to +- use for wake word detection. Supported options include 'pvporcupine' +- for using the Porcupine wake word engine or 'oww' for using the +- OpenWakeWord engine. +- - wakeword_backend (str, default="pvporcupine"): Specifies the backend +- library to use for wake word detection. Supported options include +- 'pvporcupine' for using the Porcupine wake word engine or 'oww' for ++ use for wake word detection. Supported options include 'oww' for + using the OpenWakeWord engine. + - openwakeword_model_paths (str, default=None): Comma-separated paths + to model files for the openwakeword library. These paths point to +@@ -819,7 +813,7 @@ class AudioToTextRecorder: + "transcription model initialized successfully") + + # Setup wake word detection +- if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords', 'pvp', 'pvporcupine'}: ++ if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}: + self.wakeword_backend = wakeword_backend + + self.wake_words_list = [ +@@ -831,29 +825,7 @@ class AudioToTextRecorder: + for _ in range(len(self.wake_words_list)) + ] + +- if wake_words and self.wakeword_backend in {'pvp', 'pvporcupine'}: +- +- try: +- self.porcupine = pvporcupine.create( +- keywords=self.wake_words_list, +- sensitivities=self.wake_words_sensitivities +- ) +- self.buffer_size = self.porcupine.frame_length +- self.sample_rate = self.porcupine.sample_rate +- +- except Exception as e: +- logger.exception( +- "Error initializing porcupine " +- f"wake word detection engine: {e}. " +- f"Wakewords: {self.wake_words_list}." +- ) +- raise +- +- logger.debug( +- "Porcupine wake word detection engine initialized successfully" +- ) +- +- elif wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}: ++ if wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}: + + openwakeword.utils.download_models() + +@@ -896,7 +868,7 @@ class AudioToTextRecorder: + ) + + else: +- logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: pvporcupine, openwakeword.") ++ logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: openwakeword.") + + + # Setup voice activity detection model WebRTC +@@ -1592,17 +1564,7 @@ class AudioToTextRecorder: + """ + Processes audio data to detect wake words. + """ +- if self.wakeword_backend in {'pvp', 'pvporcupine'}: +- pcm = struct.unpack_from( +- "h" * self.buffer_size, +- data +- ) +- porcupine_index = self.porcupine.process(pcm) +- if self.debug_mode: +- logger.info(f"wake words porcupine_index: {porcupine_index}") +- return porcupine_index +- +- elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}: ++ if self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}: + pcm = np.frombuffer(data, dtype=np.int16) + prediction = self.owwModel.predict(pcm) + max_score = -1 +@@ -2840,4 +2802,4 @@ class AudioToTextRecorder: + traceback (Traceback or None): The traceback corresponding to the + exception, if any. + """ +- self.shutdown() +\ No newline at end of file ++ self.shutdown() +diff --git a/RealtimeSTT/audio_recorder_client.py b/RealtimeSTT/audio_recorder_client.py +index 89478c8..19ea49e 100644 +--- a/RealtimeSTT/audio_recorder_client.py ++++ b/RealtimeSTT/audio_recorder_client.py +@@ -137,7 +137,7 @@ class AudioToTextRecorderClient: + on_turn_detection_stop=None, + + # Wake word parameters +- wakeword_backend: str = "pvporcupine", ++ wakeword_backend: str = "openwakeword", + openwakeword_model_paths: str = None, + openwakeword_inference_framework: str = "onnx", + wake_words: str = "", +diff --git a/RealtimeSTT_server/stt_server.py b/RealtimeSTT_server/stt_server.py +index da3e55e..09e8cde 100644 +--- a/RealtimeSTT_server/stt_server.py ++++ b/RealtimeSTT_server/stt_server.py +@@ -499,7 +499,7 @@ def parse_arguments(): + help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0 seconds.') + + parser.add_argument('--wakeword_backend', type=str, default='none', +- help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "pvporcupine".') ++ help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "openwakeword".') + + parser.add_argument('--openwakeword_model_paths', type=str, nargs='*', + help='A list of file paths to OpenWakeWord models. This is useful if you are using OpenWakeWord for wake word detection and need to specify custom models.') +diff --git a/requirements.txt b/requirements.txt +index d880b96..c0554e0 100644 +--- a/requirements.txt ++++ b/requirements.txt +@@ -1,6 +1,5 @@ + PyAudio==0.2.14 + faster-whisper==1.1.1 +-pvporcupine==1.9.5 + webrtcvad-wheels==2.0.14 + halo==0.0.31 + torch +@@ -9,4 +8,4 @@ scipy==1.15.2 + openwakeword>=0.4.0 + websockets==15.0.1 + websocket-client==1.8.0 +-soundfile==0.13.1 +\ No newline at end of file ++soundfile==0.13.1 +-- +2.50.1 +