nongnu: Add python-realtimestt.

* nongnu/packages/machine-learning.scm (python-realtimestt): New
variable.
* nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch:
Record it.
This commit is contained in:
Nicolas Graves 2025-09-08 08:41:06 +02:00 committed by Nicolas Graves
parent c4267aa23e
commit 7dbe934f10
2 changed files with 224 additions and 0 deletions

View file

@ -83,3 +83,57 @@ detection framework with a focus on performance and simplicity.")
(string-append (package-description python-openwakeword) "\n\
Note: This minimal variant doesn't provide the additional Voice Activity
Detection."))))
(define-public python-realtimestt
(package
(name "python-realtimestt")
(version "0.3.104")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/KoljaB/RealTimeSTT")
(commit (string-append "v" version))))
(file-name (git-file-name name version))
(sha256
(base32 "1455z2m86qdldap211rp1z9zdwjvmxhdv49cg7bci37190yh1r3h"))
(patches
(search-patches "python-realtimestt-remove-pvporcupine.patch"))))
(build-system pyproject-build-system)
(arguments
(list
;; XXX: Tests require sibling package realtimetts
#:tests? #f
#:phases
#~(modify-phases %standard-phases
(add-after 'unpack 'switch-default-to-tflite
(lambda _
(substitute* "RealtimeSTT/audio_recorder.py"
(("\"onnx\"")
"\"tflite\""))))
(add-after 'unpack 'relax-requirements
(lambda _
(substitute* "requirements.txt"
(("^(scipy|websockets)==.*" all target)
(string-append target "\n"))))))))
(propagated-inputs
(list python-faster-whisper
python-halo
python-openwakeword-minimal
python-pyaudio
python-scipy
python-soundfile
python-pytorch
python-torchaudio
python-webrtcvad-wheels
python-websocket-client
python-websockets))
(native-inputs
(list ;; python-pyqt python-pytest ; required for tests.
python-setuptools python-wheel))
(home-page "https://github.com/KoljaB/RealTimeSTT")
(synopsis "Voice Activity Detection and Transcription System")
(description
"This package provides a fast Voice Activity Detection and Transcription System.
Note: This package default on the tensorflow-lite backend.")
(license license:expat)))

View file

@ -0,0 +1,170 @@
From 54d3cd6077a1bb545ea4dbc75120d614412d7673 Mon Sep 17 00:00:00 2001
Message-ID: <54d3cd6077a1bb545ea4dbc75120d614412d7673.1756745212.git.ngraves@ngraves.fr>
From: Nicolas Graves <ngraves@ngraves.fr>
Date: Mon, 1 Sep 2025 18:46:22 +0200
Subject: [PATCH] Remove nonfree pvporcupine backend.
---
RealtimeSTT/audio_recorder.py | 52 ++++------------------------
RealtimeSTT/audio_recorder_client.py | 2 +-
RealtimeSTT_server/stt_server.py | 2 +-
requirements.txt | 3 +-
4 files changed, 10 insertions(+), 49 deletions(-)
diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py
index 59461b5..cd04c6d 100644
--- a/RealtimeSTT/audio_recorder.py
+++ b/RealtimeSTT/audio_recorder.py
@@ -7,7 +7,7 @@ The class employs the faster_whisper library to transcribe the recorded audio
into text using machine learning models, which can be run either on a GPU or
CPU. Voice activity detection (VAD) is built in, meaning the software can
automatically start or stop recording based on the presence or absence of
-speech. It integrates wake word detection through the pvporcupine library,
+speech. It integrates wake word detection through the openwakeword library,
allowing the software to initiate recording when a specific word or phrase
is spoken. The system provides real-time feedback and can be further
customized.
@@ -40,7 +40,6 @@ import faster_whisper
import openwakeword
import collections
import numpy as np
-import pvporcupine
import traceback
import threading
import webrtcvad
@@ -467,12 +466,7 @@ class AudioToTextRecorder:
- on_turn_detection_stop (callable, default=None): Callback function to
be called when the system stops listening for a turn of speech.
- wakeword_backend (str, default=""): Specifies the backend library to
- use for wake word detection. Supported options include 'pvporcupine'
- for using the Porcupine wake word engine or 'oww' for using the
- OpenWakeWord engine.
- - wakeword_backend (str, default="pvporcupine"): Specifies the backend
- library to use for wake word detection. Supported options include
- 'pvporcupine' for using the Porcupine wake word engine or 'oww' for
+ use for wake word detection. Supported options include 'oww' for
using the OpenWakeWord engine.
- openwakeword_model_paths (str, default=None): Comma-separated paths
to model files for the openwakeword library. These paths point to
@@ -819,7 +813,7 @@ class AudioToTextRecorder:
"transcription model initialized successfully")
# Setup wake word detection
- if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords', 'pvp', 'pvporcupine'}:
+ if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
self.wakeword_backend = wakeword_backend
self.wake_words_list = [
@@ -831,29 +825,7 @@ class AudioToTextRecorder:
for _ in range(len(self.wake_words_list))
]
- if wake_words and self.wakeword_backend in {'pvp', 'pvporcupine'}:
-
- try:
- self.porcupine = pvporcupine.create(
- keywords=self.wake_words_list,
- sensitivities=self.wake_words_sensitivities
- )
- self.buffer_size = self.porcupine.frame_length
- self.sample_rate = self.porcupine.sample_rate
-
- except Exception as e:
- logger.exception(
- "Error initializing porcupine "
- f"wake word detection engine: {e}. "
- f"Wakewords: {self.wake_words_list}."
- )
- raise
-
- logger.debug(
- "Porcupine wake word detection engine initialized successfully"
- )
-
- elif wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+ if wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
openwakeword.utils.download_models()
@@ -896,7 +868,7 @@ class AudioToTextRecorder:
)
else:
- logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: pvporcupine, openwakeword.")
+ logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: openwakeword.")
# Setup voice activity detection model WebRTC
@@ -1592,17 +1564,7 @@ class AudioToTextRecorder:
"""
Processes audio data to detect wake words.
"""
- if self.wakeword_backend in {'pvp', 'pvporcupine'}:
- pcm = struct.unpack_from(
- "h" * self.buffer_size,
- data
- )
- porcupine_index = self.porcupine.process(pcm)
- if self.debug_mode:
- logger.info(f"wake words porcupine_index: {porcupine_index}")
- return porcupine_index
-
- elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+ if self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
pcm = np.frombuffer(data, dtype=np.int16)
prediction = self.owwModel.predict(pcm)
max_score = -1
@@ -2840,4 +2802,4 @@ class AudioToTextRecorder:
traceback (Traceback or None): The traceback corresponding to the
exception, if any.
"""
- self.shutdown()
\ No newline at end of file
+ self.shutdown()
diff --git a/RealtimeSTT/audio_recorder_client.py b/RealtimeSTT/audio_recorder_client.py
index 89478c8..19ea49e 100644
--- a/RealtimeSTT/audio_recorder_client.py
+++ b/RealtimeSTT/audio_recorder_client.py
@@ -137,7 +137,7 @@ class AudioToTextRecorderClient:
on_turn_detection_stop=None,
# Wake word parameters
- wakeword_backend: str = "pvporcupine",
+ wakeword_backend: str = "openwakeword",
openwakeword_model_paths: str = None,
openwakeword_inference_framework: str = "onnx",
wake_words: str = "",
diff --git a/RealtimeSTT_server/stt_server.py b/RealtimeSTT_server/stt_server.py
index da3e55e..09e8cde 100644
--- a/RealtimeSTT_server/stt_server.py
+++ b/RealtimeSTT_server/stt_server.py
@@ -499,7 +499,7 @@ def parse_arguments():
help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0 seconds.')
parser.add_argument('--wakeword_backend', type=str, default='none',
- help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "pvporcupine".')
+ help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "openwakeword".')
parser.add_argument('--openwakeword_model_paths', type=str, nargs='*',
help='A list of file paths to OpenWakeWord models. This is useful if you are using OpenWakeWord for wake word detection and need to specify custom models.')
diff --git a/requirements.txt b/requirements.txt
index d880b96..c0554e0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
PyAudio==0.2.14
faster-whisper==1.1.1
-pvporcupine==1.9.5
webrtcvad-wheels==2.0.14
halo==0.0.31
torch
@@ -9,4 +8,4 @@ scipy==1.15.2
openwakeword>=0.4.0
websockets==15.0.1
websocket-client==1.8.0
-soundfile==0.13.1
\ No newline at end of file
+soundfile==0.13.1
--
2.50.1