mirror of
https://gitlab.com/nonguix/nonguix.git
synced 2025-10-02 02:14:59 +00:00
* nongnu/packages/machine-learning.scm (python-realtimestt): New variable. * nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch: Record it.
170 lines
7.9 KiB
Diff
170 lines
7.9 KiB
Diff
From 54d3cd6077a1bb545ea4dbc75120d614412d7673 Mon Sep 17 00:00:00 2001
|
|
Message-ID: <54d3cd6077a1bb545ea4dbc75120d614412d7673.1756745212.git.ngraves@ngraves.fr>
|
|
From: Nicolas Graves <ngraves@ngraves.fr>
|
|
Date: Mon, 1 Sep 2025 18:46:22 +0200
|
|
Subject: [PATCH] Remove nonfree pvporcupine backend.
|
|
|
|
---
|
|
RealtimeSTT/audio_recorder.py | 52 ++++------------------------
|
|
RealtimeSTT/audio_recorder_client.py | 2 +-
|
|
RealtimeSTT_server/stt_server.py | 2 +-
|
|
requirements.txt | 3 +-
|
|
4 files changed, 10 insertions(+), 49 deletions(-)
|
|
|
|
diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py
|
|
index 59461b5..cd04c6d 100644
|
|
--- a/RealtimeSTT/audio_recorder.py
|
|
+++ b/RealtimeSTT/audio_recorder.py
|
|
@@ -7,7 +7,7 @@ The class employs the faster_whisper library to transcribe the recorded audio
|
|
into text using machine learning models, which can be run either on a GPU or
|
|
CPU. Voice activity detection (VAD) is built in, meaning the software can
|
|
automatically start or stop recording based on the presence or absence of
|
|
-speech. It integrates wake word detection through the pvporcupine library,
|
|
+speech. It integrates wake word detection through the openwakeword library,
|
|
allowing the software to initiate recording when a specific word or phrase
|
|
is spoken. The system provides real-time feedback and can be further
|
|
customized.
|
|
@@ -40,7 +40,6 @@ import faster_whisper
|
|
import openwakeword
|
|
import collections
|
|
import numpy as np
|
|
-import pvporcupine
|
|
import traceback
|
|
import threading
|
|
import webrtcvad
|
|
@@ -467,12 +466,7 @@ class AudioToTextRecorder:
|
|
- on_turn_detection_stop (callable, default=None): Callback function to
|
|
be called when the system stops listening for a turn of speech.
|
|
- wakeword_backend (str, default=""): Specifies the backend library to
|
|
- use for wake word detection. Supported options include 'pvporcupine'
|
|
- for using the Porcupine wake word engine or 'oww' for using the
|
|
- OpenWakeWord engine.
|
|
- - wakeword_backend (str, default="pvporcupine"): Specifies the backend
|
|
- library to use for wake word detection. Supported options include
|
|
- 'pvporcupine' for using the Porcupine wake word engine or 'oww' for
|
|
+ use for wake word detection. Supported options include 'oww' for
|
|
using the OpenWakeWord engine.
|
|
- openwakeword_model_paths (str, default=None): Comma-separated paths
|
|
to model files for the openwakeword library. These paths point to
|
|
@@ -819,7 +813,7 @@ class AudioToTextRecorder:
|
|
"transcription model initialized successfully")
|
|
|
|
# Setup wake word detection
|
|
- if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords', 'pvp', 'pvporcupine'}:
|
|
+ if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
self.wakeword_backend = wakeword_backend
|
|
|
|
self.wake_words_list = [
|
|
@@ -831,29 +825,7 @@ class AudioToTextRecorder:
|
|
for _ in range(len(self.wake_words_list))
|
|
]
|
|
|
|
- if wake_words and self.wakeword_backend in {'pvp', 'pvporcupine'}:
|
|
-
|
|
- try:
|
|
- self.porcupine = pvporcupine.create(
|
|
- keywords=self.wake_words_list,
|
|
- sensitivities=self.wake_words_sensitivities
|
|
- )
|
|
- self.buffer_size = self.porcupine.frame_length
|
|
- self.sample_rate = self.porcupine.sample_rate
|
|
-
|
|
- except Exception as e:
|
|
- logger.exception(
|
|
- "Error initializing porcupine "
|
|
- f"wake word detection engine: {e}. "
|
|
- f"Wakewords: {self.wake_words_list}."
|
|
- )
|
|
- raise
|
|
-
|
|
- logger.debug(
|
|
- "Porcupine wake word detection engine initialized successfully"
|
|
- )
|
|
-
|
|
- elif wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
+ if wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
|
|
openwakeword.utils.download_models()
|
|
|
|
@@ -896,7 +868,7 @@ class AudioToTextRecorder:
|
|
)
|
|
|
|
else:
|
|
- logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: pvporcupine, openwakeword.")
|
|
+ logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: openwakeword.")
|
|
|
|
|
|
# Setup voice activity detection model WebRTC
|
|
@@ -1592,17 +1564,7 @@ class AudioToTextRecorder:
|
|
"""
|
|
Processes audio data to detect wake words.
|
|
"""
|
|
- if self.wakeword_backend in {'pvp', 'pvporcupine'}:
|
|
- pcm = struct.unpack_from(
|
|
- "h" * self.buffer_size,
|
|
- data
|
|
- )
|
|
- porcupine_index = self.porcupine.process(pcm)
|
|
- if self.debug_mode:
|
|
- logger.info(f"wake words porcupine_index: {porcupine_index}")
|
|
- return porcupine_index
|
|
-
|
|
- elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
+ if self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
pcm = np.frombuffer(data, dtype=np.int16)
|
|
prediction = self.owwModel.predict(pcm)
|
|
max_score = -1
|
|
@@ -2840,4 +2802,4 @@ class AudioToTextRecorder:
|
|
traceback (Traceback or None): The traceback corresponding to the
|
|
exception, if any.
|
|
"""
|
|
- self.shutdown()
|
|
\ No newline at end of file
|
|
+ self.shutdown()
|
|
diff --git a/RealtimeSTT/audio_recorder_client.py b/RealtimeSTT/audio_recorder_client.py
|
|
index 89478c8..19ea49e 100644
|
|
--- a/RealtimeSTT/audio_recorder_client.py
|
|
+++ b/RealtimeSTT/audio_recorder_client.py
|
|
@@ -137,7 +137,7 @@ class AudioToTextRecorderClient:
|
|
on_turn_detection_stop=None,
|
|
|
|
# Wake word parameters
|
|
- wakeword_backend: str = "pvporcupine",
|
|
+ wakeword_backend: str = "openwakeword",
|
|
openwakeword_model_paths: str = None,
|
|
openwakeword_inference_framework: str = "onnx",
|
|
wake_words: str = "",
|
|
diff --git a/RealtimeSTT_server/stt_server.py b/RealtimeSTT_server/stt_server.py
|
|
index da3e55e..09e8cde 100644
|
|
--- a/RealtimeSTT_server/stt_server.py
|
|
+++ b/RealtimeSTT_server/stt_server.py
|
|
@@ -499,7 +499,7 @@ def parse_arguments():
|
|
help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0 seconds.')
|
|
|
|
parser.add_argument('--wakeword_backend', type=str, default='none',
|
|
- help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "pvporcupine".')
|
|
+ help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "openwakeword".')
|
|
|
|
parser.add_argument('--openwakeword_model_paths', type=str, nargs='*',
|
|
help='A list of file paths to OpenWakeWord models. This is useful if you are using OpenWakeWord for wake word detection and need to specify custom models.')
|
|
diff --git a/requirements.txt b/requirements.txt
|
|
index d880b96..c0554e0 100644
|
|
--- a/requirements.txt
|
|
+++ b/requirements.txt
|
|
@@ -1,6 +1,5 @@
|
|
PyAudio==0.2.14
|
|
faster-whisper==1.1.1
|
|
-pvporcupine==1.9.5
|
|
webrtcvad-wheels==2.0.14
|
|
halo==0.0.31
|
|
torch
|
|
@@ -9,4 +8,4 @@ scipy==1.15.2
|
|
openwakeword>=0.4.0
|
|
websockets==15.0.1
|
|
websocket-client==1.8.0
|
|
-soundfile==0.13.1
|
|
\ No newline at end of file
|
|
+soundfile==0.13.1
|
|
--
|
|
2.50.1
|
|
|