mirror of
https://gitlab.com/nonguix/nonguix.git
synced 2025-10-02 02:14:59 +00:00
nongnu: Add python-realtimestt.
* nongnu/packages/machine-learning.scm (python-realtimestt): New variable. * nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch: Record it.
This commit is contained in:
parent
c4267aa23e
commit
7dbe934f10
2 changed files with 224 additions and 0 deletions
|
@ -83,3 +83,57 @@ detection framework with a focus on performance and simplicity.")
|
|||
(string-append (package-description python-openwakeword) "\n\
|
||||
Note: This minimal variant doesn't provide the additional Voice Activity
|
||||
Detection."))))
|
||||
|
||||
(define-public python-realtimestt
|
||||
(package
|
||||
(name "python-realtimestt")
|
||||
(version "0.3.104")
|
||||
(source
|
||||
(origin
|
||||
(method git-fetch)
|
||||
(uri (git-reference
|
||||
(url "https://github.com/KoljaB/RealTimeSTT")
|
||||
(commit (string-append "v" version))))
|
||||
(file-name (git-file-name name version))
|
||||
(sha256
|
||||
(base32 "1455z2m86qdldap211rp1z9zdwjvmxhdv49cg7bci37190yh1r3h"))
|
||||
(patches
|
||||
(search-patches "python-realtimestt-remove-pvporcupine.patch"))))
|
||||
(build-system pyproject-build-system)
|
||||
(arguments
|
||||
(list
|
||||
;; XXX: Tests require sibling package realtimetts
|
||||
#:tests? #f
|
||||
#:phases
|
||||
#~(modify-phases %standard-phases
|
||||
(add-after 'unpack 'switch-default-to-tflite
|
||||
(lambda _
|
||||
(substitute* "RealtimeSTT/audio_recorder.py"
|
||||
(("\"onnx\"")
|
||||
"\"tflite\""))))
|
||||
(add-after 'unpack 'relax-requirements
|
||||
(lambda _
|
||||
(substitute* "requirements.txt"
|
||||
(("^(scipy|websockets)==.*" all target)
|
||||
(string-append target "\n"))))))))
|
||||
(propagated-inputs
|
||||
(list python-faster-whisper
|
||||
python-halo
|
||||
python-openwakeword-minimal
|
||||
python-pyaudio
|
||||
python-scipy
|
||||
python-soundfile
|
||||
python-pytorch
|
||||
python-torchaudio
|
||||
python-webrtcvad-wheels
|
||||
python-websocket-client
|
||||
python-websockets))
|
||||
(native-inputs
|
||||
(list ;; python-pyqt python-pytest ; required for tests.
|
||||
python-setuptools python-wheel))
|
||||
(home-page "https://github.com/KoljaB/RealTimeSTT")
|
||||
(synopsis "Voice Activity Detection and Transcription System")
|
||||
(description
|
||||
"This package provides a fast Voice Activity Detection and Transcription System.
|
||||
Note: This package default on the tensorflow-lite backend.")
|
||||
(license license:expat)))
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
From 54d3cd6077a1bb545ea4dbc75120d614412d7673 Mon Sep 17 00:00:00 2001
|
||||
Message-ID: <54d3cd6077a1bb545ea4dbc75120d614412d7673.1756745212.git.ngraves@ngraves.fr>
|
||||
From: Nicolas Graves <ngraves@ngraves.fr>
|
||||
Date: Mon, 1 Sep 2025 18:46:22 +0200
|
||||
Subject: [PATCH] Remove nonfree pvporcupine backend.
|
||||
|
||||
---
|
||||
RealtimeSTT/audio_recorder.py | 52 ++++------------------------
|
||||
RealtimeSTT/audio_recorder_client.py | 2 +-
|
||||
RealtimeSTT_server/stt_server.py | 2 +-
|
||||
requirements.txt | 3 +-
|
||||
4 files changed, 10 insertions(+), 49 deletions(-)
|
||||
|
||||
diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py
|
||||
index 59461b5..cd04c6d 100644
|
||||
--- a/RealtimeSTT/audio_recorder.py
|
||||
+++ b/RealtimeSTT/audio_recorder.py
|
||||
@@ -7,7 +7,7 @@ The class employs the faster_whisper library to transcribe the recorded audio
|
||||
into text using machine learning models, which can be run either on a GPU or
|
||||
CPU. Voice activity detection (VAD) is built in, meaning the software can
|
||||
automatically start or stop recording based on the presence or absence of
|
||||
-speech. It integrates wake word detection through the pvporcupine library,
|
||||
+speech. It integrates wake word detection through the openwakeword library,
|
||||
allowing the software to initiate recording when a specific word or phrase
|
||||
is spoken. The system provides real-time feedback and can be further
|
||||
customized.
|
||||
@@ -40,7 +40,6 @@ import faster_whisper
|
||||
import openwakeword
|
||||
import collections
|
||||
import numpy as np
|
||||
-import pvporcupine
|
||||
import traceback
|
||||
import threading
|
||||
import webrtcvad
|
||||
@@ -467,12 +466,7 @@ class AudioToTextRecorder:
|
||||
- on_turn_detection_stop (callable, default=None): Callback function to
|
||||
be called when the system stops listening for a turn of speech.
|
||||
- wakeword_backend (str, default=""): Specifies the backend library to
|
||||
- use for wake word detection. Supported options include 'pvporcupine'
|
||||
- for using the Porcupine wake word engine or 'oww' for using the
|
||||
- OpenWakeWord engine.
|
||||
- - wakeword_backend (str, default="pvporcupine"): Specifies the backend
|
||||
- library to use for wake word detection. Supported options include
|
||||
- 'pvporcupine' for using the Porcupine wake word engine or 'oww' for
|
||||
+ use for wake word detection. Supported options include 'oww' for
|
||||
using the OpenWakeWord engine.
|
||||
- openwakeword_model_paths (str, default=None): Comma-separated paths
|
||||
to model files for the openwakeword library. These paths point to
|
||||
@@ -819,7 +813,7 @@ class AudioToTextRecorder:
|
||||
"transcription model initialized successfully")
|
||||
|
||||
# Setup wake word detection
|
||||
- if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords', 'pvp', 'pvporcupine'}:
|
||||
+ if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
||||
self.wakeword_backend = wakeword_backend
|
||||
|
||||
self.wake_words_list = [
|
||||
@@ -831,29 +825,7 @@ class AudioToTextRecorder:
|
||||
for _ in range(len(self.wake_words_list))
|
||||
]
|
||||
|
||||
- if wake_words and self.wakeword_backend in {'pvp', 'pvporcupine'}:
|
||||
-
|
||||
- try:
|
||||
- self.porcupine = pvporcupine.create(
|
||||
- keywords=self.wake_words_list,
|
||||
- sensitivities=self.wake_words_sensitivities
|
||||
- )
|
||||
- self.buffer_size = self.porcupine.frame_length
|
||||
- self.sample_rate = self.porcupine.sample_rate
|
||||
-
|
||||
- except Exception as e:
|
||||
- logger.exception(
|
||||
- "Error initializing porcupine "
|
||||
- f"wake word detection engine: {e}. "
|
||||
- f"Wakewords: {self.wake_words_list}."
|
||||
- )
|
||||
- raise
|
||||
-
|
||||
- logger.debug(
|
||||
- "Porcupine wake word detection engine initialized successfully"
|
||||
- )
|
||||
-
|
||||
- elif wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
||||
+ if wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
||||
|
||||
openwakeword.utils.download_models()
|
||||
|
||||
@@ -896,7 +868,7 @@ class AudioToTextRecorder:
|
||||
)
|
||||
|
||||
else:
|
||||
- logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: pvporcupine, openwakeword.")
|
||||
+ logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: openwakeword.")
|
||||
|
||||
|
||||
# Setup voice activity detection model WebRTC
|
||||
@@ -1592,17 +1564,7 @@ class AudioToTextRecorder:
|
||||
"""
|
||||
Processes audio data to detect wake words.
|
||||
"""
|
||||
- if self.wakeword_backend in {'pvp', 'pvporcupine'}:
|
||||
- pcm = struct.unpack_from(
|
||||
- "h" * self.buffer_size,
|
||||
- data
|
||||
- )
|
||||
- porcupine_index = self.porcupine.process(pcm)
|
||||
- if self.debug_mode:
|
||||
- logger.info(f"wake words porcupine_index: {porcupine_index}")
|
||||
- return porcupine_index
|
||||
-
|
||||
- elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
||||
+ if self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
||||
pcm = np.frombuffer(data, dtype=np.int16)
|
||||
prediction = self.owwModel.predict(pcm)
|
||||
max_score = -1
|
||||
@@ -2840,4 +2802,4 @@ class AudioToTextRecorder:
|
||||
traceback (Traceback or None): The traceback corresponding to the
|
||||
exception, if any.
|
||||
"""
|
||||
- self.shutdown()
|
||||
\ No newline at end of file
|
||||
+ self.shutdown()
|
||||
diff --git a/RealtimeSTT/audio_recorder_client.py b/RealtimeSTT/audio_recorder_client.py
|
||||
index 89478c8..19ea49e 100644
|
||||
--- a/RealtimeSTT/audio_recorder_client.py
|
||||
+++ b/RealtimeSTT/audio_recorder_client.py
|
||||
@@ -137,7 +137,7 @@ class AudioToTextRecorderClient:
|
||||
on_turn_detection_stop=None,
|
||||
|
||||
# Wake word parameters
|
||||
- wakeword_backend: str = "pvporcupine",
|
||||
+ wakeword_backend: str = "openwakeword",
|
||||
openwakeword_model_paths: str = None,
|
||||
openwakeword_inference_framework: str = "onnx",
|
||||
wake_words: str = "",
|
||||
diff --git a/RealtimeSTT_server/stt_server.py b/RealtimeSTT_server/stt_server.py
|
||||
index da3e55e..09e8cde 100644
|
||||
--- a/RealtimeSTT_server/stt_server.py
|
||||
+++ b/RealtimeSTT_server/stt_server.py
|
||||
@@ -499,7 +499,7 @@ def parse_arguments():
|
||||
help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0 seconds.')
|
||||
|
||||
parser.add_argument('--wakeword_backend', type=str, default='none',
|
||||
- help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "pvporcupine".')
|
||||
+ help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "openwakeword".')
|
||||
|
||||
parser.add_argument('--openwakeword_model_paths', type=str, nargs='*',
|
||||
help='A list of file paths to OpenWakeWord models. This is useful if you are using OpenWakeWord for wake word detection and need to specify custom models.')
|
||||
diff --git a/requirements.txt b/requirements.txt
|
||||
index d880b96..c0554e0 100644
|
||||
--- a/requirements.txt
|
||||
+++ b/requirements.txt
|
||||
@@ -1,6 +1,5 @@
|
||||
PyAudio==0.2.14
|
||||
faster-whisper==1.1.1
|
||||
-pvporcupine==1.9.5
|
||||
webrtcvad-wheels==2.0.14
|
||||
halo==0.0.31
|
||||
torch
|
||||
@@ -9,4 +8,4 @@ scipy==1.15.2
|
||||
openwakeword>=0.4.0
|
||||
websockets==15.0.1
|
||||
websocket-client==1.8.0
|
||||
-soundfile==0.13.1
|
||||
\ No newline at end of file
|
||||
+soundfile==0.13.1
|
||||
--
|
||||
2.50.1
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue