nongnu: Add python-realtimestt.

* nongnu/packages/machine-learning.scm (python-realtimestt): New variable. * nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch: Record it.
2025-10-02 02:14:59 +00:00 · 2025-09-08 08:41:06 +02:00 · 2025-09-08 08:41:06 +02:00 · 7dbe934f10
commit 7dbe934f10
parent c4267aa23e
2 changed files with 224 additions and 0 deletions
--- a/nongnu/packages/machine-learning.scm
+++ b/nongnu/packages/machine-learning.scm
@ -83,3 +83,57 @@ detection framework with a focus on performance and simplicity.")
     (string-append (package-description python-openwakeword) "\n\
 Note: This minimal variant doesn't provide the additional Voice Activity
 Detection."))))
+
+(define-public python-realtimestt
+  (package
+    (name "python-realtimestt")
+    (version "0.3.104")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+              (url "https://github.com/KoljaB/RealTimeSTT")
+              (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32 "1455z2m86qdldap211rp1z9zdwjvmxhdv49cg7bci37190yh1r3h"))
+       (patches
+        (search-patches "python-realtimestt-remove-pvporcupine.patch"))))
+    (build-system pyproject-build-system)
+    (arguments
+     (list
+      ;; XXX: Tests require sibling package realtimetts
+      #:tests? #f
+      #:phases
+      #~(modify-phases %standard-phases
+          (add-after 'unpack 'switch-default-to-tflite
+            (lambda _
+              (substitute* "RealtimeSTT/audio_recorder.py"
+                (("\"onnx\"")
+                 "\"tflite\""))))
+          (add-after 'unpack 'relax-requirements
+            (lambda _
+              (substitute* "requirements.txt"
+                (("^(scipy|websockets)==.*" all target)
+                 (string-append target "\n"))))))))
+    (propagated-inputs
+     (list python-faster-whisper
+           python-halo
+           python-openwakeword-minimal
+           python-pyaudio
+           python-scipy
+           python-soundfile
+           python-pytorch
+           python-torchaudio
+           python-webrtcvad-wheels
+           python-websocket-client
+           python-websockets))
+    (native-inputs
+     (list ;; python-pyqt python-pytest  ; required for tests.
+      python-setuptools python-wheel))
+    (home-page "https://github.com/KoljaB/RealTimeSTT")
+    (synopsis "Voice Activity Detection and Transcription System")
+    (description
+     "This package provides a fast Voice Activity Detection and Transcription System.
+Note: This package default on the tensorflow-lite backend.")
+    (license license:expat)))
--- a/nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch
+++ b/nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch
@ -0,0 +1,170 @@
+From 54d3cd6077a1bb545ea4dbc75120d614412d7673 Mon Sep 17 00:00:00 2001
+Message-ID: <54d3cd6077a1bb545ea4dbc75120d614412d7673.1756745212.git.ngraves@ngraves.fr>
+From: Nicolas Graves <ngraves@ngraves.fr>
+Date: Mon, 1 Sep 2025 18:46:22 +0200
+Subject: [PATCH] Remove nonfree pvporcupine backend.
+
+---
+ RealtimeSTT/audio_recorder.py        | 52 ++++------------------------
+ RealtimeSTT/audio_recorder_client.py |  2 +-
+ RealtimeSTT_server/stt_server.py     |  2 +-
+ requirements.txt                     |  3 +-
+ 4 files changed, 10 insertions(+), 49 deletions(-)
+
+diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py
+index 59461b5..cd04c6d 100644
+--- a/RealtimeSTT/audio_recorder.py
+++ b/RealtimeSTT/audio_recorder.py
+@@ -7,7 +7,7 @@ The class employs the faster_whisper library to transcribe the recorded audio
+ into text using machine learning models, which can be run either on a GPU or
+ CPU. Voice activity detection (VAD) is built in, meaning the software can
+ automatically start or stop recording based on the presence or absence of
+-speech. It integrates wake word detection through the pvporcupine library,
+speech. It integrates wake word detection through the openwakeword library,
+ allowing the software to initiate recording when a specific word or phrase
+ is spoken. The system provides real-time feedback and can be further
+ customized.
+@@ -40,7 +40,6 @@ import faster_whisper
+ import openwakeword
+ import collections
+ import numpy as np
+-import pvporcupine
+ import traceback
+ import threading
+ import webrtcvad
+@@ -467,12 +466,7 @@ class AudioToTextRecorder:
+         - on_turn_detection_stop (callable, default=None): Callback function to
+             be called when the system stops listening for a turn of speech.
+         - wakeword_backend (str, default=""): Specifies the backend library to
+-            use for wake word detection. Supported options include 'pvporcupine'
+-            for using the Porcupine wake word engine or 'oww' for using the
+-            OpenWakeWord engine.
+-        - wakeword_backend (str, default="pvporcupine"): Specifies the backend
+-            library to use for wake word detection. Supported options include
+-            'pvporcupine' for using the Porcupine wake word engine or 'oww' for
+            use for wake word detection. Supported options include 'oww' for
+             using the OpenWakeWord engine.
+         - openwakeword_model_paths (str, default=None): Comma-separated paths
+             to model files for the openwakeword library. These paths point to
+@@ -819,7 +813,7 @@ class AudioToTextRecorder:
+                           "transcription model initialized successfully")
+ 
+         # Setup wake word detection
+-        if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords', 'pvp', 'pvporcupine'}:
+        if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+             self.wakeword_backend = wakeword_backend
+ 
+             self.wake_words_list = [
+@@ -831,29 +825,7 @@ class AudioToTextRecorder:
+                 for _ in range(len(self.wake_words_list))
+             ]
+ 
+-            if wake_words and self.wakeword_backend in {'pvp', 'pvporcupine'}:
+-
+-                try:
+-                    self.porcupine = pvporcupine.create(
+-                        keywords=self.wake_words_list,
+-                        sensitivities=self.wake_words_sensitivities
+-                    )
+-                    self.buffer_size = self.porcupine.frame_length
+-                    self.sample_rate = self.porcupine.sample_rate
+-
+-                except Exception as e:
+-                    logger.exception(
+-                        "Error initializing porcupine "
+-                        f"wake word detection engine: {e}. "
+-                        f"Wakewords: {self.wake_words_list}."
+-                    )
+-                    raise
+-
+-                logger.debug(
+-                    "Porcupine wake word detection engine initialized successfully"
+-                )
+-
+-            elif wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+            if wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+                     
+                 openwakeword.utils.download_models()
+ 
+@@ -896,7 +868,7 @@ class AudioToTextRecorder:
+                 )
+             
+             else:
+-                logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: pvporcupine, openwakeword.")
+                logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: openwakeword.")
+ 
+ 
+         # Setup voice activity detection model WebRTC
+@@ -1592,17 +1564,7 @@ class AudioToTextRecorder:
+         """
+         Processes audio data to detect wake words.
+         """
+-        if self.wakeword_backend in {'pvp', 'pvporcupine'}:
+-            pcm = struct.unpack_from(
+-                "h" * self.buffer_size,
+-                data
+-            )
+-            porcupine_index = self.porcupine.process(pcm)
+-            if self.debug_mode:
+-                logger.info(f"wake words porcupine_index: {porcupine_index}")
+-            return porcupine_index
+-
+-        elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+        if self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+             pcm = np.frombuffer(data, dtype=np.int16)
+             prediction = self.owwModel.predict(pcm)
+             max_score = -1
+@@ -2840,4 +2802,4 @@ class AudioToTextRecorder:
+             traceback (Traceback or None): The traceback corresponding to the
+               exception, if any.
+         """
+-        self.shutdown()
+\ No newline at end of file
+        self.shutdown()
+diff --git a/RealtimeSTT/audio_recorder_client.py b/RealtimeSTT/audio_recorder_client.py
+index 89478c8..19ea49e 100644
+--- a/RealtimeSTT/audio_recorder_client.py
+++ b/RealtimeSTT/audio_recorder_client.py
+@@ -137,7 +137,7 @@ class AudioToTextRecorderClient:
+                  on_turn_detection_stop=None,
+ 
+                  # Wake word parameters
+-                 wakeword_backend: str = "pvporcupine",
+                 wakeword_backend: str = "openwakeword",
+                  openwakeword_model_paths: str = None,
+                  openwakeword_inference_framework: str = "onnx",
+                  wake_words: str = "",
+diff --git a/RealtimeSTT_server/stt_server.py b/RealtimeSTT_server/stt_server.py
+index da3e55e..09e8cde 100644
+--- a/RealtimeSTT_server/stt_server.py
+++ b/RealtimeSTT_server/stt_server.py
+@@ -499,7 +499,7 @@ def parse_arguments():
+                         help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0 seconds.')
+ 
+     parser.add_argument('--wakeword_backend', type=str, default='none',
+-                        help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "pvporcupine".')
+                        help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "openwakeword".')
+ 
+     parser.add_argument('--openwakeword_model_paths', type=str, nargs='*',
+                         help='A list of file paths to OpenWakeWord models. This is useful if you are using OpenWakeWord for wake word detection and need to specify custom models.')
+diff --git a/requirements.txt b/requirements.txt
+index d880b96..c0554e0 100644
+--- a/requirements.txt
+++ b/requirements.txt
+@@ -1,6 +1,5 @@
+ PyAudio==0.2.14
+ faster-whisper==1.1.1
+-pvporcupine==1.9.5
+ webrtcvad-wheels==2.0.14
+ halo==0.0.31
+ torch
+@@ -9,4 +8,4 @@ scipy==1.15.2
+ openwakeword>=0.4.0
+ websockets==15.0.1
+ websocket-client==1.8.0
+-soundfile==0.13.1
+\ No newline at end of file
+soundfile==0.13.1
+-- 
+2.50.1
+