nonguix-mirrors/nongnu/packages/patches/python-realtimestt-remove-pvporcupine.patch

From 54d3cd6077a1bb545ea4dbc75120d614412d7673 Mon Sep 17 00:00:00 2001
Message-ID: <54d3cd6077a1bb545ea4dbc75120d614412d7673.1756745212.git.ngraves@ngraves.fr>
From: Nicolas Graves <ngraves@ngraves.fr>
Date: Mon, 1 Sep 2025 18:46:22 +0200
Subject: [PATCH] Remove nonfree pvporcupine backend.

---
 RealtimeSTT/audio_recorder.py        | 52 ++++------------------------
 RealtimeSTT/audio_recorder_client.py |  2 +-
 RealtimeSTT_server/stt_server.py     |  2 +-
 requirements.txt                     |  3 +-
 4 files changed, 10 insertions(+), 49 deletions(-)

diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py
index 59461b5..cd04c6d 100644
--- a/RealtimeSTT/audio_recorder.py
+++ b/RealtimeSTT/audio_recorder.py
@@ -7,7 +7,7 @@ The class employs the faster_whisper library to transcribe the recorded audio
 into text using machine learning models, which can be run either on a GPU or
 CPU. Voice activity detection (VAD) is built in, meaning the software can
 automatically start or stop recording based on the presence or absence of
-speech. It integrates wake word detection through the pvporcupine library,
+speech. It integrates wake word detection through the openwakeword library,
 allowing the software to initiate recording when a specific word or phrase
 is spoken. The system provides real-time feedback and can be further
 customized.
@@ -40,7 +40,6 @@ import faster_whisper
 import openwakeword
 import collections
 import numpy as np
-import pvporcupine
 import traceback
 import threading
 import webrtcvad
@@ -467,12 +466,7 @@ class AudioToTextRecorder:
         - on_turn_detection_stop (callable, default=None): Callback function to
             be called when the system stops listening for a turn of speech.
         - wakeword_backend (str, default=""): Specifies the backend library to
-            use for wake word detection. Supported options include 'pvporcupine'
-            for using the Porcupine wake word engine or 'oww' for using the
-            OpenWakeWord engine.
-        - wakeword_backend (str, default="pvporcupine"): Specifies the backend
-            library to use for wake word detection. Supported options include
-            'pvporcupine' for using the Porcupine wake word engine or 'oww' for
+            use for wake word detection. Supported options include 'oww' for
             using the OpenWakeWord engine.
         - openwakeword_model_paths (str, default=None): Comma-separated paths
             to model files for the openwakeword library. These paths point to
@@ -819,7 +813,7 @@ class AudioToTextRecorder:
                           "transcription model initialized successfully")

         # Setup wake word detection
-        if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords', 'pvp', 'pvporcupine'}:
+        if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
             self.wakeword_backend = wakeword_backend

             self.wake_words_list = [
@@ -831,29 +825,7 @@ class AudioToTextRecorder:
                 for _ in range(len(self.wake_words_list))
             ]

-            if wake_words and self.wakeword_backend in {'pvp', 'pvporcupine'}:
-
-                try:
-                    self.porcupine = pvporcupine.create(
-                        keywords=self.wake_words_list,
-                        sensitivities=self.wake_words_sensitivities
-                    )
-                    self.buffer_size = self.porcupine.frame_length
-                    self.sample_rate = self.porcupine.sample_rate
-
-                except Exception as e:
-                    logger.exception(
-                        "Error initializing porcupine "
-                        f"wake word detection engine: {e}. "
-                        f"Wakewords: {self.wake_words_list}."
-                    )
-                    raise
-
-                logger.debug(
-                    "Porcupine wake word detection engine initialized successfully"
-                )
-
-            elif wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+            if wake_words and self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:

                 openwakeword.utils.download_models()

@@ -896,7 +868,7 @@ class AudioToTextRecorder:
                 )

             else:
-                logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: pvporcupine, openwakeword.")
+                logger.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported or wake_words not specified. Please specify one of: openwakeword.")


         # Setup voice activity detection model WebRTC
@@ -1592,17 +1564,7 @@ class AudioToTextRecorder:
         """
         Processes audio data to detect wake words.
         """
-        if self.wakeword_backend in {'pvp', 'pvporcupine'}:
-            pcm = struct.unpack_from(
-                "h" * self.buffer_size,
-                data
-            )
-            porcupine_index = self.porcupine.process(pcm)
-            if self.debug_mode:
-                logger.info(f"wake words porcupine_index: {porcupine_index}")
-            return porcupine_index
-
-        elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
+        if self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
             pcm = np.frombuffer(data, dtype=np.int16)
             prediction = self.owwModel.predict(pcm)
             max_score = -1
@@ -2840,4 +2802,4 @@ class AudioToTextRecorder:
             traceback (Traceback or None): The traceback corresponding to the
               exception, if any.
         """
-        self.shutdown()
\ No newline at end of file
+        self.shutdown()
diff --git a/RealtimeSTT/audio_recorder_client.py b/RealtimeSTT/audio_recorder_client.py
index 89478c8..19ea49e 100644
--- a/RealtimeSTT/audio_recorder_client.py
+++ b/RealtimeSTT/audio_recorder_client.py
@@ -137,7 +137,7 @@ class AudioToTextRecorderClient:
                  on_turn_detection_stop=None,

                  # Wake word parameters
-                 wakeword_backend: str = "pvporcupine",
+                 wakeword_backend: str = "openwakeword",
                  openwakeword_model_paths: str = None,
                  openwakeword_inference_framework: str = "onnx",
                  wake_words: str = "",
diff --git a/RealtimeSTT_server/stt_server.py b/RealtimeSTT_server/stt_server.py
index da3e55e..09e8cde 100644
--- a/RealtimeSTT_server/stt_server.py
+++ b/RealtimeSTT_server/stt_server.py
@@ -499,7 +499,7 @@ def parse_arguments():
                         help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0 seconds.')

     parser.add_argument('--wakeword_backend', type=str, default='none',
-                        help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "pvporcupine".')
+                        help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "openwakeword".')

     parser.add_argument('--openwakeword_model_paths', type=str, nargs='*',
                         help='A list of file paths to OpenWakeWord models. This is useful if you are using OpenWakeWord for wake word detection and need to specify custom models.')
diff --git a/requirements.txt b/requirements.txt
index d880b96..c0554e0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
 PyAudio==0.2.14
 faster-whisper==1.1.1
-pvporcupine==1.9.5
 webrtcvad-wheels==2.0.14
 halo==0.0.31
 torch
@@ -9,4 +8,4 @@ scipy==1.15.2
 openwakeword>=0.4.0
 websockets==15.0.1
 websocket-client==1.8.0
-soundfile==0.13.1
\ No newline at end of file
+soundfile==0.13.1
--
2.50.1