Python實現(xiàn)語音啟動電腦應(yīng)用程序

更新時間：2025年03月08日 09:48:15 作者：Atlas Shepherd

這篇文章主要為大家詳細介紹了如何使用Python實現(xiàn)語音啟動電腦應(yīng)用程序功能,文中的示例代碼講解詳細,感興趣的小伙伴可以跟隨小編一學(xué)習(xí)一下

實現(xiàn)思路

osk模型進行輸入語音轉(zhuǎn)換

txt字典導(dǎo)航程序路徑

pyttsx3引擎進行語音打印輸出

關(guān)鍵詞=程序路徑

完整代碼

import os
import json
import queue
import sounddevice as sd
from vosk import Model, KaldiRecognizer
import subprocess
import time
import pyttsx3
import threading
 
# 初始化 pyttsx3 引擎
engine = pyttsx3.init()
engine.setProperty('rate', 150)    # 設(shè)置語速
engine.setProperty('volume', 1.0)  # 設(shè)置音量
 
# 加載Vosk模型
model_path = r"D:\daku\yuyinshibie\vosk-model-small-cn-0.22"
if not os.path.exists(model_path):
    print(f"模型路徑不存在: {model_path}")
    engine.say(f"模型路徑不存在: {model_path}")
    engine.runAndWait()
    exit(1)
 
# 讀取字典文件，格式為 "命令=程序路徑"
def load_app_dict(file_path):
    app_dict = {}
    if not os.path.exists(file_path):
        print(f"字典文件不存在: {file_path}")
        engine.say(f"字典文件不存在: {file_path}")
        engine.runAndWait()
        return app_dict
 
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('=')
            if len(parts) == 2:
                keys, value = parts
                # 處理可能存在的別名情況，例如 "微信,weixin"
                for key in keys.split('，'):
                    app_dict[key.strip()] = value.strip()
    return app_dict
 
# 啟動應(yīng)用程序
def launch_application(app_name, app_dict):
    if app_name in app_dict:
        app_path = app_dict[app_name]
        response = f"正在啟動 {app_name}..."
        say(response)
        subprocess.Popen(app_path)
        time.sleep(2)  # 等待2秒再繼續(xù)監(jiān)聽
    else:
        response = f"找不到與 '{app_name}' 對應(yīng)的應(yīng)用程序。"
        say(response)
 
# 定義一個函數(shù)用于語音輸出，并在說的時候暫停監(jiān)聽
def say(text):
    global stream, callback_func
    if stream is not None:
        with stream_lock:
            stream.callback = None  # 移除回調(diào)函數(shù)以暫停監(jiān)聽
            stream.stop()           # 暫停音頻流
    engine.say(text)
    engine.runAndWait()
    if stream is not None:
        with stream_lock:
            stream.start()          # 恢復(fù)音頻流
            stream.callback = callback_func  # 重新設(shè)置回調(diào)函數(shù)
 
# 初始化模型和識別器
model = Model(model_path)
rec = KaldiRecognizer(model, 16000)
 
q = queue.Queue()
 
last_partial_result = ""
last_full_command = ""
 
stream_lock = threading.Lock()
stream = None
callback_func = None
 
def callback(indata, frames, time, status):
    if status:
        print(status, file=sys.stderr)
    q.put(bytes(indata))
 
# 主程序
if __name__ == "__main__":
    dict_file = r"D:\daku\yuyinshibie\zidian.txt"  # 字典文件路徑
    app_dict = load_app_dict(dict_file)
 
    try:
        # 提前初始化音頻流
        callback_func = callback
        stream = sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16',
                                  channels=1, callback=callback)
        stream.start()
 
        say("請說：")
        while True:
            data = q.get()
            if rec.AcceptWaveform(data):
                result = json.loads(rec.Result())
                command = result['text'].strip()
                if command and command != last_full_command:
                    print(f"你說的是: {command}")
                    say(f"你說的是: {command}")
                    if "打開" in command:
                        app_to_open = command.replace("打開", "").strip()
                        launch_application(app_to_open, app_dict)
                    last_full_command = command
            elif rec.PartialResult():
                partial_result = json.loads(rec.PartialResult())['partial']
                if partial_result and "打開" in partial_result and partial_result != last_partial_result:
                    print(f"部分結(jié)果: {partial_result}")
                    say(f"部分結(jié)果: {partial_result}")
                    last_partial_result = partial_result
    except KeyboardInterrupt:
        say("\n退出程序。")
    finally:
        if stream is not None:
            stream.stop()
            stream.close()