Files

89 lines
2.5 KiB
Python

import gradio as gr
from transformers import pipeline
import edge_tts
import asyncio
asr = pipeline("automatic-speech-recognition", model="openai/whisper-base")
generator = pipeline("text-generation", model="Qwen/Qwen2.5-0.5B-Instruct")
voice_txt, current_answer = "", ""
async def text_to_voice(text):
voice = "ko-KR-InJoonNeural"
communicate = edge_tts.Communicate(text, voice)
await communicate.save("answer.mp3")
def make_voice():
global current_answer
if not current_answer:
return None
asyncio.run(text_to_voice(current_answer))
return "answer.mp3"
def change_txt(file):
global voice_txt
result = asr(file, return_timestamps=True)
voice_txt = result['text']
return voice_txt
def question_answer(question):
"""_summary_
question 답변 생성 후 리턴
음성내용 -> 텍스트 변환한거 보내주고, 질문도 보내주고
Args:
question (_type_): _description_
"""
global voice_txt, current_answer
if not voice_txt:
return "음성을 텍스트로 변환한 후 질문하세요."
prompt = f"""
다음 음성 내용을 참고해서 질문에 답변하세요
음성내용 :
{voice_txt}
질문 :
{question}
답변:
"""
result = generator(prompt, max_new_tokens=50, return_full_text = False, do_sample= False, pad_token_id=generator.tokenizer.eos_token_id)
current_answer = result[0]['generated_text'].strip()
return current_answer
with gr.Blocks(title="AI 음성 챗봇") as demo:
gr.Markdown("## AI 음성 비서")
with gr.Row():
with gr.Column(scale=1):
file = gr.Audio(type="filepath")
txt_btn = gr.Button("텍스트 변환")
with gr.Column(scale=1):
out = gr.Textbox(label = "텍스트 변환", lines=3)
with gr.Row():
with gr.Column(scale=1):
question = gr.Textbox(label="질문하기")
question_btn = gr.Button("질문하기")
with gr.Column(scale=1):
answer = gr.Textbox(label = "answer", placeholder="답변")
voice_btn = gr.Button("답변 음성 전환")
with gr.Row():
audio_output = gr.Audio(label="AI 음성 답변", autoplay=True)
txt_btn.click(fn=change_txt, inputs=file, outputs=out)
question_btn.click(fn=question_answer, inputs = question, outputs = answer)
voice_btn.click(fn=make_voice, outputs= audio_output)
demo.launch()