Source code for language_service.tts

"""
This is "tts.py"
"""


"""
import asyncio, aiohttp, logging, uuid
from google.cloud import texttospeech
from google.oauth2 import service_account

from common.constants import Constants
from common.utils import clean_text
from language_service.utils import get_language_by_code
from django_core.config import Config
"""

#logger = logging.getLogger(__name__)


#credentials = service_account.Credentials.from_service_account_file(Config.GOOGLE_APPLICATION_CREDENTIALS)


[docs]async def synthesize_speech_azure(text_to_synthesize, language_code, aiohttp_session):
    """
    Synthesise speech using Azure TTS model.
    `Azure TTS Docs <https://learn.microsoft.com/en-us/azure/ai-services/speech-service/>`_
    """
    audio_content = None

    # use Azure for Speech synthesis
    url = f"https://{Config.AZURE_SERVICE_REGION}.tts.speech.microsoft.com/cognitiveservices/v1"
    headers = {
        "Ocp-Apim-Subscription-Key": Config.AZURE_SUBSCRIPTION_KEY,
        "Content-Type": "application/ssml+xml",
        "X-Microsoft-OutputFormat": "ogg-48khz-16bit-mono-opus",
    }

    AZURE_VOICE = "en-GB-SoniaNeural"
    if language_code == "en-KE":
        AZURE_VOICE = "en-KE-AsiliaNeural"

    elif language_code == "sw-KE":
        AZURE_VOICE = "sw-KE-ZuriNeural"

    elif language_code == "en-NG":
        AZURE_VOICE = "en-NG-EzinneNeural"

    # The body of the request. Replace the text you want to synthesize
    body = f"""
    <speak version='1.0' xml:lang='{language_code}'>
        <voice xml:lang='{language_code}' xml:gender='Female' name='{AZURE_VOICE}'>
            {text_to_synthesize}
        </voice>
    </speak>
    """
    # Making the POST request to the Azure service
    # response = requests.post(url, headers=headers, data=body)

    async with aiohttp_session.post(url, data=body, headers=headers) as response:
        audio_content = await response.read() if response.status == 200 else None

    return audio_content



[docs]async def synthesize_speech(
    input_text: str,
    input_language: str,
    id_string: str = None,
    aiohttp_session=None,
    #audio_encoding_format=texttospeech.AudioEncoding.OGG_OPUS,
    sample_rate_hertz=48000,
) -> str:
    id_string = uuid.uuid4() if not id_string else id_string
    file_name = f"response_{id_string}.{Constants.OGG}"
    input_text = clean_text(input_text)
    synthesis_input = texttospeech.SynthesisInput(text=input_text)
    language_code = "en-IN"
    input_language = input_language.split("-")[0] if "-" in input_language else input_language

    if audio_encoding_format and str(audio_encoding_format).lower() == Constants.MP3:
        audio_encoding_format = texttospeech.AudioEncoding.MP3
        file_name = f"response_{id_string}.{Constants.MP3}"
    else:
        audio_encoding_format = texttospeech.AudioEncoding.OGG_OPUS

    sample_rate_hertz = sample_rate_hertz if sample_rate_hertz else 48000

    try:
        language = get_language_by_code(input_language)
        if language:
            language_code = language.get("bcp_code")

        # user Google ASR for speech synthesis
        voice = texttospeech.VoiceSelectionParams(
            language_code=language_code,
            # name="hi-IN-Neural2-A" if input_language == "hi" else "en-IN-Standard-D",
            ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
        )
        audio_config = texttospeech.AudioConfig(
            audio_encoding=audio_encoding_format, sample_rate_hertz=sample_rate_hertz
        )
        text_to_speech_client = texttospeech.TextToSpeechClient(credentials=credentials)

        try:
            response = await asyncio.to_thread(
                text_to_speech_client.synthesize_speech,
                input=synthesis_input,
                voice=voice,
                audio_config=audio_config,
            )
            audio_content = response.audio_content

        except Exception as e:
            logger.error("Error while synthesizing speech: %s", str(e))
            return None

        with open(file_name, "wb") as out:
            out.write(audio_content)
            logger.info("Successfully wrote voice response to file")

    except Exception as e:
        logger.error(e, exc_info=True)
        return None

    return file_name