WebMAUS: Forced-alignment | SPEAC

This Python script performs forced-alignment using WebMAUS. It takes pairs of sound files + text files with the same filename (item1.wav + item1.txt) as input and matches the orthographic words in the txt file to the sounds, syllables, and words in the sound file. The output are TextGrid files with word, syllable, and phoneme level annotations.
If you want to get started using this script, please see our how-to-do-forced-alignment for hands-on instructions.
You can also download the script as a .py file.
################################################################################
### Hans Rutger Bosker, Radboud University
### HansRutger.Bosker@donders.ru.nl
### Date: 11 November 2024, run in Python 3.11.5 on Windows 11
### License: CC BY 4.0
################################################################################

######################################
# IMPORTING LIBRARIES
######################################

import requests # to send HTTP requests 
import json # for parsing JSON data
import os # for changing the working directory and removing files
import xml.etree.ElementTree as ET # to parse the XML response from webmaus



######################################
# SET PARAMETERS
######################################

# Set the working directory to the location of the script file
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)

# WebMAUSBasic parameters
webmaus_url = f"https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/runMAUSBasic"
webmaus_language = 'eng-US'

# runPho2Syl parameters
runpho2syl_url = f"https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/runPho2Syl"
runpho2syl_input_tier = "MAU"
runpho2syl_wsync = "yes"
runpho2syl_outsym = "sampa"
runpho2syl_output_format = "tg"  # Output format: TextGrid

# Define audio file extension (e.g., wav or mp3)
audio_extension = "wav"



######################################
# FINDING FILE PAIRS AND PROCESSING
######################################

# Collect all text and audio files in the directory
all_files = os.listdir(script_dir)

# Create sets for easy matching
text_files = {os.path.splitext(f)[0] for f in all_files if f.endswith('.txt')}
audio_files = {os.path.splitext(f)[0] for f in all_files if f.endswith(f'.{audio_extension}')}

# Find common items between text files and audio files
items_to_process = text_files.intersection(audio_files)

# Main loop for processing the data
for item in items_to_process:
    input_text_path = f"{item}.txt"
    input_audio_path = f"{item}.{audio_extension}"

    # Add further processing logic for each pair
    print(f"Processing: {input_text_path} and {input_audio_path}")

    # Run WebMAUSBasic on the mp3 and txt pairs
    with open(input_audio_path, 'rb') as audio_file, open(input_text_path, 'r') as text_file:
        # Construct the payload
        files = {
            'SIGNAL': audio_file,
            'TEXT': text_file
        }
        data = {
            'LANGUAGE': webmaus_language,
            'OUTFORMAT': 'bpf'
        }

        # Send the request to the WebMAUSBasic API
        response = requests.post(webmaus_url, files=files, data=data)

        if response.status_code == 200:
            # Parse the XML response
            root = ET.fromstring(response.content)
            download_link = root.find(".//downloadLink").text

            if download_link:
                # Download the BPF file using the download link
                output_filename_webmaus = f"{item}_webmaus.par"
                download_response = requests.get(download_link)

                # Check if the download was successful
                if download_response.status_code == 200:
                    with open(output_filename_webmaus, 'wb') as f:
                        f.write(download_response.content)
                    #print(f"WebMAUS TextGrid file downloaded as {output_filename_webmaus}")
                    
                    # Step 2: Use the .par BPF file as input to runPho2Syl
                    with open(output_filename_webmaus, 'rb') as bpf_file:
                        files = {
                            'i': bpf_file
                        }
                        data = {
                            'wsync': runpho2syl_wsync,
                            'lng': webmaus_language,
                            'tier': runpho2syl_input_tier,
                            'outsym': runpho2syl_outsym,
                            'oform': runpho2syl_output_format
                        }
                        response_pho2syl = requests.post(runpho2syl_url, files=files, data=data)

                        if response_pho2syl.status_code == 200:
                            # Parse the XML response to find download link for runPho2Syl
                            root_pho2syl = ET.fromstring(response_pho2syl.content)
                            download_link_pho2syl = root_pho2syl.find(".//downloadLink").text

                            if download_link_pho2syl:
                                # Download the runPho2Syl output file
                                output_filename_pho2syl = f"{item}.TextGrid"
                                download_response_pho2syl = requests.get(download_link_pho2syl)
                                
                                if download_response_pho2syl.status_code == 200:
                                    with open(output_filename_pho2syl, 'wb') as f:
                                        f.write(download_response_pho2syl.content)
                                    print(f"TextGrid file saved as {output_filename_pho2syl}")
                                else:
                                    print("Failed to download the runPho2Syl output file.")
                            else:
                                print("Download link not found in the runPho2Syl response.")
                        else:
                            print(f"Failed to process the runPho2Syl request. Status code: {response_pho2syl.status_code}")
                        
                    # Remove the intermediate output files containing only partial output
                    os.remove(output_filename_webmaus)
                else:
                    print("Failed to download the WebMAUS BPF file.")
            else:
                print("Download link not found in the response.")
        else:
            print(f"Failed to process the WebMAUS request. Status code: {response.status_code}")

################################################################################
# End of script
################################################################################