Can anyone give me the code on how the ‘audio’ column is generated?
I put my sample code below but I think the ‘audio’ column is wrong
import os
import librosa
import pandas as pd
# Define the directory containing audio and text files
data_directory = '/content/drive/MyDrive/1'
# List audio files and matching text files
audio_files = [os.path.join(data_directory, file) for file in os.listdir(data_directory) if file.endswith('.wav')]
text_files = [file.replace('.wav', '.txt') for file in audio_files]
# Initialize lists to store data for the CSV
path_column = []
sentence_column = []
audio_column = []
age_column = []
gender_column = []
# Constants for age and gender
age_value = 35
gender_value = 'male'
# Process audio and text files
for audio_file, text_file in zip(audio_files, text_files):
# Read the text from the matching text file
with open(text_file, 'r') as text:
sentence = text.read()
# Use the provided code to generate the 'audio' column
audio, sampling_rate = librosa.load(audio_file, sr=48000)
# Append data to the respective columns
path_column.append(audio_file)
sentence_column.append(sentence)
audio_column.append(audio)
age_column.append(age_value)
gender_column.append(gender_value)
# Create a DataFrame to hold the data
data = {
'path': path_column,
'sentence': sentence_column,
'audio': audio_column,
'age': age_column,
'gender': gender_column
}
df = pd.DataFrame(data)
# Save the DataFrame to a CSV file
output_csv = '/content/drive/MyDrive/1/output.csv'
df.to_csv(output_csv, index=False)
print(f'CSV file "{output_csv}" has been created with the specified columns.')