mirror of
https://github.com/jooray/rss2podcast.git
synced 2025-05-22 23:42:00 +00:00
189 lines
6.8 KiB
Python
189 lines
6.8 KiB
Python
import os
|
|
import re
|
|
import tempfile
|
|
import subprocess
|
|
from datetime import datetime
|
|
from content_processing import prepare_for_speech
|
|
from markdown_to_speech import convert_markdown_to_speech
|
|
from sqlalchemy.orm import sessionmaker
|
|
from utils import slugify, get_engine, Episode
|
|
from colorama import init, Fore, Style
|
|
|
|
# Initialize colorama
|
|
init(autoreset=True)
|
|
|
|
def process_episode(episode, config):
|
|
title = episode.title
|
|
content = episode.content
|
|
|
|
# Apply preprocess_regexps to content and title if available
|
|
if 'preprocess_regexps' in config:
|
|
content = apply_preprocess_regexps(content, config['preprocess_regexps'])
|
|
title = apply_preprocess_regexps(title, config['preprocess_regexps'])
|
|
|
|
print(f"{Fore.GREEN}Processing episode:{Style.RESET_ALL} {title}")
|
|
|
|
content = prepare_for_speech(content, config)
|
|
|
|
audio_basename = slugify(title)
|
|
|
|
# Convert Title to Speech
|
|
title_audio_wav = os.path.join(tempfile.gettempdir(), f"title_audio_{audio_basename}.wav")
|
|
convert_markdown_to_speech(title, title_audio_wav, **config['tts_options'])
|
|
|
|
# Convert Markdown to Speech
|
|
content_audio_wav = os.path.join(tempfile.gettempdir(), f"content_audio_{audio_basename}.wav")
|
|
convert_markdown_to_speech(content, content_audio_wav, **config['tts_options'])
|
|
|
|
# Apply audio speedup if configured
|
|
if config.get('audio_speedup') and config['audio_speedup'] != 1:
|
|
content_audio_wav = speedup_wav_file(content_audio_wav, config['audio_speedup'])
|
|
title_audio_wav = speedup_wav_file(title_audio_wav, config['audio_speedup'])
|
|
|
|
# Combine Audio Files
|
|
final_audio_wav = os.path.join(tempfile.gettempdir(), f"final_audio_{audio_basename}.wav")
|
|
combine_audio_files(
|
|
config.get('prefix_audio_files', []),
|
|
title_audio_wav,
|
|
content_audio_wav,
|
|
config.get('postfix_audio_files', []),
|
|
final_audio_wav
|
|
)
|
|
|
|
# Convert to MP3
|
|
mp3_filename = f"{audio_basename}.mp3"
|
|
os.makedirs(config['audio_output_directory'], exist_ok=True)
|
|
mp3_file_path = os.path.join(config['audio_output_directory'], mp3_filename)
|
|
convert_to_mp3(final_audio_wav, mp3_file_path, config['mp3_conversion'])
|
|
duration, file_size = get_mp3_duration_and_size(mp3_file_path)
|
|
|
|
# Update Episode Metadata
|
|
episode.processing_status = 'processed'
|
|
episode.processed_date = datetime.utcnow().isoformat()
|
|
episode.mp3_file_path = mp3_filename # Store the filename instead of full path
|
|
episode.duration = duration
|
|
episode.file_size = file_size
|
|
update_episode_in_db(episode, config['database'])
|
|
|
|
# Clean Up Temporary Files
|
|
os.remove(content_audio_wav)
|
|
os.remove(title_audio_wav)
|
|
os.remove(final_audio_wav)
|
|
|
|
def speedup_wav_file(wav_file, audio_speedup):
|
|
output_wav_file = os.path.join(tempfile.gettempdir(), f"speedup_{os.path.basename(wav_file)}")
|
|
subprocess.run([
|
|
'ffmpeg', '-y', '-i', wav_file, '-filter:a',
|
|
f"atempo={audio_speedup}", output_wav_file
|
|
], check=True)
|
|
os.remove(wav_file) # Remove the original file
|
|
return output_wav_file
|
|
|
|
def combine_audio_files(prefix_files, title_audio_file, content_audio_file, postfix_files, output_file):
|
|
audio_files = []
|
|
|
|
# Handle prefix files
|
|
if len(prefix_files) == 0:
|
|
# No prefix files
|
|
pass
|
|
elif len(prefix_files) == 1:
|
|
# One prefix file: prefix + content
|
|
audio_files.extend([prefix_files[0]])
|
|
elif len(prefix_files) == 2:
|
|
# Two prefix files: first prefix + title + second prefix + content
|
|
audio_files.extend([prefix_files[0], title_audio_file, prefix_files[1]])
|
|
else:
|
|
raise ValueError("Prefix files should be either 0, 1, or 2 files.")
|
|
|
|
# Add the content audio
|
|
audio_files.append(content_audio_file)
|
|
|
|
# Handle postfix files
|
|
if len(postfix_files) == 0:
|
|
# No postfix files
|
|
pass
|
|
elif len(postfix_files) == 1:
|
|
# One postfix file: content + postfix
|
|
audio_files.extend([postfix_files[0]])
|
|
elif len(postfix_files) == 2:
|
|
# Two postfix files: content + first postfix + title + second postfix
|
|
audio_files.extend([postfix_files[0], title_audio_file, postfix_files[1]])
|
|
else:
|
|
raise ValueError("Postfix files should be either 0, 1, or 2 files.")
|
|
|
|
# Create a temporary file listing the audio files
|
|
concat_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt')
|
|
for audio_file in audio_files:
|
|
concat_file.write(f"file '{audio_file}'\n")
|
|
concat_file.close()
|
|
|
|
# Use ffmpeg to concatenate audio files
|
|
subprocess.run([
|
|
'ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i',
|
|
concat_file.name, '-c', 'copy', output_file
|
|
], check=True)
|
|
|
|
os.remove(concat_file.name)
|
|
|
|
def convert_to_mp3(wav_file, mp3_file, mp3_config):
|
|
os.makedirs(os.path.dirname(mp3_file), exist_ok=True)
|
|
subprocess.run([
|
|
'ffmpeg', '-y', '-i', wav_file, '-codec:a',
|
|
mp3_config.get('codec', 'libmp3lame'),
|
|
'-b:a', mp3_config.get('bitrate', '192k'), mp3_file
|
|
], check=True)
|
|
|
|
def get_mp3_duration_and_size(mp3_file_path):
|
|
if not os.path.isfile(mp3_file_path):
|
|
return None, None
|
|
|
|
# Get duration using ffprobe
|
|
cmd = [
|
|
'ffprobe', '-v', 'error', '-show_entries',
|
|
'format=duration', '-of',
|
|
'default=noprint_wrappers=1:nokey=1', mp3_file_path
|
|
]
|
|
try:
|
|
duration = float(subprocess.check_output(cmd).strip())
|
|
file_size = os.path.getsize(mp3_file_path)
|
|
return int(duration), file_size
|
|
except Exception as e:
|
|
print(f"Error processing {mp3_file_path}: {e}")
|
|
return None, None
|
|
|
|
def update_episode_in_db(episode, db_url):
|
|
"""
|
|
Updates the episode's metadata in the database.
|
|
"""
|
|
engine = get_engine(db_url)
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
|
|
try:
|
|
existing_episode = session.query(Episode).filter(
|
|
Episode.podcast_id == episode.podcast_id,
|
|
Episode.article_guid == episode.article_guid
|
|
).first()
|
|
if existing_episode:
|
|
existing_episode.processing_status = episode.processing_status
|
|
existing_episode.mp3_file_path = episode.mp3_file_path
|
|
existing_episode.processed_date = episode.processed_date
|
|
existing_episode.duration = episode.duration
|
|
existing_episode.file_size = episode.file_size
|
|
session.commit()
|
|
print(f"Episode '{episode.title}' updated in the database.")
|
|
else:
|
|
print(f"Episode '{episode.title}' not found in the database.")
|
|
except Exception as e:
|
|
session.rollback()
|
|
print(f"Error updating episode in DB: {e}")
|
|
finally:
|
|
session.close()
|
|
|
|
def apply_preprocess_regexps(text, regexps):
|
|
for item in regexps:
|
|
regexp = item['regexp']
|
|
replacement = item['replacement']
|
|
text = re.sub(regexp, replacement, text)
|
|
return text
|