最近在研究将图片和文本批量合成为带字幕口播视频
主要是基于python的moviepy库
from generator import audio, pics, subs, videodef main():texts_input = 'example'pics_input = 'example'# 图片分辨率预处理pics.adjust(pics_input)# 文字转语音audio.text_to_audio(texts_input)# 语音转视频video.audio_to_video(texts_input, pics_input)# 生成字幕subs.download_subs(texts_input)# 生成字幕视频video.attach_subs(texts_input)if __name__ == "__main__":main()
以下是核心的图片+文本转视频逻辑
import json
from mutagen.mp3 import MP3
from moviepy import editor
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import generator.pics as picsdef audio_to_video(text_input, pics_input):audio_filepath = './output/audios/' + text_input.replace(' ', '_') + '.mp3'video_filepath = './output/videos/' + text_input.replace(' ', '_') + '.mp4'pics_dir = './output/pics/' + pics_input.replace(' ', '_')list_of_images = pics.preprocess_pics(pics_dir)audio = MP3(audio_filepath)audio_length = audio.info.lengthfps = len(list_of_images) / audio_length# 生成视频video = editor.ImageSequenceClip(pics_dir, fps=fps)audio = editor.AudioFileClip(audio_filepath)final_video = video.set_audio(audio)final_video.write_videofile(video_filepath, codec="libx264", fps=10)
以下是我实现的示例项目,可参考和star一下下哈!
https://github.com/chengxs1994/text-img-generator-vedio