faster-whisper: cpu and gpu recognite result differently,gpu recognition all the way '!' and long video cuda out of memory
model: large-v3 question:
-
when I use cpu to transctibe, it can recognite the speech correctly, but when I use gpu to transcribe, it output all ‘!’, and it detected language ‘en’ with probability nan detected language:
-
and when I use gpu, it loaded the model into gpu memory, but when I look at the gpu usage in task manager I see that it is 0% from time to time, is this correct? the gpu usage:
recognite result:
cpu:
gpu:
this is my code: gpu version:
from faster_whisper import WhisperModel
from changevideospeed import SearchVideoFiles
import pysubs2
def GetSubtitleResult(results : list, audio_path: str) -> tuple:
subs: pysubs2.SSAFile = pysubs2.load_from_whisper(results)
file_name = audio_path[:audio_path.rfind('.')]
return (subs, file_name)
def SaveSRT(results : list, audio_path: str) -> None:
subs, file_name = GetSubtitleResult(results, audio_path)
subs.save(file_name+'.srt')
def SaveASS(results : list, audio_path: str) -> None:
subs, file_name = GetSubtitleResult(results, audio_path)
subs.save(file_name+'.ass')
def LoadModel(model_size: str, download_root: str = 'models/') -> WhisperModel:
model: WhisperModel = WhisperModel(download_root = download_root, model_size_or_path = model_size,
device='cuda', # cpu_threads=12,
compute_type='float16',
local_files_only = True)
return model
def Transcribe(model : WhisperModel, audio_path: str, language:str = None) -> list:
segments, info = model.transcribe(audio = audio_path, language = language)
segments = list(segments)
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
results= []
for s in segments:
segment_dict = {'start':s.start,'end':s.end,'text':s.text}
results.append(segment_dict)
return results
if __name__ == '__main__':
audio_dir = 'audio'
file_paths = SearchVideoFiles(audio_dir)
model_size = 'large-v3'
model: WhisperModel = LoadModel(model_size)
for path in file_paths:
results: list = Transcribe(model, path)
SaveSRT(results, path)
cpu version: just change the LoadModel function
def LoadModel(model_size: str, download_root: str = 'models/') -> WhisperModel:
model: WhisperModel = WhisperModel(download_root = download_root, model_size_or_path = model_size,
device='cpu', cpu_threads=12,
# compute_type='float16',
local_files_only = True)
return model
if anyone can help me, thank you very much!
About this issue
- Original URL
- State: closed
- Created 5 months ago
- Comments: 18 (8 by maintainers)
Because different compute type is used, use same ->
int8_float32.large-v3model tends to hallucinate a lot, trylarge-v2. Or try this PR [it’s not on pypi yet], with it a model should recover from such hallucination loop.