Whisper.py`¶

Broken.Externals.Whisper ¶

BrokenWhisper ¶

Bases: ExternalModelsBase, ExternalTorchBase

Source code in Broken/Externals/Whisper.py

class BrokenWhisper(ExternalModelsBase, ExternalTorchBase):
    class Model(str, BrokenEnum):
        Tiny         = "tiny"
        TinyEN       = "tiny.en"
        Base         = "base"
        BaseEN       = "base.en"
        Small        = "small"
        SmallEN      = "small.en"
        SmallDistEN  = "distil-small.en"
        Medium       = "medium"
        MediumEN     = "medium.en"
        MediumDistEN = "distil-medium.en"
        LargeV1      = "large-v1"
        LargeV2      = "large-v2"
        LargeV3      = "large-v3"
        Large        = "large"
        LargeDist2   = "distil-large-v2"
        LargeDist3   = "distil-large-v3"

    model: Annotated[Model, Option("--model", "-m",
        help="[bold green](🟢 Basic)[/] Model to use for Transcription [green](tiny, base, small, medium, large)[/]")] = \
        Field(Model.LargeV2)

    lowvram: Annotated[bool, Option("--lowvram", "-l",
        help="[bold green](🟢 Basic)[/] Use INT8 instead of FP16 for low VRAM GPUs")] = \
        Field(False)

    def _load_model(self):
        self.load_torch()
        install(packages="faster_whisper")

        # Copy PyPI libcudnn to avoid setting LD_LIBRARY_PATH
        if BrokenPlatform.OnLinux:
            for target in ("libcudnn_ops_infer.so.8", "libcudnn_cnn_infer.so.8"):
                if (libcudnn := Path(f"/usr/lib/{target}")).exists():
                    continue
                for site_packages in site.getsitepackages():
                    if (pycudnn := Path(site_packages)/f"nvidia/cudnn/lib/{target}").exists():
                        log.warning(f"Running FasterWhisper might fail, as ({libcudnn}) doesn't exist")
                        log.warning(f"• Luckily, we can copy it from {pycudnn}")
                        shell("sudo", "cp", pycudnn, libcudnn, confirm=True)
                        break
                else:
                    raise RuntimeError(f"{target} not found in any site-packages")

        # Finally load the model
        log.info(f"Loading OpenAI Whisper model ({self.model.value})")

        self._model = WhisperModel(
            model_size_or_path=self.model.value,
            download_root=(BROKEN.DIRECTORIES.CACHE/"Whisper"),
            compute_type=("int8" if self.lowvram else "float16"),
        )

    def transcribe(self,
        audio: Union[str, Path, numpy.ndarray],
        *,
        reference: Optional[str]=None
    ) -> Spoken:
        if isinstance(audio, str) or isinstance(audio, Path):
            if not (audio := BrokenPath.get(audio)).exists():
                raise RuntimeError(f"Audio file doesn't exist: {audio}")
            audio = str(audio)

        self.load_model()
        spoken = Spoken()

        with Halo(f"Transcribing audio with Whisper model ({self.model}).."):
            for segment in self._model.transcribe(
                audio=audio,
                word_timestamps=True,
                initial_prompt=reference
            )[0]:
                spoken.sentences[(segment.start) : (segment.end + 0.001)] = segment.text.strip()

                for word in segment.words:
                    spoken.words[(word.start) : (word.end + 0.001)] = word.word.strip()
        del self._model
        return spoken

Model ¶

Bases: str, BrokenEnum

Source code in Broken/Externals/Whisper.py

class Model(str, BrokenEnum):
    Tiny         = "tiny"
    TinyEN       = "tiny.en"
    Base         = "base"
    BaseEN       = "base.en"
    Small        = "small"
    SmallEN      = "small.en"
    SmallDistEN  = "distil-small.en"
    Medium       = "medium"
    MediumEN     = "medium.en"
    MediumDistEN = "distil-medium.en"
    LargeV1      = "large-v1"
    LargeV2      = "large-v2"
    LargeV3      = "large-v3"
    Large        = "large"
    LargeDist2   = "distil-large-v2"
    LargeDist3   = "distil-large-v3"

Tiny ¶

Tiny = 'tiny'

TinyEN ¶

TinyEN = 'tiny.en'

Base ¶

Base = 'base'

BaseEN ¶

BaseEN = 'base.en'

Small ¶

Small = 'small'

SmallEN ¶

SmallEN = 'small.en'

SmallDistEN ¶

SmallDistEN = 'distil-small.en'

Medium ¶

Medium = 'medium'

MediumEN ¶

MediumEN = 'medium.en'

MediumDistEN ¶

MediumDistEN = 'distil-medium.en'

LargeV1 ¶

LargeV1 = 'large-v1'

LargeV2 ¶

LargeV2 = 'large-v2'

LargeV3 ¶

LargeV3 = 'large-v3'

Large ¶

Large = 'large'

LargeDist2 ¶

LargeDist2 = 'distil-large-v2'

LargeDist3 ¶

LargeDist3 = 'distil-large-v3'

model ¶

model: Annotated[
    Model,
    Option(
        "--model",
        "-m",
        help="[bold green](🟢 Basic)[/] Model to use for Transcription [green](tiny, base, small, medium, large)[/]",
    ),
] = Field(Model.LargeV2)

lowvram ¶

lowvram: Annotated[
    bool,
    Option(
        "--lowvram",
        "-l",
        help="[bold green](🟢 Basic)[/] Use INT8 instead of FP16 for low VRAM GPUs",
    ),
] = Field(False)

transcribe ¶

transcribe(
    audio: Union[str, Path, numpy.ndarray],
    *,
    reference: Optional[str] = None
) -> Spoken

Source code in Broken/Externals/Whisper.py

def transcribe(self,
    audio: Union[str, Path, numpy.ndarray],
    *,
    reference: Optional[str]=None
) -> Spoken:
    if isinstance(audio, str) or isinstance(audio, Path):
        if not (audio := BrokenPath.get(audio)).exists():
            raise RuntimeError(f"Audio file doesn't exist: {audio}")
        audio = str(audio)

    self.load_model()
    spoken = Spoken()

    with Halo(f"Transcribing audio with Whisper model ({self.model}).."):
        for segment in self._model.transcribe(
            audio=audio,
            word_timestamps=True,
            initial_prompt=reference
        )[0]:
            spoken.sentences[(segment.start) : (segment.end + 0.001)] = segment.text.strip()

            for word in segment.words:
                spoken.words[(word.start) : (word.end + 0.001)] = word.word.strip()
    del self._model
    return spoken

Spoken ¶

Bases: BaseModel

Source code in Broken/Externals/Whisper.py

class Spoken(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    sentences: IntervalTree[Segment] = Field(default_factory=IntervalTree)
    words: IntervalTree[Word] = Field(default_factory=IntervalTree)

model_config ¶

model_config = ConfigDict(arbitrary_types_allowed=True)

sentences ¶

sentences: IntervalTree[Segment] = Field(
    default_factory=IntervalTree
)

words ¶

words: IntervalTree[Word] = Field(
    default_factory=IntervalTree
)

File: Broken/Externals/Whisper.py¶

Broken.Externals.Whisper ¶

BrokenWhisper ¶

Model ¶

Tiny ¶

TinyEN ¶

Base ¶

BaseEN ¶

Small ¶

SmallEN ¶

SmallDistEN ¶

Medium ¶

MediumEN ¶

MediumDistEN ¶

LargeV1 ¶

LargeV2 ¶

LargeV3 ¶

Large ¶

LargeDist2 ¶

LargeDist3 ¶

model ¶

lowvram ¶

transcribe ¶

Spoken ¶

model_config ¶

sentences ¶

words ¶

File: `Broken/Externals/Whisper.py`¶