Source code for tensorbay.label.label_sentence

#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#

"""The implementation of the TensorBay sentence label."""

from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar, Union

from tensorbay.label.basic import SubcatalogBase, _LabelBase
from tensorbay.label.supports import AttributesMixin
from tensorbay.utility import AttrsMixin, ReprMixin, attr, camel, common_loads


[docs]class SentenceSubcatalog(SubcatalogBase, AttributesMixin): """This class defines the subcatalog for audio transcripted sentence type of labels. Arguments: is_sample: A boolen value indicates whether time format is sample related. sample_rate: The number of samples of audio carried per second. lexicon: A list consists all of text and phone. Attributes: description: The description of the entire sentence subcatalog. is_sample: A boolen value indicates whether time format is sample related. sample_rate: The number of samples of audio carried per second. lexicon: A list consists all of text and phone. attributes: All the possible attributes in the corresponding dataset stored in a :class:`~tensorbay.utility.name.NameList` with the attribute names as keys and the :class:`~tensorbay.label.attribute.AttributeInfo` as values. Raises: TypeError: When sample_rate is None and is_sample is True. Examples: *Initialization Method 1:* Init from ``SentenceSubcatalog.__init__()``. >>> SentenceSubcatalog(True, 16000, [["mean", "m", "iy", "n"]]) SentenceSubcatalog( (is_sample): True, (sample_rate): 16000, (lexicon): [...] ) *Initialization Method 2:* Init from ``SentenceSubcatalog.loads()`` method. >>> contents = { ... "isSample": True, ... "sampleRate": 16000, ... "lexicon": [["mean", "m", "iy", "n"]], ... "attributes": [{"name": "gender", "enum": ["male", "female"]}], ... } >>> SentenceSubcatalog.loads(contents) SentenceSubcatalog( (is_sample): True, (sample_rate): 16000, (attributes): NameList [...], (lexicon): [...] ) """ is_sample: bool = attr(key=camel, default=False) sample_rate: int = attr(is_dynamic=True, key=camel) lexicon: List[List[str]] = attr(is_dynamic=True) def __init__( self, is_sample: bool = False, sample_rate: Optional[int] = None, lexicon: Optional[List[List[str]]] = None, ) -> None: SubcatalogBase.__init__(self) if is_sample and not sample_rate: raise TypeError( f"Require 'sample_rate' to init {self.__class__.__name__} when is_sample is True" ) self.is_sample = is_sample if sample_rate: self.sample_rate = sample_rate if lexicon: self.lexicon = lexicon
[docs] def dumps(self) -> Dict[str, Any]: """Dumps the information of this SentenceSubcatalog into a dict. Returns: A dict containing all information of this SentenceSubcatalog. Examples: >>> sentence_subcatalog = SentenceSubcatalog(True, 16000, [["mean", "m", "iy", "n"]]) >>> sentence_subcatalog.dumps() {'isSample': True, 'sampleRate': 16000, 'lexicon': [['mean', 'm', 'iy', 'n']]} """ return self._dumps()
[docs] def append_lexicon(self, lexemes: List[str]) -> None: """Add lexemes to lexicon. Arguments: lexemes: A list consists of text and phone. Examples: >>> sentence_subcatalog = SentenceSubcatalog(True, 16000, [["mean", "m", "iy", "n"]]) >>> sentence_subcatalog.append_lexicon(["example"]) >>> sentence_subcatalog.lexicon [['mean', 'm', 'iy', 'n'], ['example']] """ if hasattr(self, "lexicon"): self.lexicon.append(lexemes) else: self.lexicon = [lexemes]
[docs]class Word(ReprMixin, AttrsMixin): """This class defines the concept of word. :class:`Word` is a word within a phonetic transcription sentence, containing the content of the word, the start and end time in the audio. Arguments: text: The content of the word. begin: The begin time of the word in the audio. end: The end time of the word in the audio. Attributes: text: The content of the word. begin: The begin time of the word in the audio. end: The end time of the word in the audio. Examples: >>> Word(text="example", begin=1, end=2) Word( (text): 'example', (begin): 1, (end): 2 ) """ _T = TypeVar("_T", bound="Word") _repr_attrs = ("text", "begin", "end") text: str = attr() begin: float = attr(is_dynamic=True) end: float = attr(is_dynamic=True) def __init__( self, text: str, begin: Optional[float] = None, end: Optional[float] = None, ): self.text = text if begin is not None: self.begin = begin if end is not None: self.end = end
[docs] @classmethod def loads(cls: Type[_T], contents: Dict[str, Union[str, float]]) -> _T: """Loads a Word from a dict containing the information of the word. Arguments: contents: A dict containing the information of the word Returns: The loaded :class:`Word` object. Examples: >>> contents = {"text": "Hello, World", "begin": 1, "end": 2} >>> Word.loads(contents) Word( (text): 'Hello, World', (begin): 1, (end): 2 ) """ return common_loads(cls, contents)
[docs] def dumps(self) -> Dict[str, Union[str, float]]: """Dumps the current word into a dict. Returns: A dict containing all the information of the word Examples: >>> word = Word(text="example", begin=1, end=2) >>> word.dumps() {'text': 'example', 'begin': 1, 'end': 2} """ return self._dumps()
[docs]class LabeledSentence(_LabelBase): """This class defines the concept of phonetic transcription lable. :class:`LabeledSentence` is the transcripted sentence type of label. which is often used for tasks such as automatic speech recognition. Arguments: sentence: A list of sentence. spell: A list of spell, only exists in Chinese language. phone: A list of phone. attributes: The attributes of the label. Attributes: sentence: The transcripted sentence. spell: The spell within the sentence, only exists in Chinese language. phone: The phone of the sentence label. attributes: The attributes of the label. Examples: >>> sentence = [Word(text="qi1shi2", begin=1, end=2)] >>> spell = [Word(text="qi1", begin=1, end=2)] >>> phone = [Word(text="q", begin=1, end=2)] >>> LabeledSentence( ... sentence, ... spell, ... phone, ... attributes={"key": "value"}, ... ) LabeledSentence( (sentence): [ Word( (text): 'qi1shi2', (begin): 1, (end): 2 ) ], (spell): [ Word( (text): 'qi1', (begin): 1, (end): 2 ) ], (phone): [ Word( (text): 'q', (begin): 1, (end): 2 ) ], (attributes): { 'key': 'value' } ) """ _T = TypeVar("_T", bound="LabeledSentence") _repr_attrs = ("sentence", "spell", "phone", "attributes") _repr_maxlevel = 3 sentence: List[Word] = attr(is_dynamic=True) spell: List[Word] = attr(is_dynamic=True) phone: List[Word] = attr(is_dynamic=True) def __init__( self, sentence: Optional[Iterable[Word]] = None, spell: Optional[Iterable[Word]] = None, phone: Optional[Iterable[Word]] = None, *, attributes: Optional[Dict[str, Any]] = None, ): super().__init__(attributes=attributes) if sentence: self.sentence = list(sentence) if spell: self.spell = list(spell) if phone: self.phone = list(phone) @staticmethod def _load_word(contents: Iterable[Dict[str, Any]]) -> List[Word]: return [Word.loads(word) for word in contents]
[docs] @classmethod def loads(cls: Type[_T], contents: Dict[str, Any]) -> _T: """Loads a LabeledSentence from a dict containing the information of the label. Arguments: contents: A dict containing the information of the sentence label. Returns: The loaded :class:`LabeledSentence` object. Examples: >>> contents = { ... "sentence": [{"text": "qi1shi2", "begin": 1, "end": 2}], ... "spell": [{"text": "qi1", "begin": 1, "end": 2}], ... "phone": [{"text": "q", "begin": 1, "end": 2}], ... "attributes": {"key": "value"}, ... } >>> LabeledSentence.loads(contents) LabeledSentence( (sentence): [ Word( (text): 'qi1shi2', (begin): 1, (end): 2 ) ], (spell): [ Word( (text): 'qi1', (begin): 1, (end): 2 ) ], (phone): [ Word( (text): 'q', (begin): 1, (end): 2 ) ], (attributes): { 'key': 'value' } ) """ return common_loads(cls, contents)
[docs] def dumps(self) -> Dict[str, Any]: """Dumps the current label into a dict. Returns: A dict containing all the information of the sentence label. Examples: >>> sentence = [Word(text="qi1shi2", begin=1, end=2)] >>> spell = [Word(text="qi1", begin=1, end=2)] >>> phone = [Word(text="q", begin=1, end=2)] >>> labeledsentence = LabeledSentence( ... sentence, ... spell, ... phone, ... attributes={"key": "value"}, ... ) >>> labeledsentence.dumps() { 'attributes': {'key': 'value'}, 'sentence': [{'text': 'qi1shi2', 'begin': 1, 'end': 2}], 'spell': [{'text': 'qi1', 'begin': 1, 'end': 2}], 'phone': [{'text': 'q', 'begin': 1, 'end': 2}] } """ return self._dumps()