| config: !!python/object/new:yacs.config.CfgNode |
| dictitems: |
| CONSTANTS_PATH: constants/ |
| DATA: !!python/object/new:yacs.config.CfgNode |
| dictitems: |
| MAX_LENGTH: 270 |
| TEST_PATH: data/kssa_data_1/dev+asr_nobom.txt |
| TRAIN_PATH: data/kssa_data_1/train+asr_nobom.txt |
| VAL_PATH: data/kssa_data_1/dev+asr_nobom.txt |
| state: |
| __deprecated_keys__: !!set {} |
| __immutable__: true |
| __new_allowed__: false |
| __renamed_keys__: {} |
| INFERENCE: !!python/object/new:yacs.config.CfgNode |
| dictitems: |
| ASR_MODEL_NAME: sashat/whisper-medium-ClassicalAr |
| BATCH_SIZE: 16 |
| BUFFER_SIZE: 25 |
| DEVICE: cpu |
| FORCED_IDS: null |
| MAX_LENGTH: 270 |
| MODEL_PATH: /home/rufael/Projects/forced_alignment/Diac/outputs/kssa/transformer-text+asr/tashkeela+clartts+kssa/tensorboard/version_13/checkpoints/best_model.ckpt |
| OUTPUT_PATH: outputs/kssa/transformer-text+asr/tashkeela+clartts+kssa/results.txt |
| USE_ASR: true |
| WINDOW_SIZE: 50 |
| state: |
| __deprecated_keys__: !!set {} |
| __immutable__: true |
| __new_allowed__: false |
| __renamed_keys__: {} |
| MODEL: !!python/object/new:yacs.config.CfgNode |
| dictitems: |
| ASR_VOCAB_SIZE: 91 |
| DFF: 128 |
| DROPOUT_RATE: 0.2 |
| D_MODEL: 256 |
| LOAD_TEXT_BRANCH_ONLY: false |
| MAXLEN: 272 |
| NUM_BLOCKS: 2 |
| NUM_HEADS: 4 |
| OUTPUT_SIZE: 19 |
| PRETRAINED_PATH: /home/rufael/Projects/forced_alignment/Diac/outputs/old/results/transformer-text+asr/tashkeela+clartts/tensorboard/version_0/checkpoints/best_model.ckpt |
| TYPE: Transformer |
| USE_ASR: true |
| VOCAB_SIZE: 77 |
| WITH_CONN: false |
| state: |
| __deprecated_keys__: !!set {} |
| __immutable__: true |
| __new_allowed__: false |
| __renamed_keys__: {} |
| TRAIN: !!python/object/new:yacs.config.CfgNode |
| dictitems: |
| ACCUMULATE_GRAD_BATCHES: 1 |
| BATCH_SIZE: 128 |
| DEVICE: cuda |
| EARLY_STOPPING_PATIENCE: 50 |
| EVAL_FREQ: 1 |
| GRAD_CLIP_NORM: 0.5 |
| LEARNING_RATE: 0.001 |
| LR_SCHEDULER_FACTOR: 0.5 |
| LR_SCHEDULER_PATIENCE: 5 |
| NUM_EPOCHS: 1000 |
| SAVE_DIR: outputs/kssa/transformer-text+asr/tashkeela+clartts+kssa |
| SAVE_FREQ: 30 |
| VAL_CHECK_INTERVAL: 1.0 |
| state: |
| __deprecated_keys__: !!set {} |
| __immutable__: true |
| __new_allowed__: false |
| __renamed_keys__: {} |
| state: |
| __deprecated_keys__: !!set {} |
| __immutable__: true |
| __new_allowed__: false |
| __renamed_keys__: {} |
| tokenizer: !!python/object:diac.tokenizer.ArabicDiacritizationTokenizer |
| constants: !!python/object:diac.tokenizer.TokenizerConstants |
| arabic_letters_list: "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062A\ |
| \u062B\u062C\u062D\u062E\u062F\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\ |
| \u0638\u0639\u063A\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064A" |
| characters_mapping: |
| ? ' |
| |
| ' |
| : 4 |
| ' ': 5 |
| '!': 6 |
| '"': 7 |
| '''': 8 |
| (: 9 |
| ): 10 |
| '*': 11 |
| ',': 12 |
| '-': 13 |
| .: 14 |
| /: 15 |
| '0': 16 |
| '1': 17 |
| '2': 18 |
| '3': 19 |
| '4': 20 |
| '5': 21 |
| '6': 22 |
| '7': 23 |
| '8': 24 |
| '9': 25 |
| ':': 26 |
| ;: 27 |
| <EOS>: 2 |
| <PAD>: 0 |
| <SOS>: 1 |
| <UNK>: 3 |
| '[': 28 |
| ']': 29 |
| '`': 30 |
| '{': 31 |
| '}': 32 |
| '~': 33 |
| "\xAB": 34 |
| "\xBB": 35 |
| "\u060C": 36 |
| "\u061B": 37 |
| "\u061F": 38 |
| "\u0621": 39 |
| "\u0622": 40 |
| "\u0623": 41 |
| "\u0624": 42 |
| "\u0625": 43 |
| "\u0626": 44 |
| "\u0627": 45 |
| "\u0628": 46 |
| "\u0629": 47 |
| "\u062A": 48 |
| "\u062B": 49 |
| "\u062C": 50 |
| "\u062D": 51 |
| "\u062E": 52 |
| "\u062F": 53 |
| "\u0630": 54 |
| "\u0631": 55 |
| "\u0632": 56 |
| "\u0633": 57 |
| "\u0634": 58 |
| "\u0635": 59 |
| "\u0636": 60 |
| "\u0637": 61 |
| "\u0638": 62 |
| "\u0639": 63 |
| "\u063A": 64 |
| "\u0641": 65 |
| "\u0642": 66 |
| "\u0643": 67 |
| "\u0644": 68 |
| "\u0645": 69 |
| "\u0646": 70 |
| "\u0647": 71 |
| "\u0648": 72 |
| "\u0649": 73 |
| "\u064A": 74 |
| "\u200F": 75 |
| "\u2013": 76 |
| classes_mapping: |
| ? '' |
| : 0 |
| <EOS>: 17 |
| <N/A>: 18 |
| <PAD>: 15 |
| <SOS>: 16 |
| "\u064B": 2 |
| "\u064C": 4 |
| "\u064D": 6 |
| "\u064E": 1 |
| "\u064F": 3 |
| "\u0650": 5 |
| "\u0651": 8 |
| "\u0651\u064B": 10 |
| "\u0651\u064C": 12 |
| "\u0651\u064D": 14 |
| "\u0651\u064E": 9 |
| "\u0651\u064F": 11 |
| "\u0651\u0650": 13 |
| "\u0652": 7 |
| diacritics_list: |
| - "\u064E" |
| - "\u064B" |
| - "\u0650" |
| - "\u064D" |
| - "\u064F" |
| - "\u064C" |
| - "\u0652" |
| - "\u0651" |
| expanded_vocabulary: |
| ? ' |
| |
| ' |
| : 4 |
| ' ': 5 |
| '!': 6 |
| '"': 7 |
| '''': 8 |
| (: 9 |
| ): 10 |
| '*': 11 |
| ',': 12 |
| '-': 13 |
| .: 14 |
| /: 15 |
| '0': 16 |
| '1': 17 |
| '2': 18 |
| '3': 19 |
| '4': 20 |
| '5': 21 |
| '6': 22 |
| '7': 23 |
| '8': 24 |
| '9': 25 |
| ':': 26 |
| ;: 27 |
| <EOS>: 2 |
| <PAD>: 0 |
| <SOS>: 1 |
| <UNK>: 3 |
| '[': 28 |
| ']': 29 |
| '`': 30 |
| '{': 31 |
| '}': 32 |
| '~': 33 |
| "\xAB": 34 |
| "\xBB": 35 |
| "\u060C": 36 |
| "\u061B": 37 |
| "\u061F": 38 |
| "\u0621": 39 |
| "\u0622": 40 |
| "\u0623": 41 |
| "\u0624": 42 |
| "\u0625": 43 |
| "\u0626": 44 |
| "\u0627": 45 |
| "\u0628": 46 |
| "\u0629": 47 |
| "\u062A": 48 |
| "\u062B": 49 |
| "\u062C": 50 |
| "\u062D": 51 |
| "\u062E": 52 |
| "\u062F": 53 |
| "\u0630": 54 |
| "\u0631": 55 |
| "\u0632": 56 |
| "\u0633": 57 |
| "\u0634": 58 |
| "\u0635": 59 |
| "\u0636": 60 |
| "\u0637": 61 |
| "\u0638": 62 |
| "\u0639": 63 |
| "\u063A": 64 |
| "\u0641": 65 |
| "\u0642": 66 |
| "\u0643": 67 |
| "\u0644": 68 |
| "\u0645": 69 |
| "\u0646": 70 |
| "\u0647": 71 |
| "\u0648": 72 |
| "\u0649": 73 |
| "\u064A": 74 |
| "\u064B": 78 |
| "\u064C": 80 |
| "\u064D": 82 |
| "\u064E": 77 |
| "\u064F": 79 |
| "\u0650": 81 |
| "\u0651": 84 |
| "\u0651\u064B": 86 |
| "\u0651\u064C": 88 |
| "\u0651\u064D": 90 |
| "\u0651\u064E": 85 |
| "\u0651\u064F": 87 |
| "\u0651\u0650": 89 |
| "\u0652": 83 |
| "\u200F": 75 |
| "\u2013": 76 |
| rev_classes_mapping: |
| 0: '' |
| 1: "\u064E" |
| 2: "\u064B" |
| 3: "\u064F" |
| 4: "\u064C" |
| 5: "\u0650" |
| 6: "\u064D" |
| 7: "\u0652" |
| 8: "\u0651" |
| 9: "\u0651\u064E" |
| 10: "\u0651\u064B" |
| 11: "\u0651\u064F" |
| 12: "\u0651\u064C" |
| 13: "\u0651\u0650" |
| 14: "\u0651\u064D" |
| 15: <PAD> |
| 16: <SOS> |
| 17: <EOS> |
| 18: <N/A> |
| constants_path: constants/ |
| with_extra_train: false |
|
|