Automatic Speech Recognition
Transformers
PyTorch
Chinese
whisper
whisper-event
Generated from Trainer
Eval Results (legacy)
Instructions to use thomas0104/large_v2_nan_tw_so_short_30s with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use thomas0104/large_v2_nan_tw_so_short_30s with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("automatic-speech-recognition", model="thomas0104/large_v2_nan_tw_so_short_30s")# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("thomas0104/large_v2_nan_tw_so_short_30s") model = AutoModelForMultimodalLM.from_pretrained("thomas0104/large_v2_nan_tw_so_short_30s") - Notebooks
- Google Colab
- Kaggle
| import string, re, opencc | |
| 全型2半型= str.maketrans( | |
| ' 0123456789' | |
| 'abcdefghijklmnopqrstuvwxyz' | |
| 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
| '!゛#$%&()*+、ー。/:;〈=〉?@[]^_‘{|}~', | |
| ' 0123456789' | |
| 'abcdefghijklmnopqrstuvwxyz' | |
| 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
| '!"#$%&()*+,-./:;<=>?@[]^_`{|}~' | |
| ) | |
| def 把怪字修進unicode(xStr): | |
| xStr= re.sub('\uf5c3','𪜶', xStr) | |
| return xStr | |
| def ryNormText(s): | |
| """ | |
| <<<None>>> ==> 刪除 | |
| 標點 ==> 空白 | |
| 連續空白 ==> 1個空白 | |
| 簡繁 | |
| """ | |
| punc1= string.punctuation # 這是為英文 | |
| punc1 | |
| punc2= '。,﹐、!?::;『』「」…,\n' # 這是為中文,尚未完備!! | |
| punc= f"[{punc1}{punc2}]" ## 這是 regular expression 的 pattern | |
| ## <<<None>>> ==> 刪除 | |
| s= re.sub('<<<None>>>','',s) | |
| # 標點 ==> 空白 | |
| s= re.sub(punc,' ',s) | |
| # 連續空白 ==> 1個空白 | |
| s= re.sub('[ ]+',' ',s) | |
| # 空白 ==> 刪除 | |
| s= re.sub(' ','',s) | |
| s= 把怪字修進unicode(s) | |
| # 簡繁 | |
| s= opencc.OpenCC('s2tw').convert(s) | |
| return s | |
| import unicodedata | |
| import re | |
| def separ_char_word(inputString= '我是呂仁園 Renyuan Lyu'): | |
| inputString= 把怪字修進unicode(inputString) | |
| y= '' | |
| for x in inputString: | |
| y += x | |
| try: | |
| un= unicodedata.name(x) | |
| if un.startswith('CJK'): | |
| y += ' ' | |
| else: | |
| pass | |
| except Exception as ex: | |
| y = ' '+y+' ' | |
| print(f'ryErr:(def 中英分開:){ex= }\t【{x= }】\t{inputString= }') | |
| y= re.sub('[ ]+',' ', y) #連續空白只保留1個空白 | |
| return y | |
| #q= 中英分開('大家好 da jia hao 我是呂仁園 I am Renyuan Lyu') | |
| #print(q) |