import azure.cognitiveservices.speech as speechsdk import re import argparse import time import cn2an pattern = '53948358944853994358398458549394835354988459349358' standard = [(25,44),(20,33),(17,27),(15,24),(14,22),(12,20)] unit = 10000000 speech_config = speechsdk.SpeechConfig(subscription="934d6f1c2e8c4817b01f98cb93451fba", region='eastasia') speech_config.speech_recognition_language = "zh-CN" speech_config.set_property(speechsdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "50000") speech_config.request_word_level_timestamps() def find_last_digit_position(input_string): input_string = re.sub(r"[,,。:?]","", input_string) pattern = r'\d' matches = list(re.finditer(pattern, input_string)) if matches: last_match = matches[-1] return last_match.start() else: return -1 def find_first_digit_position(input_string): input_string = re.sub(r"[,,。:?]","", input_string) pattern = r'\d' result = re.search(pattern,input_string) if result: return result.start() else: return -1 def rec(speech_recognizer): done = False def stop_cb(evt): # print('CLOSING on {}'.format(evt)) speech_recognizer.stop_continuous_recognition() nonlocal done done = True recognized_text = [] json_result = [] def handle_recognized(evt): # print('RECOGNIZED: {}'.format(evt)) recognized_text.append(evt.result.text) json_result.append(evt.result.json) # speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt))) speech_recognizer.recognized.connect(handle_recognized) # speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) # speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt))) # speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt))) speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5) return recognized_text, json_result def read_time(text,json): start = 9999 end = 0 for i,j in zip(text,json): if len(i)>0: s = find_first_digit_position(i) e = find_last_digit_position(i) j_result = eval(j) tmp_start = j_result['NBest'][0]['Words'][s]['Offset']/unit tmp_end = j_result['NBest'][0]['Words'][e]['Offset']/unit + j_result['NBest'][0]['Words'][e]['Duration']/unit if tmp_start< start: start = tmp_start if tmp_end > end: end = tmp_end return start, end def check_digit(text): if len(text) != len(pattern): return -1 else: count = 0 for i in range(len(text)): if text[i]==pattern[i]: count += 1 if count<47: return -1 else: return 1 def calc_read_time(filename, grade): audio_config = speechsdk.audio.AudioConfig(filename=filename) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) text, json = rec(speech_recognizer) start, end = read_time(text,json) duration = end - start score = round(duration, 3) digit_text = ''.join(text) digit_text = cn2an.transform(digit_text) digit_text = re.sub("\D","",digit_text) if check_digit(digit_text)==1: if score<=standard[grade-1][0]: c = 'a' elif score>standard[grade-1][1]: c = 'c' else: c = 'b' print(f'{score}-{c}') return score, c, digit_text else: return 0 if __name__ == '__main__': parse = argparse.ArgumentParser() parse.add_argument('filename', type=str) parse.add_argument('grade',type=int) args = parse.parse_args() calc_read_time(args.filename, args.grade)