david
commited on
Commit
·
b6e4de3
1
Parent(s):
e1e0093
debug cut index
Browse files- transcribe/strategy.py +10 -5
transcribe/strategy.py
CHANGED
|
@@ -48,7 +48,7 @@ class TranscriptToken:
|
|
| 48 |
return PAUSEE_END_PATTERN.search(self.text) is not None
|
| 49 |
|
| 50 |
def buffer_index(self) -> int:
|
| 51 |
-
return int(self.t1 / 100 * SAMPLE_RATE)
|
| 52 |
|
| 53 |
@dataclass
|
| 54 |
class TranscriptChunk:
|
|
@@ -108,6 +108,9 @@ class TranscriptChunk:
|
|
| 108 |
return any(seg.is_punctuation() for seg in self.items)
|
| 109 |
|
| 110 |
def get_buffer_index(self) -> int:
|
|
|
|
|
|
|
|
|
|
| 111 |
return self.items[-1].buffer_index()
|
| 112 |
|
| 113 |
def is_end_sentence(self) ->bool:
|
|
@@ -182,10 +185,13 @@ class TranscriptBuffer:
|
|
| 182 |
self.update_pending_text(stable_string)
|
| 183 |
if is_end_sentence:
|
| 184 |
self.commit_paragraph(end_of_sentence=True)
|
|
|
|
| 185 |
# if len() >=20
|
|
|
|
| 186 |
else:
|
| 187 |
self.commit_line()
|
| 188 |
-
|
|
|
|
| 189 |
|
| 190 |
|
| 191 |
@property
|
|
@@ -278,11 +284,10 @@ class TranscriptStabilityAnalyzer:
|
|
| 278 |
logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
|
| 279 |
|
| 280 |
prev_seg_id = self._transcript_buffer.get_seg_id()
|
| 281 |
-
self._transcript_buffer.update_and_commit(stable_str, remaining_str, is_end_sentence)
|
| 282 |
-
|
| 283 |
logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
|
| 284 |
|
| 285 |
-
if
|
| 286 |
# 表示生成了一个新段落 换行
|
| 287 |
yield TranscriptResult(
|
| 288 |
seg_id=prev_seg_id,
|
|
|
|
| 48 |
return PAUSEE_END_PATTERN.search(self.text) is not None
|
| 49 |
|
| 50 |
def buffer_index(self) -> int:
|
| 51 |
+
return max(int(self.t1 / 100 * SAMPLE_RATE) - 300, 0)
|
| 52 |
|
| 53 |
@dataclass
|
| 54 |
class TranscriptChunk:
|
|
|
|
| 108 |
return any(seg.is_punctuation() for seg in self.items)
|
| 109 |
|
| 110 |
def get_buffer_index(self) -> int:
|
| 111 |
+
logger.debug("==== Current cut item ====")
|
| 112 |
+
logger.debug(f"{self.items[-1]}")
|
| 113 |
+
logger.debug("==========================")
|
| 114 |
return self.items[-1].buffer_index()
|
| 115 |
|
| 116 |
def is_end_sentence(self) ->bool:
|
|
|
|
| 185 |
self.update_pending_text(stable_string)
|
| 186 |
if is_end_sentence:
|
| 187 |
self.commit_paragraph(end_of_sentence=True)
|
| 188 |
+
self.update_pending_text(remaining_string)
|
| 189 |
# if len() >=20
|
| 190 |
+
return True
|
| 191 |
else:
|
| 192 |
self.commit_line()
|
| 193 |
+
self.update_pending_text(remaining_string)
|
| 194 |
+
return False
|
| 195 |
|
| 196 |
|
| 197 |
@property
|
|
|
|
| 284 |
logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
|
| 285 |
|
| 286 |
prev_seg_id = self._transcript_buffer.get_seg_id()
|
| 287 |
+
commit_paragraph = self._transcript_buffer.update_and_commit(stable_str, remaining_str, is_end_sentence)
|
|
|
|
| 288 |
logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
|
| 289 |
|
| 290 |
+
if commit_paragraph:
|
| 291 |
# 表示生成了一个新段落 换行
|
| 292 |
yield TranscriptResult(
|
| 293 |
seg_id=prev_seg_id,
|