david commited on
Commit
b6e4de3
·
1 Parent(s): e1e0093

debug cut index

Browse files
Files changed (1) hide show
  1. transcribe/strategy.py +10 -5
transcribe/strategy.py CHANGED
@@ -48,7 +48,7 @@ class TranscriptToken:
48
  return PAUSEE_END_PATTERN.search(self.text) is not None
49
 
50
  def buffer_index(self) -> int:
51
- return int(self.t1 / 100 * SAMPLE_RATE)
52
 
53
  @dataclass
54
  class TranscriptChunk:
@@ -108,6 +108,9 @@ class TranscriptChunk:
108
  return any(seg.is_punctuation() for seg in self.items)
109
 
110
  def get_buffer_index(self) -> int:
 
 
 
111
  return self.items[-1].buffer_index()
112
 
113
  def is_end_sentence(self) ->bool:
@@ -182,10 +185,13 @@ class TranscriptBuffer:
182
  self.update_pending_text(stable_string)
183
  if is_end_sentence:
184
  self.commit_paragraph(end_of_sentence=True)
 
185
  # if len() >=20
 
186
  else:
187
  self.commit_line()
188
- self.update_pending_text(remaining_string)
 
189
 
190
 
191
  @property
@@ -278,11 +284,10 @@ class TranscriptStabilityAnalyzer:
278
  logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
279
 
280
  prev_seg_id = self._transcript_buffer.get_seg_id()
281
- self._transcript_buffer.update_and_commit(stable_str, remaining_str, is_end_sentence)
282
-
283
  logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
284
 
285
- if self._transcript_buffer.get_seg_id() > prev_seg_id:
286
  # 表示生成了一个新段落 换行
287
  yield TranscriptResult(
288
  seg_id=prev_seg_id,
 
48
  return PAUSEE_END_PATTERN.search(self.text) is not None
49
 
50
  def buffer_index(self) -> int:
51
+ return max(int(self.t1 / 100 * SAMPLE_RATE) - 300, 0)
52
 
53
  @dataclass
54
  class TranscriptChunk:
 
108
  return any(seg.is_punctuation() for seg in self.items)
109
 
110
  def get_buffer_index(self) -> int:
111
+ logger.debug("==== Current cut item ====")
112
+ logger.debug(f"{self.items[-1]}")
113
+ logger.debug("==========================")
114
  return self.items[-1].buffer_index()
115
 
116
  def is_end_sentence(self) ->bool:
 
185
  self.update_pending_text(stable_string)
186
  if is_end_sentence:
187
  self.commit_paragraph(end_of_sentence=True)
188
+ self.update_pending_text(remaining_string)
189
  # if len() >=20
190
+ return True
191
  else:
192
  self.commit_line()
193
+ self.update_pending_text(remaining_string)
194
+ return False
195
 
196
 
197
  @property
 
284
  logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
285
 
286
  prev_seg_id = self._transcript_buffer.get_seg_id()
287
+ commit_paragraph = self._transcript_buffer.update_and_commit(stable_str, remaining_str, is_end_sentence)
 
288
  logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
289
 
290
+ if commit_paragraph:
291
  # 表示生成了一个新段落 换行
292
  yield TranscriptResult(
293
  seg_id=prev_seg_id,