daihui.zhang commited on
Commit
0a036e5
·
1 Parent(s): 2c5a26c

drop anly puncation audio chunk in analysis

Browse files
Files changed (1) hide show
  1. transcribe/strategy.py +9 -1
transcribe/strategy.py CHANGED
@@ -76,10 +76,15 @@ class TranscriptChunk:
76
 
77
  # 每个切分点向后移一个索引,表示“分隔符归前段”
78
  cut_points = [0] + sorted(i + 1 for i in indexes) + [len(self.items)]
79
- return [
80
  TranscriptChunk(items=self.items[start:end], separator=self.separator)
81
  for start, end in zip(cut_points, cut_points[1:])
82
  ]
 
 
 
 
 
83
 
84
 
85
  def get_split_first_rest(self, mode: SplitMode):
@@ -109,6 +114,9 @@ class TranscriptChunk:
109
  logger.debug(f"Compare: {self.join()} vs {chunk.join()} : {score}")
110
  return score
111
 
 
 
 
112
  def has_punctuation(self) -> bool:
113
  return any(seg.is_punctuation() for seg in self.items)
114
 
 
76
 
77
  # 每个切分点向后移一个索引,表示“分隔符归前段”
78
  cut_points = [0] + sorted(i + 1 for i in indexes) + [len(self.items)]
79
+ chunks = [
80
  TranscriptChunk(items=self.items[start:end], separator=self.separator)
81
  for start, end in zip(cut_points, cut_points[1:])
82
  ]
83
+ return [
84
+ ck
85
+ for ck in chunks
86
+ if not ck.only_punctuation()
87
+ ]
88
 
89
 
90
  def get_split_first_rest(self, mode: SplitMode):
 
114
  logger.debug(f"Compare: {self.join()} vs {chunk.join()} : {score}")
115
  return score
116
 
117
+ def only_punctuation(self)->bool:
118
+ return all(seg.is_punctuation() for seg in self.items)
119
+
120
  def has_punctuation(self) -> bool:
121
  return any(seg.is_punctuation() for seg in self.items)
122