Sontran0108 commited on
Commit
9abf359
·
1 Parent(s): fec7816

update the compute changes of the handler.py

Browse files
Files changed (1) hide show
  1. handler.py +9 -8
handler.py CHANGED
@@ -44,26 +44,27 @@ class EndpointHandler:
44
  return decoded
45
 
46
  def compute_changes(self, original, enhanced):
47
- # Your existing compute_changes logic
48
  changes = []
49
- matcher = SequenceMatcher(None, original.split(), enhanced.split())
 
50
  for tag, i1, i2, j1, j2 in matcher.get_opcodes():
51
  if tag in ("replace", "insert", "delete"):
52
- original_phrase = " ".join(original.split()[i1:i2])
53
- new_phrase = " ".join(enhanced.split()[j1:j2])
54
  changes.append({
55
  "original_phrase": original_phrase,
56
  "new_phrase": new_phrase,
57
  "char_start": i1,
58
  "char_end": i2,
59
- "token_start": i1,
60
- "token_end": i2,
61
  "explanation": f"{tag} change",
62
- "error_type": "",
63
- "tip": ""
64
  })
65
  return changes
66
 
 
67
  def __call__(self, inputs):
68
  # This method is the main entry point for the Hugging Face Endpoint.
69
 
 
44
  return decoded
45
 
46
  def compute_changes(self, original, enhanced):
 
47
  changes = []
48
+ matcher = SequenceMatcher(None, original, enhanced) # char-level, not token-level
49
+
50
  for tag, i1, i2, j1, j2 in matcher.get_opcodes():
51
  if tag in ("replace", "insert", "delete"):
52
+ original_phrase = original[i1:i2]
53
+ new_phrase = enhanced[j1:j2]
54
  changes.append({
55
  "original_phrase": original_phrase,
56
  "new_phrase": new_phrase,
57
  "char_start": i1,
58
  "char_end": i2,
59
+ "token_start": None, # not token-based anymore
60
+ "token_end": None,
61
  "explanation": f"{tag} change",
62
+ "error_type": "whitespace" if original_phrase.isspace() or new_phrase.isspace() else "",
63
+ "tip": "Avoid extra spaces between words." if original_phrase.isspace() or new_phrase.isspace() else ""
64
  })
65
  return changes
66
 
67
+
68
  def __call__(self, inputs):
69
  # This method is the main entry point for the Hugging Face Endpoint.
70