How to Build Smarter Multilingual Text Wrapping with BudouX Through Parsing, HTML Rendering, Model Introspection, and Toy Training
n = len(y)
w = [1/n]*n
feat_set = sorted({f for fx in X for f in fx})
fmap = [set(fx) for fx in X]
model_rounds = []
for r in range(rounds):
best_feat, best_err, best_pol = None, 1.0, 1
for f in feat_set:
err_pos = sum(w[i] for i in range(n) if (f in fmap[i]) != (y[i]==1))
err_neg = 1 – err_pos
if err_pos < best_err: best_feat, best_err, best_pol = f, err_pos, +1
if err_neg < best_err: best_feat, best_err, best_pol = f, err_neg, -1
if best_err >= 0.5 – 1e-9: break
eps = max(best_err, 1e-6)
alpha = 0.5 * ( (1-eps)/eps ) ** 0.5
new_w = []
for i in range(n):
pred = best_pol if best_feat in fmap[i] else -best_pol
new_w.append(w[i] * (0.5 if pred == y[i] else 2.0))
s = sum(new_w); w = [x/s for x in new_w]
model_rounds.append((best_feat, best_pol, alpha))
return model_rounds
print(“Training (this is a toy trainer — be patient ~10s)…”)
t0 = time.perf_counter()
rounds = adaboost(X, y, rounds=60)
print(f”Done in {time.perf_counter()-t0:.1f}s, {len(rounds)} stumps kept.”)
correct = 0
for fx, label in zip(X, y):
score = sum(a if (f in fx) == (p==1) else -a for f,p,a in rounds)
pred = 1 if score > 0 else -1
correct += (pred == label)
print(f”Training accuracy of toy model: {correct/len(X)*100:.1f}%”)
print(“👉 For a production model, use `scripts/train.py` from the BudouX repo with the matching feature extractor — this section is illustrative.”)
header(“8️⃣ Real-world demo — narrow column comparison”)
paragraph = (“BudouXはGoogleが開発したオープンソースの改行ライブラリです。”
“機械学習モデルを使って、文章を意味のあるフレーズに分割し、”
“読みやすい位置でのみ改行が起こるようにします。”
“依存関係がなく軽量なため、ウェブサイトやモバイルアプリに”
“簡単に組み込むことができます。”)
display(HTML(f”””
<div style=”display:flex; gap:24px; font-family:’Hiragino Sans’,’Yu Gothic’,sans-serif; font-size:15px;”>
<div style=”flex:1; border:2px solid #c33; padding:12px; max-width:180px;”>
<b style=”color:#c33;”>Without BudouX</b>
<p style=”line-height:1.7;”>{paragraph}</p>
</div>
<div style=”flex:1; border:2px solid #2a8; padding:12px; max-width:180px;”>
<b style=”color:#2a8;”>With BudouX</b>
<p style=”line-height:1.7;”>{ja_parser.translate_html_string(paragraph)}</p>
</div>
</div>
<p style=”font-size:12px;color:#666;”>Resize the browser/Colab pane to see the difference more clearly — BudouX never breaks a phrase mid-word.</p>
“””))
print(“\n🌸 Tutorial complete. Try plugging BudouX output into your own UI.”)


