سرویس Pastebin (نسخه آزمایشی)
import re
import xml.etree.ElementTree as ET
import uno
from com.sun.star.awt import MessageBoxButtons as MBButtons
from com.sun.star.awt.MessageBoxType import MESSAGEBOX
# ← تبدیل اعداد انگلیسی به فارسی
def en_to_fa_numbers(text):
return text.translate(str.maketrans("0123456789", "۰۱۲۳۴۵۶۷۸۹"))
# ← بارگذاری واژههای غلط/درست
def load_replacements(path):
tree = ET.parse(path)
root = tree.getroot()
replacements = {}
ns = {"bl": "http://openoffice.org/2001/block-list"}
for block in root.findall("bl:block", ns):
wrong = block.get("{http://openoffice.org/2001/block-list}abbreviated-name")
correct = block.get("{http://openoffice.org/2001/block-list}name")
if wrong and correct:
replacements[wrong] = correct
return replacements
REPLACEMENTS = load_replacements("/home/afshin/.config/libreoffice/4/user/Scripts/python/DocumentList.xml")
def fix_text_full(event=None):
ctx = uno.getComponentContext()
smgr = ctx.ServiceManager
desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
doc = desktop.getCurrentComponent()
if not doc.supportsService("com.sun.star.text.TextDocument"):
return
text_obj = doc.getText()
full_text = text_obj.getString()
report_counts = {
"می": 0, "ك→ک": 0, "ي→ی": 0, "،؛؟": 0, "گیومه": 0,
"اعداد EN→FA": 0, "اعداد عربی→FA": 0, "ه ی → هٔ": 0,
"؟؟؟": 0, "فاصله قبل از علائم": 0, "غلطهای املایی (بانک)": 0,
}
# ← الگوهای اصلی
def fix_all(text):
# ۱. می + فاصله
text, n = re.subn(r"می(?!\u200c)\s+([\u0600-\u06FF]+)",
lambda m: "می\u200c" + m.group(1), text)
report_counts["می"] += n
# ۲. ك → ک
c_before = text.count("ك")
if c_before:
report_counts["ك→ک"] += c_before
text = text.replace("ك", "ک")
# ۳. ي → ی
y_before = text.count("ي")
if y_before:
report_counts["ي→ی"] += y_before
text = text.replace("ي", "ی")
# ۴. علائم
punct_map = {",": "،", ";": "؛", "?": "؟"}
for en_punct, fa_punct in punct_map.items():
n = text.count(en_punct)
if n:
report_counts["،؛؟"] += n
text = text.replace(en_punct, fa_punct)
# ۵. گیومه
if '"' in text:
result, open_q, cnt = [], True, 0
for ch in text:
if ch == '"':
result.append("«" if open_q else "»")
open_q = not open_q
cnt += 1
else:
result.append(ch)
text = "".join(result)
report_counts["گیومه"] += cnt
# ۶. اعداد انگلیسی
for d in "0123456789":
report_counts["اعداد EN→FA"] += text.count(d)
text = text.translate(str.maketrans("0123456789", "۰۱۲۳۴۵۶۷۸۹"))
# ۷. اعداد عربی
for d in "٠١٢٣٤٥٦٧٨٩":
report_counts["اعداد عربی→FA"] += text.count(d)
text = text.translate(str.maketrans("٠١٢٣٤٥٦٧٨٩", "۰۱۲۳۴۵۶۷۸۹"))
# ۸. ه ی → هٔ
text, n = re.subn(r"(\S*ه)[\s\u200c]ی\b", lambda m: m.group(1) + "ٔ", text)
report_counts["ه ی → هٔ"] += n
# ۹. چند علامت سؤال → یکی
text, n = re.subn(r"؟{2,}", "؟", text)
report_counts["؟؟؟"] += n
# ۱۰. فاصله قبل از علائم
text, n = re.subn(r"\s+([،؛؟.])", r"\1", text)
report_counts["فاصله قبل از علائم"] += n
# ۱۱. غلطهای املایی (بانک)
for wrong, correct in REPLACEMENTS.items():
pat = r"\b" + re.escape(wrong) + r"\b"
text, n = re.subn(pat, correct, text)
report_counts["غلطهای املایی (بانک)"] += n
return text
fixed_text = fix_all(full_text)
text_obj.setString(fixed_text)
# ← گزارش
total = sum(report_counts.values())
if total > 0:
lines = [f"{k}: {en_to_fa_numbers(str(v))}"
for k, v in report_counts.items() if v > 0]
report = f"مجموع اصلاحات: {en_to_fa_numbers(str(total))}\n" + "\n".join(lines)
parent_win = doc.CurrentController.Frame.ContainerWindow
mb = parent_win.getToolkit().createMessageBox(
parent_win, MESSAGEBOX, MBButtons.BUTTONS_OK,
"گزارش اصلاح متن", report
)
mb.execute()