Iranian Ubuntu Community

سرویس Pastebin (نسخه آزمایشی)
		
			import re

import xml.etree.ElementTree as ET

import uno

from com.sun.star.awt import MessageBoxButtons as MBButtons

from com.sun.star.awt.MessageBoxType import MESSAGEBOX

# ← تبدیل اعداد انگلیسی به فارسی

def en_to_fa_numbers(text):

    return text.translate(str.maketrans("0123456789", "۰۱۲۳۴۵۶۷۸۹"))

# ← بارگذاری واژه‌های غلط/درست از DocumentList.xml

def load_replacements(path):

    tree = ET.parse(path)

    root = tree.getroot()

    replacements = {}

    ns = {"bl": "http://openoffice.org/2001/block-list"}

    for block in root.findall("bl:block", ns):

        wrong = block.get("{http://openoffice.org/2001/block-list}abbreviated-name")

        correct = block.get("{http://openoffice.org/2001/block-list}name")

        if wrong and correct:

            replacements[wrong] = correct

    return replacements

REPLACEMENTS = load_replacements("/home/afshin/.config/libreoffice/4/user/Scripts/python/DocumentList.xml")

def fix_text_full(event=None):

    ctx = uno.getComponentContext()

    smgr = ctx.ServiceManager

    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)

    doc = desktop.getCurrentComponent()

    if not doc.supportsService("com.sun.star.text.TextDocument"):

        return

    text_obj = doc.getText()

    full_text = text_obj.getString()

    report_counts = {

        "می‌": 0, "ك→ک": 0, "ي→ی": 0, "،؛؟": 0, "گیومه": 0,

        "اعداد EN→FA": 0, "اعداد عربی→FA": 0, "ه ی → هٔ": 0,

        "؟؟؟": 0, "فاصله قبل از علائم": 0, "غلط‌های املایی (بانک)": 0,

        "نیم‌فاصله پسوندها": 0, "فاصله‌های اضافی": 0,

        "فاصله قبل/بعد علائم": 0,

    }

    def fix_all(text):

        # ۱. اصلاح «می + فاصله»

        text, n = re.subn(r"می(?!\u200c)\s+([\u0600-\u06FF]+)",

                          lambda m: "می\u200c" + m.group(1), text)

        report_counts["می‌"] += n

        # ۲. ك → ک

        c_before = text.count("ك")

        if c_before:

            report_counts["ك→ک"] += c_before

            text = text.replace("ك", "ک")

        # ۳. ي → ی

        y_before = text.count("ي")

        if y_before:

            report_counts["ي→ی"] += y_before

            text = text.replace("ي", "ی")

        # ۴. علائم لاتین → فارسی

        punct_map = {",": "،", ";": "؛", "?": "؟"}

        for en_punct, fa_punct in punct_map.items():

            n = text.count(en_punct)

            if n:

                report_counts["،؛؟"] += n

                text = text.replace(en_punct, fa_punct)

        # ۵. گیومه انگلیسی → گیومه فارسی

        if '"' in text:

            result, open_q, cnt = [], True, 0

            for ch in text:

                if ch == '"':

                    result.append("«" if open_q else "»")

                    open_q = not open_q

                    cnt += 1

                else:

                    result.append(ch)

            text = "".join(result)

            report_counts["گیومه"] += cnt

        # ۶. اعداد انگلیسی → فارسی

        for d in "0123456789":

            report_counts["اعداد EN→FA"] += text.count(d)

        text = text.translate(str.maketrans("0123456789", "۰۱۲۳۴۵۶۷۸۹"))

        # ۷. اعداد عربی → فارسی

        for d in "٠١٢٣٤٥٦٧٨٩":

            report_counts["اعداد عربی→FA"] += text.count(d)

        text = text.translate(str.maketrans("٠١٢٣٤٥٦٧٨٩", "۰۱۲۳۴۵۶۷۸۹"))

        # ۸. «ه ی» → «هٔ»

        text, n = re.subn(r"(\S*ه)[\s\u200c]ی\b", lambda m: m.group(1) + "ٔ", text)

        report_counts["ه ی → هٔ"] += n

        # ۹. چند علامت سؤال → یکی

        text, n = re.subn(r"؟{2,}", "؟", text)

        report_counts["؟؟؟"] += n

        # ۱۰. حذف فاصله قبل از علائم (، ؛ ؟ . و پرانتز شکسته ⟨ ⟩)

        text, n = re.subn(r"\s+([،؛؟.\(\)⟨⟩])", r"\1", text)

        report_counts["فاصله قبل از علائم"] += n

        # ۱۱. غلط‌های املایی بانک

        for wrong, correct in REPLACEMENTS.items():

            pat = r"\b" + re.escape(wrong) + r"\b"

            text, n = re.subn(pat, correct, text)

            report_counts["غلط‌های املایی (بانک)"] += n

        # ۱۲. نیم‌فاصله قبل از پسوندهای رایج

        ZWNJ = "\u200c"

        def fix_suffixes(m):

            word = m.group(1)

            suffix = m.group(2)

            if word.endswith(ZWNJ):

                return word + suffix

            return word + ZWNJ + suffix

        suffixes = r"(تر(?:ین)?|ها|ام|ات|اش|ایم|اید|اند)"

        pattern2 = rf"(\S+?)(?:\s|{ZWNJ})?{suffixes}\b"

        text, n2 = re.subn(pattern2, fix_suffixes, text)

        report_counts["نیم‌فاصله پسوندها"] += n2

        # ۱۳. حذف فاصله‌های اضافی بین کلمات

        text, n = re.subn(r"[ ]{2,}", " ", text)

        report_counts["فاصله‌های اضافی"] += n

        if text.startswith(" "):

            text = text.lstrip()

            report_counts["فاصله‌های اضافی"] += 1

        if text.endswith(" "):

            text = text.rstrip()

            report_counts["فاصله‌های اضافی"] += 1

        # ۱۴. حذف فاصله‌های اضافی اطراف علائم

        # شامل: پرانتز ()، گیومه «»، کروشه []، آکولاد {}، پرانتز شکسته ⟨⟩

        # پرانتز

        text, n1 = re.subn(r"\(\s+", "(", text)

        report_counts["فاصله قبل/بعد علائم"] += n1

        text, n2 = re.subn(r"\s+\)", ")", text)

        report_counts["فاصله قبل/بعد علائم"] += n2

        # گیومه

        text, n3 = re.subn(r"«\s+", "«", text)

        report_counts["فاصله قبل/بعد علائم"] += n3

        text, n4 = re.subn(r"\s+»", "»", text)

        report_counts["فاصله قبل/بعد علائم"] += n4

        # کروشه

        text, n5 = re.subn(r"\[\s+", "[", text)

        report_counts["فاصله قبل/بعد علائم"] += n5

        text, n6 = re.subn(r"\s+\]", "]", text)

        report_counts["فاصله قبل/بعد علائم"] += n6

        # آکولاد

        text, n7 = re.subn(r"\{\s+", "{", text)

        report_counts["فاصله قبل/بعد علائم"] += n7

        text, n8 = re.subn(r"\s+\}", "}", text)

        report_counts["فاصله قبل/بعد علائم"] += n8

        # پرانتز شکسته ⟨⟩

        text, n9 = re.subn(r"⟨\s+", "⟨", text)

        report_counts["فاصله قبل/بعد علائم"] += n9

        text, n10 = re.subn(r"\s+⟩", "⟩", text)

        report_counts["فاصله قبل/بعد علائم"] += n10

        return text

    fixed_text = fix_all(full_text)

    text_obj.setString(fixed_text)

    # ← گزارش نهایی

    total = sum(report_counts.values())

    if total > 0:

        lines = [f"{k}: {en_to_fa_numbers(str(v))}"

                 for k, v in report_counts.items() if v > 0]

        report = f"مجموع اصلاحات: {en_to_fa_numbers(str(total))}\n" + "\n".join(lines)

        parent_win = doc.CurrentController.Frame.ContainerWindow

        mb = parent_win.getToolkit().createMessageBox(

            parent_win, MESSAGEBOX, MBButtons.BUTTONS_OK,

            "گزارش اصلاح متن", report

        )

        mb.execute()