[sword-devel] Displaying the chapter and verse in other numbers

Michael Johnson Michael at eBible.org
Sun Jul 21 11:57:58 MST 2019


On 7/20/19 5:41 PM, Troy A. Griffitts wrote:
>
> Michael, I would be interested to hear your logic for internationalizing a numeric parser.  I always assumed it wasn't as simply as providing alternate symbol codes for 0-9.  Think Roman Numerals (SWORD already supports Roman Numerals).  If it was as simply as swapping out, say 47 with an alternate symbol for '4' and an alternate symbol for '7', it wouldn't be much of a problem to solve, but I suspect other languages do things like: XLVII.
>
Digit swapping works for most languages, but not for Ethiopic, Roman, and Coptic. There may be others that I haven't encountered, yet. LGPL C# code that I wrote follows for going from Hindu-Arabic (0-9, like you are most used to) to a variety of number systems:

        // The following linguistically diverse digit sets might not display correctly unless
        // you have appropriate Unicode fonts installed.
        public const string ArabicDigits = "٠١٢٣٤٥٦٧٨٩";
        public const string BengaliDigits = "০১২৩৪৫৬৭৮৯";
        public const string ChineseSimplifiedDigits = "〇一二三四五六七八九";
        public const string ChineseTraditionalDigits = "零壹貳參肆伍陸柒捌玖";
        public const string ChineseHuaMaDigits = "〇〡〢〣〤〥〦〧〨〩";
        public const string CopticUnits = "\u2c81\u0305\u2C83\u0305\u2C85\u0305\u2C87\u0305\u2C89\u0305\u2C8B\u0305\u2C8D\u0305\u2C8F\u0305\u2C91\u0305";  // First 9 coptic letters with single overbar
        public const string CopticTens = "\u2C93\u0305\u2C95\u0305\u2C97\u0305\u2C99\u0305\u2C9B\u0305\u2C9D\u0305\u2C9F\u0305\u2CA1\u0305\u03E5\u0305";   // 10th thru 18th coptic letters with single overbar
        public const string CopticHundreds = "\u2CA3\u0305\u2CA5\u0305\u2CA7\u0305\u2CA9\u0305\u2CAB\u0305\u2CAD\u0305\u2CAF\u0305\u2CB1\u0305\u2CB3\u0305\u2CB5\u0305";  // 19th thru 27th coptic letters with single overbar
        public const string CopticThousands = "\u2c81\u033F\u2C83\u033F\u2C85\u033F\u2C87\u033F\u2C89\u033F\u2C8B\u033F\u2C8D\u033F\u2C8F\u033F\u2C91\u033F";    //    Thousands are the same as units, but double overbar.
        public const string CopticTenThousands = "\u2C93\u033F\u2C95\u033F\u2C97\u033F\u2C99\u033F\u2C9B\u033F\u2C9D\u033F\u2C9F\u033F\u2CA1\u033F\u03E5\u033F";    // Pattern continues: one bar added per period (10^3)
        public const string DevangariDigits = "०१२३४५६७८९";
        public const string EthiopicDigits = " ፩፪፫፬፭፮፯፰፱";
        public const string EthiopicTens = " ፲፳፴፵፶፷፸፹፺";
        public const string EthiopicHundred = "፻";
        public const string EthiopicTenThousand = "፼";
        public const string GugaratiDigits = "૦૧૨૩૪૫૬૭૮૯";
        public const string GurmukhiDigits = "੦੧੨੩੪੫੬੭੮੯";
        public const string KannadaDigits = "೦೧೨೩೪೫೬೭೮೯";
        public const string KhmerDigits = "០១២៣៤៥៦៧៨៩";
        public const string LaoDigits = "໐໑໒໓໔໕໖໗໘໙";
        public const string LimbuDigits = "᥆᥇᥈᥉᥊᥋᥌᥍᥎᥏";
        public const string MalayalamDigits = "൦൧൨൩൪൫൬൭൮൯";
        public const string MongolianDigits = "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙";
        public const string BurmeseDigits = "၀၁၂၃၄၅၆၇၈၉";
        public const string OriyaDigits = "୦୧୨୩୪୫୬୭୮୯";
        public const string PersianDigits = "۰۱۲۳۴۵۶۷۸۹";   // Same as Urdu digits
        public const string TamilDigits = "௦௧௨௩௪௫௬௭௮௯";
        public const string TeluguDigits = "౦౧౨౩౪౫౬౭౮౯";
        public const string ThaiDigits = "๐๑๒๓๔๕๖๗๘๙";
        public const string TibetanDigits = "༠༡༢༣༤༥༦༧༨༩";
        public const string UrduDigits = "۰۱۲۳۴۵۶۷۸۹";
        public const string RomanDigits = " ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
        protected static string CurrentDigits = String.Empty;

        public static string NumberSample()
        {
            if (!String.IsNullOrEmpty(CurrentDigits))
                return CurrentDigits;
            else
                return "0123456789";
        }

        /// <summary>
        /// true iff we are changing digits to an alternate writing system
        /// </summary>
        public static bool LocalizingDigits
        {
            get { return CurrentDigits != String.Empty; }
        }

        /// <summary>
        /// Set the locale for localizing digits for display in Bibles for verse numbers, etc.
        /// </summary>
        /// <param name="digitPlace">string with one of the exact names of supported digit sets</param>
        /// <returns>the set string if successful, or "Default" otherwise</returns>
        public static string SetDigitLocale(string digitPlace)
        {
            switch (digitPlace)
            {
                case "Arabic":
                    CurrentDigits = ArabicDigits;
                    break;
                case "Bengali":
                    CurrentDigits = BengaliDigits;
                    break;
                case "Burmese (Myanmar)":
                    CurrentDigits = BurmeseDigits;
                    break;
                case "Chinese (Simplified)":
                    CurrentDigits = ChineseSimplifiedDigits;
                    break;
                case "Chinese (Traditional)":
                    CurrentDigits = ChineseTraditionalDigits;
                    break;
                case "Chinese (hua ma)":
                    CurrentDigits = ChineseHuaMaDigits;
                    break;
                case "Coptic":
                    CurrentDigits = CopticUnits;
                    break;
                case "Devangari":
                    CurrentDigits = DevangariDigits;
                    break;
                case "Ethiopic (Ge'ez)":
                    CurrentDigits = EthiopicDigits;
                    break;
                case "Gujarati":
                    CurrentDigits = GugaratiDigits;
                    break;
                case "Gurmukhi":
                    CurrentDigits = GurmukhiDigits;
                    break;
                case "Kannada":
                    CurrentDigits = KannadaDigits;
                    break;
                case "Khmer":
                    CurrentDigits = KhmerDigits;
                    break;
                case "Lao":
                    CurrentDigits = LaoDigits;
                    break;
                case "Limbu":
                    CurrentDigits = LimbuDigits;
                    break;
                case "Malayalam":
                    CurrentDigits = MalayalamDigits;
                    break;
                case "Mongolian":
                    CurrentDigits = MongolianDigits;
                    break;
                case "Oriya":
                    CurrentDigits = OriyaDigits;
                    break;
                case "Roman":
                    CurrentDigits = RomanDigits;
                    break;
                case "Tamil":
                    CurrentDigits = TamilDigits;
                    break;
                case "Telugu":
                    CurrentDigits = TeluguDigits;
                    break;
                case "Thai":
                    CurrentDigits = ThaiDigits;
                    break;
                case "Tibetan":
                    CurrentDigits = TibetanDigits;
                    break;
                case "Persian":
                case "Urdu":
                    CurrentDigits = UrduDigits;
                    break;
                case "Hindu-Arabic":
                case "Default":
                default:
                    CurrentDigits = String.Empty;
                    digitPlace = "Default";
                    break;
            }
            return digitPlace;
        }

        /// <summary>
        /// Replaces all numbers with appropriate numbers in the current writing system
        /// </summary>
        /// <param name="s">string that might include numbers</param>
        /// <returns>string with numbers localized</returns>
        public static string LocalizeDigits(string s)
        {
            return ReplaceDigits(s, CurrentDigits);
        }

        /// <summary>
        /// Some writing systems just have exact equivalents for 0 through 9 and the same place values.
        /// Those are easy, with a simple digit-for-digit substitution. Others require some logic beyond
        /// that.
        /// </summary>
        /// <param name="s">String that may have digits to localize</param>
        /// <param name="newDigits">one of the supported digit strings</param>
        /// <returns></returns>
        public static string ReplaceDigits(string s, string newDigits)
        {   // TODO: implement logic for the different Chinese numeral systems, which require more than simple digit substitution, and which have many dialect and usage options.
            if ((newDigits == null) || (newDigits.Length < 10))
            {   // Nothing to do; no conversion specified
                return s;
            }
            if (newDigits == EthiopicDigits)
            {   // Gotta count differently
                return EthiopicNumerals(s);
            }
            else if (newDigits == RomanDigits)
            {   // Seriously old school
                return RomanNumerals(s);
            }
            else if (newDigits == CopticUnits)
            {   // Older than Roman Numerals, but not the same as Heirogliphic numbers or ancient Egyptian numbers
                return CopticNumerals(s);
            }
            else
            {   // Simple digit substitution with normal place values
                StringBuilder sb = new StringBuilder();
                int n;
                foreach (char c in s)
                {
                    n = ((int)c) - ((int)'0');
                    if ((n >= 0) && (n <= 9))
                    {
                        sb.Append(newDigits[n]);
                    }
                    else
                    {
                        sb.Append(c);
                    }
                }
                return sb.ToString();
            }
        }

        /// <summary>
        /// If the input ch is a localized digit in the string localDigits, return a digit in the range '0'-'9',
        /// otherwise return the input character.
        /// </summary>
        /// <param name="ch">Possible localized digit</param>
        /// <param name="localDigits">String of 0-9 in local digits.</param>
        /// <returns>Standardized digit or input character</returns>
        public static char StandardDigit(char ch)
        {
            char result = ch;
            int i = CurrentDigits.IndexOf(ch);
            if (i >= 0)
                result = (char)(i + (int)'0');
            return result;
        }

        /// <summary>
        /// Coptic numbers have no 0, but have different symbols for units, tens, hundreds, thousands, etc.
        /// </summary>
        /// <param name="s">Digits to convert less than or equal to 999</param>
        /// <returns>String with coptic numerals</returns>
        public static string CopticNumerals(string s)
        {
            StringBuilder sb = new StringBuilder();
                        int i, n;
            int place = 0;
            for (i = s.Length - 1; i >= 0; i--)
            {
                if (Char.IsDigit(s[i]))
                {
                    if (s[i] == '0')
                    {
                        place++;
                    }
                    else
                    {
                        n = 2 * (((int)s[i]) - ((int)'1'));
                        if (place == 0)
                        {
                            sb.Insert(0, CopticUnits[n + 1]); // Letter
                            sb.Insert(0, CopticUnits[n]);   // Combining overbar
                            place++;
                        }
                        else if (place == 1)
                        {
                            sb.Insert(0, CopticTens[n + 1]);
                            sb.Insert(0, CopticTens[n]);
                            place++;
                        }
                        else if (place == 2)
                        {
                            sb.Insert(0, CopticHundreds[n + 1]);
                            sb.Insert(0, CopticHundreds[n]);
                            place++;
                        }
                        else if (place == 3)
                        {
                            sb.Insert(0, CopticThousands[n + 1]);
                            sb.Insert(0, CopticThousands[n]);
                            place++;
                        }
                        else if (place == 4)
                        {
                            sb.Insert(0, CopticTenThousands[n + 1]);
                            sb.Insert(0, CopticTenThousands[n]);
                            place++;
                        }
                        else if (place >= 5)
                        {
                            return s;   // Give up and fail gracefully for numbers bigger than we designed for.
                        }
                    }
                }
                else
                {   // Non-digit: just copy it.
                    place = 0;
                    sb.Insert(0, s[i]);
                }
            }
            return sb.ToString();
        }

        /// <summary>
        /// Writing big numbers without a 0 is different.
        /// </summary>
        /// <param name="s">String with numbers to localize to Ethiopic</param>
        /// <returns>String with numbers localized to Ethiopic Ge'ez.</returns>
        public static string EthiopicNumerals(string s)
        {
            StringBuilder sb = new StringBuilder();
            int i, n;
            int place = 0;
            for (i = s.Length - 1; i >= 0; i--)
            {
                if (Char.IsDigit(s[i]))
                {
                    n = ((int)s[i]) - ((int)'0');
                    if (place == 0)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place++;
                    }
                    else if (place == 1)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicTens[n]);
                        }
                        place++;
                    }
                    else if (place == 2)
                    {
                        sb.Insert(0, EthiopicHundred);
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place++;
                    }
                    else if (place == 3)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicTens[n]);
                        }
                        place++;
                    }
                    else if (place == 4)
                    {
                        sb.Insert(0, EthiopicTenThousand);
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place++;
                    }
                    else if (place == 5)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place = 0;
                    }
                }
                else
                {
                    place = 0;
                    sb.Insert(0, s[i]);
                }
            }
            return sb.ToString();
        }

        /// <summary>
        /// Roman numerals kind of break down after 3,000 (MMM) in terms of common use,
        /// which seems to be limited to chapters and years in this decade. This function
        /// uses Unicode Roman numerals, but an alternate routine could easily be created
        /// that uses plain letters I, V, X, L, C, and M or i, v, x, l, c, and m by replacing
        /// or providing choices for the strings in RomanUnits in this method.
        /// </summary>
        /// <param name="s">string that may have numbers to "Romanize"</param>
        /// <returns>string with numbers as Roman numerals</returns>
        public static string RomanNumerals(string s)
        {
            string[,] RomanUnits = {{"","Ⅰ","Ⅱ","Ⅲ","Ⅳ","Ⅴ","Ⅵ","Ⅶ","Ⅷ","Ⅸ"},
            {"", "Ⅹ","ⅩⅩ","ⅩⅩⅩ","ⅩⅬ","Ⅼ","ⅬⅩ","ⅬⅩⅩ","ⅬⅩⅩⅩ","ⅩⅭ"},
            {"", "Ⅽ", "ⅭⅭ", "ⅭⅭⅭ", "ⅭⅮ", "Ⅾ", "ⅮⅭ", "ⅮⅭⅭ", "ⅮⅭⅭⅭ", "ⅩⅯ"},
            { "", "Ⅿ", "ⅯⅯ", "ⅯⅯⅯ", "ⅯV̅", "V̅", "V̅Ⅿ", "V̅ⅯⅯ", "V̅ⅯⅯⅯ", "ⅯX̅̅"}};
            StringBuilder sb = new StringBuilder();
            int i, n;
            int place = 0;
            for (i = s.Length - 1; i >= 0; i--)
            {
                if (Char.IsDigit(s[i]))
                {
                    n = ((int)s[i]) - ((int)'0');
                    sb.Insert(0, RomanUnits[place, n]);
                    place++;
                    if (place > 3)
                        place = 0;
                }
                else
                {
                    place = 0;
                    sb.Insert(0, s[i]);
                }
            }
            sb.Replace("ⅩⅡ", "Ⅻ");
            sb.Replace("ⅩⅠ", "Ⅺ");
            return sb.ToString();
        }


> On 7/20/19 1:49 PM, Michael Johnson wrote:
>> It is an important question. I have a way to handle it in other formats using Haiola. I suspect that proper handling it in SWORD will take a design change. In Haiola, the source chapter and verse numbers are always as in English. I can select display chapter and verse numbers from many options.
>>
>> Aloha,
>> Michael
>> http://mpj.us
>>
>>
>> On Jul 20, 2019, at 10:39, Cyrille <lafricain79 at gmail.com <mailto:lafricain79 at gmail.com>> wrote:
>>
>>> Hello,
>>> No body else have a proposition for this question? Should I open a bug report?
>>> Is it not an important question?
>>>
>>> Best regards, Br Cyrille
>>>
>>> Le 17/07/2019 à 23:14, David Haslam a écrit :
>>>> Several other non-Roman scripts have their own digit characters corresponding to our 0-9.
>>>>
>>>> IMHO the possibilities for using non-Roman digits ought to be facilitated in the back-end.
>>>>
>>>> Even so, each front-end would then require a new UI control to select which script should be used to display the chapter and verse numbers.
>>>>
>>>> An alternate idea would be to specify the non-Roman digits as a ten character UTF-8 string in a module .conf file.
>>>>
>>>> Adapting the back-end to use this module specific configuration key might be much simpler. 
>>>>
>>>> Front-ends would still require adapting for the UI features that require chapter and verse numbers to be input or displayed or adjusted by controls. 
>>>>
>>>> Best regards,
>>>>
>>>> David 
>>>>
>>>> Sent from ProtonMail Mobile
>>>>
>>>>
>>>> On Wed, Jul 17, 2019 at 22:00, Cyrille <lafricain79 at gmail.com <mailto:lafricain79 at gmail.com>> wrote:
>>>>> Hello,
>>>>> I'm still working on a modern NT-Ps-Pr translation in Burmese. My friends from Myanmar send me the text. But they don't use the arab numbers, they hava their own numbers.
>>>>> It could be very important for them to write in they own numbers (If I had tu use their I will be lost ;) ).
>>>>> Is it possible to add this possibility to the frontend, or this should be in sword directly?
>>>>> Need I to open a new issue on the bug tracker?
>>>>>
>>>>> Example of text, in bold the chapter and verses:
>>>>>
>>>>> ၃ ၁။ ထိုနေ့ရက်တို့၌ ယောဟန်ဘတ္တိဇံသည် ရောက်လာ၍ ဂျူဒေးယပြည်၊ တောကန္တာရတွင် ဟော
>>>>>
>>>>> *၂။* ပြောသည်မှာ၊- နောင်တရကြလော့၊ အကြောင်းမူကား ကောင်းကင်နိုင်ငံတော်သည် ရောက်လုနီးပြီဟူ၍တည်း။-
>>>>>
>>>>> *၃။* ပရောဖက်အီဇာယဟောထားခဲ့သည့်အတိုင်း၊ ထာ၀ရ ဘုရားသခင်ကြွလာတော်မူမည့်လမ်းကိုပြင်ဆင်ကြ လော့၊ ကိုယ်တော်၏ လမ်းများကို ဖြောင့်တန်းစေကြ လော့ဟူ၍ တောကန္တာရ၌ ကြွေးကြော်သောသူ၏အသံသည်ကား ဤသူပင်ဖြစ်သတည်း။-
>>>>>
>>>>> *၄။* ယောဟန်သည် ကုလားအုတ်မွေးဖြင့် ရက်လုပ်သောအဝတ်ကိုဝတ်ဆင်ကာ ခါး၌ သားရေခါးစည်းကြိုးကိုစည်းထား၏။ သူ၏အစာသည်ကား ကျိုင်းကောင်နှင့် တောပျားရည်တို့သာဖြစ်၏။-
>>>>>
>>>>> *၅။* ထိုအခါ ဂျေရုဆလင်မြို့မှစ၍ ဂျူဒေးယနယ်တစ်နယ် လုံးနှင့် ဂျော်ဒန်မြစ်တစ်လျှောက်ရှိဒေသမှ လူအပေါင်း တို့သည် သူ့ထံသို့ လာကြ၏။-
>>>>>
>>>>> ၆။ ထိုသူတို့သည် မိမိတို့၏ အပြစ်များကို ထုတ်ဖော်ဝန်ခံကြလျက် ဂျော်ဒန်မြစ်တွင် သူ့အားဖြင့် ဆေးကြောခြင်းကို ခံယူကြ၏။
>>>>>
>>>>>
>>>>>
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> sword-devel mailing list: sword-devel at crosswire.org
>>>> http://www.crosswire.org/mailman/listinfo/sword-devel
>>>> Instructions to unsubscribe/change your settings at above page
>>>
>>> _______________________________________________
>>> sword-devel mailing list: sword-devel at crosswire.org <mailto:sword-devel at crosswire.org>
>>> http://www.crosswire.org/mailman/listinfo/sword-devel
>>> Instructions to unsubscribe/change your settings at above page
>>
>> _______________________________________________
>> sword-devel mailing list: sword-devel at crosswire.org
>> http://www.crosswire.org/mailman/listinfo/sword-devel
>> Instructions to unsubscribe/change your settings at above page
>
> _______________________________________________
> sword-devel mailing list: sword-devel at crosswire.org
> http://www.crosswire.org/mailman/listinfo/sword-devel
> Instructions to unsubscribe/change your settings at above page


-- 
signature

Aloha,
*/Michael Johnson/**
PO BOX 881143 • PUKALANI HI 96788-1143*• USA
mljohnson.org <http://mljohnson.org> • Phone: +1 808-333-6921 • Skype: kahunapule



More information about the sword-devel mailing list