[sword-devel] [sword-svn] r2862 - in trunk: include src/modules/filters

Greg Hellings greg.hellings at gmail.com
Mon Jul 8 06:16:23 MST 2013


Chris,

This commit broke building with gcc on Fedora 18.

[  0%] Building CXX object CMakeFiles/sword.dir/src/mgr/swmgr.cpp.o
/usr/bin/c++   -DGLOBCONFPATH=\"/sword.conf\" -D_FTPLIB_NO_COMPAT -D_ICU_
-Dsword_EXPORTS -g3 -Wall -O0  -fPIC -I/home/greg/Source/sword/include
 -o CMakeFiles/sword.dir/src/mgr/swmgr.cpp.o -c
/home/greg/Source/sword/src/mgr/swmgr.cpp
In file included from /home/greg/Source/sword/src/mgr/swmgr.cpp:81:0:
/home/greg/Source/sword/include/utf8arabicpoints.h:39:15: error: extra
qualification ‘sword::UTF8ArabicPoints::’ on member ‘next_mark’
[-fpermissive]

--Greg


On Mon, Jul 8, 2013 at 4:08 AM, <chrislit at crosswire.org> wrote:

> Author: chrislit
> Date: 2013-07-08 02:08:04 -0700 (Mon, 08 Jul 2013)
> New Revision: 2862
>
> Modified:
>    trunk/include/utf8arabicpoints.h
>    trunk/src/modules/filters/utf8arabicpoints.cpp
> Log:
> applied submitted UTF8ArabicPoints code (needs testing because I replaced
> deprecated bcopy with memmove)
>
> Modified: trunk/include/utf8arabicpoints.h
> ===================================================================
> --- trunk/include/utf8arabicpoints.h    2013-07-08 09:01:53 UTC (rev 2861)
> +++ trunk/include/utf8arabicpoints.h    2013-07-08 09:08:04 UTC (rev 2862)
> @@ -35,6 +35,8 @@
>         UTF8ArabicPoints();
>         virtual ~UTF8ArabicPoints();
>         virtual char processText(SWBuf &text, const SWKey *key = 0, const
> SWModule *module = 0);
> +private:
> +       static char* UTF8ArabicPoints::next_mark(const char* from, int*
> mark_size);
>  };
>
>  SWORD_NAMESPACE_END
>
> Modified: trunk/src/modules/filters/utf8arabicpoints.cpp
> ===================================================================
> --- trunk/src/modules/filters/utf8arabicpoints.cpp      2013-07-08
> 09:01:53 UTC (rev 2861)
> +++ trunk/src/modules/filters/utf8arabicpoints.cpp      2013-07-08
> 09:08:04 UTC (rev 2862)
> @@ -40,24 +40,140 @@
>
>  UTF8ArabicPoints::~UTF8ArabicPoints(){};
>
> +char* UTF8ArabicPoints::next_mark(const char* from, int* mark_size)
> +{
> +       // Arabic vowel points currently targeted for elimination:
> +       // Table entries excerpted from
> +       // http://www.utf8-chartable.de/unicode-utf8-table.pl.
> +       // Code   UTF-8     Description
> +       // point
> +       // -----  --------- -----------
> +       // U+064B d9 8b     ARABIC FATHATAN
> +       // U+064C d9 8c     ARABIC DAMMATAN
> +       // U+064D d9 8d     ARABIC KASRATAN
> +       // U+064E d9 8e     ARABIC FATHA
> +       // U+064F d9 8f     ARABIC DAMMA
> +       // U+0650 d9 90     ARABIC KASRA
> +       // U+0651 d9 91     ARABIC SHADDA
> +       // U+0652 d9 92     ARABIC SUKUN
> +       // U+0653 d9 93     ARABIC MADDAH ABOVE
> +       // U+0654 d9 94     ARABIC HAMZA ABOVE
> +       // U+0655 d9 95     ARABIC HAMZA BELOW
> +       //
> +       // U+FC5E ef b1 9e  ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED
> FORM
> +       // U+FC5F ef b1 9f  ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED
> FORM
> +       // U+FC60 ef b1 a0  ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM
> +       // U+FC61 ef b1 a1  ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM
> +       // U+FC62 ef b1 a2  ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM
> +       // U+FC63 ef b1 a3  ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF
> ISOLATED FORM
> +       //
> +       // U+FE70 ef b9 b0  ARABIC FATHATAN ISOLATED FORM
> +       // U+FE71 ef b9 b1  ARABIC TATWEEL WITH FATHATAN ABOVE
> +       // U+FE72 ef b9 b2  ARABIC DAMMATAN ISOLATED FORM
> +       // U+FE73 ef b9 b3  ARABIC TAIL FRAGMENT
> +       // U+FE74 ef b9 b4  ARABIC KASRATAN ISOLATED FORM
> +       // U+FE75 ef b9 b5       ???
> +       // U+FE76 ef b9 b6  ARABIC FATHA ISOLATED FORM
> +       // U+FE77 ef b9 b7  ARABIC FATHA MEDIAL FORM
> +       // U+FE78 ef b9 b8  ARABIC DAMMA ISOLATED FORM
> +       // U+FE79 ef b9 b9  ARABIC DAMMA MEDIAL FORM
> +       // U+FE7A ef b9 ba  ARABIC KASRA ISOLATED FORM
> +       // U+FE7B ef b9 bb  ARABIC KASRA MEDIAL FORM
> +       // U+FE7C ef b9 bc  ARABIC SHADDA ISOLATED FORM
> +       // U+FE7D ef b9 bd  ARABIC SHADDA MEDIAL FORM
> +       // U+FE7E ef b9 be  ARABIC SUKUN ISOLATED FORM
> +       // U+FE7F ef b9 bf  ARABIC SUKUN MEDIAL FORM
>
> -char UTF8ArabicPoints::processText(SWBuf &text, const SWKey *key, const
> SWModule *module) {
> -       if (!option) {
> -               //The UTF-8 range 0xFC 0xE5 to 0xFC 0x63 consist of Arabic
> vowel marks so block those out.
> -               // Also ranges 0xFE70 til OxFE7F and 0x064b-0x0655
> -               SWBuf orig = text;
> -               const unsigned char* from = (unsigned char*)orig.c_str();
> -               for (text = ""; *from; from++) {
> -                       if ((*from == 0xD9) && (*(from + 1) >= 0x8B &&
> *(from + 1) <= 0x95)) {
> -                               from++;
> +       unsigned char* byte = (unsigned char*) from;
> +       for (; *byte; ++byte) {
> +               if (byte[0] == 0xD9) {
> +                       if (byte[1] >= 0x8B && byte[1] <= 0x95) {
> +                         *mark_size = 2;
> +                         break;
>                         }
> -                       else if ((*from == 0xEF) &&
> -                                (((*(from + 1) == 0xB1) && (*(from + 2)
> >= 0x9E && *(from + 2) <= 0xA3)) ||
> -                                 ((*(from + 1) == 0xB9) && (*(from + 2)
> >= 0x4B && *(from + 2) <= 0x55)))) {
> -                               from += 2;
> -                       }
> +            continue;
>                 }
> +               if (byte[0] == 0xEF) {
> +                       if (byte[1] == 0xB1) {
> +                if (byte[2] >= 0x9E && byte[2] <= 0xA3) {
> +                    *mark_size = 3;
> +                    break;
> +                }
> +                continue;
> +            }
> +                       if (byte[1] == 0xB9) {
> +                if (byte[2] >= 0xB0 && byte[2] <= 0xBF) {
> +                    *mark_size = 3;
> +                    break;
> +                }
> +                continue;
> +            }
> +               }
>         }
> +       return (char*)byte;
> +}
> +
> +
> +char UTF8ArabicPoints::processText(SWBuf &text, const SWKey *, const
> SWModule *) {
> +    // A non-zero/true option setting means that setOptionValue("On")
> +    // was called which apparently means that Arabic Vowel Marks are
> ENABLED,
> +       // so the filter's actions are DISABLED.
> +       if (option)
> +               return 0;
> +
> +       // Eliminate Arabic vowel marks from the text.
> +       // The recognized marks are determined by the "next_mark" function.
> +
> +       // If next_mark were polymorphic (a virtual function or a function
> +       // pointer), this function could be generically used in any filter
> that
> +       // only removed (vs. replaced) areas of text based on the arbitrary
> +       // match criteria encapsulated in the specific next_mark
> +       // implementation.
> +       int mark_size = 0;
> +       char* mark_pos = next_mark(text.c_str(), &mark_size);
> +
> +       // Here and at the end of the loop,
> +       // test BOTH mark_pos AND *mark_pos for safety and to give
> next_mark
> +       // the option of returning either NULL or a pointer to the null
> +       // terminator when done.
> +       if (!mark_pos || !*mark_pos)
> +               return 0; // no marks found.
> +
> +       // Purposely granting write access into SWBuf internal buffer via
> +       // "end_of_output" avoids a needless temporary SWBuf copy.
> +       // Everything before the first mark is already in its final
> position
> +       // and can be safely ignored. So start appending at the current
> mark.
> +       char* end_of_output = mark_pos;
> +
> +       // For consistency, input starts at (vs. after) the first mark as
> well
> +       // -- not a problem since the mark itself gets skipped, anyway.
> +       const char* start_of_input = mark_pos;
> +       do {
> +               // At this point, "mark_pos" and "mark_pos+mark_size"
> delimit
> +               // the text to drop.
> +               // "start_of_input" is either mark_pos or any text between
> the
> +               // end of any previous mark and the current mark_pos.
> +               // This text is now ready to be moved into the output.
> +               int ready_size = mark_pos - start_of_input;
> +               if (ready_size > 0) {
> +                       // Append the input text before the current mark
> to the
> +                       // output.
> +                       // Must use bcopy vs. strncpy because the final
> +                       // end_of_output may overtake the original
> +                       // start_of_input.
> +                       memmove(end_of_output, start_of_input, ready_size);
> +                       // Keep appending to end_of_output.
> +                       end_of_output += ready_size;
> +               }
> +               // Ensure the mark never gets copied.
> +               start_of_input = mark_pos + mark_size;
> +               // Find the next mark.
> +               mark_pos = next_mark(start_of_input, &mark_size);
> +
> +       } while (mark_pos && *mark_pos); // No more marks.
> +
> +       // Copy any trailing input text AND always the terminating null.
> +       memmove(end_of_output, start_of_input, strlen(start_of_input)+1);
>         return 0;
>  }
>
>
>
> _______________________________________________
> sword-cvs mailing list
> sword-cvs at crosswire.org
> http://www.crosswire.org/mailman/listinfo/sword-cvs
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.crosswire.org/pipermail/sword-devel/attachments/20130708/c4751c0e/attachment-0001.html>


More information about the sword-devel mailing list