{*******************************************************} { The Delphi Unicode Controls Project } { } { http://home.ccci.org/wolbrink } { } { Copyright (c) 2002, Troy Wolbrink (wolbrink@ccci.org) } { } { Thanks to Francisco Leong for providing the Pascal } { conversion of ConvertUTF7.c (by David B. Goldsmith) } { } {*******************************************************} unit ConvertUTF7; interface function WideStringToUTF7(const W: WideString): AnsiString; function UTF7ToWideString(const S: AnsiString; AllowInvalid: Boolean = False): WideString; implementation uses SysUtils; resourcestring SBufferOverflow = 'Buffer overflow'; SInvalidUTF7 = 'Invalid UTF7'; function ConvertUCS2toUTF7(var sourceStart: PWideChar; sourceEnd: PWideChar; var targetStart: PAnsiChar; targetEnd: PAnsiChar; optional: Boolean; verbose: Boolean): Integer; forward; function WideStringToUTF7(const W: WideString): AnsiString; var SourceStart, SourceEnd: PWideChar; TargetStart, TargetEnd: PAnsiChar; begin if W = '' then Result := '' else begin SetLength(Result, Length(W) * 7); // Assume worst case SourceStart := PWideChar(@W[1]); SourceEnd := PWideChar(@W[Length(W)]) + 1; TargetStart := PAnsiChar(@Result[1]); TargetEnd := PAnsiChar(@Result[Length(Result)]) + 1; if ConvertUCS2toUTF7(SourceStart, SourceEnd, TargetStart, TargetEnd, True, False) <> 0 then raise Exception.Create(SBufferOverflow); SetLength(Result, TargetStart - PAnsiChar(@Result[1])); end; end; function ConvertUTF7toUCS2(var sourceStart: PAnsiChar; sourceEnd: PAnsiChar; var targetStart: PWideChar; targetEnd: PWideChar): Integer; forward; function UTF7ToWideString(const S: AnsiString; AllowInvalid: Boolean = False): WideString; var SourceStart, SourceEnd: PAnsiChar; TargetStart, TargetEnd: PWideChar; begin if (S = '') then Result := '' else begin SetLength(Result, Length(S)); // Assume Worst case SourceStart := PAnsiChar(@S[1]); SourceEnd := PAnsiChar(@S[Length(S)]) + 1; TargetStart := PWideChar(@Result[1]); TargetEnd := PWideChar(@Result[Length(Result)]) + 1; case ConvertUTF7toUCS2(SourceStart, SourceEnd, TargetStart, TargetEnd) of 1: if not AllowInvalid then raise Exception.Create(SInvalidUTF7); 2: raise Exception.Create(SBufferOverflow); end; SetLength(Result, TargetStart - PWideChar(@Result[1])); end; end; { ======================================================= } { Translated by: CtoP version 1.2b } { } { From: Knowledge Software Ltd } { 32 Cove Rd, Farnborough, Hants, GU14 0EN, England } { } { Specialists in language translators and code generators } { } { cvtutf7.c 18:01:48 6 Jul 2001 } { ======================================================= } { ======================================================================= } { } { File: ConvertUTF7.c } { Author: David B. Goldsmith } { Copyright (C) 1994, 1996 Taligent, Inc. All rights reserved. } { } { This code is copyrighted. Under the copyright laws, this code may not } { be copied, in whole or part, without prior written consent of Taligent. } { } { Taligent grants the right to use this code as long as this ENTIRE } { copyright notice is reproduced in the code. The code is provided } { AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR } { IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF } { MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT } { WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, } { WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS } { INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY } { LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN } { IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. } { BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF } { LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE } { LIMITATION MAY NOT APPLY TO YOU. } { } { RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the } { government is subject to restrictions as set forth in subparagraph } { (c)(l)(ii) of the Rights in Technical Data and Computer Software } { clause at DFARS 252.227-7013 and FAR 52.227-19. } { } { This code may be protected by one or more U.S. and International } { Patents. } { } { TRADEMARKS: Taligent and the Taligent Design Mark are registered } { trademarks of Taligent, Inc. } { } { ======================================================================= } type UCS2 = Word; const _base64: AnsiString = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; _direct: AnsiString = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789''(),-./:?'; _optional: AnsiString = '!"#$%&*;<=>@[]^_`{|}'; _spaces: AnsiString = #9#13#10#32; var base64: PAnsiChar; invbase64: array[0..127] of SmallInt; direct: PAnsiChar; optional: PAnsiChar; spaces: PAnsiChar; mustshiftsafe: array[0..127] of AnsiChar; mustshiftopt: array[0..127] of AnsiChar; var needtables: Boolean = True; procedure tabinit; var i: Integer; limit: Integer; begin i := 0; while (i < 128) do begin mustshiftopt[i] := #1; mustshiftsafe[i] := #1; invbase64[i] := -1; Inc(i); end { For }; limit := Length(_Direct); i := 0; while (i < limit) do begin mustshiftopt[Integer(direct[i])] := #0; mustshiftsafe[Integer(direct[i])] := #0; Inc(i); end { For }; limit := Length(_Spaces); i := 0; while (i < limit) do begin mustshiftopt[Integer(spaces[i])] := #0; mustshiftsafe[Integer(spaces[i])] := #0; Inc(i); end { For }; limit := Length(_Optional); i := 0; while (i < limit) do begin mustshiftopt[Integer(optional[i])] := #0; Inc(i); end { For }; limit := Length(_Base64); i := 0; while (i < limit) do begin invbase64[Integer(base64[i])] := i; Inc(i); end { For }; needtables := False; end; { tabinit } function WRITE_N_BITS(x: UCS2; n: Integer; var BITbuffer: Cardinal; var bufferbits: Integer): Integer; begin BITbuffer := BITbuffer or (x and (not (-1 shl n))) shl (32 - n - bufferbits); bufferbits := bufferbits + n; Result := bufferbits; end; { WRITE_N_BITS } function READ_N_BITS(n: Integer; var BITbuffer: Cardinal; var bufferbits: Integer): UCS2; var buffertemp: Cardinal; begin buffertemp := BITbuffer shr (32 - n); BITbuffer := BITbuffer shl n; bufferbits := bufferbits - n; Result := UCS2(buffertemp); end; { READ_N_BITS } function ConvertUCS2toUTF7(var sourceStart: PWideChar; sourceEnd: PWideChar; var targetStart: PAnsiChar; targetEnd: PAnsiChar; optional: Boolean; verbose: Boolean): Integer; var r: UCS2; target: PAnsiChar; source: PWideChar; BITbuffer: Cardinal; bufferbits: Integer; shifted: Boolean; needshift: Boolean; done: Boolean; mustshift: PAnsiChar; begin Result := 0; BITbuffer := 0; bufferbits := 0; shifted := False; source := sourceStart; target := targetStart; r := 0; if needtables then tabinit; if optional then mustshift := @mustshiftopt[0] else mustshift := @mustshiftsafe[0]; repeat done := source >= sourceEnd; if not Done then begin r := Word(source^); Inc(Source); end { If }; needshift := (not done) and ((r > $7F) or (mustshift[r] <> #0)); if needshift and (not shifted) then begin if (Target >= TargetEnd) then begin Result := 2; break; end { If }; target^ := '+'; Inc(target); { Special case handling of the SHIFT_IN character } if (r = UCS2('+')) then begin if (target >= targetEnd) then begin Result := 2; break; end; target^ := '-'; Inc(target); end else shifted := True; end { If }; if shifted then begin { Either write the character to the bit buffer, or pad } { the bit buffer out to a full base64 character. } { } if needshift then WRITE_N_BITS(r, 16, BITbuffer, bufferbits) else WRITE_N_BITS(0, (6 - (bufferbits mod 6)) mod 6, BITbuffer, bufferbits); { Flush out as many full base64 characters as possible } { from the bit buffer. } { } while (target < targetEnd) and (bufferbits >= 6) do begin Target^ := base64[READ_N_BITS(6, BITbuffer, bufferbits)]; Inc(Target); end { While }; if (bufferbits >= 6) then begin if (target >= targetEnd) then begin Result := 2; break; end { If }; end { If }; if (not needshift) then begin { Write the explicit shift out character if } { 1) The caller has requested we always do it, or } { 2) The directly encoded character is in the } { base64 set, or } { 3) The directly encoded character is SHIFT_OUT. } { } if verbose or ((not done) and ((invbase64[r] >= 0) or (r = Integer('-')))) then begin if (target >= targetEnd) then begin Result := 2; Break; end { If }; Target^ := '-'; Inc(Target); end { If }; shifted := False; end { If }; { The character can be directly encoded as ASCII. } end { If }; if (not needshift) and (not done) then begin if (target >= targetEnd) then begin Result := 2; break; end { If }; Target^ := AnsiChar(r); Inc(Target); end { If }; until (done); sourceStart := source; targetStart := target; end; { ConvertUCS2toUTF7 } function ConvertUTF7toUCS2(var sourceStart: PAnsiChar; sourceEnd: PAnsiChar; var targetStart: PWideChar; targetEnd: PWideChar): Integer; var target: PWideChar { Register }; source: PAnsiChar { Register }; BITbuffer: Cardinal { & "Address Of" Used }; bufferbits: Integer { & "Address Of" Used }; shifted: Boolean { Used In Boolean Context }; first: Boolean { Used In Boolean Context }; wroteone: Boolean; base64EOF: Boolean; base64value: Integer; done: Boolean; c: UCS2; prevc: UCS2; junk: UCS2 { Used In Boolean Context }; begin Result := 0; BITbuffer := 0; bufferbits := 0; shifted := False; first := False; wroteone := False; source := sourceStart; target := targetStart; c := 0; if needtables then tabinit; repeat { read an ASCII character c } done := Source >= SourceEnd; if (not done) then begin c := Word(Source^); Inc(Source); end { If }; if shifted then begin { We're done with a base64 string if we hit EOF, it's not a valid } { ASCII character, or it's not in the base64 set. } { } base64value := invbase64[c]; base64EOF := (done or (c > $7F)) or (base64value < 0); if base64EOF then begin shifted := False; { If the character causing us to drop out was SHIFT_IN or } { SHIFT_OUT, it may be a special escape for SHIFT_IN. The } { test for SHIFT_IN is not necessary, but allows an alternate } { form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This } { only works for some values of SHIFT_IN. } { } if ((not done) and ((c = Integer('+')) or (c = Integer('-')))) then begin { get another character c } prevc := c; Done := Source >= SourceEnd; if (not Done) then begin c := Word(Source^); Inc(Source); { If no base64 characters were encountered, and the } { character terminating the shift sequence was } { SHIFT_OUT, then it's a special escape for SHIFT_IN. } { } end; if first and (prevc = Integer('-')) then begin { write SHIFT_IN unicode } if (target >= targetEnd) then begin Result := 2; break; end { If }; Target^ := WideChar('+'); Inc(Target); end else begin if (not wroteone) then begin Result := 1; end { If }; end { Else }; ; end { If } else begin if (not wroteone) then begin Result := 1; end { If }; end { Else }; end { If } else begin { Add another 6 bits of base64 to the bit buffer. } WRITE_N_BITS(base64value, 6, BITbuffer, bufferbits); first := False; end { Else }; { Extract as many full 16 bit characters as possible from the } { bit buffer. } { } while (bufferbits >= 16) and (target < targetEnd) do begin { write a unicode } Target^ := WideChar(READ_N_BITS(16, BITbuffer, bufferbits)); Inc(Target); wroteone := True; end { While }; if (bufferbits >= 16) then begin if (target >= targetEnd) then begin Result := 2; Break; end; end { If }; if (base64EOF) then begin junk := READ_N_BITS(bufferbits, BITbuffer, bufferbits); if (junk <> 0) then begin Result := 1; end { If }; end { If }; end { If }; if (not shifted) and (not done) then begin if (c = Integer('+')) then begin shifted := True; first := True; wroteone := False; end { If } else begin { It must be a directly encoded character. } if (c > $7F) then begin Result := 1; end { If }; if (target >= targetEnd) then begin Result := 2; break; end { If }; Target^ := WideChar(c); Inc(Target); end { Else }; end { If }; until (done); sourceStart := source; targetStart := target; end; { ConvertUTF7toUCS2 } initialization { cvtutf7 } base64 := PAnsiChar(_base64); direct := PAnsiChar(_direct); optional := PAnsiChar(_optional); spaces := PAnsiChar(_spaces); end.