Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 : #ifndef TOOLS_INETMIME_HXX
20 : #define TOOLS_INETMIME_HXX
21 :
22 : #include <boost/ptr_container/ptr_vector.hpp>
23 :
24 : #include "tools/toolsdllapi.h"
25 : #include <rtl/alloc.h>
26 : #include <rtl/character.hxx>
27 : #include <rtl/string.hxx>
28 : #include <rtl/strbuf.hxx>
29 : #include <rtl/ustring.hxx>
30 : #include <rtl/tencinfo.h>
31 : #include <tools/debug.hxx>
32 : #include <tools/errcode.hxx>
33 : #include <tools/string.hxx>
34 :
35 : class DateTime;
36 : class INetContentTypeParameterList;
37 : class INetMIMECharsetList_Impl;
38 : class INetMIMEOutputSink;
39 :
40 : class TOOLS_DLLPUBLIC INetMIME
41 : {
42 : public:
43 : enum { SOFT_LINE_LENGTH_LIMIT = 76,
44 : HARD_LINE_LENGTH_LIMIT = 998 };
45 :
46 : /** The various types of message header field bodies, with respect to
47 : encoding and decoding them.
48 :
49 : @descr At the moment, five different types of header fields suffice
50 : to describe how to encoded and decode any known message header field
51 : body, but need for more types may arise in the future as new header
52 : fields are introduced.
53 :
54 : @descr The following is an exhaustive list of all the header fields
55 : currently known to our implementation. For every header field, it
56 : includes a 'canonic' (with regard to capitalization) name, a grammar
57 : rule for the body (using RFC 822 and RFC 2234 conventions), a list of
58 : relevant sources of information, and the HeaderFieldType value to use
59 : with that header field. The list is based on RFC 2076 and draft-
60 : palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
61 : ietf/jp-ietf-home.html#anchor1003783>).
62 :
63 : Approved: address ;RFC 1036; HEADER_FIELD_ADDRESS
64 : bcc: #address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
65 : cc: 1#address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
66 : Comments: *text ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
67 : Content-Base: absoluteURI ;RFC 2110; HEADER_FIELD_TEXT
68 : Content-Description: *text ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
69 : Content-Disposition: disposition-type *(";" disposition-parm)
70 : ;RFC 1806; HEADER_FIELD_STRUCTURED
71 : Content-ID: msg-id ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
72 : Content-Location: absoluteURI / relativeURI ;RFC 2110;
73 : HEADER_FIELD_TEXT
74 : Content-Transfer-Encoding: mechanism ;RFC 2045, RFC 2047;
75 : HEADER_FIELD_STRUCTURED
76 : Content-Type: type "/" subtype *(";" parameter) ;RFC 2045, RFC 2047;
77 : HEADER_FIELD_STRUCTURED
78 : Control: *text ;RFC 1036; HEADER_FIELD_TEXT
79 : Date: date-time ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
80 : Distribution: 1#atom ;RFC 1036; HEADER_FIELD_STRUCTURED
81 : Encrypted: 1#2word ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
82 : Expires: date-time ;RFC 1036; HEADER_FIELD_STRUCTURED
83 : Followup-To: 1#(atom *("." atom)) ;RFC 1036; HEADER_FIELD_STRUCTURED
84 : From: mailbox / 1#mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
85 : In-Reply-To: *(phrase / msg-id) ;RFC 822, RFC 2047;
86 : HEADER_FIELD_ADDRESS
87 : Keywords: #phrase ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
88 : MIME-Version: 1*DIGIT "." 1*DIGIT ;RFC 2045, RFC 2047;
89 : HEADER_FIELD_STRUCTURED
90 : Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
91 : Newsgroups: 1#(atom *("." atom)) ;RFC 1036, RFC 2047;
92 : HEADER_FIELD_STRUCTURED
93 : Organization: *text ;RFC 1036; HEADER_FIELD_TEXT
94 : Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
95 : ["id" msg-id] ["for" addr-spec] ";" date-time ;RFC 822, RFC 1123,
96 : RFC 2047; HEADER_FIELD_STRUCTURED
97 : References: *(phrase / msg-id) ;RFC 822, RFC 2047;
98 : HEADER_FIELD_ADDRESS
99 : Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
100 : Resent-Date: date-time ;RFC 822, RFC 1123, RFC 2047;
101 : HEADER_FIELD_STRUCTURED
102 : Resent-From: mailbox / 1#mailbox ;RFC 822, RFC 2047;
103 : HEADER_FIELD_ADDRESS
104 : Resent-Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
105 : Resent-Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
106 : Resent-Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 : Resent-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 : Resent-bcc: #address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109 : Resent-cc: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
110 : Return-path: route-addr / ("<" ">") ;RFC 822, RFC 1123, RFC 2047;
111 : HEADER_FIELD_STRUCTURED
112 : Return-Receipt-To: address ;Not Internet standard;
113 : HEADER_FIELD_ADDRES
114 : Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
115 : Subject: *text ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
116 : Summary: *text ;RFC 1036; HEADER_FIELD_TEXT
117 : To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
118 : X-CHAOS-Marked: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
119 : X-CHAOS-Read: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
120 : X-CHAOS-Recipients: #*("<" atom word ">") ;local;
121 : HEADER_FIELD_STRUCTURED
122 : X-CHAOS-Size: 1*DIGIT ;local; HEADER_FIELD_STRUCTURED
123 : X-Mailer: *text ;Not Internet standard; HEADER_FIELD_TEXT
124 : X-Mozilla-Status: 4HEXDIG ;Mozilla; HEADER_FIELD_STRUCTURED
125 : X-Newsreader: *text ;Not Internet standard; HEADER_FIELD_TEXT
126 : X-Priority: "1" / "2" / "3" / "4" / "5" ;Not Internet standard;
127 : HEADER_FIELD_STRUCTURED
128 : Xref: sub-domain
129 : 1*((atom / string) *("." (atom / string)) ":" msg-number)
130 : ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
131 : */
132 : enum HeaderFieldType
133 : {
134 : HEADER_FIELD_TEXT,
135 : HEADER_FIELD_STRUCTURED,
136 : HEADER_FIELD_PHRASE,
137 : HEADER_FIELD_MESSAGE_ID,
138 : HEADER_FIELD_ADDRESS
139 : };
140 :
141 : /** Check for US-ASCII character.
142 :
143 : @param nChar Some UCS-4 character.
144 :
145 : @return True if nChar is a US-ASCII character (0x00--0x7F).
146 : */
147 : static inline bool isUSASCII(sal_uInt32 nChar);
148 :
149 : /** Check for ISO 8859-1 character.
150 :
151 : @param nChar Some UCS-4 character.
152 :
153 : @return True if nChar is a ISO 8859-1 character (0x00--0xFF).
154 : */
155 : static inline bool isISO88591(sal_uInt32 nChar);
156 :
157 : /** Check for US-ASCII control character.
158 :
159 : @param nChar Some UCS-4 character.
160 :
161 : @return True if nChar is a US-ASCII control character (US-ASCII
162 : 0x00--0x1F or 0x7F).
163 : */
164 : static inline bool isControl(sal_uInt32 nChar);
165 :
166 : /** Check for US-ASCII white space character.
167 :
168 : @param nChar Some UCS-4 character.
169 :
170 : @return True if nChar is a US-ASCII white space character (US-ASCII
171 : 0x09 or 0x20).
172 : */
173 : static inline bool isWhiteSpace(sal_uInt32 nChar);
174 :
175 : /** Check for US-ASCII visible character.
176 :
177 : @param nChar Some UCS-4 character.
178 :
179 : @return True if nChar is a US-ASCII visible character (US-ASCII
180 : 0x21--0x7E).
181 : */
182 : static inline bool isVisible(sal_uInt32 nChar);
183 :
184 : /** Check for US-ASCII digit character.
185 :
186 : @param nChar Some UCS-4 character.
187 :
188 : @return True if nChar is a US-ASCII (decimal) digit character (US-
189 : ASCII '0'--'9').
190 : */
191 : static inline bool isDigit(sal_uInt32 nChar);
192 :
193 : /** Check for US-ASCII canonic hexadecimal digit character.
194 :
195 : @param nChar Some UCS-4 character.
196 :
197 : @return True if nChar is a US-ASCII canonic (i.e., upper case)
198 : hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
199 : */
200 : static inline bool isCanonicHexDigit(sal_uInt32 nChar);
201 :
202 : /** Check for US-ASCII hexadecimal digit character.
203 :
204 : @param nChar Some UCS-4 character.
205 :
206 : @return True if nChar is a US-ASCII hexadecimal digit character (US-
207 : ASCII '0'--'9', 'A'--'F', 'a'--'f').
208 : */
209 : static inline bool isHexDigit(sal_uInt32 nChar);
210 :
211 : /** Check for US-ASCII upper case character.
212 :
213 : @param nChar Some UCS-4 character.
214 :
215 : @return True if nChar is a US-ASCII upper case alphabetic character
216 : (US-ASCII 'A'--'Z').
217 : */
218 : static inline bool isUpperCase(sal_uInt32 nChar);
219 :
220 : /** Check for US-ASCII lower case character.
221 :
222 : @param nChar Some UCS-4 character.
223 :
224 : @return True if nChar is a US-ASCII lower case alphabetic character
225 : (US-ASCII 'a'--'z').
226 : */
227 : static inline bool isLowerCase(sal_uInt32 nChar);
228 :
229 : /** Check for US-ASCII alphabetic character.
230 :
231 : @param nChar Some UCS-4 character.
232 :
233 : @return True if nChar is a US-ASCII alphabetic character (US-ASCII
234 : 'A'--'Z' or 'a'--'z').
235 : */
236 : static inline bool isAlpha(sal_uInt32 nChar);
237 :
238 : /** Check for US-ASCII alphanumeric character.
239 :
240 : @param nChar Some UCS-4 character.
241 :
242 : @return True if nChar is a US-ASCII alphanumeric character (US-ASCII
243 : '0'--'9', 'A'--'Z' or 'a'--'z').
244 : */
245 : static inline bool isAlphanumeric(sal_uInt32 nChar);
246 :
247 : /** Check for US-ASCII Base 64 digit character.
248 :
249 : @param nChar Some UCS-4 character.
250 :
251 : @return True if nChar is a US-ASCII Base 64 digit character (US-ASCII
252 : 'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
253 : */
254 : static inline bool isBase64Digit(sal_uInt32 nChar);
255 :
256 : /** Check whether some character is valid within an RFC 822 <atom>.
257 :
258 : @param nChar Some UCS-4 character.
259 :
260 : @return True if nChar is valid within an RFC 822 <atom> (US-ASCII
261 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
262 : '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
263 : */
264 : static bool isAtomChar(sal_uInt32 nChar);
265 :
266 : /** Check whether some character is valid within an RFC 2045 <token>.
267 :
268 : @param nChar Some UCS-4 character.
269 :
270 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
271 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
272 : '-', '.', '^', '_', '`', '{', '|', '}', or '~').
273 : */
274 : static bool isTokenChar(sal_uInt32 nChar);
275 :
276 : /** Check whether some character is valid within an RFC 2047 <token>.
277 :
278 : @param nChar Some UCS-4 character.
279 :
280 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
281 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
282 : '-', '^', '_', '`', '{', '|', '}', or '~').
283 : */
284 : static bool isEncodedWordTokenChar(sal_uInt32 nChar);
285 :
286 : /** Check whether some character is valid within an RFC 2060 <atom>.
287 :
288 : @param nChar Some UCS-4 character.
289 :
290 : @return True if nChar is valid within an RFC 2060 <atom> (US-ASCII
291 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
292 : '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
293 : '|', '}', or '~').
294 : */
295 : static bool isIMAPAtomChar(sal_uInt32 nChar);
296 :
297 : /** Translate an US-ASCII character to upper case.
298 :
299 : @param nChar Some UCS-4 character.
300 :
301 : @return If nChar is a US-ASCII upper case character (US-ASCII
302 : 'A'--'Z'), return the corresponding US-ASCII lower case character (US-
303 : ASCII 'a'--'z'); otherwise, return nChar unchanged.
304 : */
305 : static inline sal_uInt32 toUpperCase(sal_uInt32 nChar);
306 :
307 : /** Translate an US-ASCII character to lower case.
308 :
309 : @param nChar Some UCS-4 character.
310 :
311 : @return If nChar is a US-ASCII lower case character (US-ASCII
312 : 'a'--'z'), return the corresponding US-ASCII upper case character (US-
313 : ASCII 'A'--'Z'); otherwise, return nChar unchanged.
314 : */
315 : static inline sal_uInt32 toLowerCase(sal_uInt32 nChar);
316 :
317 : /** Get the digit weight of a US-ASCII character.
318 :
319 : @param nChar Some UCS-4 character.
320 :
321 : @return If nChar is a US-ASCII (decimal) digit character (US-ASCII
322 : '0'--'9'), return the corresponding weight (0--9); otherwise,
323 : return -1.
324 : */
325 : static inline int getWeight(sal_uInt32 nChar);
326 :
327 : /** Get the hexadecimal digit weight of a US-ASCII character.
328 :
329 : @param nChar Some UCS-4 character.
330 :
331 : @return If nChar is a US-ASCII hexadecimal digit character (US-ASCII
332 : '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
333 : (0--15); otherwise, return -1.
334 : */
335 : static inline int getHexWeight(sal_uInt32 nChar);
336 :
337 : /** Get the Base 64 digit weight of a US-ASCII character.
338 :
339 : @param nChar Some UCS-4 character.
340 :
341 : @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
342 : 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
343 : corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
344 : character (US-ASCII '='), return -1; otherwise, return -2.
345 : */
346 : static inline int getBase64Weight(sal_uInt32 nChar);
347 :
348 : /** Get a hexadecimal digit encoded as US-ASCII.
349 :
350 : @param nWeight Must be in the range 0--15, inclusive.
351 :
352 : @return The canonic (i.e., upper case) hexadecimal digit
353 : corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
354 : */
355 : static sal_uInt32 getHexDigit(int nWeight);
356 :
357 : static inline bool isHighSurrogate(sal_uInt32 nUTF16);
358 :
359 : static inline bool isLowSurrogate(sal_uInt32 nUTF16);
360 :
361 : static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
362 : sal_Unicode cLowSurrogate);
363 :
364 : /** Check two US-ASCII strings for equality, ignoring case.
365 :
366 : @param pBegin1 Points to the start of the first string, must not be
367 : null.
368 :
369 : @param pEnd1 Points past the end of the first string, must be >=
370 : pBegin1.
371 :
372 : @param pString2 Points to the start of the null terminated second
373 : string, must not be null.
374 :
375 : @return True if the two strings are equal, ignoring the case of US-
376 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
377 : */
378 : static bool equalIgnoreCase(const sal_Char * pBegin1,
379 : const sal_Char * pEnd1,
380 : const sal_Char * pString2);
381 :
382 : /** Check two US-ASCII strings for equality, ignoring case.
383 :
384 : @param pBegin1 Points to the start of the first string, must not be
385 : null.
386 :
387 : @param pEnd1 Points past the end of the first string, must be >=
388 : pBegin1.
389 :
390 : @param pString2 Points to the start of the null terminated second
391 : string, must not be null.
392 :
393 : @return True if the two strings are equal, ignoring the case of US-
394 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
395 : */
396 : static bool equalIgnoreCase(const sal_Unicode * pBegin1,
397 : const sal_Unicode * pEnd1,
398 : const sal_Char * pString2);
399 :
400 : static inline bool startsWithLineBreak(const sal_Char * pBegin,
401 : const sal_Char * pEnd);
402 :
403 : static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
404 : const sal_Unicode * pEnd);
405 :
406 : static inline bool startsWithLineFolding(const sal_Char * pBegin,
407 : const sal_Char * pEnd);
408 :
409 : static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
410 : const sal_Unicode * pEnd);
411 :
412 : static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
413 : const sal_Char * pEnd);
414 :
415 : static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
416 : pBegin,
417 : const sal_Unicode * pEnd);
418 :
419 : static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
420 : const sal_Unicode * pEnd);
421 :
422 : static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
423 : pBegin,
424 : const sal_Unicode *
425 : pEnd);
426 :
427 : static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
428 :
429 : static const sal_Char * skipQuotedString(const sal_Char * pBegin,
430 : const sal_Char * pEnd);
431 :
432 : static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
433 : const sal_Unicode * pEnd);
434 :
435 : static bool scanUnsigned(const sal_Unicode *& rBegin,
436 : const sal_Unicode * pEnd, bool bLeadingZeroes,
437 : sal_uInt32 & rValue);
438 :
439 : static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
440 : const sal_Unicode * pEnd,
441 : sal_uInt32 nOpening,
442 : sal_uInt32 nClosing,
443 : sal_Size & rLength,
444 : bool & rModify);
445 :
446 : static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
447 : sal_Unicode const * pEnd,
448 : INetContentTypeParameterList *
449 : pParameters);
450 :
451 : static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
452 : eEncoding);
453 :
454 : static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
455 : eEncoding);
456 :
457 : static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
458 :
459 : static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
460 : const sal_Char * pEnd);
461 :
462 : static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
463 :
464 : static INetMIMECharsetList_Impl *
465 : createPreferredCharsetList(rtl_TextEncoding eEncoding);
466 :
467 : static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
468 : const sal_Char * pEnd,
469 : rtl_TextEncoding eEncoding,
470 : sal_Size & rSize);
471 :
472 : static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
473 : const sal_Unicode * pEnd,
474 : rtl_TextEncoding eEncoding,
475 : sal_Size & rSize);
476 :
477 : /** Get the number of octets required to encode an UCS-4 character using
478 : UTF-8 encoding.
479 :
480 : @param nChar Some UCS-4 character.
481 :
482 : @return The number of octets required (in the range 1--6, inclusive).
483 : */
484 : static inline int getUTF8OctetCount(sal_uInt32 nChar);
485 :
486 : static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
487 : sal_uInt32 nChar);
488 :
489 : static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
490 :
491 : static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
492 : HeaderFieldType eType,
493 : const OUString& rBody,
494 : rtl_TextEncoding ePreferredEncoding,
495 : bool bInitialSpace = true);
496 :
497 : static bool translateUTF8Char(const sal_Char *& rBegin,
498 : const sal_Char * pEnd,
499 : rtl_TextEncoding eEncoding,
500 : sal_uInt32 & rCharacter);
501 :
502 : static OUString decodeHeaderFieldBody(HeaderFieldType eType,
503 : const OString& rBody);
504 :
505 : // #i70651#: Prevent warnings on Mac OS X.
506 : #ifdef MACOSX
507 : #pragma GCC system_header
508 : #endif
509 :
510 : /** Get the UTF-32 character at the head of a UTF-16 encoded string.
511 :
512 : @param rBegin Points to the start of the UTF-16 encoded string, must
513 : not be null. On exit, it points past the first UTF-32 character's
514 : encoding.
515 :
516 : @param pEnd Points past the end of the UTF-16 encoded string, must be
517 : strictly greater than rBegin.
518 :
519 : @return The UCS-4 character at the head of the UTF-16 encoded string.
520 : If the string does not start with the UTF-16 encoding of a UCS-32
521 : character, the first UTF-16 value is returned.
522 : */
523 : static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
524 : const sal_Unicode * pEnd);
525 :
526 : /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
527 :
528 : @param pBuffer Points to a buffer, must not be null.
529 :
530 : @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
531 :
532 : @return A pointer past the UTF-16 characters put into the buffer
533 : (i.e., pBuffer + 1 or pBuffer + 2).
534 : */
535 : static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
536 : sal_uInt32 nUTF32);
537 : };
538 :
539 : // static
540 9495774 : inline bool INetMIME::isUSASCII(sal_uInt32 nChar)
541 : {
542 9495774 : return rtl::isAscii(nChar);
543 : }
544 :
545 : // static
546 : inline bool INetMIME::isISO88591(sal_uInt32 nChar)
547 : {
548 : return nChar <= 0xFF;
549 : }
550 :
551 : // static
552 : inline bool INetMIME::isControl(sal_uInt32 nChar)
553 : {
554 : return nChar <= 0x1F || nChar == 0x7F;
555 : }
556 :
557 : // static
558 0 : inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
559 : {
560 0 : return nChar == '\t' || nChar == ' ';
561 : }
562 :
563 : // static
564 222 : inline bool INetMIME::isVisible(sal_uInt32 nChar)
565 : {
566 222 : return nChar >= '!' && nChar <= '~';
567 : }
568 :
569 : // static
570 0 : inline bool INetMIME::isDigit(sal_uInt32 nChar)
571 : {
572 0 : return rtl::isAsciiDigit(nChar);
573 : }
574 :
575 : // static
576 : inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar)
577 : {
578 : return rtl::isAsciiCanonicHexDigit(nChar);
579 : }
580 :
581 : // static
582 : inline bool INetMIME::isHexDigit(sal_uInt32 nChar)
583 : {
584 : return rtl::isAsciiHexDigit(nChar);
585 : }
586 :
587 : // static
588 59800 : inline bool INetMIME::isUpperCase(sal_uInt32 nChar)
589 : {
590 59800 : return rtl::isAsciiUpperCase(nChar);
591 : }
592 :
593 : // static
594 : inline bool INetMIME::isLowerCase(sal_uInt32 nChar)
595 : {
596 : return rtl::isAsciiLowerCase(nChar);
597 : }
598 :
599 : // static
600 160 : inline bool INetMIME::isAlpha(sal_uInt32 nChar)
601 : {
602 160 : return rtl::isAsciiAlpha(nChar);
603 : }
604 :
605 : // static
606 : inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar)
607 : {
608 : return rtl::isAsciiAlphanumeric(nChar);
609 : }
610 :
611 : // static
612 : inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
613 : {
614 : return rtl::isAsciiUpperCase(nChar) || rtl::isAsciiLowerCase(nChar) || rtl::isAsciiDigit(nChar)
615 : || nChar == '+' || nChar == '/';
616 : }
617 :
618 : // static
619 204 : inline sal_uInt32 INetMIME::toUpperCase(sal_uInt32 nChar)
620 : {
621 204 : return rtl::isAsciiLowerCase(nChar) ? nChar - ('a' - 'A') : nChar;
622 : }
623 :
624 : // static
625 3769643 : inline sal_uInt32 INetMIME::toLowerCase(sal_uInt32 nChar)
626 : {
627 3769643 : return rtl::isAsciiUpperCase(nChar) ? nChar + ('a' - 'A') : nChar;
628 : }
629 :
630 : // static
631 52 : inline int INetMIME::getWeight(sal_uInt32 nChar)
632 : {
633 52 : return rtl::isAsciiDigit(nChar) ? int(nChar - '0') : -1;
634 : }
635 :
636 : // static
637 4574 : inline int INetMIME::getHexWeight(sal_uInt32 nChar)
638 : {
639 7208 : return rtl::isAsciiDigit(nChar) ? int(nChar - '0') :
640 1940 : nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
641 9148 : nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
642 : }
643 :
644 : // static
645 12 : inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
646 : {
647 21 : return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
648 3 : rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
649 3 : rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
650 : nChar == '+' ? 62 :
651 : nChar == '/' ? 63 :
652 27 : nChar == '=' ? -1 : -2;
653 : }
654 :
655 : // static
656 746 : inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
657 : {
658 746 : return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
659 : }
660 :
661 : // static
662 0 : inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
663 : {
664 0 : return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
665 : }
666 :
667 : // static
668 : inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
669 : sal_Unicode cLowSurrogate)
670 : {
671 : DBG_ASSERT(isHighSurrogate(cHighSurrogate)
672 : && isLowSurrogate(cLowSurrogate),
673 : "INetMIME::toUTF32(): Bad chars");
674 : return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
675 : | (sal_uInt32(cLowSurrogate) & 0x3FF);
676 : }
677 :
678 : // static
679 : inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
680 : const sal_Char * pEnd)
681 : {
682 : DBG_ASSERT(pBegin && pBegin <= pEnd,
683 : "INetMIME::startsWithLineBreak(): Bad sequence");
684 :
685 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
686 : // CR, LF
687 : }
688 :
689 : // static
690 0 : inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
691 : const sal_Unicode * pEnd)
692 : {
693 : DBG_ASSERT(pBegin && pBegin <= pEnd,
694 : "INetMIME::startsWithLineBreak(): Bad sequence");
695 :
696 0 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
697 : // CR, LF
698 : }
699 :
700 : // static
701 : inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
702 : const sal_Char * pEnd)
703 : {
704 : DBG_ASSERT(pBegin && pBegin <= pEnd,
705 : "INetMIME::startsWithLineFolding(): Bad sequence");
706 :
707 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
708 : && isWhiteSpace(pBegin[2]); // CR, LF
709 : }
710 :
711 : // static
712 0 : inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
713 : const sal_Unicode * pEnd)
714 : {
715 : DBG_ASSERT(pBegin && pBegin <= pEnd,
716 : "INetMIME::startsWithLineFolding(): Bad sequence");
717 :
718 0 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
719 0 : && isWhiteSpace(pBegin[2]); // CR, LF
720 : }
721 :
722 : // static
723 : inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
724 : const sal_Char * pEnd)
725 : {
726 : DBG_ASSERT(pBegin && pBegin <= pEnd,
727 : "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
728 :
729 : return pBegin != pEnd
730 : && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
731 : }
732 :
733 : // static
734 0 : inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
735 : {
736 0 : return nChar == '"' || nChar == '\\';
737 : }
738 :
739 : // static
740 0 : inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
741 : {
742 : #if defined WNT
743 : return eEncoding == RTL_TEXTENCODING_MS_1252 ?
744 : RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
745 : #else // WNT
746 0 : return eEncoding;
747 : #endif // WNT
748 : }
749 :
750 : // static
751 3 : inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
752 : eEncoding)
753 : {
754 : #if defined WNT
755 : return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
756 : RTL_TEXTENCODING_MS_1252 : eEncoding;
757 : #else
758 3 : return eEncoding;
759 : #endif
760 : }
761 :
762 : // static
763 3 : inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
764 : {
765 3 : return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
766 : }
767 :
768 : // static
769 0 : inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
770 : {
771 : DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
772 :
773 : return nChar < 0x80 ? 1 :
774 : nChar < 0x800 ? 2 :
775 : nChar <= 0x10000 ? 3 :
776 : nChar <= 0x200000 ? 4 :
777 0 : nChar <= 0x4000000 ? 5 : 6;
778 : }
779 :
780 : // static
781 11192750 : inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
782 : const sal_Unicode * pEnd)
783 : {
784 : DBG_ASSERT(rBegin && rBegin < pEnd,
785 : "INetMIME::getUTF32Character(): Bad sequence");
786 11192750 : if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
787 0 : && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
788 : {
789 0 : sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
790 0 : return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
791 : }
792 : else
793 11192750 : return *rBegin++;
794 : }
795 :
796 : // static
797 0 : inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
798 : sal_uInt32 nUTF32)
799 : {
800 : DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
801 0 : if (nUTF32 < 0x10000)
802 0 : *pBuffer++ = sal_Unicode(nUTF32);
803 : else
804 : {
805 0 : nUTF32 -= 0x10000;
806 0 : *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
807 0 : *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
808 : }
809 0 : return pBuffer;
810 : }
811 :
812 : class INetMIMEOutputSink
813 : {
814 : public:
815 : static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
816 :
817 : private:
818 : sal_uInt32 m_nColumn;
819 : sal_uInt32 m_nLineLengthLimit;
820 :
821 : protected:
822 : /** Write a sequence of octets.
823 :
824 : @param pBegin Points to the start of the sequence, must not be null.
825 :
826 : @param pEnd Points past the end of the sequence, must be >= pBegin.
827 : */
828 : virtual void writeSequence(const sal_Char * pBegin,
829 : const sal_Char * pEnd) = 0;
830 :
831 : /** Write a null terminated sequence of octets (without the terminating
832 : null).
833 :
834 : @param pOctets A null terminated sequence of octets, must not be
835 : null.
836 :
837 : @return The length of pOctets (without the terminating null).
838 : */
839 : virtual sal_Size writeSequence(const sal_Char * pSequence);
840 :
841 : /** Write a sequence of octets.
842 :
843 : @descr The supplied sequence of UCS-4 characters is interpreted as a
844 : sequence of octets. It is an error if any of the elements of the
845 : sequence has a numerical value greater than 255.
846 :
847 : @param pBegin Points to the start of the sequence, must not be null.
848 :
849 : @param pEnd Points past the end of the sequence, must be >= pBegin.
850 : */
851 : virtual void writeSequence(const sal_uInt32 * pBegin,
852 : const sal_uInt32 * pEnd);
853 :
854 : /** Write a sequence of octets.
855 :
856 : @descr The supplied sequence of Unicode characters is interpreted as
857 : a sequence of octets. It is an error if any of the elements of the
858 : sequence has a numerical value greater than 255.
859 :
860 : @param pBegin Points to the start of the sequence, must not be null.
861 :
862 : @param pEnd Points past the end of the sequence, must be >= pBegin.
863 : */
864 : virtual void writeSequence(const sal_Unicode * pBegin,
865 : const sal_Unicode * pEnd);
866 :
867 : public:
868 0 : INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
869 : sal_uInt32 nTheLineLengthLimit
870 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
871 0 : m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
872 :
873 0 : virtual ~INetMIMEOutputSink() {}
874 :
875 : /** Get the current column.
876 :
877 : @return The current column (starting from zero).
878 : */
879 0 : sal_uInt32 getColumn() const { return m_nColumn; }
880 :
881 0 : sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
882 :
883 : void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
884 : { m_nLineLengthLimit = nTheLineLengthLimit; }
885 :
886 : virtual ErrCode getError() const;
887 :
888 : /** Write a sequence of octets.
889 :
890 : @param pBegin Points to the start of the sequence, must not be null.
891 :
892 : @param pEnd Points past the end of the sequence, must be >= pBegin.
893 : */
894 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
895 :
896 : /** Write a sequence of octets.
897 :
898 : @param pBegin Points to the start of the sequence, must not be null.
899 :
900 : @param nLength The length of the sequence.
901 : */
902 : void write(const sal_Char * pBegin, sal_Size nLength)
903 : { write(pBegin, pBegin + nLength); }
904 :
905 : /** Write a sequence of octets.
906 :
907 : @descr The supplied sequence of UCS-4 characters is interpreted as a
908 : sequence of octets. It is an error if any of the elements of the
909 : sequence has a numerical value greater than 255.
910 :
911 : @param pBegin Points to the start of the sequence, must not be null.
912 :
913 : @param pEnd Points past the end of the sequence, must be >= pBegin.
914 : */
915 : inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
916 :
917 : /** Write a sequence of octets.
918 :
919 : @descr The supplied sequence of Unicode characters is interpreted as
920 : a sequence of octets. It is an error if any of the elements of the
921 : sequence has a numerical value greater than 255.
922 :
923 : @param pBegin Points to the start of the sequence, must not be null.
924 :
925 : @param pEnd Points past the end of the sequence, must be >= pBegin.
926 : */
927 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
928 :
929 : /** Write a sequence of octets.
930 :
931 : @param rOctets A OString, interpreted as a sequence of octets.
932 :
933 : @param nBegin The offset of the first character to write.
934 :
935 : @param nEnd The offset past the last character to write.
936 : */
937 : void write(const OString& rOctets, xub_StrLen nBegin,
938 : xub_StrLen nEnd)
939 : {
940 : writeSequence(rOctets.getStr() + nBegin, rOctets.getStr() + nEnd);
941 : m_nColumn += nEnd - nBegin;
942 : }
943 :
944 : /** Write a single octet.
945 :
946 : @param nOctet Some octet.
947 :
948 : @return This instance.
949 : */
950 : inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
951 :
952 : /** Write a null terminated sequence of octets (without the terminating
953 : null).
954 :
955 : @param pOctets A null terminated sequence of octets, must not be
956 : null.
957 :
958 : @return This instance.
959 : */
960 : inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
961 :
962 : /** Write a sequence of octets.
963 :
964 : @param rOctets A OString, interpreted as a sequence of octets.
965 :
966 : @return This instance.
967 : */
968 : INetMIMEOutputSink & operator <<(const OString& rOctets)
969 : {
970 : writeSequence(rOctets.getStr(), rOctets.getStr() + rOctets.getLength());
971 : m_nColumn += rOctets.getLength();
972 : return *this;
973 : }
974 :
975 : /** Call a manipulator function.
976 :
977 : @param pManipulator A manipulator function.
978 :
979 : @return Whatever the manipulator function returns.
980 : */
981 : INetMIMEOutputSink &
982 0 : operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
983 0 : { return pManipulator(*this); }
984 :
985 : /** Write a line end (CR LF).
986 : */
987 : void writeLineEnd();
988 :
989 : /** A manipulator function that writes a line end (CR LF).
990 :
991 : @param rSink Some sink.
992 :
993 : @return The sink rSink.
994 : */
995 : static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
996 : };
997 :
998 : inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
999 : const sal_Char * pEnd)
1000 : {
1001 : writeSequence(pBegin, pEnd);
1002 : m_nColumn += pEnd - pBegin;
1003 : }
1004 :
1005 : inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
1006 : const sal_uInt32 * pEnd)
1007 : {
1008 : writeSequence(pBegin, pEnd);
1009 : m_nColumn += pEnd - pBegin;
1010 : }
1011 :
1012 0 : inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
1013 : const sal_Unicode * pEnd)
1014 : {
1015 0 : writeSequence(pBegin, pEnd);
1016 0 : m_nColumn += pEnd - pBegin;
1017 0 : }
1018 :
1019 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
1020 : {
1021 0 : writeSequence(&nOctet, &nOctet + 1);
1022 0 : ++m_nColumn;
1023 0 : return *this;
1024 : }
1025 :
1026 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
1027 : pOctets)
1028 : {
1029 0 : m_nColumn += writeSequence(pOctets);
1030 0 : return *this;
1031 : }
1032 :
1033 : // static
1034 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
1035 : rSink)
1036 : {
1037 0 : rSink.writeLineEnd();
1038 0 : return rSink;
1039 : }
1040 :
1041 : // static
1042 0 : inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
1043 : sal_uInt32 nChar)
1044 : {
1045 : DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1046 0 : rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
1047 0 : << sal_uInt8(getHexDigit(nChar & 15));
1048 0 : }
1049 :
1050 0 : class INetMIMEStringOutputSink: public INetMIMEOutputSink
1051 : {
1052 : OStringBuffer m_aBuffer;
1053 :
1054 : using INetMIMEOutputSink::writeSequence;
1055 :
1056 : virtual void writeSequence(const sal_Char * pBegin,
1057 : const sal_Char * pEnd);
1058 :
1059 : public:
1060 0 : inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
1061 : sal_uInt32 nLineLengthLimit
1062 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1063 0 : INetMIMEOutputSink(nColumn, nLineLengthLimit) {}
1064 :
1065 : virtual ErrCode getError() const;
1066 :
1067 0 : OString takeBuffer()
1068 : {
1069 0 : return m_aBuffer.makeStringAndClear();
1070 : }
1071 : };
1072 :
1073 : class INetMIMEEncodedWordOutputSink
1074 : {
1075 : public:
1076 : enum Context { CONTEXT_TEXT = 1,
1077 : CONTEXT_COMMENT = 2,
1078 : CONTEXT_PHRASE = 4 };
1079 :
1080 : enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
1081 :
1082 : private:
1083 : enum { BUFFER_SIZE = 256 };
1084 :
1085 : enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
1086 : CODING_ENCODED_TERMINATED };
1087 :
1088 : enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
1089 : STATE_FIRST_QUESTION, STATE_CHARSET,
1090 : STATE_SECOND_QUESTION, STATE_ENCODING,
1091 : STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
1092 : STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
1093 : STATE_BAD };
1094 :
1095 : INetMIMEOutputSink & m_rSink;
1096 : Context m_eContext;
1097 : Space m_eInitialSpace;
1098 : sal_uInt32 m_nExtraSpaces;
1099 : INetMIMECharsetList_Impl * m_pEncodingList;
1100 : sal_Unicode * m_pBuffer;
1101 : sal_uInt32 m_nBufferSize;
1102 : sal_Unicode * m_pBufferEnd;
1103 : Coding m_ePrevCoding;
1104 : rtl_TextEncoding m_ePrevMIMEEncoding;
1105 : Coding m_eCoding;
1106 : sal_uInt32 m_nQuotedEscaped;
1107 : EncodedWordState m_eEncodedWordState;
1108 :
1109 : inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
1110 :
1111 : void finish(bool bWriteTrailer);
1112 :
1113 : public:
1114 : inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
1115 : Context eTheContext,
1116 : Space eTheInitialSpace,
1117 : rtl_TextEncoding ePreferredEncoding);
1118 :
1119 : ~INetMIMEEncodedWordOutputSink();
1120 :
1121 : INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
1122 :
1123 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1124 :
1125 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1126 :
1127 : inline bool flush();
1128 : };
1129 :
1130 0 : inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1131 : INetMIMEOutputSink & rTheSink, Context eTheContext,
1132 : Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
1133 : m_rSink(rTheSink),
1134 : m_eContext(eTheContext),
1135 : m_eInitialSpace(eTheInitialSpace),
1136 : m_nExtraSpaces(0),
1137 0 : m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
1138 : m_ePrevCoding(CODING_NONE),
1139 : m_eCoding(CODING_NONE),
1140 : m_nQuotedEscaped(0),
1141 0 : m_eEncodedWordState(STATE_INITIAL)
1142 : {
1143 0 : m_nBufferSize = BUFFER_SIZE;
1144 : m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
1145 : m_nBufferSize
1146 0 : * sizeof (sal_Unicode)));
1147 0 : m_pBufferEnd = m_pBuffer;
1148 0 : }
1149 :
1150 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
1151 : const sal_Char * pEnd)
1152 : {
1153 : DBG_ASSERT(pBegin && pBegin <= pEnd,
1154 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1155 :
1156 : while (pBegin != pEnd)
1157 : operator <<(*pBegin++);
1158 : }
1159 :
1160 0 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1161 : const sal_Unicode * pEnd)
1162 : {
1163 : DBG_ASSERT(pBegin && pBegin <= pEnd,
1164 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1165 :
1166 0 : while (pBegin != pEnd)
1167 0 : operator <<(*pBegin++);
1168 0 : }
1169 :
1170 0 : inline bool INetMIMEEncodedWordOutputSink::flush()
1171 : {
1172 0 : finish(true);
1173 0 : return m_ePrevCoding != CODING_NONE;
1174 : }
1175 :
1176 130 : struct INetContentTypeParameter
1177 : {
1178 : /** The name of the attribute, in US-ASCII encoding and converted to lower
1179 : case. If a parameter value is split as described in RFC 2231, there
1180 : will only be one item for the complete parameter, with the attribute
1181 : name lacking any section suffix.
1182 : */
1183 : const OString m_sAttribute;
1184 :
1185 : /** The optional character set specification (see RFC 2231), in US-ASCII
1186 : encoding and converted to lower case.
1187 : */
1188 : const OString m_sCharset;
1189 :
1190 : /** The optional language specification (see RFC 2231), in US-ASCII
1191 : encoding and converted to lower case.
1192 : */
1193 : const OString m_sLanguage;
1194 :
1195 : /** The attribute value. If the value is a quoted-string, it is
1196 : 'unpacked.' If a character set is specified, and the value can be
1197 : converted to Unicode, this is done. Also, if no character set is
1198 : specified, it is first tried to convert the value from UTF-8 encoding
1199 : to Unicode, and if that doesn't work (because the value is not in
1200 : UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1201 : (which will always work). But if a character set is specified and the
1202 : value cannot be converted from that character set to Unicode, special
1203 : action is taken to produce a value that can possibly be transformed
1204 : back into its original form: Any 8-bit character from a non-encoded
1205 : part of the original value is directly converted to Unicode
1206 : (effectively handling it as if it was ISO-8859-1 encoded), and any
1207 : 8-bit character from an encoded part of the original value is mapped
1208 : to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1209 : within Unicode's Private Use Area (effectively adding 0xF800 to the
1210 : character's numeric value).
1211 : */
1212 : const OUString m_sValue;
1213 :
1214 : /** This is true if the value is successfully converted to Unicode, and
1215 : false if the value is a special mixture of ISO-LATIN-1 characters and
1216 : characters from Unicode's Private Use Area.
1217 : */
1218 : const bool m_bConverted;
1219 :
1220 130 : INetContentTypeParameter(const OString& rTheAttribute,
1221 : const OString& rTheCharset, const OString& rTheLanguage,
1222 : const OUString& rTheValue, bool bTheConverted)
1223 : : m_sAttribute(rTheAttribute)
1224 : , m_sCharset(rTheCharset)
1225 : , m_sLanguage(rTheLanguage)
1226 : , m_sValue(rTheValue)
1227 130 : , m_bConverted(bTheConverted)
1228 : {
1229 130 : }
1230 : };
1231 :
1232 2916 : class TOOLS_DLLPUBLIC INetContentTypeParameterList
1233 : {
1234 : public:
1235 :
1236 : void Clear();
1237 :
1238 : void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1239 : {
1240 : maEntries.insert(maEntries.begin()+nIndex,pParameter);
1241 : }
1242 :
1243 130 : void Append(INetContentTypeParameter *pParameter)
1244 : {
1245 130 : maEntries.push_back(pParameter);
1246 130 : }
1247 :
1248 : inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const
1249 : {
1250 : return &(maEntries[nIndex]);
1251 : }
1252 :
1253 : const INetContentTypeParameter * find(const OString& rAttribute) const;
1254 :
1255 : private:
1256 :
1257 : boost::ptr_vector<INetContentTypeParameter> maEntries;
1258 : };
1259 :
1260 : #endif
1261 :
1262 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|