Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 : #ifndef TOOLS_INETMIME_HXX
20 : #define TOOLS_INETMIME_HXX
21 :
22 : #include <boost/ptr_container/ptr_vector.hpp>
23 :
24 : #include "tools/toolsdllapi.h"
25 : #include <rtl/alloc.h>
26 : #include <rtl/string.hxx>
27 : #include <rtl/strbuf.hxx>
28 : #include <rtl/tencinfo.h>
29 : #include <tools/debug.hxx>
30 : #include <tools/errcode.hxx>
31 : #include <tools/string.hxx>
32 :
33 : class DateTime;
34 : class INetContentTypeParameterList;
35 : class INetMIMECharsetList_Impl;
36 : class INetMIMEOutputSink;
37 :
38 : class TOOLS_DLLPUBLIC INetMIME
39 : {
40 : public:
41 : enum { SOFT_LINE_LENGTH_LIMIT = 76,
42 : HARD_LINE_LENGTH_LIMIT = 998 };
43 :
44 : /** The various types of message header field bodies, with respect to
45 : encoding and decoding them.
46 :
47 : @descr At the moment, five different types of header fields suffice
48 : to describe how to encoded and decode any known message header field
49 : body, but need for more types may arise in the future as new header
50 : fields are introduced.
51 :
52 : @descr The following is an exhaustive list of all the header fields
53 : currently known to our implementation. For every header field, it
54 : includes a 'canonic' (with regard to capitalization) name, a grammar
55 : rule for the body (using RFC 822 and RFC 2234 conventions), a list of
56 : relevant sources of information, and the HeaderFieldType value to use
57 : with that header field. The list is based on RFC 2076 and draft-
58 : palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
59 : ietf/jp-ietf-home.html#anchor1003783>).
60 :
61 : Approved: address ;RFC 1036; HEADER_FIELD_ADDRESS
62 : bcc: #address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
63 : cc: 1#address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
64 : Comments: *text ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
65 : Content-Base: absoluteURI ;RFC 2110; HEADER_FIELD_TEXT
66 : Content-Description: *text ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
67 : Content-Disposition: disposition-type *(";" disposition-parm)
68 : ;RFC 1806; HEADER_FIELD_STRUCTURED
69 : Content-ID: msg-id ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
70 : Content-Location: absoluteURI / relativeURI ;RFC 2110;
71 : HEADER_FIELD_TEXT
72 : Content-Transfer-Encoding: mechanism ;RFC 2045, RFC 2047;
73 : HEADER_FIELD_STRUCTURED
74 : Content-Type: type "/" subtype *(";" parameter) ;RFC 2045, RFC 2047;
75 : HEADER_FIELD_STRUCTURED
76 : Control: *text ;RFC 1036; HEADER_FIELD_TEXT
77 : Date: date-time ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
78 : Distribution: 1#atom ;RFC 1036; HEADER_FIELD_STRUCTURED
79 : Encrypted: 1#2word ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
80 : Expires: date-time ;RFC 1036; HEADER_FIELD_STRUCTURED
81 : Followup-To: 1#(atom *("." atom)) ;RFC 1036; HEADER_FIELD_STRUCTURED
82 : From: mailbox / 1#mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
83 : In-Reply-To: *(phrase / msg-id) ;RFC 822, RFC 2047;
84 : HEADER_FIELD_ADDRESS
85 : Keywords: #phrase ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
86 : MIME-Version: 1*DIGIT "." 1*DIGIT ;RFC 2045, RFC 2047;
87 : HEADER_FIELD_STRUCTURED
88 : Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
89 : Newsgroups: 1#(atom *("." atom)) ;RFC 1036, RFC 2047;
90 : HEADER_FIELD_STRUCTURED
91 : Organization: *text ;RFC 1036; HEADER_FIELD_TEXT
92 : Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
93 : ["id" msg-id] ["for" addr-spec] ";" date-time ;RFC 822, RFC 1123,
94 : RFC 2047; HEADER_FIELD_STRUCTURED
95 : References: *(phrase / msg-id) ;RFC 822, RFC 2047;
96 : HEADER_FIELD_ADDRESS
97 : Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
98 : Resent-Date: date-time ;RFC 822, RFC 1123, RFC 2047;
99 : HEADER_FIELD_STRUCTURED
100 : Resent-From: mailbox / 1#mailbox ;RFC 822, RFC 2047;
101 : HEADER_FIELD_ADDRESS
102 : Resent-Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
103 : Resent-Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
104 : Resent-Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
105 : Resent-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
106 : Resent-bcc: #address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 : Resent-cc: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 : Return-path: route-addr / ("<" ">") ;RFC 822, RFC 1123, RFC 2047;
109 : HEADER_FIELD_STRUCTURED
110 : Return-Receipt-To: address ;Not Internet standard;
111 : HEADER_FIELD_ADDRES
112 : Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
113 : Subject: *text ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
114 : Summary: *text ;RFC 1036; HEADER_FIELD_TEXT
115 : To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
116 : X-CHAOS-Marked: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
117 : X-CHAOS-Read: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
118 : X-CHAOS-Recipients: #*("<" atom word ">") ;local;
119 : HEADER_FIELD_STRUCTURED
120 : X-CHAOS-Size: 1*DIGIT ;local; HEADER_FIELD_STRUCTURED
121 : X-Mailer: *text ;Not Internet standard; HEADER_FIELD_TEXT
122 : X-Mozilla-Status: 4HEXDIG ;Mozilla; HEADER_FIELD_STRUCTURED
123 : X-Newsreader: *text ;Not Internet standard; HEADER_FIELD_TEXT
124 : X-Priority: "1" / "2" / "3" / "4" / "5" ;Not Internet standard;
125 : HEADER_FIELD_STRUCTURED
126 : Xref: sub-domain
127 : 1*((atom / string) *("." (atom / string)) ":" msg-number)
128 : ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
129 : */
130 : enum HeaderFieldType
131 : {
132 : HEADER_FIELD_TEXT,
133 : HEADER_FIELD_STRUCTURED,
134 : HEADER_FIELD_PHRASE,
135 : HEADER_FIELD_MESSAGE_ID,
136 : HEADER_FIELD_ADDRESS
137 : };
138 :
139 : /** Check for US-ASCII character.
140 :
141 : @param nChar Some UCS-4 character.
142 :
143 : @return True if nChar is a US-ASCII character (0x00--0x7F).
144 : */
145 : static inline bool isUSASCII(sal_uInt32 nChar);
146 :
147 : /** Check for ISO 8859-1 character.
148 :
149 : @param nChar Some UCS-4 character.
150 :
151 : @return True if nChar is a ISO 8859-1 character (0x00--0xFF).
152 : */
153 : static inline bool isISO88591(sal_uInt32 nChar);
154 :
155 : /** Check for US-ASCII control character.
156 :
157 : @param nChar Some UCS-4 character.
158 :
159 : @return True if nChar is a US-ASCII control character (US-ASCII
160 : 0x00--0x1F or 0x7F).
161 : */
162 : static inline bool isControl(sal_uInt32 nChar);
163 :
164 : /** Check for US-ASCII white space character.
165 :
166 : @param nChar Some UCS-4 character.
167 :
168 : @return True if nChar is a US-ASCII white space character (US-ASCII
169 : 0x09 or 0x20).
170 : */
171 : static inline bool isWhiteSpace(sal_uInt32 nChar);
172 :
173 : /** Check for US-ASCII visible character.
174 :
175 : @param nChar Some UCS-4 character.
176 :
177 : @return True if nChar is a US-ASCII visible character (US-ASCII
178 : 0x21--0x7E).
179 : */
180 : static inline bool isVisible(sal_uInt32 nChar);
181 :
182 : /** Check for US-ASCII digit character.
183 :
184 : @param nChar Some UCS-4 character.
185 :
186 : @return True if nChar is a US-ASCII (decimal) digit character (US-
187 : ASCII '0'--'9').
188 : */
189 : static inline bool isDigit(sal_uInt32 nChar);
190 :
191 : /** Check for US-ASCII canonic hexadecimal digit character.
192 :
193 : @param nChar Some UCS-4 character.
194 :
195 : @return True if nChar is a US-ASCII canonic (i.e., upper case)
196 : hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
197 : */
198 : static inline bool isCanonicHexDigit(sal_uInt32 nChar);
199 :
200 : /** Check for US-ASCII hexadecimal digit character.
201 :
202 : @param nChar Some UCS-4 character.
203 :
204 : @return True if nChar is a US-ASCII hexadecimal digit character (US-
205 : ASCII '0'--'9', 'A'--'F', 'a'--'f').
206 : */
207 : static inline bool isHexDigit(sal_uInt32 nChar);
208 :
209 : /** Check for US-ASCII upper case character.
210 :
211 : @param nChar Some UCS-4 character.
212 :
213 : @return True if nChar is a US-ASCII upper case alphabetic character
214 : (US-ASCII 'A'--'Z').
215 : */
216 : static inline bool isUpperCase(sal_uInt32 nChar);
217 :
218 : /** Check for US-ASCII lower case character.
219 :
220 : @param nChar Some UCS-4 character.
221 :
222 : @return True if nChar is a US-ASCII lower case alphabetic character
223 : (US-ASCII 'a'--'z').
224 : */
225 : static inline bool isLowerCase(sal_uInt32 nChar);
226 :
227 : /** Check for US-ASCII alphabetic character.
228 :
229 : @param nChar Some UCS-4 character.
230 :
231 : @return True if nChar is a US-ASCII alphabetic character (US-ASCII
232 : 'A'--'Z' or 'a'--'z').
233 : */
234 : static inline bool isAlpha(sal_uInt32 nChar);
235 :
236 : /** Check for US-ASCII alphanumeric character.
237 :
238 : @param nChar Some UCS-4 character.
239 :
240 : @return True if nChar is a US-ASCII alphanumeric character (US-ASCII
241 : '0'--'9', 'A'--'Z' or 'a'--'z').
242 : */
243 : static inline bool isAlphanumeric(sal_uInt32 nChar);
244 :
245 : /** Check for US-ASCII Base 64 digit character.
246 :
247 : @param nChar Some UCS-4 character.
248 :
249 : @return True if nChar is a US-ASCII Base 64 digit character (US-ASCII
250 : 'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
251 : */
252 : static inline bool isBase64Digit(sal_uInt32 nChar);
253 :
254 : /** Check whether some character is valid within an RFC 822 <atom>.
255 :
256 : @param nChar Some UCS-4 character.
257 :
258 : @return True if nChar is valid within an RFC 822 <atom> (US-ASCII
259 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
260 : '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
261 : */
262 : static bool isAtomChar(sal_uInt32 nChar);
263 :
264 : /** Check whether some character is valid within an RFC 2045 <token>.
265 :
266 : @param nChar Some UCS-4 character.
267 :
268 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
269 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
270 : '-', '.', '^', '_', '`', '{', '|', '}', or '~').
271 : */
272 : static bool isTokenChar(sal_uInt32 nChar);
273 :
274 : /** Check whether some character is valid within an RFC 2047 <token>.
275 :
276 : @param nChar Some UCS-4 character.
277 :
278 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
279 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
280 : '-', '^', '_', '`', '{', '|', '}', or '~').
281 : */
282 : static bool isEncodedWordTokenChar(sal_uInt32 nChar);
283 :
284 : /** Check whether some character is valid within an RFC 2060 <atom>.
285 :
286 : @param nChar Some UCS-4 character.
287 :
288 : @return True if nChar is valid within an RFC 2060 <atom> (US-ASCII
289 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
290 : '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
291 : '|', '}', or '~').
292 : */
293 : static bool isIMAPAtomChar(sal_uInt32 nChar);
294 :
295 : /** Translate an US-ASCII character to upper case.
296 :
297 : @param nChar Some UCS-4 character.
298 :
299 : @return If nChar is a US-ASCII upper case character (US-ASCII
300 : 'A'--'Z'), return the corresponding US-ASCII lower case character (US-
301 : ASCII 'a'--'z'); otherwise, return nChar unchanged.
302 : */
303 : static inline sal_uInt32 toUpperCase(sal_uInt32 nChar);
304 :
305 : /** Translate an US-ASCII character to lower case.
306 :
307 : @param nChar Some UCS-4 character.
308 :
309 : @return If nChar is a US-ASCII lower case character (US-ASCII
310 : 'a'--'z'), return the corresponding US-ASCII upper case character (US-
311 : ASCII 'A'--'Z'); otherwise, return nChar unchanged.
312 : */
313 : static inline sal_uInt32 toLowerCase(sal_uInt32 nChar);
314 :
315 : /** Get the digit weight of a US-ASCII character.
316 :
317 : @param nChar Some UCS-4 character.
318 :
319 : @return If nChar is a US-ASCII (decimal) digit character (US-ASCII
320 : '0'--'9'), return the corresponding weight (0--9); otherwise,
321 : return -1.
322 : */
323 : static inline int getWeight(sal_uInt32 nChar);
324 :
325 : /** Get the hexadecimal digit weight of a US-ASCII character.
326 :
327 : @param nChar Some UCS-4 character.
328 :
329 : @return If nChar is a US-ASCII hexadecimal digit character (US-ASCII
330 : '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
331 : (0--15); otherwise, return -1.
332 : */
333 : static inline int getHexWeight(sal_uInt32 nChar);
334 :
335 : /** Get the Base 64 digit weight of a US-ASCII character.
336 :
337 : @param nChar Some UCS-4 character.
338 :
339 : @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
340 : 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
341 : corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
342 : character (US-ASCII '='), return -1; otherwise, return -2.
343 : */
344 : static inline int getBase64Weight(sal_uInt32 nChar);
345 :
346 : /** Get a hexadecimal digit encoded as US-ASCII.
347 :
348 : @param nWeight Must be in the range 0--15, inclusive.
349 :
350 : @return The canonic (i.e., upper case) hexadecimal digit
351 : corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
352 : */
353 : static sal_uInt32 getHexDigit(int nWeight);
354 :
355 : static inline bool isHighSurrogate(sal_uInt32 nUTF16);
356 :
357 : static inline bool isLowSurrogate(sal_uInt32 nUTF16);
358 :
359 : static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
360 : sal_Unicode cLowSurrogate);
361 :
362 : /** Check two US-ASCII strings for equality, ignoring case.
363 :
364 : @param pBegin1 Points to the start of the first string, must not be
365 : null.
366 :
367 : @param pEnd1 Points past the end of the first string, must be >=
368 : pBegin1.
369 :
370 : @param pString2 Points to the start of the null terminated second
371 : string, must not be null.
372 :
373 : @return True if the two strings are equal, ignoring the case of US-
374 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
375 : */
376 : static bool equalIgnoreCase(const sal_Char * pBegin1,
377 : const sal_Char * pEnd1,
378 : const sal_Char * pString2);
379 :
380 : /** Check two US-ASCII strings for equality, ignoring case.
381 :
382 : @param pBegin1 Points to the start of the first string, must not be
383 : null.
384 :
385 : @param pEnd1 Points past the end of the first string, must be >=
386 : pBegin1.
387 :
388 : @param pString2 Points to the start of the null terminated second
389 : string, must not be null.
390 :
391 : @return True if the two strings are equal, ignoring the case of US-
392 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
393 : */
394 : static bool equalIgnoreCase(const sal_Unicode * pBegin1,
395 : const sal_Unicode * pEnd1,
396 : const sal_Char * pString2);
397 :
398 : static inline bool startsWithLineBreak(const sal_Char * pBegin,
399 : const sal_Char * pEnd);
400 :
401 : static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
402 : const sal_Unicode * pEnd);
403 :
404 : static inline bool startsWithLineFolding(const sal_Char * pBegin,
405 : const sal_Char * pEnd);
406 :
407 : static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
408 : const sal_Unicode * pEnd);
409 :
410 : static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
411 : const sal_Char * pEnd);
412 :
413 : static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
414 : pBegin,
415 : const sal_Unicode * pEnd);
416 :
417 : static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
418 : const sal_Unicode * pEnd);
419 :
420 : static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
421 : pBegin,
422 : const sal_Unicode *
423 : pEnd);
424 :
425 : static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
426 :
427 : static const sal_Char * skipQuotedString(const sal_Char * pBegin,
428 : const sal_Char * pEnd);
429 :
430 : static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
431 : const sal_Unicode * pEnd);
432 :
433 : static bool scanUnsigned(const sal_Unicode *& rBegin,
434 : const sal_Unicode * pEnd, bool bLeadingZeroes,
435 : sal_uInt32 & rValue);
436 :
437 : static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
438 : const sal_Unicode * pEnd,
439 : sal_uInt32 nOpening,
440 : sal_uInt32 nClosing,
441 : sal_Size & rLength,
442 : bool & rModify);
443 :
444 : static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
445 : sal_Unicode const * pEnd,
446 : INetContentTypeParameterList *
447 : pParameters);
448 :
449 : static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
450 : eEncoding);
451 :
452 : static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
453 : eEncoding);
454 :
455 : static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
456 :
457 : static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
458 : const sal_Char * pEnd);
459 :
460 : static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
461 :
462 : static INetMIMECharsetList_Impl *
463 : createPreferredCharsetList(rtl_TextEncoding eEncoding);
464 :
465 : static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
466 : const sal_Char * pEnd,
467 : rtl_TextEncoding eEncoding,
468 : sal_Size & rSize);
469 :
470 : static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
471 : const sal_Unicode * pEnd,
472 : rtl_TextEncoding eEncoding,
473 : sal_Size & rSize);
474 :
475 : /** Get the number of octets required to encode an UCS-4 character using
476 : UTF-8 encoding.
477 :
478 : @param nChar Some UCS-4 character.
479 :
480 : @return The number of octets required (in the range 1--6, inclusive).
481 : */
482 : static inline int getUTF8OctetCount(sal_uInt32 nChar);
483 :
484 : static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
485 : sal_uInt32 nChar);
486 :
487 : static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
488 :
489 : static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
490 : HeaderFieldType eType,
491 : const rtl::OUString& rBody,
492 : rtl_TextEncoding ePreferredEncoding,
493 : bool bInitialSpace = true);
494 :
495 : static bool translateUTF8Char(const sal_Char *& rBegin,
496 : const sal_Char * pEnd,
497 : rtl_TextEncoding eEncoding,
498 : sal_uInt32 & rCharacter);
499 :
500 : static rtl::OUString decodeHeaderFieldBody(HeaderFieldType eType,
501 : const rtl::OString& rBody);
502 :
503 : // #i70651#: Prevent warnings on Mac OS X.
504 : #ifdef MACOSX
505 : #pragma GCC system_header
506 : #endif
507 :
508 : /** Get the UTF-32 character at the head of a UTF-16 encoded string.
509 :
510 : @param rBegin Points to the start of the UTF-16 encoded string, must
511 : not be null. On exit, it points past the first UTF-32 character's
512 : encoding.
513 :
514 : @param pEnd Points past the end of the UTF-16 encoded string, must be
515 : strictly greater than rBegin.
516 :
517 : @return The UCS-4 character at the head of the UTF-16 encoded string.
518 : If the string does not start with the UTF-16 encoding of a UCS-32
519 : character, the first UTF-16 value is returned.
520 : */
521 : static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
522 : const sal_Unicode * pEnd);
523 :
524 : /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
525 :
526 : @param pBuffer Points to a buffer, must not be null.
527 :
528 : @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
529 :
530 : @return A pointer past the UTF-16 characters put into the buffer
531 : (i.e., pBuffer + 1 or pBuffer + 2).
532 : */
533 : static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
534 : sal_uInt32 nUTF32);
535 : };
536 :
537 : // static
538 0 : inline bool INetMIME::isUSASCII(sal_uInt32 nChar)
539 : {
540 0 : return nChar <= 0x7F;
541 : }
542 :
543 : // static
544 : inline bool INetMIME::isISO88591(sal_uInt32 nChar)
545 : {
546 : return nChar <= 0xFF;
547 : }
548 :
549 : // static
550 : inline bool INetMIME::isControl(sal_uInt32 nChar)
551 : {
552 : return nChar <= 0x1F || nChar == 0x7F;
553 : }
554 :
555 : // static
556 : inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
557 : {
558 : return nChar == '\t' || nChar == ' ';
559 : }
560 :
561 : // static
562 : inline bool INetMIME::isVisible(sal_uInt32 nChar)
563 : {
564 : return nChar >= '!' && nChar <= '~';
565 : }
566 :
567 : // static
568 : inline bool INetMIME::isDigit(sal_uInt32 nChar)
569 : {
570 : return nChar >= '0' && nChar <= '9';
571 : }
572 :
573 : // static
574 : inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar)
575 : {
576 : return isDigit(nChar) || (nChar >= 'A' && nChar <= 'F');
577 : }
578 :
579 : // static
580 : inline bool INetMIME::isHexDigit(sal_uInt32 nChar)
581 : {
582 : return isCanonicHexDigit(nChar) || (nChar >= 'a' && nChar <= 'f');
583 : }
584 :
585 : // static
586 0 : inline bool INetMIME::isUpperCase(sal_uInt32 nChar)
587 : {
588 0 : return nChar >= 'A' && nChar <= 'Z';
589 : }
590 :
591 : // static
592 0 : inline bool INetMIME::isLowerCase(sal_uInt32 nChar)
593 : {
594 0 : return nChar >= 'a' && nChar <= 'z';
595 : }
596 :
597 : // static
598 0 : inline bool INetMIME::isAlpha(sal_uInt32 nChar)
599 : {
600 0 : return isUpperCase(nChar) || isLowerCase(nChar);
601 : }
602 :
603 : // static
604 : inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar)
605 : {
606 : return isAlpha(nChar) || isDigit(nChar);
607 : }
608 :
609 : // static
610 : inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
611 : {
612 : return isUpperCase(nChar) || isLowerCase(nChar) || isDigit(nChar)
613 : || nChar == '+' || nChar == '/';
614 : }
615 :
616 : // static
617 : inline sal_uInt32 INetMIME::toUpperCase(sal_uInt32 nChar)
618 : {
619 : return isLowerCase(nChar) ? nChar - ('a' - 'A') : nChar;
620 : }
621 :
622 : // static
623 : inline sal_uInt32 INetMIME::toLowerCase(sal_uInt32 nChar)
624 : {
625 : return isUpperCase(nChar) ? nChar + ('a' - 'A') : nChar;
626 : }
627 :
628 : // static
629 : inline int INetMIME::getWeight(sal_uInt32 nChar)
630 : {
631 : return isDigit(nChar) ? int(nChar - '0') : -1;
632 : }
633 :
634 : // static
635 : inline int INetMIME::getHexWeight(sal_uInt32 nChar)
636 : {
637 : return isDigit(nChar) ? int(nChar - '0') :
638 : nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
639 : nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
640 : }
641 :
642 : // static
643 : inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
644 : {
645 : return isUpperCase(nChar) ? int(nChar - 'A') :
646 : isLowerCase(nChar) ? int(nChar - 'a' + 26) :
647 : isDigit(nChar) ? int(nChar - '0' + 52) :
648 : nChar == '+' ? 62 :
649 : nChar == '/' ? 63 :
650 : nChar == '=' ? -1 : -2;
651 : }
652 :
653 : // static
654 0 : inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
655 : {
656 0 : return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
657 : }
658 :
659 : // static
660 0 : inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
661 : {
662 0 : return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
663 : }
664 :
665 : // static
666 : inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
667 : sal_Unicode cLowSurrogate)
668 : {
669 : DBG_ASSERT(isHighSurrogate(cHighSurrogate)
670 : && isLowSurrogate(cLowSurrogate),
671 : "INetMIME::toUTF32(): Bad chars");
672 : return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
673 : | (sal_uInt32(cLowSurrogate) & 0x3FF);
674 : }
675 :
676 : // static
677 : inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
678 : const sal_Char * pEnd)
679 : {
680 : DBG_ASSERT(pBegin && pBegin <= pEnd,
681 : "INetMIME::startsWithLineBreak(): Bad sequence");
682 :
683 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
684 : // CR, LF
685 : }
686 :
687 : // static
688 : inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
689 : const sal_Unicode * pEnd)
690 : {
691 : DBG_ASSERT(pBegin && pBegin <= pEnd,
692 : "INetMIME::startsWithLineBreak(): Bad sequence");
693 :
694 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
695 : // CR, LF
696 : }
697 :
698 : // static
699 : inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
700 : const sal_Char * pEnd)
701 : {
702 : DBG_ASSERT(pBegin && pBegin <= pEnd,
703 : "INetMIME::startsWithLineFolding(): Bad sequence");
704 :
705 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
706 : && isWhiteSpace(pBegin[2]); // CR, LF
707 : }
708 :
709 : // static
710 : inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
711 : const sal_Unicode * pEnd)
712 : {
713 : DBG_ASSERT(pBegin && pBegin <= pEnd,
714 : "INetMIME::startsWithLineFolding(): Bad sequence");
715 :
716 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
717 : && isWhiteSpace(pBegin[2]); // CR, LF
718 : }
719 :
720 : // static
721 : inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
722 : const sal_Char * pEnd)
723 : {
724 : DBG_ASSERT(pBegin && pBegin <= pEnd,
725 : "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
726 :
727 : return pBegin != pEnd
728 : && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
729 : }
730 :
731 : // static
732 : inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
733 : {
734 : return nChar == '"' || nChar == '\\';
735 : }
736 :
737 : // static
738 : inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
739 : {
740 : #if defined WNT
741 : return eEncoding == RTL_TEXTENCODING_MS_1252 ?
742 : RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
743 : #else // WNT
744 : return eEncoding;
745 : #endif // WNT
746 : }
747 :
748 : // static
749 : inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
750 : eEncoding)
751 : {
752 : #if defined WNT
753 : return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
754 : RTL_TEXTENCODING_MS_1252 : eEncoding;
755 : #else
756 : return eEncoding;
757 : #endif
758 : }
759 :
760 : // static
761 : inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
762 : {
763 : return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
764 : }
765 :
766 : // static
767 : inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
768 : {
769 : DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
770 :
771 : return nChar < 0x80 ? 1 :
772 : nChar < 0x800 ? 2 :
773 : nChar <= 0x10000 ? 3 :
774 : nChar <= 0x200000 ? 4 :
775 : nChar <= 0x4000000 ? 5 : 6;
776 : }
777 :
778 : // static
779 : inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
780 : const sal_Unicode * pEnd)
781 : {
782 : DBG_ASSERT(rBegin && rBegin < pEnd,
783 : "INetMIME::getUTF32Character(): Bad sequence");
784 : if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
785 : && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
786 : {
787 : sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
788 : return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
789 : }
790 : else
791 : return *rBegin++;
792 : }
793 :
794 : // static
795 : inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
796 : sal_uInt32 nUTF32)
797 : {
798 : DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
799 : if (nUTF32 < 0x10000)
800 : *pBuffer++ = sal_Unicode(nUTF32);
801 : else
802 : {
803 : nUTF32 -= 0x10000;
804 : *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
805 : *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
806 : }
807 : return pBuffer;
808 : }
809 :
810 : class INetMIMEOutputSink
811 : {
812 : public:
813 : static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
814 :
815 : private:
816 : sal_uInt32 m_nColumn;
817 : sal_uInt32 m_nLineLengthLimit;
818 :
819 : protected:
820 : /** Write a sequence of octets.
821 :
822 : @param pBegin Points to the start of the sequence, must not be null.
823 :
824 : @param pEnd Points past the end of the sequence, must be >= pBegin.
825 : */
826 : virtual void writeSequence(const sal_Char * pBegin,
827 : const sal_Char * pEnd) = 0;
828 :
829 : /** Write a null terminated sequence of octets (without the terminating
830 : null).
831 :
832 : @param pOctets A null terminated sequence of octets, must not be
833 : null.
834 :
835 : @return The length of pOctets (without the terminating null).
836 : */
837 : virtual sal_Size writeSequence(const sal_Char * pSequence);
838 :
839 : /** Write a sequence of octets.
840 :
841 : @descr The supplied sequence of UCS-4 characters is interpreted as a
842 : sequence of octets. It is an error if any of the elements of the
843 : sequence has a numerical value greater than 255.
844 :
845 : @param pBegin Points to the start of the sequence, must not be null.
846 :
847 : @param pEnd Points past the end of the sequence, must be >= pBegin.
848 : */
849 : virtual void writeSequence(const sal_uInt32 * pBegin,
850 : const sal_uInt32 * pEnd);
851 :
852 : /** Write a sequence of octets.
853 :
854 : @descr The supplied sequence of Unicode characters is interpreted as
855 : a sequence of octets. It is an error if any of the elements of the
856 : sequence has a numerical value greater than 255.
857 :
858 : @param pBegin Points to the start of the sequence, must not be null.
859 :
860 : @param pEnd Points past the end of the sequence, must be >= pBegin.
861 : */
862 : virtual void writeSequence(const sal_Unicode * pBegin,
863 : const sal_Unicode * pEnd);
864 :
865 : public:
866 : INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
867 : sal_uInt32 nTheLineLengthLimit
868 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
869 : m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
870 :
871 0 : virtual ~INetMIMEOutputSink() {}
872 :
873 : /** Get the current column.
874 :
875 : @return The current column (starting from zero).
876 : */
877 : sal_uInt32 getColumn() const { return m_nColumn; }
878 :
879 : sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
880 :
881 : void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
882 : { m_nLineLengthLimit = nTheLineLengthLimit; }
883 :
884 : virtual ErrCode getError() const;
885 :
886 : /** Write a sequence of octets.
887 :
888 : @param pBegin Points to the start of the sequence, must not be null.
889 :
890 : @param pEnd Points past the end of the sequence, must be >= pBegin.
891 : */
892 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
893 :
894 : /** Write a sequence of octets.
895 :
896 : @param pBegin Points to the start of the sequence, must not be null.
897 :
898 : @param nLength The length of the sequence.
899 : */
900 : void write(const sal_Char * pBegin, sal_Size nLength)
901 : { write(pBegin, pBegin + nLength); }
902 :
903 : /** Write a sequence of octets.
904 :
905 : @descr The supplied sequence of UCS-4 characters is interpreted as a
906 : sequence of octets. It is an error if any of the elements of the
907 : sequence has a numerical value greater than 255.
908 :
909 : @param pBegin Points to the start of the sequence, must not be null.
910 :
911 : @param pEnd Points past the end of the sequence, must be >= pBegin.
912 : */
913 : inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
914 :
915 : /** Write a sequence of octets.
916 :
917 : @descr The supplied sequence of Unicode characters is interpreted as
918 : a sequence of octets. It is an error if any of the elements of the
919 : sequence has a numerical value greater than 255.
920 :
921 : @param pBegin Points to the start of the sequence, must not be null.
922 :
923 : @param pEnd Points past the end of the sequence, must be >= pBegin.
924 : */
925 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
926 :
927 : /** Write a sequence of octets.
928 :
929 : @param rOctets A rtl::OString, interpreted as a sequence of octets.
930 :
931 : @param nBegin The offset of the first character to write.
932 :
933 : @param nEnd The offset past the last character to write.
934 : */
935 : void write(const rtl::OString& rOctets, xub_StrLen nBegin,
936 : xub_StrLen nEnd)
937 : {
938 : writeSequence(rOctets.getStr() + nBegin, rOctets.getStr() + nEnd);
939 : m_nColumn += nEnd - nBegin;
940 : }
941 :
942 : /** Write a single octet.
943 :
944 : @param nOctet Some octet.
945 :
946 : @return This instance.
947 : */
948 : inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
949 :
950 : /** Write a null terminated sequence of octets (without the terminating
951 : null).
952 :
953 : @param pOctets A null terminated sequence of octets, must not be
954 : null.
955 :
956 : @return This instance.
957 : */
958 : inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
959 :
960 : /** Write a sequence of octets.
961 :
962 : @param rOctets A rtl::OString, interpreted as a sequence of octets.
963 :
964 : @return This instance.
965 : */
966 : INetMIMEOutputSink & operator <<(const rtl::OString& rOctets)
967 : {
968 : writeSequence(rOctets.getStr(), rOctets.getStr() + rOctets.getLength());
969 : m_nColumn += rOctets.getLength();
970 : return *this;
971 : }
972 :
973 : /** Call a manipulator function.
974 :
975 : @param pManipulator A manipulator function.
976 :
977 : @return Whatever the manipulator function returns.
978 : */
979 : INetMIMEOutputSink &
980 : operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
981 : { return pManipulator(*this); }
982 :
983 : /** Write a line end (CR LF).
984 : */
985 : void writeLineEnd();
986 :
987 : /** A manipulator function that writes a line end (CR LF).
988 :
989 : @param rSink Some sink.
990 :
991 : @return The sink rSink.
992 : */
993 : static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
994 : };
995 :
996 : inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
997 : const sal_Char * pEnd)
998 : {
999 : writeSequence(pBegin, pEnd);
1000 : m_nColumn += pEnd - pBegin;
1001 : }
1002 :
1003 : inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
1004 : const sal_uInt32 * pEnd)
1005 : {
1006 : writeSequence(pBegin, pEnd);
1007 : m_nColumn += pEnd - pBegin;
1008 : }
1009 :
1010 : inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
1011 : const sal_Unicode * pEnd)
1012 : {
1013 : writeSequence(pBegin, pEnd);
1014 : m_nColumn += pEnd - pBegin;
1015 : }
1016 :
1017 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
1018 : {
1019 : writeSequence(&nOctet, &nOctet + 1);
1020 : ++m_nColumn;
1021 : return *this;
1022 : }
1023 :
1024 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
1025 : pOctets)
1026 : {
1027 : m_nColumn += writeSequence(pOctets);
1028 : return *this;
1029 : }
1030 :
1031 : // static
1032 : inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
1033 : rSink)
1034 : {
1035 : rSink.writeLineEnd();
1036 : return rSink;
1037 : }
1038 :
1039 : // static
1040 : inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
1041 : sal_uInt32 nChar)
1042 : {
1043 : DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1044 : rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
1045 : << sal_uInt8(getHexDigit(nChar & 15));
1046 : }
1047 :
1048 : class INetMIMEStringOutputSink: public INetMIMEOutputSink
1049 : {
1050 : rtl::OStringBuffer m_aBuffer;
1051 :
1052 : using INetMIMEOutputSink::writeSequence;
1053 :
1054 : virtual void writeSequence(const sal_Char * pBegin,
1055 : const sal_Char * pEnd);
1056 :
1057 : public:
1058 : inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
1059 : sal_uInt32 nLineLengthLimit
1060 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1061 : INetMIMEOutputSink(nColumn, nLineLengthLimit) {}
1062 :
1063 : virtual ErrCode getError() const;
1064 :
1065 : rtl::OString takeBuffer()
1066 : {
1067 : return m_aBuffer.makeStringAndClear();
1068 : }
1069 : };
1070 :
1071 : class INetMIMEEncodedWordOutputSink
1072 : {
1073 : public:
1074 : enum Context { CONTEXT_TEXT = 1,
1075 : CONTEXT_COMMENT = 2,
1076 : CONTEXT_PHRASE = 4 };
1077 :
1078 : enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
1079 :
1080 : private:
1081 : enum { BUFFER_SIZE = 256 };
1082 :
1083 : enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
1084 : CODING_ENCODED_TERMINATED };
1085 :
1086 : enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
1087 : STATE_FIRST_QUESTION, STATE_CHARSET,
1088 : STATE_SECOND_QUESTION, STATE_ENCODING,
1089 : STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
1090 : STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
1091 : STATE_BAD };
1092 :
1093 : INetMIMEOutputSink & m_rSink;
1094 : Context m_eContext;
1095 : Space m_eInitialSpace;
1096 : sal_uInt32 m_nExtraSpaces;
1097 : INetMIMECharsetList_Impl * m_pEncodingList;
1098 : sal_Unicode * m_pBuffer;
1099 : sal_uInt32 m_nBufferSize;
1100 : sal_Unicode * m_pBufferEnd;
1101 : Coding m_ePrevCoding;
1102 : rtl_TextEncoding m_ePrevMIMEEncoding;
1103 : Coding m_eCoding;
1104 : sal_uInt32 m_nQuotedEscaped;
1105 : EncodedWordState m_eEncodedWordState;
1106 :
1107 : inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
1108 :
1109 : void finish(bool bWriteTrailer);
1110 :
1111 : public:
1112 : inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
1113 : Context eTheContext,
1114 : Space eTheInitialSpace,
1115 : rtl_TextEncoding ePreferredEncoding);
1116 :
1117 : ~INetMIMEEncodedWordOutputSink();
1118 :
1119 : INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
1120 :
1121 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1122 :
1123 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1124 :
1125 : inline bool flush();
1126 : };
1127 :
1128 : inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1129 : INetMIMEOutputSink & rTheSink, Context eTheContext,
1130 : Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
1131 : m_rSink(rTheSink),
1132 : m_eContext(eTheContext),
1133 : m_eInitialSpace(eTheInitialSpace),
1134 : m_nExtraSpaces(0),
1135 : m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
1136 : m_ePrevCoding(CODING_NONE),
1137 : m_eCoding(CODING_NONE),
1138 : m_nQuotedEscaped(0),
1139 : m_eEncodedWordState(STATE_INITIAL)
1140 : {
1141 : m_nBufferSize = BUFFER_SIZE;
1142 : m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
1143 : m_nBufferSize
1144 : * sizeof (sal_Unicode)));
1145 : m_pBufferEnd = m_pBuffer;
1146 : }
1147 :
1148 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
1149 : const sal_Char * pEnd)
1150 : {
1151 : DBG_ASSERT(pBegin && pBegin <= pEnd,
1152 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1153 :
1154 : while (pBegin != pEnd)
1155 : operator <<(*pBegin++);
1156 : }
1157 :
1158 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1159 : const sal_Unicode * pEnd)
1160 : {
1161 : DBG_ASSERT(pBegin && pBegin <= pEnd,
1162 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1163 :
1164 : while (pBegin != pEnd)
1165 : operator <<(*pBegin++);
1166 : }
1167 :
1168 : inline bool INetMIMEEncodedWordOutputSink::flush()
1169 : {
1170 : finish(true);
1171 : return m_ePrevCoding != CODING_NONE;
1172 : }
1173 :
1174 0 : struct INetContentTypeParameter
1175 : {
1176 : /** The name of the attribute, in US-ASCII encoding and converted to lower
1177 : case. If a parameter value is split as described in RFC 2231, there
1178 : will only be one item for the complete parameter, with the attribute
1179 : name lacking any section suffix.
1180 : */
1181 : const rtl::OString m_sAttribute;
1182 :
1183 : /** The optional character set specification (see RFC 2231), in US-ASCII
1184 : encoding and converted to lower case.
1185 : */
1186 : const rtl::OString m_sCharset;
1187 :
1188 : /** The optional language specification (see RFC 2231), in US-ASCII
1189 : encoding and converted to lower case.
1190 : */
1191 : const rtl::OString m_sLanguage;
1192 :
1193 : /** The attribute value. If the value is a quoted-string, it is
1194 : 'unpacked.' If a character set is specified, and the value can be
1195 : converted to Unicode, this is done. Also, if no character set is
1196 : specified, it is first tried to convert the value from UTF-8 encoding
1197 : to Unicode, and if that doesn't work (because the value is not in
1198 : UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1199 : (which will always work). But if a character set is specified and the
1200 : value cannot be converted from that character set to Unicode, special
1201 : action is taken to produce a value that can possibly be transformed
1202 : back into its original form: Any 8-bit character from a non-encoded
1203 : part of the original value is directly converted to Unicode
1204 : (effectively handling it as if it was ISO-8859-1 encoded), and any
1205 : 8-bit character from an encoded part of the original value is mapped
1206 : to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1207 : within Unicode's Private Use Area (effectively adding 0xF800 to the
1208 : character's numeric value).
1209 : */
1210 : const rtl::OUString m_sValue;
1211 :
1212 : /** This is true if the value is successfuly converted to Unicode, and
1213 : false if the value is a special mixture of ISO-LATIN-1 characters and
1214 : characters from Unicode's Private Use Area.
1215 : */
1216 : const bool m_bConverted;
1217 :
1218 : INetContentTypeParameter(const rtl::OString& rTheAttribute,
1219 : const rtl::OString& rTheCharset, const rtl::OString& rTheLanguage,
1220 : const rtl::OUString& rTheValue, bool bTheConverted)
1221 : : m_sAttribute(rTheAttribute)
1222 : , m_sCharset(rTheCharset)
1223 : , m_sLanguage(rTheLanguage)
1224 : , m_sValue(rTheValue)
1225 : , m_bConverted(bTheConverted)
1226 : {
1227 : }
1228 : };
1229 :
1230 0 : class TOOLS_DLLPUBLIC INetContentTypeParameterList
1231 : {
1232 : public:
1233 :
1234 : void Clear();
1235 :
1236 : void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1237 : {
1238 : maEntries.insert(maEntries.begin()+nIndex,pParameter);
1239 : }
1240 :
1241 : void Append(INetContentTypeParameter *pParameter)
1242 : {
1243 : maEntries.push_back(pParameter);
1244 : }
1245 :
1246 : inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const
1247 : {
1248 : return &(maEntries[nIndex]);
1249 : }
1250 :
1251 : const INetContentTypeParameter * find(const rtl::OString& rAttribute) const;
1252 :
1253 : private:
1254 :
1255 : boost::ptr_vector<INetContentTypeParameter> maEntries;
1256 : };
1257 :
1258 : #endif
1259 :
1260 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|