Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 : #ifndef INCLUDED_TOOLS_INETMIME_HXX
20 : #define INCLUDED_TOOLS_INETMIME_HXX
21 :
22 : #include <boost/ptr_container/ptr_vector.hpp>
23 :
24 : #include <tools/toolsdllapi.h>
25 : #include <rtl/alloc.h>
26 : #include <rtl/character.hxx>
27 : #include <rtl/string.hxx>
28 : #include <rtl/strbuf.hxx>
29 : #include <rtl/ustring.hxx>
30 : #include <rtl/tencinfo.h>
31 : #include <tools/debug.hxx>
32 : #include <tools/errcode.hxx>
33 :
34 : class DateTime;
35 : class INetContentTypeParameterList;
36 : class INetMIMECharsetList_Impl;
37 : class INetMIMEOutputSink;
38 :
39 : class TOOLS_DLLPUBLIC INetMIME
40 : {
41 : public:
42 : enum { SOFT_LINE_LENGTH_LIMIT = 76,
43 : HARD_LINE_LENGTH_LIMIT = 998 };
44 :
45 : /** The various types of message header field bodies, with respect to
46 : encoding and decoding them.
47 :
48 : @descr At the moment, five different types of header fields suffice
49 : to describe how to encoded and decode any known message header field
50 : body, but need for more types may arise in the future as new header
51 : fields are introduced.
52 :
53 : @descr The following is an exhaustive list of all the header fields
54 : currently known to our implementation. For every header field, it
55 : includes a 'canonic' (with regard to capitalization) name, a grammar
56 : rule for the body (using RFC 822 and RFC 2234 conventions), a list of
57 : relevant sources of information, and the HeaderFieldType value to use
58 : with that header field. The list is based on RFC 2076 and draft-
59 : palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
60 : ietf/jp-ietf-home.html#anchor1003783>).
61 :
62 : Approved: address ;RFC 1036; HEADER_FIELD_ADDRESS
63 : bcc: #address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
64 : cc: 1#address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
65 : Comments: *text ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
66 : Content-Base: absoluteURI ;RFC 2110; HEADER_FIELD_TEXT
67 : Content-Description: *text ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
68 : Content-Disposition: disposition-type *(";" disposition-parm)
69 : ;RFC 1806; HEADER_FIELD_STRUCTURED
70 : Content-ID: msg-id ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
71 : Content-Location: absoluteURI / relativeURI ;RFC 2110;
72 : HEADER_FIELD_TEXT
73 : Content-Transfer-Encoding: mechanism ;RFC 2045, RFC 2047;
74 : HEADER_FIELD_STRUCTURED
75 : Content-Type: type "/" subtype *(";" parameter) ;RFC 2045, RFC 2047;
76 : HEADER_FIELD_STRUCTURED
77 : Control: *text ;RFC 1036; HEADER_FIELD_TEXT
78 : Date: date-time ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
79 : Distribution: 1#atom ;RFC 1036; HEADER_FIELD_STRUCTURED
80 : Encrypted: 1#2word ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
81 : Expires: date-time ;RFC 1036; HEADER_FIELD_STRUCTURED
82 : Followup-To: 1#(atom *("." atom)) ;RFC 1036; HEADER_FIELD_STRUCTURED
83 : From: mailbox / 1#mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
84 : In-Reply-To: *(phrase / msg-id) ;RFC 822, RFC 2047;
85 : HEADER_FIELD_ADDRESS
86 : Keywords: #phrase ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
87 : MIME-Version: 1*DIGIT "." 1*DIGIT ;RFC 2045, RFC 2047;
88 : HEADER_FIELD_STRUCTURED
89 : Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
90 : Newsgroups: 1#(atom *("." atom)) ;RFC 1036, RFC 2047;
91 : HEADER_FIELD_STRUCTURED
92 : Organization: *text ;RFC 1036; HEADER_FIELD_TEXT
93 : Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
94 : ["id" msg-id] ["for" addr-spec] ";" date-time ;RFC 822, RFC 1123,
95 : RFC 2047; HEADER_FIELD_STRUCTURED
96 : References: *(phrase / msg-id) ;RFC 822, RFC 2047;
97 : HEADER_FIELD_ADDRESS
98 : Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
99 : Resent-Date: date-time ;RFC 822, RFC 1123, RFC 2047;
100 : HEADER_FIELD_STRUCTURED
101 : Resent-From: mailbox / 1#mailbox ;RFC 822, RFC 2047;
102 : HEADER_FIELD_ADDRESS
103 : Resent-Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
104 : Resent-Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
105 : Resent-Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
106 : Resent-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 : Resent-bcc: #address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 : Resent-cc: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109 : Return-path: route-addr / ("<" ">") ;RFC 822, RFC 1123, RFC 2047;
110 : HEADER_FIELD_STRUCTURED
111 : Return-Receipt-To: address ;Not Internet standard;
112 : HEADER_FIELD_ADDRES
113 : Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
114 : Subject: *text ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
115 : Summary: *text ;RFC 1036; HEADER_FIELD_TEXT
116 : To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
117 : X-CHAOS-Marked: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
118 : X-CHAOS-Read: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
119 : X-CHAOS-Recipients: #*("<" atom word ">") ;local;
120 : HEADER_FIELD_STRUCTURED
121 : X-CHAOS-Size: 1*DIGIT ;local; HEADER_FIELD_STRUCTURED
122 : X-Mailer: *text ;Not Internet standard; HEADER_FIELD_TEXT
123 : X-Mozilla-Status: 4HEXDIG ;Mozilla; HEADER_FIELD_STRUCTURED
124 : X-Newsreader: *text ;Not Internet standard; HEADER_FIELD_TEXT
125 : X-Priority: "1" / "2" / "3" / "4" / "5" ;Not Internet standard;
126 : HEADER_FIELD_STRUCTURED
127 : Xref: sub-domain
128 : 1*((atom / string) *("." (atom / string)) ":" msg-number)
129 : ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
130 : */
131 : enum HeaderFieldType
132 : {
133 : HEADER_FIELD_TEXT,
134 : HEADER_FIELD_STRUCTURED,
135 : HEADER_FIELD_PHRASE,
136 : HEADER_FIELD_MESSAGE_ID,
137 : HEADER_FIELD_ADDRESS
138 : };
139 :
140 : /** Check for ISO 8859-1 character.
141 :
142 : @param nChar Some UCS-4 character.
143 :
144 : @return True if nChar is a ISO 8859-1 character (0x00--0xFF).
145 : */
146 : static inline bool isISO88591(sal_uInt32 nChar);
147 :
148 : /** Check for US-ASCII control character.
149 :
150 : @param nChar Some UCS-4 character.
151 :
152 : @return True if nChar is a US-ASCII control character (US-ASCII
153 : 0x00--0x1F or 0x7F).
154 : */
155 : static inline bool isControl(sal_uInt32 nChar);
156 :
157 : /** Check for US-ASCII white space character.
158 :
159 : @param nChar Some UCS-4 character.
160 :
161 : @return True if nChar is a US-ASCII white space character (US-ASCII
162 : 0x09 or 0x20).
163 : */
164 : static inline bool isWhiteSpace(sal_uInt32 nChar);
165 :
166 : /** Check for US-ASCII visible character.
167 :
168 : @param nChar Some UCS-4 character.
169 :
170 : @return True if nChar is a US-ASCII visible character (US-ASCII
171 : 0x21--0x7E).
172 : */
173 : static inline bool isVisible(sal_uInt32 nChar);
174 :
175 : /** Check for US-ASCII Base 64 digit character.
176 :
177 : @param nChar Some UCS-4 character.
178 :
179 : @return True if nChar is a US-ASCII Base 64 digit character (US-ASCII
180 : 'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
181 : */
182 : static inline bool isBase64Digit(sal_uInt32 nChar);
183 :
184 : /** Check whether some character is valid within an RFC 822 <atom>.
185 :
186 : @param nChar Some UCS-4 character.
187 :
188 : @return True if nChar is valid within an RFC 822 <atom> (US-ASCII
189 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
190 : '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
191 : */
192 : static bool isAtomChar(sal_uInt32 nChar);
193 :
194 : /** Check whether some character is valid within an RFC 2045 <token>.
195 :
196 : @param nChar Some UCS-4 character.
197 :
198 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
199 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
200 : '-', '.', '^', '_', '`', '{', '|', '}', or '~').
201 : */
202 : static bool isTokenChar(sal_uInt32 nChar);
203 :
204 : /** Check whether some character is valid within an RFC 2047 <token>.
205 :
206 : @param nChar Some UCS-4 character.
207 :
208 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
209 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
210 : '-', '^', '_', '`', '{', '|', '}', or '~').
211 : */
212 : static bool isEncodedWordTokenChar(sal_uInt32 nChar);
213 :
214 : /** Check whether some character is valid within an RFC 2060 <atom>.
215 :
216 : @param nChar Some UCS-4 character.
217 :
218 : @return True if nChar is valid within an RFC 2060 <atom> (US-ASCII
219 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
220 : '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
221 : '|', '}', or '~').
222 : */
223 : static bool isIMAPAtomChar(sal_uInt32 nChar);
224 :
225 : /** Get the digit weight of a US-ASCII character.
226 :
227 : @param nChar Some UCS-4 character.
228 :
229 : @return If nChar is a US-ASCII (decimal) digit character (US-ASCII
230 : '0'--'9'), return the corresponding weight (0--9); otherwise,
231 : return -1.
232 : */
233 : static inline int getWeight(sal_uInt32 nChar);
234 :
235 : /** Get the hexadecimal digit weight of a US-ASCII character.
236 :
237 : @param nChar Some UCS-4 character.
238 :
239 : @return If nChar is a US-ASCII hexadecimal digit character (US-ASCII
240 : '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
241 : (0--15); otherwise, return -1.
242 : */
243 : static inline int getHexWeight(sal_uInt32 nChar);
244 :
245 : /** Get the Base 64 digit weight of a US-ASCII character.
246 :
247 : @param nChar Some UCS-4 character.
248 :
249 : @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
250 : 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
251 : corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
252 : character (US-ASCII '='), return -1; otherwise, return -2.
253 : */
254 : static inline int getBase64Weight(sal_uInt32 nChar);
255 :
256 : /** Get a hexadecimal digit encoded as US-ASCII.
257 :
258 : @param nWeight Must be in the range 0--15, inclusive.
259 :
260 : @return The canonic (i.e., upper case) hexadecimal digit
261 : corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
262 : */
263 : static sal_uInt32 getHexDigit(int nWeight);
264 :
265 : static inline bool isHighSurrogate(sal_uInt32 nUTF16);
266 :
267 : static inline bool isLowSurrogate(sal_uInt32 nUTF16);
268 :
269 : static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
270 : sal_Unicode cLowSurrogate);
271 :
272 : /** Check two US-ASCII strings for equality, ignoring case.
273 :
274 : @param pBegin1 Points to the start of the first string, must not be
275 : null.
276 :
277 : @param pEnd1 Points past the end of the first string, must be >=
278 : pBegin1.
279 :
280 : @param pString2 Points to the start of the null terminated second
281 : string, must not be null.
282 :
283 : @return True if the two strings are equal, ignoring the case of US-
284 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
285 : */
286 : static bool equalIgnoreCase(const sal_Char * pBegin1,
287 : const sal_Char * pEnd1,
288 : const sal_Char * pString2);
289 :
290 : /** Check two US-ASCII strings for equality, ignoring case.
291 :
292 : @param pBegin1 Points to the start of the first string, must not be
293 : null.
294 :
295 : @param pEnd1 Points past the end of the first string, must be >=
296 : pBegin1.
297 :
298 : @param pString2 Points to the start of the null terminated second
299 : string, must not be null.
300 :
301 : @return True if the two strings are equal, ignoring the case of US-
302 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
303 : */
304 : static bool equalIgnoreCase(const sal_Unicode * pBegin1,
305 : const sal_Unicode * pEnd1,
306 : const sal_Char * pString2);
307 :
308 : static inline bool startsWithLineBreak(const sal_Char * pBegin,
309 : const sal_Char * pEnd);
310 :
311 : static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
312 : const sal_Unicode * pEnd);
313 :
314 : static inline bool startsWithLineFolding(const sal_Char * pBegin,
315 : const sal_Char * pEnd);
316 :
317 : static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
318 : const sal_Unicode * pEnd);
319 :
320 : static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
321 : const sal_Char * pEnd);
322 :
323 : static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
324 : pBegin,
325 : const sal_Unicode * pEnd);
326 :
327 : static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
328 : const sal_Unicode * pEnd);
329 :
330 : static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
331 : pBegin,
332 : const sal_Unicode *
333 : pEnd);
334 :
335 : static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
336 :
337 : static const sal_Char * skipQuotedString(const sal_Char * pBegin,
338 : const sal_Char * pEnd);
339 :
340 : static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
341 : const sal_Unicode * pEnd);
342 :
343 : static bool scanUnsigned(const sal_Unicode *& rBegin,
344 : const sal_Unicode * pEnd, bool bLeadingZeroes,
345 : sal_uInt32 & rValue);
346 :
347 : static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
348 : const sal_Unicode * pEnd,
349 : sal_uInt32 nOpening,
350 : sal_uInt32 nClosing,
351 : sal_Size & rLength,
352 : bool & rModify);
353 :
354 : static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
355 : sal_Unicode const * pEnd,
356 : INetContentTypeParameterList *
357 : pParameters);
358 :
359 : static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
360 : eEncoding);
361 :
362 : static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
363 : eEncoding);
364 :
365 : static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
366 :
367 : static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
368 : const sal_Char * pEnd);
369 :
370 : static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
371 :
372 : static INetMIMECharsetList_Impl *
373 : createPreferredCharsetList(rtl_TextEncoding eEncoding);
374 :
375 : static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
376 : const sal_Char * pEnd,
377 : rtl_TextEncoding eEncoding,
378 : sal_Size & rSize);
379 :
380 : static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
381 : const sal_Unicode * pEnd,
382 : rtl_TextEncoding eEncoding,
383 : sal_Size & rSize);
384 :
385 : /** Get the number of octets required to encode an UCS-4 character using
386 : UTF-8 encoding.
387 :
388 : @param nChar Some UCS-4 character.
389 :
390 : @return The number of octets required (in the range 1--6, inclusive).
391 : */
392 : static inline int getUTF8OctetCount(sal_uInt32 nChar);
393 :
394 : static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
395 : sal_uInt32 nChar);
396 :
397 : static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
398 :
399 : static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
400 : HeaderFieldType eType,
401 : const OUString& rBody,
402 : rtl_TextEncoding ePreferredEncoding,
403 : bool bInitialSpace = true);
404 :
405 : static bool translateUTF8Char(const sal_Char *& rBegin,
406 : const sal_Char * pEnd,
407 : rtl_TextEncoding eEncoding,
408 : sal_uInt32 & rCharacter);
409 :
410 : static OUString decodeHeaderFieldBody(HeaderFieldType eType,
411 : const OString& rBody);
412 :
413 : /** Get the UTF-32 character at the head of a UTF-16 encoded string.
414 :
415 : @param rBegin Points to the start of the UTF-16 encoded string, must
416 : not be null. On exit, it points past the first UTF-32 character's
417 : encoding.
418 :
419 : @param pEnd Points past the end of the UTF-16 encoded string, must be
420 : strictly greater than rBegin.
421 :
422 : @return The UCS-4 character at the head of the UTF-16 encoded string.
423 : If the string does not start with the UTF-16 encoding of a UCS-32
424 : character, the first UTF-16 value is returned.
425 : */
426 : static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
427 : const sal_Unicode * pEnd);
428 :
429 : /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
430 :
431 : @param pBuffer Points to a buffer, must not be null.
432 :
433 : @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
434 :
435 : @return A pointer past the UTF-16 characters put into the buffer
436 : (i.e., pBuffer + 1 or pBuffer + 2).
437 : */
438 : static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
439 : sal_uInt32 nUTF32);
440 : };
441 :
442 : // static
443 : inline bool INetMIME::isISO88591(sal_uInt32 nChar)
444 : {
445 : return nChar <= 0xFF;
446 : }
447 :
448 : // static
449 : inline bool INetMIME::isControl(sal_uInt32 nChar)
450 : {
451 : return nChar <= 0x1F || nChar == 0x7F;
452 : }
453 :
454 : // static
455 0 : inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
456 : {
457 0 : return nChar == '\t' || nChar == ' ';
458 : }
459 :
460 : // static
461 0 : inline bool INetMIME::isVisible(sal_uInt32 nChar)
462 : {
463 0 : return nChar >= '!' && nChar <= '~';
464 : }
465 :
466 : // static
467 : inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
468 : {
469 : return rtl::isAsciiUpperCase(nChar) || rtl::isAsciiLowerCase(nChar) || rtl::isAsciiDigit(nChar)
470 : || nChar == '+' || nChar == '/';
471 : }
472 :
473 : // static
474 0 : inline int INetMIME::getWeight(sal_uInt32 nChar)
475 : {
476 0 : return rtl::isAsciiDigit(nChar) ? int(nChar - '0') : -1;
477 : }
478 :
479 : // static
480 0 : inline int INetMIME::getHexWeight(sal_uInt32 nChar)
481 : {
482 0 : return rtl::isAsciiDigit(nChar) ? int(nChar - '0') :
483 0 : nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
484 0 : nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
485 : }
486 :
487 : // static
488 0 : inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
489 : {
490 0 : return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
491 0 : rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
492 0 : rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
493 : nChar == '+' ? 62 :
494 : nChar == '/' ? 63 :
495 0 : nChar == '=' ? -1 : -2;
496 : }
497 :
498 : // static
499 0 : inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
500 : {
501 0 : return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
502 : }
503 :
504 : // static
505 0 : inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
506 : {
507 0 : return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
508 : }
509 :
510 : // static
511 : inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
512 : sal_Unicode cLowSurrogate)
513 : {
514 : DBG_ASSERT(isHighSurrogate(cHighSurrogate)
515 : && isLowSurrogate(cLowSurrogate),
516 : "INetMIME::toUTF32(): Bad chars");
517 : return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
518 : | (sal_uInt32(cLowSurrogate) & 0x3FF);
519 : }
520 :
521 : // static
522 : inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
523 : const sal_Char * pEnd)
524 : {
525 : DBG_ASSERT(pBegin && pBegin <= pEnd,
526 : "INetMIME::startsWithLineBreak(): Bad sequence");
527 :
528 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
529 : // CR, LF
530 : }
531 :
532 : // static
533 0 : inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
534 : const sal_Unicode * pEnd)
535 : {
536 : DBG_ASSERT(pBegin && pBegin <= pEnd,
537 : "INetMIME::startsWithLineBreak(): Bad sequence");
538 :
539 0 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
540 : // CR, LF
541 : }
542 :
543 : // static
544 : inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
545 : const sal_Char * pEnd)
546 : {
547 : DBG_ASSERT(pBegin && pBegin <= pEnd,
548 : "INetMIME::startsWithLineFolding(): Bad sequence");
549 :
550 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
551 : && isWhiteSpace(pBegin[2]); // CR, LF
552 : }
553 :
554 : // static
555 0 : inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
556 : const sal_Unicode * pEnd)
557 : {
558 : DBG_ASSERT(pBegin && pBegin <= pEnd,
559 : "INetMIME::startsWithLineFolding(): Bad sequence");
560 :
561 0 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
562 0 : && isWhiteSpace(pBegin[2]); // CR, LF
563 : }
564 :
565 : // static
566 : inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
567 : const sal_Char * pEnd)
568 : {
569 : DBG_ASSERT(pBegin && pBegin <= pEnd,
570 : "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
571 :
572 : return pBegin != pEnd
573 : && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
574 : }
575 :
576 : // static
577 0 : inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
578 : {
579 0 : return nChar == '"' || nChar == '\\';
580 : }
581 :
582 : // static
583 0 : inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
584 : {
585 : #if defined WNT
586 : return eEncoding == RTL_TEXTENCODING_MS_1252 ?
587 : RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
588 : #else // WNT
589 0 : return eEncoding;
590 : #endif // WNT
591 : }
592 :
593 : // static
594 0 : inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
595 : eEncoding)
596 : {
597 : #if defined WNT
598 : return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
599 : RTL_TEXTENCODING_MS_1252 : eEncoding;
600 : #else
601 0 : return eEncoding;
602 : #endif
603 : }
604 :
605 : // static
606 0 : inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
607 : {
608 0 : return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
609 : }
610 :
611 : // static
612 0 : inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
613 : {
614 : DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
615 :
616 : return nChar < 0x80 ? 1 :
617 : nChar < 0x800 ? 2 :
618 : nChar <= 0x10000 ? 3 :
619 : nChar <= 0x200000 ? 4 :
620 0 : nChar <= 0x4000000 ? 5 : 6;
621 : }
622 :
623 : // static
624 0 : inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
625 : const sal_Unicode * pEnd)
626 : {
627 : DBG_ASSERT(rBegin && rBegin < pEnd,
628 : "INetMIME::getUTF32Character(): Bad sequence");
629 0 : if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
630 0 : && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
631 : {
632 0 : sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
633 0 : return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
634 : }
635 : else
636 0 : return *rBegin++;
637 : }
638 :
639 : // static
640 0 : inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
641 : sal_uInt32 nUTF32)
642 : {
643 : DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
644 0 : if (nUTF32 < 0x10000)
645 0 : *pBuffer++ = sal_Unicode(nUTF32);
646 : else
647 : {
648 0 : nUTF32 -= 0x10000;
649 0 : *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
650 0 : *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
651 : }
652 0 : return pBuffer;
653 : }
654 :
655 : class INetMIMEOutputSink
656 : {
657 : public:
658 : static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
659 :
660 : private:
661 : sal_uInt32 m_nColumn;
662 : sal_uInt32 m_nLineLengthLimit;
663 :
664 : protected:
665 : /** Write a sequence of octets.
666 :
667 : @param pBegin Points to the start of the sequence, must not be null.
668 :
669 : @param pEnd Points past the end of the sequence, must be >= pBegin.
670 : */
671 : virtual void writeSequence(const sal_Char * pBegin,
672 : const sal_Char * pEnd) = 0;
673 :
674 : /** Write a null terminated sequence of octets (without the terminating
675 : null).
676 :
677 : @param pOctets A null terminated sequence of octets, must not be
678 : null.
679 :
680 : @return The length of pOctets (without the terminating null).
681 : */
682 : virtual sal_Size writeSequence(const sal_Char * pSequence);
683 :
684 : /** Write a sequence of octets.
685 :
686 : @descr The supplied sequence of UCS-4 characters is interpreted as a
687 : sequence of octets. It is an error if any of the elements of the
688 : sequence has a numerical value greater than 255.
689 :
690 : @param pBegin Points to the start of the sequence, must not be null.
691 :
692 : @param pEnd Points past the end of the sequence, must be >= pBegin.
693 : */
694 : virtual void writeSequence(const sal_uInt32 * pBegin,
695 : const sal_uInt32 * pEnd);
696 :
697 : /** Write a sequence of octets.
698 :
699 : @descr The supplied sequence of Unicode characters is interpreted as
700 : a sequence of octets. It is an error if any of the elements of the
701 : sequence has a numerical value greater than 255.
702 :
703 : @param pBegin Points to the start of the sequence, must not be null.
704 :
705 : @param pEnd Points past the end of the sequence, must be >= pBegin.
706 : */
707 : virtual void writeSequence(const sal_Unicode * pBegin,
708 : const sal_Unicode * pEnd);
709 :
710 : public:
711 0 : INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
712 : sal_uInt32 nTheLineLengthLimit
713 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
714 0 : m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
715 :
716 0 : virtual ~INetMIMEOutputSink() {}
717 :
718 : /** Get the current column.
719 :
720 : @return The current column (starting from zero).
721 : */
722 0 : sal_uInt32 getColumn() const { return m_nColumn; }
723 :
724 0 : sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
725 :
726 : void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
727 : { m_nLineLengthLimit = nTheLineLengthLimit; }
728 :
729 : virtual ErrCode getError() const;
730 :
731 : /** Write a sequence of octets.
732 :
733 : @param pBegin Points to the start of the sequence, must not be null.
734 :
735 : @param pEnd Points past the end of the sequence, must be >= pBegin.
736 : */
737 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
738 :
739 : /** Write a sequence of octets.
740 :
741 : @param pBegin Points to the start of the sequence, must not be null.
742 :
743 : @param nLength The length of the sequence.
744 : */
745 : void write(const sal_Char * pBegin, sal_Size nLength)
746 : { write(pBegin, pBegin + nLength); }
747 :
748 : /** Write a sequence of octets.
749 :
750 : @descr The supplied sequence of UCS-4 characters is interpreted as a
751 : sequence of octets. It is an error if any of the elements of the
752 : sequence has a numerical value greater than 255.
753 :
754 : @param pBegin Points to the start of the sequence, must not be null.
755 :
756 : @param pEnd Points past the end of the sequence, must be >= pBegin.
757 : */
758 : inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
759 :
760 : /** Write a sequence of octets.
761 :
762 : @descr The supplied sequence of Unicode characters is interpreted as
763 : a sequence of octets. It is an error if any of the elements of the
764 : sequence has a numerical value greater than 255.
765 :
766 : @param pBegin Points to the start of the sequence, must not be null.
767 :
768 : @param pEnd Points past the end of the sequence, must be >= pBegin.
769 : */
770 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
771 :
772 : /** Write a sequence of octets.
773 :
774 : @param rOctets A OString, interpreted as a sequence of octets.
775 :
776 : @param nBegin The offset of the first character to write.
777 :
778 : @param nEnd The offset past the last character to write.
779 : */
780 : void write(const OString& rOctets, sal_Int32 nBegin, sal_Int32 nEnd)
781 : {
782 : writeSequence(rOctets.getStr() + nBegin, rOctets.getStr() + nEnd);
783 : m_nColumn += nEnd - nBegin;
784 : }
785 :
786 : /** Write a single octet.
787 :
788 : @param nOctet Some octet.
789 :
790 : @return This instance.
791 : */
792 : inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
793 :
794 : /** Write a null terminated sequence of octets (without the terminating
795 : null).
796 :
797 : @param pOctets A null terminated sequence of octets, must not be
798 : null.
799 :
800 : @return This instance.
801 : */
802 : inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
803 :
804 : /** Write a sequence of octets.
805 :
806 : @param rOctets A OString, interpreted as a sequence of octets.
807 :
808 : @return This instance.
809 : */
810 : INetMIMEOutputSink & operator <<(const OString& rOctets)
811 : {
812 : writeSequence(rOctets.getStr(), rOctets.getStr() + rOctets.getLength());
813 : m_nColumn += rOctets.getLength();
814 : return *this;
815 : }
816 :
817 : /** Call a manipulator function.
818 :
819 : @param pManipulator A manipulator function.
820 :
821 : @return Whatever the manipulator function returns.
822 : */
823 : INetMIMEOutputSink &
824 0 : operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
825 0 : { return pManipulator(*this); }
826 :
827 : /** Write a line end (CR LF).
828 : */
829 : void writeLineEnd();
830 :
831 : /** A manipulator function that writes a line end (CR LF).
832 :
833 : @param rSink Some sink.
834 :
835 : @return The sink rSink.
836 : */
837 : static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
838 : };
839 :
840 : inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
841 : const sal_Char * pEnd)
842 : {
843 : writeSequence(pBegin, pEnd);
844 : m_nColumn += pEnd - pBegin;
845 : }
846 :
847 : inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
848 : const sal_uInt32 * pEnd)
849 : {
850 : writeSequence(pBegin, pEnd);
851 : m_nColumn += pEnd - pBegin;
852 : }
853 :
854 0 : inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
855 : const sal_Unicode * pEnd)
856 : {
857 0 : writeSequence(pBegin, pEnd);
858 0 : m_nColumn += pEnd - pBegin;
859 0 : }
860 :
861 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
862 : {
863 0 : writeSequence(&nOctet, &nOctet + 1);
864 0 : ++m_nColumn;
865 0 : return *this;
866 : }
867 :
868 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
869 : pOctets)
870 : {
871 0 : m_nColumn += writeSequence(pOctets);
872 0 : return *this;
873 : }
874 :
875 : // static
876 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
877 : rSink)
878 : {
879 0 : rSink.writeLineEnd();
880 0 : return rSink;
881 : }
882 :
883 : // static
884 0 : inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
885 : sal_uInt32 nChar)
886 : {
887 : DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
888 0 : rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
889 0 : << sal_uInt8(getHexDigit(nChar & 15));
890 0 : }
891 :
892 0 : class INetMIMEStringOutputSink: public INetMIMEOutputSink
893 : {
894 : OStringBuffer m_aBuffer;
895 :
896 : using INetMIMEOutputSink::writeSequence;
897 :
898 : virtual void writeSequence(const sal_Char * pBegin,
899 : const sal_Char * pEnd) SAL_OVERRIDE;
900 :
901 : public:
902 0 : inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
903 : sal_uInt32 nLineLengthLimit
904 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
905 0 : INetMIMEOutputSink(nColumn, nLineLengthLimit) {}
906 :
907 : virtual ErrCode getError() const SAL_OVERRIDE;
908 :
909 0 : OString takeBuffer()
910 : {
911 0 : return m_aBuffer.makeStringAndClear();
912 : }
913 : };
914 :
915 : class INetMIMEEncodedWordOutputSink
916 : {
917 : public:
918 : enum Context { CONTEXT_TEXT = 1,
919 : CONTEXT_COMMENT = 2,
920 : CONTEXT_PHRASE = 4 };
921 :
922 : enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
923 :
924 : private:
925 : enum { BUFFER_SIZE = 256 };
926 :
927 : enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
928 : CODING_ENCODED_TERMINATED };
929 :
930 : enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
931 : STATE_FIRST_QUESTION, STATE_CHARSET,
932 : STATE_SECOND_QUESTION, STATE_ENCODING,
933 : STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
934 : STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
935 : STATE_BAD };
936 :
937 : INetMIMEOutputSink & m_rSink;
938 : Context m_eContext;
939 : Space m_eInitialSpace;
940 : sal_uInt32 m_nExtraSpaces;
941 : INetMIMECharsetList_Impl * m_pEncodingList;
942 : sal_Unicode * m_pBuffer;
943 : sal_uInt32 m_nBufferSize;
944 : sal_Unicode * m_pBufferEnd;
945 : Coding m_ePrevCoding;
946 : rtl_TextEncoding m_ePrevMIMEEncoding;
947 : Coding m_eCoding;
948 : sal_uInt32 m_nQuotedEscaped;
949 : EncodedWordState m_eEncodedWordState;
950 :
951 : inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
952 :
953 : void finish(bool bWriteTrailer);
954 :
955 : public:
956 : inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
957 : Context eTheContext,
958 : Space eTheInitialSpace,
959 : rtl_TextEncoding ePreferredEncoding);
960 :
961 : ~INetMIMEEncodedWordOutputSink();
962 :
963 : INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
964 :
965 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
966 :
967 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
968 :
969 : inline bool flush();
970 : };
971 :
972 0 : inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
973 : INetMIMEOutputSink & rTheSink, Context eTheContext,
974 : Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
975 : m_rSink(rTheSink),
976 : m_eContext(eTheContext),
977 : m_eInitialSpace(eTheInitialSpace),
978 : m_nExtraSpaces(0),
979 0 : m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
980 : m_ePrevCoding(CODING_NONE),
981 : m_ePrevMIMEEncoding(RTL_TEXTENCODING_DONTKNOW),
982 : m_eCoding(CODING_NONE),
983 : m_nQuotedEscaped(0),
984 0 : m_eEncodedWordState(STATE_INITIAL)
985 : {
986 0 : m_nBufferSize = BUFFER_SIZE;
987 : m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
988 : m_nBufferSize
989 0 : * sizeof (sal_Unicode)));
990 0 : m_pBufferEnd = m_pBuffer;
991 0 : }
992 :
993 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
994 : const sal_Char * pEnd)
995 : {
996 : DBG_ASSERT(pBegin && pBegin <= pEnd,
997 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
998 :
999 : while (pBegin != pEnd)
1000 : operator <<(*pBegin++);
1001 : }
1002 :
1003 0 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1004 : const sal_Unicode * pEnd)
1005 : {
1006 : DBG_ASSERT(pBegin && pBegin <= pEnd,
1007 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1008 :
1009 0 : while (pBegin != pEnd)
1010 0 : operator <<(*pBegin++);
1011 0 : }
1012 :
1013 0 : inline bool INetMIMEEncodedWordOutputSink::flush()
1014 : {
1015 0 : finish(true);
1016 0 : return m_ePrevCoding != CODING_NONE;
1017 : }
1018 :
1019 0 : struct INetContentTypeParameter
1020 : {
1021 : /** The name of the attribute, in US-ASCII encoding and converted to lower
1022 : case. If a parameter value is split as described in RFC 2231, there
1023 : will only be one item for the complete parameter, with the attribute
1024 : name lacking any section suffix.
1025 : */
1026 : const OString m_sAttribute;
1027 :
1028 : /** The optional character set specification (see RFC 2231), in US-ASCII
1029 : encoding and converted to lower case.
1030 : */
1031 : const OString m_sCharset;
1032 :
1033 : /** The optional language specification (see RFC 2231), in US-ASCII
1034 : encoding and converted to lower case.
1035 : */
1036 : const OString m_sLanguage;
1037 :
1038 : /** The attribute value. If the value is a quoted-string, it is
1039 : 'unpacked.' If a character set is specified, and the value can be
1040 : converted to Unicode, this is done. Also, if no character set is
1041 : specified, it is first tried to convert the value from UTF-8 encoding
1042 : to Unicode, and if that doesn't work (because the value is not in
1043 : UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1044 : (which will always work). But if a character set is specified and the
1045 : value cannot be converted from that character set to Unicode, special
1046 : action is taken to produce a value that can possibly be transformed
1047 : back into its original form: Any 8-bit character from a non-encoded
1048 : part of the original value is directly converted to Unicode
1049 : (effectively handling it as if it was ISO-8859-1 encoded), and any
1050 : 8-bit character from an encoded part of the original value is mapped
1051 : to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1052 : within Unicode's Private Use Area (effectively adding 0xF800 to the
1053 : character's numeric value).
1054 : */
1055 : const OUString m_sValue;
1056 :
1057 : /** This is true if the value is successfully converted to Unicode, and
1058 : false if the value is a special mixture of ISO-LATIN-1 characters and
1059 : characters from Unicode's Private Use Area.
1060 : */
1061 : const bool m_bConverted;
1062 :
1063 0 : INetContentTypeParameter(const OString& rTheAttribute,
1064 : const OString& rTheCharset, const OString& rTheLanguage,
1065 : const OUString& rTheValue, bool bTheConverted)
1066 : : m_sAttribute(rTheAttribute)
1067 : , m_sCharset(rTheCharset)
1068 : , m_sLanguage(rTheLanguage)
1069 : , m_sValue(rTheValue)
1070 0 : , m_bConverted(bTheConverted)
1071 : {
1072 0 : }
1073 : };
1074 :
1075 0 : class TOOLS_DLLPUBLIC INetContentTypeParameterList
1076 : {
1077 : public:
1078 :
1079 : void Clear();
1080 :
1081 : void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1082 : {
1083 : maEntries.insert(maEntries.begin()+nIndex,pParameter);
1084 : }
1085 :
1086 0 : void Append(INetContentTypeParameter *pParameter)
1087 : {
1088 0 : maEntries.push_back(pParameter);
1089 0 : }
1090 :
1091 : inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const
1092 : {
1093 : return &(maEntries[nIndex]);
1094 : }
1095 :
1096 : const INetContentTypeParameter * find(const OString& rAttribute) const;
1097 :
1098 : private:
1099 :
1100 : boost::ptr_vector<INetContentTypeParameter> maEntries;
1101 : };
1102 :
1103 : #endif
1104 :
1105 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|