Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 : #ifndef INCLUDED_TOOLS_INETMIME_HXX
20 : #define INCLUDED_TOOLS_INETMIME_HXX
21 :
22 : #include <boost/ptr_container/ptr_vector.hpp>
23 :
24 : #include <tools/toolsdllapi.h>
25 : #include <rtl/alloc.h>
26 : #include <rtl/character.hxx>
27 : #include <rtl/string.hxx>
28 : #include <rtl/strbuf.hxx>
29 : #include <rtl/ustring.hxx>
30 : #include <rtl/tencinfo.h>
31 : #include <tools/debug.hxx>
32 : #include <tools/errcode.hxx>
33 :
34 : class DateTime;
35 : class INetContentTypeParameterList;
36 : class INetMIMECharsetList_Impl;
37 : class INetMIMEOutputSink;
38 :
39 : class TOOLS_DLLPUBLIC INetMIME
40 : {
41 : public:
42 : enum { SOFT_LINE_LENGTH_LIMIT = 76,
43 : HARD_LINE_LENGTH_LIMIT = 998 };
44 :
45 : /** The various types of message header field bodies, with respect to
46 : encoding and decoding them.
47 :
48 : @descr At the moment, five different types of header fields suffice
49 : to describe how to encoded and decode any known message header field
50 : body, but need for more types may arise in the future as new header
51 : fields are introduced.
52 :
53 : @descr The following is an exhaustive list of all the header fields
54 : currently known to our implementation. For every header field, it
55 : includes a 'canonic' (with regard to capitalization) name, a grammar
56 : rule for the body (using RFC 822 and RFC 2234 conventions), a list of
57 : relevant sources of information, and the HeaderFieldType value to use
58 : with that header field. The list is based on RFC 2076 and draft-
59 : palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
60 : ietf/jp-ietf-home.html#anchor1003783>).
61 :
62 : Approved: address ;RFC 1036; HEADER_FIELD_ADDRESS
63 : bcc: #address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
64 : cc: 1#address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
65 : Comments: *text ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
66 : Content-Base: absoluteURI ;RFC 2110; HEADER_FIELD_TEXT
67 : Content-Description: *text ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
68 : Content-Disposition: disposition-type *(";" disposition-parm)
69 : ;RFC 1806; HEADER_FIELD_STRUCTURED
70 : Content-ID: msg-id ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
71 : Content-Location: absoluteURI / relativeURI ;RFC 2110;
72 : HEADER_FIELD_TEXT
73 : Content-Transfer-Encoding: mechanism ;RFC 2045, RFC 2047;
74 : HEADER_FIELD_STRUCTURED
75 : Content-Type: type "/" subtype *(";" parameter) ;RFC 2045, RFC 2047;
76 : HEADER_FIELD_STRUCTURED
77 : Control: *text ;RFC 1036; HEADER_FIELD_TEXT
78 : Date: date-time ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
79 : Distribution: 1#atom ;RFC 1036; HEADER_FIELD_STRUCTURED
80 : Encrypted: 1#2word ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
81 : Expires: date-time ;RFC 1036; HEADER_FIELD_STRUCTURED
82 : Followup-To: 1#(atom *("." atom)) ;RFC 1036; HEADER_FIELD_STRUCTURED
83 : From: mailbox / 1#mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
84 : In-Reply-To: *(phrase / msg-id) ;RFC 822, RFC 2047;
85 : HEADER_FIELD_ADDRESS
86 : Keywords: #phrase ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
87 : MIME-Version: 1*DIGIT "." 1*DIGIT ;RFC 2045, RFC 2047;
88 : HEADER_FIELD_STRUCTURED
89 : Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
90 : Newsgroups: 1#(atom *("." atom)) ;RFC 1036, RFC 2047;
91 : HEADER_FIELD_STRUCTURED
92 : Organization: *text ;RFC 1036; HEADER_FIELD_TEXT
93 : Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
94 : ["id" msg-id] ["for" addr-spec] ";" date-time ;RFC 822, RFC 1123,
95 : RFC 2047; HEADER_FIELD_STRUCTURED
96 : References: *(phrase / msg-id) ;RFC 822, RFC 2047;
97 : HEADER_FIELD_ADDRESS
98 : Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
99 : Resent-Date: date-time ;RFC 822, RFC 1123, RFC 2047;
100 : HEADER_FIELD_STRUCTURED
101 : Resent-From: mailbox / 1#mailbox ;RFC 822, RFC 2047;
102 : HEADER_FIELD_ADDRESS
103 : Resent-Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
104 : Resent-Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
105 : Resent-Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
106 : Resent-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 : Resent-bcc: #address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 : Resent-cc: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109 : Return-path: route-addr / ("<" ">") ;RFC 822, RFC 1123, RFC 2047;
110 : HEADER_FIELD_STRUCTURED
111 : Return-Receipt-To: address ;Not Internet standard;
112 : HEADER_FIELD_ADDRES
113 : Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
114 : Subject: *text ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
115 : Summary: *text ;RFC 1036; HEADER_FIELD_TEXT
116 : To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
117 : X-CHAOS-Marked: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
118 : X-CHAOS-Read: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
119 : X-CHAOS-Recipients: #*("<" atom word ">") ;local;
120 : HEADER_FIELD_STRUCTURED
121 : X-CHAOS-Size: 1*DIGIT ;local; HEADER_FIELD_STRUCTURED
122 : X-Mailer: *text ;Not Internet standard; HEADER_FIELD_TEXT
123 : X-Mozilla-Status: 4HEXDIG ;Mozilla; HEADER_FIELD_STRUCTURED
124 : X-Newsreader: *text ;Not Internet standard; HEADER_FIELD_TEXT
125 : X-Priority: "1" / "2" / "3" / "4" / "5" ;Not Internet standard;
126 : HEADER_FIELD_STRUCTURED
127 : Xref: sub-domain
128 : 1*((atom / string) *("." (atom / string)) ":" msg-number)
129 : ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
130 : */
131 : enum HeaderFieldType
132 : {
133 : HEADER_FIELD_TEXT,
134 : HEADER_FIELD_STRUCTURED,
135 : HEADER_FIELD_PHRASE,
136 : HEADER_FIELD_MESSAGE_ID,
137 : HEADER_FIELD_ADDRESS
138 : };
139 :
140 : /** Check for ISO 8859-1 character.
141 :
142 : @param nChar Some UCS-4 character.
143 :
144 : @return True if nChar is a ISO 8859-1 character (0x00--0xFF).
145 : */
146 : static inline bool isISO88591(sal_uInt32 nChar);
147 :
148 : /** Check for US-ASCII control character.
149 :
150 : @param nChar Some UCS-4 character.
151 :
152 : @return True if nChar is a US-ASCII control character (US-ASCII
153 : 0x00--0x1F or 0x7F).
154 : */
155 : static inline bool isControl(sal_uInt32 nChar);
156 :
157 : /** Check for US-ASCII white space character.
158 :
159 : @param nChar Some UCS-4 character.
160 :
161 : @return True if nChar is a US-ASCII white space character (US-ASCII
162 : 0x09 or 0x20).
163 : */
164 : static inline bool isWhiteSpace(sal_uInt32 nChar);
165 :
166 : /** Check for US-ASCII visible character.
167 :
168 : @param nChar Some UCS-4 character.
169 :
170 : @return True if nChar is a US-ASCII visible character (US-ASCII
171 : 0x21--0x7E).
172 : */
173 : static inline bool isVisible(sal_uInt32 nChar);
174 :
175 : /** Check for US-ASCII Base 64 digit character.
176 :
177 : @param nChar Some UCS-4 character.
178 :
179 : @return True if nChar is a US-ASCII Base 64 digit character (US-ASCII
180 : 'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
181 : */
182 : static inline bool isBase64Digit(sal_uInt32 nChar);
183 :
184 : /** Check whether some character is valid within an RFC 822 <atom>.
185 :
186 : @param nChar Some UCS-4 character.
187 :
188 : @return True if nChar is valid within an RFC 822 <atom> (US-ASCII
189 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
190 : '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
191 : */
192 : static bool isAtomChar(sal_uInt32 nChar);
193 :
194 : /** Check whether some character is valid within an RFC 2045 <token>.
195 :
196 : @param nChar Some UCS-4 character.
197 :
198 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
199 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
200 : '-', '.', '^', '_', '`', '{', '|', '}', or '~').
201 : */
202 : static bool isTokenChar(sal_uInt32 nChar);
203 :
204 : /** Check whether some character is valid within an RFC 2047 <token>.
205 :
206 : @param nChar Some UCS-4 character.
207 :
208 : @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
209 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
210 : '-', '^', '_', '`', '{', '|', '}', or '~').
211 : */
212 : static bool isEncodedWordTokenChar(sal_uInt32 nChar);
213 :
214 : /** Check whether some character is valid within an RFC 2060 <atom>.
215 :
216 : @param nChar Some UCS-4 character.
217 :
218 : @return True if nChar is valid within an RFC 2060 <atom> (US-ASCII
219 : 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
220 : '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
221 : '|', '}', or '~').
222 : */
223 : static bool isIMAPAtomChar(sal_uInt32 nChar);
224 :
225 : /** Get the digit weight of a US-ASCII character.
226 :
227 : @param nChar Some UCS-4 character.
228 :
229 : @return If nChar is a US-ASCII (decimal) digit character (US-ASCII
230 : '0'--'9'), return the corresponding weight (0--9); otherwise,
231 : return -1.
232 : */
233 : static inline int getWeight(sal_uInt32 nChar);
234 :
235 : /** Get the hexadecimal digit weight of a US-ASCII character.
236 :
237 : @param nChar Some UCS-4 character.
238 :
239 : @return If nChar is a US-ASCII hexadecimal digit character (US-ASCII
240 : '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
241 : (0--15); otherwise, return -1.
242 : */
243 : static inline int getHexWeight(sal_uInt32 nChar);
244 :
245 : /** Get the Base 64 digit weight of a US-ASCII character.
246 :
247 : @param nChar Some UCS-4 character.
248 :
249 : @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
250 : 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
251 : corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
252 : character (US-ASCII '='), return -1; otherwise, return -2.
253 : */
254 : static inline int getBase64Weight(sal_uInt32 nChar);
255 :
256 : /** Get a hexadecimal digit encoded as US-ASCII.
257 :
258 : @param nWeight Must be in the range 0--15, inclusive.
259 :
260 : @return The canonic (i.e., upper case) hexadecimal digit
261 : corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
262 : */
263 : static sal_uInt32 getHexDigit(int nWeight);
264 :
265 : /** Check two US-ASCII strings for equality, ignoring case.
266 :
267 : @param pBegin1 Points to the start of the first string, must not be
268 : null.
269 :
270 : @param pEnd1 Points past the end of the first string, must be >=
271 : pBegin1.
272 :
273 : @param pString2 Points to the start of the null terminated second
274 : string, must not be null.
275 :
276 : @return True if the two strings are equal, ignoring the case of US-
277 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
278 : */
279 : static bool equalIgnoreCase(const sal_Char * pBegin1,
280 : const sal_Char * pEnd1,
281 : const sal_Char * pString2);
282 :
283 : /** Check two US-ASCII strings for equality, ignoring case.
284 :
285 : @param pBegin1 Points to the start of the first string, must not be
286 : null.
287 :
288 : @param pEnd1 Points past the end of the first string, must be >=
289 : pBegin1.
290 :
291 : @param pString2 Points to the start of the null terminated second
292 : string, must not be null.
293 :
294 : @return True if the two strings are equal, ignoring the case of US-
295 : ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
296 : */
297 : static bool equalIgnoreCase(const sal_Unicode * pBegin1,
298 : const sal_Unicode * pEnd1,
299 : const sal_Char * pString2);
300 :
301 : static inline bool startsWithLineBreak(const sal_Char * pBegin,
302 : const sal_Char * pEnd);
303 :
304 : static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
305 : const sal_Unicode * pEnd);
306 :
307 : static inline bool startsWithLineFolding(const sal_Char * pBegin,
308 : const sal_Char * pEnd);
309 :
310 : static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
311 : const sal_Unicode * pEnd);
312 :
313 : static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
314 : const sal_Char * pEnd);
315 :
316 : static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
317 : pBegin,
318 : const sal_Unicode * pEnd);
319 :
320 : static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
321 : const sal_Unicode * pEnd);
322 :
323 : static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
324 : pBegin,
325 : const sal_Unicode *
326 : pEnd);
327 :
328 : static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
329 :
330 : static const sal_Char * skipQuotedString(const sal_Char * pBegin,
331 : const sal_Char * pEnd);
332 :
333 : static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
334 : const sal_Unicode * pEnd);
335 :
336 : static bool scanUnsigned(const sal_Unicode *& rBegin,
337 : const sal_Unicode * pEnd, bool bLeadingZeroes,
338 : sal_uInt32 & rValue);
339 :
340 : static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
341 : const sal_Unicode * pEnd,
342 : sal_uInt32 nOpening,
343 : sal_uInt32 nClosing,
344 : sal_Size & rLength,
345 : bool & rModify);
346 :
347 : static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
348 : sal_Unicode const * pEnd,
349 : INetContentTypeParameterList *
350 : pParameters);
351 :
352 : /** Parse the body of an RFC 2045 Content-Type header field.
353 :
354 : @param pBegin The range (that must be valid) from non-null pBegin,
355 : inclusive. to non-null pEnd, exclusive, forms the body of the
356 : Content-Type header field. It must be of the form
357 :
358 : token "/" token *(";" token "=" (token / quoted-string))
359 :
360 : with intervening linear white space and comments (cf. RFCs 822, 2045).
361 : The RFC 2231 extension are supported. The encoding of rMediaType
362 : should be US-ASCII, but any Unicode values in the range U+0080..U+FFFF
363 : are interpretet 'as appropriate.'
364 :
365 : @param pType If not null, returns the type (the first of the above
366 : tokens), in US-ASCII encoding and converted to lower case.
367 :
368 : @param pSubType If not null, returns the sub-type (the second of the
369 : above tokens), in US-ASCII encoding and converted to lower case.
370 :
371 : @param pParameters If not null, returns the parameters as a list of
372 : INetContentTypeParameters (the attributes are in US-ASCII encoding and
373 : converted to lower case, the values are in Unicode encoding). If
374 : null, only the syntax of the parameters is checked, but they are not
375 : returned.
376 :
377 : @return Null if the syntax of the field body is incorrect (i.e., does
378 : not start with type and sub-type tokens). Otherwise, a pointer past the
379 : longest valid input prefix. If null is returned, none of the output
380 : parameters will be modified.
381 : */
382 : static sal_Unicode const * scanContentType(
383 : sal_Unicode const *pBegin, sal_Unicode const * pEnd,
384 : OUString * pType = 0, OUString * pSubType = 0,
385 : INetContentTypeParameterList * pParameters = 0);
386 :
387 : static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
388 : eEncoding);
389 :
390 : static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
391 : eEncoding);
392 :
393 : static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
394 :
395 : static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
396 : const sal_Char * pEnd);
397 :
398 : static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
399 :
400 : static INetMIMECharsetList_Impl *
401 : createPreferredCharsetList(rtl_TextEncoding eEncoding);
402 :
403 : static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
404 : const sal_Char * pEnd,
405 : rtl_TextEncoding eEncoding,
406 : sal_Size & rSize);
407 :
408 : static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
409 : const sal_Unicode * pEnd,
410 : rtl_TextEncoding eEncoding,
411 : sal_Size & rSize);
412 :
413 : /** Get the number of octets required to encode an UCS-4 character using
414 : UTF-8 encoding.
415 :
416 : @param nChar Some UCS-4 character.
417 :
418 : @return The number of octets required (in the range 1--6, inclusive).
419 : */
420 : static inline int getUTF8OctetCount(sal_uInt32 nChar);
421 :
422 : static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
423 : sal_uInt32 nChar);
424 :
425 : static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
426 :
427 : static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
428 : HeaderFieldType eType,
429 : const OUString& rBody,
430 : rtl_TextEncoding ePreferredEncoding,
431 : bool bInitialSpace = true);
432 :
433 : static bool translateUTF8Char(const sal_Char *& rBegin,
434 : const sal_Char * pEnd,
435 : rtl_TextEncoding eEncoding,
436 : sal_uInt32 & rCharacter);
437 :
438 : static OUString decodeHeaderFieldBody(HeaderFieldType eType,
439 : const OString& rBody);
440 :
441 : /** Get the UTF-32 character at the head of a UTF-16 encoded string.
442 :
443 : @param rBegin Points to the start of the UTF-16 encoded string, must
444 : not be null. On exit, it points past the first UTF-32 character's
445 : encoding.
446 :
447 : @param pEnd Points past the end of the UTF-16 encoded string, must be
448 : strictly greater than rBegin.
449 :
450 : @return The UCS-4 character at the head of the UTF-16 encoded string.
451 : If the string does not start with the UTF-16 encoding of a UCS-32
452 : character, the first UTF-16 value is returned.
453 : */
454 : static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
455 : const sal_Unicode * pEnd);
456 :
457 : /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
458 :
459 : @param pBuffer Points to a buffer, must not be null.
460 :
461 : @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
462 :
463 : @return A pointer past the UTF-16 characters put into the buffer
464 : (i.e., pBuffer + 1 or pBuffer + 2).
465 : */
466 : static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
467 : sal_uInt32 nUTF32);
468 : };
469 :
470 : // static
471 : inline bool INetMIME::isISO88591(sal_uInt32 nChar)
472 : {
473 : return nChar <= 0xFF;
474 : }
475 :
476 : // static
477 : inline bool INetMIME::isControl(sal_uInt32 nChar)
478 : {
479 : return nChar <= 0x1F || nChar == 0x7F;
480 : }
481 :
482 : // static
483 0 : inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
484 : {
485 0 : return nChar == '\t' || nChar == ' ';
486 : }
487 :
488 : // static
489 1268 : inline bool INetMIME::isVisible(sal_uInt32 nChar)
490 : {
491 1268 : return nChar >= '!' && nChar <= '~';
492 : }
493 :
494 : // static
495 : inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
496 : {
497 : return rtl::isAsciiUpperCase(nChar) || rtl::isAsciiLowerCase(nChar) || rtl::isAsciiDigit(nChar)
498 : || nChar == '+' || nChar == '/';
499 : }
500 :
501 : // static
502 124 : inline int INetMIME::getWeight(sal_uInt32 nChar)
503 : {
504 124 : return rtl::isAsciiDigit(nChar) ? int(nChar - '0') : -1;
505 : }
506 :
507 : // static
508 71470 : inline int INetMIME::getHexWeight(sal_uInt32 nChar)
509 : {
510 107884 : return rtl::isAsciiDigit(nChar) ? int(nChar - '0') :
511 35056 : nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
512 142940 : nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
513 : }
514 :
515 : // static
516 12 : inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
517 : {
518 21 : return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
519 3 : rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
520 3 : rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
521 : nChar == '+' ? 62 :
522 : nChar == '/' ? 63 :
523 27 : nChar == '=' ? -1 : -2;
524 : }
525 :
526 : // static
527 : inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
528 : const sal_Char * pEnd)
529 : {
530 : DBG_ASSERT(pBegin && pBegin <= pEnd,
531 : "INetMIME::startsWithLineBreak(): Bad sequence");
532 :
533 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
534 : // CR, LF
535 : }
536 :
537 : // static
538 0 : inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
539 : const sal_Unicode * pEnd)
540 : {
541 : DBG_ASSERT(pBegin && pBegin <= pEnd,
542 : "INetMIME::startsWithLineBreak(): Bad sequence");
543 :
544 0 : return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
545 : // CR, LF
546 : }
547 :
548 : // static
549 : inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
550 : const sal_Char * pEnd)
551 : {
552 : DBG_ASSERT(pBegin && pBegin <= pEnd,
553 : "INetMIME::startsWithLineFolding(): Bad sequence");
554 :
555 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
556 : && isWhiteSpace(pBegin[2]); // CR, LF
557 : }
558 :
559 : // static
560 0 : inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
561 : const sal_Unicode * pEnd)
562 : {
563 : DBG_ASSERT(pBegin && pBegin <= pEnd,
564 : "INetMIME::startsWithLineFolding(): Bad sequence");
565 :
566 0 : return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
567 0 : && isWhiteSpace(pBegin[2]); // CR, LF
568 : }
569 :
570 : // static
571 : inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
572 : const sal_Char * pEnd)
573 : {
574 : DBG_ASSERT(pBegin && pBegin <= pEnd,
575 : "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
576 :
577 : return pBegin != pEnd
578 : && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
579 : }
580 :
581 : // static
582 0 : inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
583 : {
584 0 : return nChar == '"' || nChar == '\\';
585 : }
586 :
587 : // static
588 0 : inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
589 : {
590 : #if defined WNT
591 : return eEncoding == RTL_TEXTENCODING_MS_1252 ?
592 : RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
593 : #else // WNT
594 0 : return eEncoding;
595 : #endif // WNT
596 : }
597 :
598 : // static
599 3 : inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
600 : eEncoding)
601 : {
602 : #if defined WNT
603 : return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
604 : RTL_TEXTENCODING_MS_1252 : eEncoding;
605 : #else
606 3 : return eEncoding;
607 : #endif
608 : }
609 :
610 : // static
611 3 : inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
612 : {
613 3 : return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
614 : }
615 :
616 : // static
617 0 : inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
618 : {
619 : DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
620 :
621 : return nChar < 0x80 ? 1 :
622 : nChar < 0x800 ? 2 :
623 : nChar <= 0x10000 ? 3 :
624 : nChar <= 0x200000 ? 4 :
625 0 : nChar <= 0x4000000 ? 5 : 6;
626 : }
627 :
628 : // static
629 41697964 : inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
630 : const sal_Unicode * pEnd)
631 : {
632 : DBG_ASSERT(rBegin && rBegin < pEnd,
633 : "INetMIME::getUTF32Character(): Bad sequence");
634 41697964 : if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
635 0 : && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
636 : {
637 0 : sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
638 0 : return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
639 : }
640 : else
641 41697964 : return *rBegin++;
642 : }
643 :
644 : // static
645 0 : inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
646 : sal_uInt32 nUTF32)
647 : {
648 : DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
649 0 : if (nUTF32 < 0x10000)
650 0 : *pBuffer++ = sal_Unicode(nUTF32);
651 : else
652 : {
653 0 : nUTF32 -= 0x10000;
654 0 : *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
655 0 : *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
656 : }
657 0 : return pBuffer;
658 : }
659 :
660 : class INetMIMEOutputSink
661 : {
662 : public:
663 : static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
664 :
665 : private:
666 : sal_uInt32 m_nColumn;
667 : sal_uInt32 m_nLineLengthLimit;
668 :
669 : protected:
670 : /** Write a sequence of octets.
671 :
672 : @param pBegin Points to the start of the sequence, must not be null.
673 :
674 : @param pEnd Points past the end of the sequence, must be >= pBegin.
675 : */
676 : virtual void writeSequence(const sal_Char * pBegin,
677 : const sal_Char * pEnd) = 0;
678 :
679 : /** Write a null terminated sequence of octets (without the terminating
680 : null).
681 :
682 : @param pOctets A null terminated sequence of octets, must not be
683 : null.
684 :
685 : @return The length of pOctets (without the terminating null).
686 : */
687 : sal_Size writeSequence(const sal_Char * pSequence);
688 :
689 : /** Write a sequence of octets.
690 :
691 : @descr The supplied sequence of Unicode characters is interpreted as
692 : a sequence of octets. It is an error if any of the elements of the
693 : sequence has a numerical value greater than 255.
694 :
695 : @param pBegin Points to the start of the sequence, must not be null.
696 :
697 : @param pEnd Points past the end of the sequence, must be >= pBegin.
698 : */
699 : void writeSequence(const sal_Unicode * pBegin,
700 : const sal_Unicode * pEnd);
701 :
702 : public:
703 0 : INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
704 : sal_uInt32 nTheLineLengthLimit
705 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
706 0 : m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
707 :
708 0 : virtual ~INetMIMEOutputSink() {}
709 :
710 : /** Get the current column.
711 :
712 : @return The current column (starting from zero).
713 : */
714 0 : sal_uInt32 getColumn() const { return m_nColumn; }
715 :
716 0 : sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
717 :
718 : void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
719 : { m_nLineLengthLimit = nTheLineLengthLimit; }
720 :
721 : virtual ErrCode getError() const;
722 :
723 : /** Write a sequence of octets.
724 :
725 : @param pBegin Points to the start of the sequence, must not be null.
726 :
727 : @param pEnd Points past the end of the sequence, must be >= pBegin.
728 : */
729 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
730 :
731 : /** Write a sequence of octets.
732 :
733 : @param pBegin Points to the start of the sequence, must not be null.
734 :
735 : @param nLength The length of the sequence.
736 : */
737 : void write(const sal_Char * pBegin, sal_Size nLength)
738 : { write(pBegin, pBegin + nLength); }
739 :
740 : /** Write a sequence of octets.
741 :
742 : @descr The supplied sequence of Unicode characters is interpreted as
743 : a sequence of octets. It is an error if any of the elements of the
744 : sequence has a numerical value greater than 255.
745 :
746 : @param pBegin Points to the start of the sequence, must not be null.
747 :
748 : @param pEnd Points past the end of the sequence, must be >= pBegin.
749 : */
750 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
751 :
752 : /** Write a sequence of octets.
753 :
754 : @param rOctets A OString, interpreted as a sequence of octets.
755 :
756 : @param nBegin The offset of the first character to write.
757 :
758 : @param nEnd The offset past the last character to write.
759 : */
760 : void write(const OString& rOctets, sal_Int32 nBegin, sal_Int32 nEnd)
761 : {
762 : writeSequence(rOctets.getStr() + nBegin, rOctets.getStr() + nEnd);
763 : m_nColumn += nEnd - nBegin;
764 : }
765 :
766 : /** Write a single octet.
767 :
768 : @param nOctet Some octet.
769 :
770 : @return This instance.
771 : */
772 : inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
773 :
774 : /** Write a null terminated sequence of octets (without the terminating
775 : null).
776 :
777 : @param pOctets A null terminated sequence of octets, must not be
778 : null.
779 :
780 : @return This instance.
781 : */
782 : inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
783 :
784 : /** Write a sequence of octets.
785 :
786 : @param rOctets A OString, interpreted as a sequence of octets.
787 :
788 : @return This instance.
789 : */
790 : INetMIMEOutputSink & operator <<(const OString& rOctets)
791 : {
792 : writeSequence(rOctets.getStr(), rOctets.getStr() + rOctets.getLength());
793 : m_nColumn += rOctets.getLength();
794 : return *this;
795 : }
796 :
797 : /** Call a manipulator function.
798 :
799 : @param pManipulator A manipulator function.
800 :
801 : @return Whatever the manipulator function returns.
802 : */
803 : INetMIMEOutputSink &
804 0 : operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
805 0 : { return pManipulator(*this); }
806 :
807 : /** Write a line end (CR LF).
808 : */
809 : void writeLineEnd();
810 :
811 : /** A manipulator function that writes a line end (CR LF).
812 :
813 : @param rSink Some sink.
814 :
815 : @return The sink rSink.
816 : */
817 : static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
818 : };
819 :
820 : inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
821 : const sal_Char * pEnd)
822 : {
823 : writeSequence(pBegin, pEnd);
824 : m_nColumn += pEnd - pBegin;
825 : }
826 :
827 0 : inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
828 : const sal_Unicode * pEnd)
829 : {
830 0 : writeSequence(pBegin, pEnd);
831 0 : m_nColumn += pEnd - pBegin;
832 0 : }
833 :
834 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
835 : {
836 0 : writeSequence(&nOctet, &nOctet + 1);
837 0 : ++m_nColumn;
838 0 : return *this;
839 : }
840 :
841 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
842 : pOctets)
843 : {
844 0 : m_nColumn += writeSequence(pOctets);
845 0 : return *this;
846 : }
847 :
848 : // static
849 0 : inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
850 : rSink)
851 : {
852 0 : rSink.writeLineEnd();
853 0 : return rSink;
854 : }
855 :
856 : // static
857 0 : inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
858 : sal_uInt32 nChar)
859 : {
860 : DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
861 0 : rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
862 0 : << sal_uInt8(getHexDigit(nChar & 15));
863 0 : }
864 :
865 0 : class INetMIMEStringOutputSink: public INetMIMEOutputSink
866 : {
867 : OStringBuffer m_aBuffer;
868 :
869 : using INetMIMEOutputSink::writeSequence;
870 :
871 : virtual void writeSequence(const sal_Char * pBegin,
872 : const sal_Char * pEnd) SAL_OVERRIDE;
873 :
874 : public:
875 0 : inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
876 : sal_uInt32 nLineLengthLimit
877 : = INetMIME::SOFT_LINE_LENGTH_LIMIT):
878 0 : INetMIMEOutputSink(nColumn, nLineLengthLimit) {}
879 :
880 : virtual ErrCode getError() const SAL_OVERRIDE;
881 :
882 0 : OString takeBuffer()
883 : {
884 0 : return m_aBuffer.makeStringAndClear();
885 : }
886 : };
887 :
888 : class INetMIMEEncodedWordOutputSink
889 : {
890 : public:
891 : enum Context { CONTEXT_TEXT = 1,
892 : CONTEXT_COMMENT = 2,
893 : CONTEXT_PHRASE = 4 };
894 :
895 : enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
896 :
897 : private:
898 : enum { BUFFER_SIZE = 256 };
899 :
900 : enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
901 : CODING_ENCODED_TERMINATED };
902 :
903 : enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
904 : STATE_FIRST_QUESTION, STATE_CHARSET,
905 : STATE_SECOND_QUESTION, STATE_ENCODING,
906 : STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
907 : STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
908 : STATE_BAD };
909 :
910 : INetMIMEOutputSink & m_rSink;
911 : Context m_eContext;
912 : Space m_eInitialSpace;
913 : sal_uInt32 m_nExtraSpaces;
914 : INetMIMECharsetList_Impl * m_pEncodingList;
915 : sal_Unicode * m_pBuffer;
916 : sal_uInt32 m_nBufferSize;
917 : sal_Unicode * m_pBufferEnd;
918 : Coding m_ePrevCoding;
919 : rtl_TextEncoding m_ePrevMIMEEncoding;
920 : Coding m_eCoding;
921 : sal_uInt32 m_nQuotedEscaped;
922 : EncodedWordState m_eEncodedWordState;
923 :
924 : inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
925 :
926 : void finish(bool bWriteTrailer);
927 :
928 : public:
929 : inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
930 : Context eTheContext,
931 : Space eTheInitialSpace,
932 : rtl_TextEncoding ePreferredEncoding);
933 :
934 : ~INetMIMEEncodedWordOutputSink();
935 :
936 : INetMIMEEncodedWordOutputSink & WriteUInt32(sal_uInt32 nChar);
937 :
938 : inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
939 :
940 : inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
941 :
942 : inline bool flush();
943 : };
944 :
945 0 : inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
946 : INetMIMEOutputSink & rTheSink, Context eTheContext,
947 : Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
948 : m_rSink(rTheSink),
949 : m_eContext(eTheContext),
950 : m_eInitialSpace(eTheInitialSpace),
951 : m_nExtraSpaces(0),
952 0 : m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
953 : m_ePrevCoding(CODING_NONE),
954 : m_ePrevMIMEEncoding(RTL_TEXTENCODING_DONTKNOW),
955 : m_eCoding(CODING_NONE),
956 : m_nQuotedEscaped(0),
957 0 : m_eEncodedWordState(STATE_INITIAL)
958 : {
959 0 : m_nBufferSize = BUFFER_SIZE;
960 : m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
961 : m_nBufferSize
962 0 : * sizeof (sal_Unicode)));
963 0 : m_pBufferEnd = m_pBuffer;
964 0 : }
965 :
966 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
967 : const sal_Char * pEnd)
968 : {
969 : DBG_ASSERT(pBegin && pBegin <= pEnd,
970 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
971 :
972 : while (pBegin != pEnd)
973 : WriteUInt32(*pBegin++);
974 : }
975 :
976 0 : inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
977 : const sal_Unicode * pEnd)
978 : {
979 : DBG_ASSERT(pBegin && pBegin <= pEnd,
980 : "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
981 :
982 0 : while (pBegin != pEnd)
983 0 : WriteUInt32(*pBegin++);
984 0 : }
985 :
986 0 : inline bool INetMIMEEncodedWordOutputSink::flush()
987 : {
988 0 : finish(true);
989 0 : return m_ePrevCoding != CODING_NONE;
990 : }
991 :
992 45 : struct INetContentTypeParameter
993 : {
994 : /** The name of the attribute, in US-ASCII encoding and converted to lower
995 : case. If a parameter value is split as described in RFC 2231, there
996 : will only be one item for the complete parameter, with the attribute
997 : name lacking any section suffix.
998 : */
999 : const OString m_sAttribute;
1000 :
1001 : /** The optional character set specification (see RFC 2231), in US-ASCII
1002 : encoding and converted to lower case.
1003 : */
1004 : const OString m_sCharset;
1005 :
1006 : /** The optional language specification (see RFC 2231), in US-ASCII
1007 : encoding and converted to lower case.
1008 : */
1009 : const OString m_sLanguage;
1010 :
1011 : /** The attribute value. If the value is a quoted-string, it is
1012 : 'unpacked.' If a character set is specified, and the value can be
1013 : converted to Unicode, this is done. Also, if no character set is
1014 : specified, it is first tried to convert the value from UTF-8 encoding
1015 : to Unicode, and if that doesn't work (because the value is not in
1016 : UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1017 : (which will always work). But if a character set is specified and the
1018 : value cannot be converted from that character set to Unicode, special
1019 : action is taken to produce a value that can possibly be transformed
1020 : back into its original form: Any 8-bit character from a non-encoded
1021 : part of the original value is directly converted to Unicode
1022 : (effectively handling it as if it was ISO-8859-1 encoded), and any
1023 : 8-bit character from an encoded part of the original value is mapped
1024 : to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1025 : within Unicode's Private Use Area (effectively adding 0xF800 to the
1026 : character's numeric value).
1027 : */
1028 : const OUString m_sValue;
1029 :
1030 : /** This is true if the value is successfully converted to Unicode, and
1031 : false if the value is a special mixture of ISO-LATIN-1 characters and
1032 : characters from Unicode's Private Use Area.
1033 : */
1034 : const bool m_bConverted;
1035 :
1036 15 : INetContentTypeParameter(const OString& rTheAttribute,
1037 : const OString& rTheCharset, const OString& rTheLanguage,
1038 : const OUString& rTheValue, bool bTheConverted)
1039 : : m_sAttribute(rTheAttribute)
1040 : , m_sCharset(rTheCharset)
1041 : , m_sLanguage(rTheLanguage)
1042 : , m_sValue(rTheValue)
1043 15 : , m_bConverted(bTheConverted)
1044 : {
1045 15 : }
1046 : };
1047 :
1048 113 : class TOOLS_DLLPUBLIC INetContentTypeParameterList
1049 : {
1050 : public:
1051 :
1052 : void Clear();
1053 :
1054 : void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1055 : {
1056 : maEntries.insert(maEntries.begin()+nIndex,pParameter);
1057 : }
1058 :
1059 15 : void Append(INetContentTypeParameter *pParameter)
1060 : {
1061 15 : maEntries.push_back(pParameter);
1062 15 : }
1063 :
1064 : inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const
1065 : {
1066 : return &(maEntries[nIndex]);
1067 : }
1068 :
1069 : const INetContentTypeParameter * find(const OString& rAttribute) const;
1070 :
1071 : private:
1072 :
1073 : boost::ptr_vector<INetContentTypeParameter> maEntries;
1074 : };
1075 :
1076 : #endif
1077 :
1078 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|