Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef INCLUDED_SAX_INC_XML2UTF_HXX
21 : #define INCLUDED_SAX_INC_XML2UTF_HXX
22 :
23 : #include <sal/types.h>
24 :
25 : namespace sax_expatwrap {
26 :
27 : class Text2UnicodeConverter
28 : {
29 :
30 : public:
31 : Text2UnicodeConverter( const OString & sEncoding );
32 : ~Text2UnicodeConverter();
33 :
34 : ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & );
35 0 : bool canContinue() { return m_bCanContinue; }
36 :
37 : private:
38 : void init( rtl_TextEncoding encoding );
39 :
40 : rtl_TextToUnicodeConverter m_convText2Unicode;
41 : rtl_TextToUnicodeContext m_contextText2Unicode;
42 : bool m_bCanContinue;
43 : bool m_bInitialized;
44 : rtl_TextEncoding m_rtlEncoding;
45 : ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource;
46 : };
47 :
48 : /*----------------------------------------
49 : *
50 : * Unicode2TextConverter
51 : *
52 : **-----------------------------------------*/
53 : class Unicode2TextConverter
54 : {
55 : public:
56 : Unicode2TextConverter( rtl_TextEncoding encoding );
57 : ~Unicode2TextConverter();
58 :
59 : inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const OUString &s )
60 : {
61 : return convert( s.getStr() , s.getLength() );
62 : }
63 : ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
64 0 : bool canContinue() { return m_bCanContinue; }
65 :
66 : private:
67 : void init( rtl_TextEncoding encoding );
68 :
69 : rtl_UnicodeToTextConverter m_convUnicode2Text;
70 : rtl_UnicodeToTextContext m_contextUnicode2Text;
71 : bool m_bCanContinue;
72 : bool m_bInitialized;
73 : rtl_TextEncoding m_rtlEncoding;
74 : ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource;
75 : };
76 :
77 :
78 :
79 : /*----------------------------------------
80 : *
81 : * XMLFile2UTFConverter
82 : *
83 : **-----------------------------------------*/
84 42949 : class XMLFile2UTFConverter
85 : {
86 : public:
87 43516 : XMLFile2UTFConverter( ):
88 : m_bStarted( false ),
89 : m_pText2Unicode( 0 ),
90 43516 : m_pUnicode2Text( 0 )
91 43516 : {}
92 :
93 : ~XMLFile2UTFConverter();
94 :
95 42949 : void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; }
96 0 : void setEncoding( const OString &s ) { m_sEncoding = s; }
97 :
98 :
99 :
100 : // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
101 : // may be returned less or more bytes than ordered.
102 : sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead )
103 : throw ( ::com::sun::star::io::IOException,
104 : ::com::sun::star::io::NotConnectedException ,
105 : ::com::sun::star::io::BufferSizeExceededException ,
106 : ::com::sun::star::uno::RuntimeException );
107 :
108 : private:
109 :
110 : // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
111 : // @return TRUE, when encoding information could be retrieved
112 : // @return FALSE, when no encoding information was found in file
113 : bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
114 :
115 : // Called only on first Sequence of bytes. Tries to figure out
116 : // if enough data is available to scan encoding
117 : // @return TRUE, when encoding is retrievable
118 : // @return FALSE, when more data is needed
119 : static bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq );
120 :
121 : // When encoding attribute is within the text (in the first line), it is removed.
122 : static void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
123 :
124 : // Initializes decoding depending on m_sEncoding setting
125 : void initializeDecoding();
126 : private:
127 : ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in;
128 :
129 : bool m_bStarted;
130 : OString m_sEncoding;
131 :
132 : Text2UnicodeConverter *m_pText2Unicode;
133 : Unicode2TextConverter *m_pUnicode2Text;
134 : };
135 : }
136 :
137 : #endif // INCLUDED_SAX_INC_XML2UTF_HXX
138 :
139 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|