Branch data Line data Source code
1 : : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : : /*
3 : : * This file is part of the LibreOffice project.
4 : : *
5 : : * This Source Code Form is subject to the terms of the Mozilla Public
6 : : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : : *
9 : : * This file incorporates work covered by the following license notice:
10 : : *
11 : : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : : * contributor license agreements. See the NOTICE file distributed
13 : : * with this work for additional information regarding copyright
14 : : * ownership. The ASF licenses this file to you under the Apache
15 : : * License, Version 2.0 (the "License"); you may not use this file
16 : : * except in compliance with the License. You may obtain a copy of
17 : : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : : */
19 : :
20 : : #include <sax/saxdllapi.h>
21 : :
22 : :
23 : : /*
24 : : *
25 : : * Text2UnicodeConverter
26 : : *
27 : : **/
28 : : namespace sax_expatwrap {
29 : :
30 : : class SAX_DLLPUBLIC Text2UnicodeConverter
31 : : {
32 : :
33 : : public:
34 : : Text2UnicodeConverter( const ::rtl::OString & sEncoding );
35 : : ~Text2UnicodeConverter();
36 : :
37 : : ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & );
38 : 0 : sal_Bool canContinue() { return m_bCanContinue; }
39 : :
40 : : private:
41 : : void init( rtl_TextEncoding encoding );
42 : :
43 : : rtl_TextToUnicodeConverter m_convText2Unicode;
44 : : rtl_TextToUnicodeContext m_contextText2Unicode;
45 : : sal_Bool m_bCanContinue;
46 : : sal_Bool m_bInitialized;
47 : : rtl_TextEncoding m_rtlEncoding;
48 : : ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource;
49 : : };
50 : :
51 : : /*----------------------------------------
52 : : *
53 : : * Unicode2TextConverter
54 : : *
55 : : **-----------------------------------------*/
56 : : class SAX_DLLPUBLIC Unicode2TextConverter
57 : : {
58 : : public:
59 : : Unicode2TextConverter( rtl_TextEncoding encoding );
60 : : ~Unicode2TextConverter();
61 : :
62 : : inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const ::rtl::OUString &s )
63 : : {
64 : : return convert( s.getStr() , s.getLength() );
65 : : }
66 : : ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
67 : 0 : sal_Bool canContinue() { return m_bCanContinue; }
68 : :
69 : : private:
70 : : void init( rtl_TextEncoding encoding );
71 : :
72 : : rtl_UnicodeToTextConverter m_convUnicode2Text;
73 : : rtl_UnicodeToTextContext m_contextUnicode2Text;
74 : : sal_Bool m_bCanContinue;
75 : : sal_Bool m_bInitialized;
76 : : rtl_TextEncoding m_rtlEncoding;
77 : : ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource;
78 : : };
79 : :
80 : :
81 : :
82 : : /*----------------------------------------
83 : : *
84 : : * XMLFile2UTFConverter
85 : : *
86 : : **-----------------------------------------*/
87 : 9303 : class SAX_DLLPUBLIC XMLFile2UTFConverter
88 : : {
89 : : public:
90 : 9340 : XMLFile2UTFConverter( ):
91 : : m_bStarted( sal_False ),
92 : : m_pText2Unicode( 0 ),
93 : 9340 : m_pUnicode2Text( 0 )
94 : 9340 : {}
95 : :
96 : : ~XMLFile2UTFConverter();
97 : :
98 : 9303 : void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; }
99 : 0 : void setEncoding( const ::rtl::OString &s ) { m_sEncoding = s; }
100 : :
101 : :
102 : :
103 : : // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
104 : : // may be returned less or more bytes than ordered.
105 : : sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead )
106 : : throw ( ::com::sun::star::io::IOException,
107 : : ::com::sun::star::io::NotConnectedException ,
108 : : ::com::sun::star::io::BufferSizeExceededException ,
109 : : ::com::sun::star::uno::RuntimeException );
110 : :
111 : : private:
112 : :
113 : : // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
114 : : // @return TRUE, when encoding information could be retrieved
115 : : // @return FALSE, when no encoding information was found in file
116 : : sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
117 : :
118 : : // Called only on first Sequence of bytes. Tries to figure out
119 : : // if enough data is available to scan encoding
120 : : // @return TRUE, when encoding is retrievable
121 : : // @return FALSE, when more data is needed
122 : : sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq );
123 : :
124 : : // When encoding attribute is within the text (in the first line), it is removed.
125 : : void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
126 : :
127 : : // Initializes decoding depending on m_sEncoding setting
128 : : void initializeDecoding();
129 : : private:
130 : : ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in;
131 : :
132 : : sal_Bool m_bStarted;
133 : : ::rtl::OString m_sEncoding;
134 : :
135 : : Text2UnicodeConverter *m_pText2Unicode;
136 : : Unicode2TextConverter *m_pUnicode2Text;
137 : : };
138 : : }
139 : :
140 : : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|