Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <sal/types.h>
21 :
22 : namespace sax_expatwrap {
23 :
24 : class Text2UnicodeConverter
25 : {
26 :
27 : public:
28 : Text2UnicodeConverter( const OString & sEncoding );
29 : ~Text2UnicodeConverter();
30 :
31 : ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & );
32 0 : sal_Bool canContinue() { return m_bCanContinue; }
33 :
34 : private:
35 : void init( rtl_TextEncoding encoding );
36 :
37 : rtl_TextToUnicodeConverter m_convText2Unicode;
38 : rtl_TextToUnicodeContext m_contextText2Unicode;
39 : sal_Bool m_bCanContinue;
40 : sal_Bool m_bInitialized;
41 : rtl_TextEncoding m_rtlEncoding;
42 : ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource;
43 : };
44 :
45 : /*----------------------------------------
46 : *
47 : * Unicode2TextConverter
48 : *
49 : **-----------------------------------------*/
50 : class Unicode2TextConverter
51 : {
52 : public:
53 : Unicode2TextConverter( rtl_TextEncoding encoding );
54 : ~Unicode2TextConverter();
55 :
56 : inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const OUString &s )
57 : {
58 : return convert( s.getStr() , s.getLength() );
59 : }
60 : ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
61 0 : sal_Bool canContinue() { return m_bCanContinue; }
62 :
63 : private:
64 : void init( rtl_TextEncoding encoding );
65 :
66 : rtl_UnicodeToTextConverter m_convUnicode2Text;
67 : rtl_UnicodeToTextContext m_contextUnicode2Text;
68 : sal_Bool m_bCanContinue;
69 : sal_Bool m_bInitialized;
70 : rtl_TextEncoding m_rtlEncoding;
71 : ::com::sun::star::uno::Sequence<sal_Unicode> m_seqSource;
72 : };
73 :
74 :
75 :
76 : /*----------------------------------------
77 : *
78 : * XMLFile2UTFConverter
79 : *
80 : **-----------------------------------------*/
81 6029 : class XMLFile2UTFConverter
82 : {
83 : public:
84 6133 : XMLFile2UTFConverter( ):
85 : m_bStarted( sal_False ),
86 : m_pText2Unicode( 0 ),
87 6133 : m_pUnicode2Text( 0 )
88 6133 : {}
89 :
90 : ~XMLFile2UTFConverter();
91 :
92 6029 : void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; }
93 0 : void setEncoding( const OString &s ) { m_sEncoding = s; }
94 :
95 :
96 :
97 : // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
98 : // may be returned less or more bytes than ordered.
99 : sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead )
100 : throw ( ::com::sun::star::io::IOException,
101 : ::com::sun::star::io::NotConnectedException ,
102 : ::com::sun::star::io::BufferSizeExceededException ,
103 : ::com::sun::star::uno::RuntimeException );
104 :
105 : private:
106 :
107 : // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
108 : // @return TRUE, when encoding information could be retrieved
109 : // @return FALSE, when no encoding information was found in file
110 : sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
111 :
112 : // Called only on first Sequence of bytes. Tries to figure out
113 : // if enough data is available to scan encoding
114 : // @return TRUE, when encoding is retrievable
115 : // @return FALSE, when more data is needed
116 : sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq );
117 :
118 : // When encoding attribute is within the text (in the first line), it is removed.
119 : void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
120 :
121 : // Initializes decoding depending on m_sEncoding setting
122 : void initializeDecoding();
123 : private:
124 : ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > m_in;
125 :
126 : sal_Bool m_bStarted;
127 : OString m_sEncoding;
128 :
129 : Text2UnicodeConverter *m_pText2Unicode;
130 : Unicode2TextConverter *m_pUnicode2Text;
131 : };
132 : }
133 :
134 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|