LCOV - code coverage report
Current view: top level - libreoffice/l10ntools/source/ulfconv - ulfconv.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 47 123 38.2 %
Date: 2012-12-27 Functions: 2 7 28.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <stdlib.h>
      21             : #include <stdio.h>
      22             : #include <fcntl.h>
      23             : #include <errno.h>
      24             : #include <string.h>
      25             : #include <unistd.h>
      26             : #include <ctype.h>
      27             : #include <sal/alloca.h>
      28             : #include <sal/macros.h>
      29             : 
      30             : #include <rtl/ustring.hxx>
      31             : 
      32             : #include <map>
      33             : #include <string>
      34             : 
      35             : /*****************************************************************************
      36             :  * typedefs
      37             :  *****************************************************************************/
      38             : 
      39             : typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
      40             : 
      41             : struct _pair {
      42             :     const char *key;
      43             :     rtl_TextEncoding value;
      44             : };
      45             : 
      46             : static int _pair_compare (const char *key, const _pair *pair);
      47             : static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
      48             : 
      49             : 
      50             : const _pair _ms_encoding_list[] = {
      51             :     { "0",       RTL_TEXTENCODING_UTF8        },
      52             :     { "1250",    RTL_TEXTENCODING_MS_1250     },
      53             :     { "1251",    RTL_TEXTENCODING_MS_1251     },
      54             :     { "1252",    RTL_TEXTENCODING_MS_1252     },
      55             :     { "1253",    RTL_TEXTENCODING_MS_1253     },
      56             :     { "1254",    RTL_TEXTENCODING_MS_1254     },
      57             :     { "1255",    RTL_TEXTENCODING_MS_1255     },
      58             :     { "1256",    RTL_TEXTENCODING_MS_1256     },
      59             :     { "1257",    RTL_TEXTENCODING_MS_1257     },
      60             :     { "1258",    RTL_TEXTENCODING_MS_1258     },
      61             :     { "874",     RTL_TEXTENCODING_MS_874      },
      62             :     { "932",     RTL_TEXTENCODING_MS_932      },
      63             :     { "936",     RTL_TEXTENCODING_MS_936      },
      64             :     { "949",     RTL_TEXTENCODING_MS_949      },
      65             :     { "950",     RTL_TEXTENCODING_MS_950      }
      66             : };
      67             : 
      68             : 
      69             : /*****************************************************************************
      70             :  * fgets that work with unix line ends on Windows
      71             :  *****************************************************************************/
      72             : 
      73           0 : char * my_fgets(char *s, int n, FILE *fp)
      74             : {
      75             :     int i;
      76           0 :     for( i=0; i < n-1; i++ )
      77             :     {
      78           0 :         int c = getc(fp);
      79             : 
      80           0 :         if( c == EOF )
      81           0 :             break;
      82             : 
      83           0 :         s[i] = (char) c;
      84             : 
      85           0 :         if( s[i] == '\n' )
      86             :         {
      87           0 :             i++;
      88           0 :             break;
      89             :         }
      90             :     }
      91             : 
      92           0 :     if( i>0 )
      93             :     {
      94           0 :         s[i] = '\0';
      95           0 :         return s;
      96             :     }
      97             :     else
      98             :     {
      99           0 :         return NULL;
     100             :     }
     101             : }
     102             : 
     103             : /*****************************************************************************
     104             :  * compare function for binary search
     105             :  *****************************************************************************/
     106             : 
     107             : static int
     108           0 : _pair_compare (const char *key, const _pair *pair)
     109             : {
     110           0 :     int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
     111           0 :     return result;
     112             : }
     113             : 
     114             : /*****************************************************************************
     115             :  * binary search on encoding tables
     116             :  *****************************************************************************/
     117             : 
     118             : static const _pair*
     119           0 : _pair_search (const char *key, const _pair *base, unsigned int member )
     120             : {
     121           0 :     unsigned int lower = 0;
     122           0 :     unsigned int upper = member;
     123             :     unsigned int current;
     124             :     int comparison;
     125             : 
     126             :     /* check for validity of input */
     127           0 :     if ( (key == NULL) || (base == NULL) || (member == 0) )
     128           0 :         return NULL;
     129             : 
     130             :     /* binary search */
     131           0 :     while ( lower < upper )
     132             :     {
     133           0 :         current = (lower + upper) / 2;
     134           0 :         comparison = _pair_compare( key, base + current );
     135           0 :         if (comparison < 0)
     136           0 :             upper = current;
     137             :         else
     138           0 :         if (comparison > 0)
     139           0 :             lower = current + 1;
     140             :         else
     141           0 :             return base + current;
     142             :     }
     143             : 
     144           0 :     return NULL;
     145             : }
     146             : 
     147             : 
     148             : /************************************************************************
     149             :  * read_encoding_table
     150             :  ************************************************************************/
     151             : 
     152           0 : void read_encoding_table(char * file, EncodingMap& aEncodingMap)
     153             : {
     154           0 :     FILE * fp = fopen(file, "r");
     155           0 :     if ( ! fp  ) {
     156           0 :         fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
     157           0 :         exit(2);
     158             :     }
     159             : 
     160             :     char buffer[512];
     161           0 :     while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
     162             : 
     163             :         // strip comment lines
     164           0 :         if ( buffer[0] == '#' )
     165           0 :             continue;
     166             : 
     167             :         // find end of language string
     168             :         char * cp;
     169           0 :         for ( cp = buffer; ! isspace(*cp); cp++ )
     170             :             ;
     171           0 :         *cp = '\0';
     172             : 
     173             :         // find start of codepage string
     174           0 :         for ( ++cp; isspace(*cp); ++cp )
     175             :             ;
     176           0 :         char * codepage = cp;
     177             : 
     178             :         // find end of codepage string
     179           0 :         for ( ++cp; ! isspace(*cp); ++cp )
     180             :             ;
     181           0 :         *cp = '\0';
     182             : 
     183             :         // find the correct mapping for codepage
     184           0 :         const unsigned int members = SAL_N_ELEMENTS( _ms_encoding_list );
     185           0 :         const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
     186             : 
     187           0 :         if ( encoding != NULL ) {
     188           0 :             const std::string language(buffer);
     189           0 :             aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
     190             :         }
     191             :     }
     192             : 
     193           0 :     fclose(fp);
     194           0 : }
     195             : 
     196             : /************************************************************************
     197             :  * print_legacy_mixed
     198             :  ************************************************************************/
     199             : 
     200           0 : void print_legacy_mixed(
     201             :     FILE * ostream,
     202             :     const rtl::OUString& aString,
     203             :     const std::string& language,
     204             :     EncodingMap& aEncodingMap)
     205             : {
     206           0 :     EncodingMap::iterator iter = aEncodingMap.find(language);
     207             : 
     208           0 :     if ( iter != aEncodingMap.end() ) {
     209           0 :         fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
     210             :     } else {
     211           0 :         fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
     212             :     }
     213           0 : }
     214             : 
     215             : /************************************************************************
     216             :  * print_java_style
     217             :  ************************************************************************/
     218             : 
     219        1159 : void print_java_style(FILE * ostream, const rtl::OUString& aString)
     220             : {
     221        1159 :     int imax = aString.getLength();
     222       36334 :     for (int i = 0; i < imax; i++) {
     223       35175 :         sal_Unicode uc = aString[i];
     224       35175 :         if ( uc < 128 ) {
     225       35169 :             fprintf(ostream, "%c", (char) uc);
     226             :         } else {
     227           6 :             fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
     228             :         }
     229             :     }
     230        1159 : }
     231             : 
     232             : /************************************************************************
     233             :  * main
     234             :  ************************************************************************/
     235             : 
     236          27 : int main( int argc, char * const argv[] )
     237             : {
     238          27 :     EncodingMap aEncodingMap;
     239             : 
     240          27 :     FILE *istream = stdin;
     241          27 :     FILE *ostream = stdout;
     242             : 
     243          27 :     char *outfile = NULL;
     244             : 
     245          27 :     int errflg = 0;
     246             :     int argi;
     247             : 
     248          54 :     for( argi=1; argi < argc; argi++ )
     249             :     {
     250          54 :         if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
     251             :         {
     252          54 :             switch(argv[argi][1]) {
     253             :             case 'o':
     254          27 :                 if (argi+1 >= argc || argv[argi+1][0] == '-')
     255             :                 {
     256           0 :                     fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
     257           0 :                     errflg++;
     258           0 :                     break;
     259             :                 }
     260             : 
     261          27 :                 ++argi;
     262          27 :                 outfile = argv[argi];
     263          27 :                 break;
     264             :             case 't':
     265           0 :                 if (argi+1 >= argc || argv[argi+1][0] == '-')
     266             :                 {
     267           0 :                     fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
     268           0 :                     errflg++;
     269           0 :                     break;
     270             :                 }
     271             : 
     272           0 :                 read_encoding_table(argv[++argi], aEncodingMap);
     273           0 :                 break;
     274             :             default:
     275           0 :                 fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]);
     276           0 :                 errflg++;
     277             :             }
     278             :         }
     279             :         else
     280             :         {
     281          27 :             break;
     282             :         }
     283             :     }
     284             : 
     285          27 :     if (errflg) {
     286           0 :       fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
     287           0 :       exit(2);
     288             :     }
     289             : 
     290             :     /* assign input file to stdin */
     291          27 :     if ( argi < argc )
     292             :     {
     293          27 :         istream = fopen(argv[argi], "r");
     294          27 :         if ( istream  == NULL ) {
     295           0 :             fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
     296           0 :             exit(2);
     297             :         }
     298             :     }
     299             : 
     300             :     /* open output file if any */
     301          27 :     if ( outfile )
     302             :     {
     303          27 :         ostream = fopen(outfile, "w");
     304          27 :         if ( ostream == NULL ) {
     305           0 :             fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno));
     306           0 :             fclose(istream);
     307           0 :             exit(2);
     308             :         }
     309             :     }
     310             : 
     311             :     /* read line by line from stdin */
     312             :     char buffer[65536];
     313        4139 :     while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
     314             : 
     315             :         /* only handle lines containing " = " */
     316        4085 :         char * cp = strstr(buffer, " = \"");
     317        4085 :         if ( cp ) {
     318        1159 :             rtl::OUString aString;
     319             : 
     320             :             /* find end of lang string */
     321             :             int n;
     322        1159 :             for ( n=0; ! isspace(buffer[n]); n++ )
     323             :                 ;
     324             : 
     325        1159 :             std::string line = buffer;
     326        1159 :             std::string lang(line, 0, n);
     327             : 
     328        1159 :             cp += 4;
     329        1159 :             rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
     330        1159 :                 RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
     331             : 
     332        1159 :             fprintf(ostream, "%s = \"", lang.c_str());
     333             : 
     334        1159 :             if ( aEncodingMap.empty() ) {
     335        1159 :                 print_java_style(ostream, aString);
     336             :             } else {
     337           0 :                 print_legacy_mixed(ostream, aString, lang, aEncodingMap);
     338             :             }
     339             : 
     340        1159 :             fprintf(ostream, "\"\n");
     341             : 
     342             : 
     343             :         } else {
     344        2926 :             fputs(buffer, ostream);
     345             :         }
     346             :     }
     347             : 
     348          27 :     fclose(ostream);
     349          27 :     fclose(istream);
     350          27 : }
     351             : 
     352             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10