Reintroduce OUString::replaceAt taking an OUString to LIBO_INTERNAL_ONLY
[LibreOffice.git] / filter / source / xmlfilterdetect / filterdetect.cxx
blob9fb1f7b66a0342b590ab5ce3d8c799656774a6af
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "filterdetect.hxx"
21 #include <com/sun/star/io/XInputStream.hpp>
22 #include <com/sun/star/uno/XComponentContext.hpp>
23 #include <com/sun/star/container/XNameAccess.hpp>
24 #include <com/sun/star/beans/PropertyState.hpp>
25 #include <cppuhelper/supportsservice.hxx>
26 #include <comphelper/diagnose_ex.hxx>
27 #include <ucbhelper/content.hxx>
28 #include <unotools/ucbstreamhelper.hxx>
29 #include <svl/inettype.hxx>
30 #include <memory>
31 #include <o3tl/string_view.hxx>
33 using namespace com::sun::star::container;
34 using namespace com::sun::star::uno;
35 using namespace com::sun::star::beans;
37 namespace {
39 OUString supportedByType( std::u16string_view clipBoardFormat, std::u16string_view resultString, const OUString& checkType)
41 OUString sTypeName;
42 if ( o3tl::starts_with(clipBoardFormat, u"doctype:") )
44 std::u16string_view tryStr = clipBoardFormat.substr(8);
45 if (resultString.find(tryStr) != std::u16string_view::npos)
47 sTypeName = checkType;
50 return sTypeName;
53 bool IsMediaTypeXML( const OUString& mediaType )
55 if (!mediaType.isEmpty())
57 OUString sType, sSubType;
58 if (INetContentTypes::parse(mediaType, sType, sSubType)
59 && sType == "application")
61 // RFC 3023: application/xml; don't detect text/xml
62 if (sSubType == "xml")
63 return true;
64 // Registered media types: application/XXXX+xml
65 if (sSubType.endsWith("+xml"))
66 return true;
69 return false;
74 OUString SAL_CALL FilterDetect::detect( css::uno::Sequence< css::beans::PropertyValue >& aArguments )
76 OUString sTypeName;
77 OUString sUrl;
78 Sequence<PropertyValue > lProps ;
80 css::uno::Reference< css::io::XInputStream > xInStream;
81 const PropertyValue * pValue = aArguments.getConstArray();
82 sal_Int32 nLength;
83 OUString resultString;
85 nLength = aArguments.getLength();
86 sal_Int32 location=nLength;
87 for (sal_Int32 i = 0 ; i < nLength; i++)
89 if ( pValue[i].Name == "TypeName" )
91 location=i;
93 else if ( pValue[i].Name == "URL" )
95 pValue[i].Value >>= sUrl;
97 else if ( pValue[i].Name == "InputStream" )
99 pValue[i].Value >>= xInStream ;
104 if (!xInStream.is())
106 ::ucbhelper::Content aContent(
107 sUrl, Reference< css::ucb::XCommandEnvironment >(),
108 mxCtx);
109 xInStream = aContent.openStream();
110 if (!xInStream.is())
112 return sTypeName;
116 std::unique_ptr< SvStream > pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream ) );
117 pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
118 sal_uInt64 const nUniPos = pInStream->Tell();
120 const sal_uInt16 nSize = 4000;
121 bool bTryUtf16 = false;
123 if ( nUniPos == 0 ) // No BOM detected, try to guess UTF-16 endianness
125 sal_uInt16 nHeader = 0;
126 pInStream->ReadUInt16( nHeader );
127 if ( nHeader == 0x003C )
128 bTryUtf16 = true;
129 else if ( nHeader == 0x3C00 )
131 bTryUtf16 = true;
132 pInStream->SetEndian( pInStream->GetEndian() == SvStreamEndian::LITTLE ? SvStreamEndian::BIG : SvStreamEndian::LITTLE );
134 pInStream->Seek( STREAM_SEEK_TO_BEGIN );
137 if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode
139 OString const str(read_uInt8s_ToOString(*pInStream, nSize));
140 resultString = OUString(str.getStr(), str.getLength(),
141 RTL_TEXTENCODING_ASCII_US,
142 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT);
144 else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16
145 resultString = read_uInt16s_ToOUString( *pInStream, nSize );
147 if ( !resultString.startsWith( "<?xml" ) )
149 // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8
150 // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd)
151 OUString sMediaType;
154 ::ucbhelper::Content aContent(
155 sUrl, Reference< css::ucb::XCommandEnvironment >(),
156 mxCtx);
157 aContent.getPropertyValue(u"MediaType"_ustr) >>= sMediaType;
158 if (sMediaType.isEmpty())
160 aContent.getPropertyValue(u"Content-Type"_ustr) >>= sMediaType;
163 catch (...) {}
165 if (!IsMediaTypeXML(sMediaType))
167 // This is not an XML stream. It makes no sense to try to detect
168 // a non-XML file type here.
169 return OUString();
173 // test typedetect code
174 Reference <XNameAccess> xTypeCont(mxCtx->getServiceManager()->createInstanceWithContext(u"com.sun.star.document.TypeDetection"_ustr, mxCtx), UNO_QUERY);
175 Sequence < OUString > myTypes= xTypeCont->getElementNames();
176 nLength = myTypes.getLength();
178 sal_Int32 new_nlength=0;
179 sal_Int32 i = 0 ;
180 while ((i < nLength) && (sTypeName.isEmpty()))
182 Any elem = xTypeCont->getByName(myTypes[i]);
183 elem >>=lProps;
184 new_nlength = lProps.getLength();
185 sal_Int32 j =0;
186 while (j < new_nlength && (sTypeName.isEmpty()))
188 OUString tmpStr;
189 lProps[j].Value >>=tmpStr;
190 if ( lProps[j].Name == "ClipboardFormat" && !tmpStr.isEmpty() )
192 sTypeName = supportedByType(tmpStr,resultString, myTypes[i]);
194 j++;
196 i++;
199 catch (const Exception &)
201 TOOLS_WARN_EXCEPTION("filter.xmlfd", "An Exception occurred while opening File stream");
204 if (!sTypeName.isEmpty())
206 if (location == aArguments.getLength())
208 aArguments.realloc(nLength+1);
209 aArguments.getArray()[location].Name = "TypeName";
211 aArguments.getArray()[location].Value <<=sTypeName;
214 return sTypeName;
217 // XInitialization
218 void SAL_CALL FilterDetect::initialize( const Sequence< Any >& /*aArguments*/ )
222 // XServiceInfo
223 OUString SAL_CALL FilterDetect::getImplementationName( )
225 return u"com.sun.star.comp.filters.XMLFilterDetect"_ustr;
228 sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName )
230 return cppu::supportsService( this, rServiceName );
233 Sequence< OUString > SAL_CALL FilterDetect::getSupportedServiceNames( )
235 return { u"com.sun.star.document.ExtendedTypeDetection"_ustr };
238 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
239 filter_XMLFilterDetect_get_implementation(
240 css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&)
242 return cppu::acquire(new FilterDetect(context));
246 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */