1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "filterdetect.hxx"
21 #include <com/sun/star/io/XInputStream.hpp>
22 #include <com/sun/star/uno/XComponentContext.hpp>
23 #include <com/sun/star/container/XNameAccess.hpp>
24 #include <com/sun/star/beans/PropertyState.hpp>
25 #include <cppuhelper/supportsservice.hxx>
26 #include <comphelper/diagnose_ex.hxx>
27 #include <ucbhelper/content.hxx>
28 #include <unotools/ucbstreamhelper.hxx>
29 #include <svl/inettype.hxx>
31 #include <o3tl/string_view.hxx>
33 using namespace com::sun::star::container
;
34 using namespace com::sun::star::uno
;
35 using namespace com::sun::star::beans
;
39 OUString
supportedByType( std::u16string_view clipBoardFormat
, std::u16string_view resultString
, const OUString
& checkType
)
42 if ( o3tl::starts_with(clipBoardFormat
, u
"doctype:") )
44 std::u16string_view tryStr
= clipBoardFormat
.substr(8);
45 if (resultString
.find(tryStr
) != std::u16string_view::npos
)
47 sTypeName
= checkType
;
53 bool IsMediaTypeXML( const OUString
& mediaType
)
55 if (!mediaType
.isEmpty())
57 OUString sType
, sSubType
;
58 if (INetContentTypes::parse(mediaType
, sType
, sSubType
)
59 && sType
== "application")
61 // RFC 3023: application/xml; don't detect text/xml
62 if (sSubType
== "xml")
64 // Registered media types: application/XXXX+xml
65 if (sSubType
.endsWith("+xml"))
74 OUString SAL_CALL
FilterDetect::detect( css::uno::Sequence
< css::beans::PropertyValue
>& aArguments
)
78 Sequence
<PropertyValue
> lProps
;
80 css::uno::Reference
< css::io::XInputStream
> xInStream
;
81 const PropertyValue
* pValue
= aArguments
.getConstArray();
83 OUString resultString
;
85 nLength
= aArguments
.getLength();
86 sal_Int32 location
=nLength
;
87 for (sal_Int32 i
= 0 ; i
< nLength
; i
++)
89 if ( pValue
[i
].Name
== "TypeName" )
93 else if ( pValue
[i
].Name
== "URL" )
95 pValue
[i
].Value
>>= sUrl
;
97 else if ( pValue
[i
].Name
== "InputStream" )
99 pValue
[i
].Value
>>= xInStream
;
106 ::ucbhelper::Content
aContent(
107 sUrl
, Reference
< css::ucb::XCommandEnvironment
>(),
109 xInStream
= aContent
.openStream();
116 std::unique_ptr
< SvStream
> pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream
) );
117 pInStream
->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW
);
118 sal_uInt64
const nUniPos
= pInStream
->Tell();
120 const sal_uInt16 nSize
= 4000;
121 bool bTryUtf16
= false;
123 if ( nUniPos
== 0 ) // No BOM detected, try to guess UTF-16 endianness
125 sal_uInt16 nHeader
= 0;
126 pInStream
->ReadUInt16( nHeader
);
127 if ( nHeader
== 0x003C )
129 else if ( nHeader
== 0x3C00 )
132 pInStream
->SetEndian( pInStream
->GetEndian() == SvStreamEndian::LITTLE
? SvStreamEndian::BIG
: SvStreamEndian::LITTLE
);
134 pInStream
->Seek( STREAM_SEEK_TO_BEGIN
);
137 if ( nUniPos
== 3 || ( nUniPos
== 0 && !bTryUtf16
) ) // UTF-8 or non-Unicode
139 OString
const str(read_uInt8s_ToOString(*pInStream
, nSize
));
140 resultString
= OUString(str
.getStr(), str
.getLength(),
141 RTL_TEXTENCODING_ASCII_US
,
142 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
);
144 else if ( nUniPos
== 2 || bTryUtf16
) // UTF-16
145 resultString
= read_uInt16s_ToOUString( *pInStream
, nSize
);
147 if ( !resultString
.startsWith( "<?xml" ) )
149 // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8
150 // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd)
154 ::ucbhelper::Content
aContent(
155 sUrl
, Reference
< css::ucb::XCommandEnvironment
>(),
157 aContent
.getPropertyValue(u
"MediaType"_ustr
) >>= sMediaType
;
158 if (sMediaType
.isEmpty())
160 aContent
.getPropertyValue(u
"Content-Type"_ustr
) >>= sMediaType
;
165 if (!IsMediaTypeXML(sMediaType
))
167 // This is not an XML stream. It makes no sense to try to detect
168 // a non-XML file type here.
173 // test typedetect code
174 Reference
<XNameAccess
> xTypeCont(mxCtx
->getServiceManager()->createInstanceWithContext(u
"com.sun.star.document.TypeDetection"_ustr
, mxCtx
), UNO_QUERY
);
175 Sequence
< OUString
> myTypes
= xTypeCont
->getElementNames();
176 nLength
= myTypes
.getLength();
178 sal_Int32 new_nlength
=0;
180 while ((i
< nLength
) && (sTypeName
.isEmpty()))
182 Any elem
= xTypeCont
->getByName(myTypes
[i
]);
184 new_nlength
= lProps
.getLength();
186 while (j
< new_nlength
&& (sTypeName
.isEmpty()))
189 lProps
[j
].Value
>>=tmpStr
;
190 if ( lProps
[j
].Name
== "ClipboardFormat" && !tmpStr
.isEmpty() )
192 sTypeName
= supportedByType(tmpStr
,resultString
, myTypes
[i
]);
199 catch (const Exception
&)
201 TOOLS_WARN_EXCEPTION("filter.xmlfd", "An Exception occurred while opening File stream");
204 if (!sTypeName
.isEmpty())
206 if (location
== aArguments
.getLength())
208 aArguments
.realloc(nLength
+1);
209 aArguments
.getArray()[location
].Name
= "TypeName";
211 aArguments
.getArray()[location
].Value
<<=sTypeName
;
218 void SAL_CALL
FilterDetect::initialize( const Sequence
< Any
>& /*aArguments*/ )
223 OUString SAL_CALL
FilterDetect::getImplementationName( )
225 return u
"com.sun.star.comp.filters.XMLFilterDetect"_ustr
;
228 sal_Bool SAL_CALL
FilterDetect::supportsService( const OUString
& rServiceName
)
230 return cppu::supportsService( this, rServiceName
);
233 Sequence
< OUString
> SAL_CALL
FilterDetect::getSupportedServiceNames( )
235 return { u
"com.sun.star.document.ExtendedTypeDetection"_ustr
};
238 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
239 filter_XMLFilterDetect_get_implementation(
240 css::uno::XComponentContext
* context
, css::uno::Sequence
<css::uno::Any
> const&)
242 return cppu::acquire(new FilterDetect(context
));
246 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */