1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "filterdetect.hxx"
21 #include <com/sun/star/io/XInputStream.hpp>
22 #include <com/sun/star/uno/XComponentContext.hpp>
23 #include <com/sun/star/container/XNameAccess.hpp>
24 #include <com/sun/star/beans/PropertyState.hpp>
25 #include <cppuhelper/supportsservice.hxx>
26 #include <tools/diagnose_ex.h>
27 #include <ucbhelper/content.hxx>
28 #include <unotools/ucbstreamhelper.hxx>
29 #include <svl/inettype.hxx>
32 using namespace com::sun::star::container
;
33 using namespace com::sun::star::uno
;
34 using namespace com::sun::star::beans
;
38 OUString
supportedByType( const OUString
& clipBoardFormat
, const OUString
& resultString
, const OUString
& checkType
)
41 if ( clipBoardFormat
.match("doctype:") )
43 OUString tryStr
= clipBoardFormat
.copy(8);
44 if (resultString
.indexOf(tryStr
) >= 0)
46 sTypeName
= checkType
;
52 bool IsMediaTypeXML( const OUString
& mediaType
)
54 if (!mediaType
.isEmpty())
56 OUString sType
, sSubType
;
57 if (INetContentTypes::parse(mediaType
, sType
, sSubType
)
58 && sType
== "application")
60 // RFC 3023: application/xml; don't detect text/xml
61 if (sSubType
== "xml")
63 // Registered media types: application/XXXX+xml
64 if (sSubType
.endsWith("+xml"))
73 OUString SAL_CALL
FilterDetect::detect( css::uno::Sequence
< css::beans::PropertyValue
>& aArguments
)
77 Sequence
<PropertyValue
> lProps
;
79 css::uno::Reference
< css::io::XInputStream
> xInStream
;
80 const PropertyValue
* pValue
= aArguments
.getConstArray();
82 OUString resultString
;
84 nLength
= aArguments
.getLength();
85 sal_Int32 location
=nLength
;
86 for (sal_Int32 i
= 0 ; i
< nLength
; i
++)
88 if ( pValue
[i
].Name
== "TypeName" )
92 else if ( pValue
[i
].Name
== "URL" )
94 pValue
[i
].Value
>>= sUrl
;
96 else if ( pValue
[i
].Name
== "InputStream" )
98 pValue
[i
].Value
>>= xInStream
;
105 ::ucbhelper::Content
aContent(
106 sUrl
, Reference
< css::ucb::XCommandEnvironment
>(),
108 xInStream
= aContent
.openStream();
115 std::unique_ptr
< SvStream
> pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream
) );
116 pInStream
->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW
);
117 sal_uInt64
const nUniPos
= pInStream
->Tell();
119 const sal_uInt16 nSize
= 4000;
120 bool bTryUtf16
= false;
122 if ( nUniPos
== 0 ) // No BOM detected, try to guess UTF-16 endianness
124 sal_uInt16 nHeader
= 0;
125 pInStream
->ReadUInt16( nHeader
);
126 if ( nHeader
== 0x003C )
128 else if ( nHeader
== 0x3C00 )
131 pInStream
->SetEndian( pInStream
->GetEndian() == SvStreamEndian::LITTLE
? SvStreamEndian::BIG
: SvStreamEndian::LITTLE
);
133 pInStream
->Seek( STREAM_SEEK_TO_BEGIN
);
136 if ( nUniPos
== 3 || ( nUniPos
== 0 && !bTryUtf16
) ) // UTF-8 or non-Unicode
138 OString
const str(read_uInt8s_ToOString(*pInStream
, nSize
));
139 resultString
= OUString(str
.getStr(), str
.getLength(),
140 RTL_TEXTENCODING_ASCII_US
,
141 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
);
143 else if ( nUniPos
== 2 || bTryUtf16
) // UTF-16
144 resultString
= read_uInt16s_ToOUString( *pInStream
, nSize
);
146 if ( !resultString
.startsWith( "<?xml" ) )
148 // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8
149 // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd)
153 ::ucbhelper::Content
aContent(
154 sUrl
, Reference
< css::ucb::XCommandEnvironment
>(),
156 aContent
.getPropertyValue("MediaType") >>= sMediaType
;
157 if (sMediaType
.isEmpty())
159 aContent
.getPropertyValue("Content-Type") >>= sMediaType
;
164 if (!IsMediaTypeXML(sMediaType
))
166 // This is not an XML stream. It makes no sense to try to detect
167 // a non-XML file type here.
172 // test typedetect code
173 Reference
<XNameAccess
> xTypeCont(mxCtx
->getServiceManager()->createInstanceWithContext("com.sun.star.document.TypeDetection", mxCtx
), UNO_QUERY
);
174 Sequence
< OUString
> myTypes
= xTypeCont
->getElementNames();
175 nLength
= myTypes
.getLength();
177 sal_Int32 new_nlength
=0;
179 while ((i
< nLength
) && (sTypeName
.isEmpty()))
181 Any elem
= xTypeCont
->getByName(myTypes
[i
]);
183 new_nlength
= lProps
.getLength();
185 while (j
< new_nlength
&& (sTypeName
.isEmpty()))
188 lProps
[j
].Value
>>=tmpStr
;
189 if ( lProps
[j
].Name
== "ClipboardFormat" && !tmpStr
.isEmpty() )
191 sTypeName
= supportedByType(tmpStr
,resultString
, myTypes
[i
]);
198 catch (const Exception
&)
200 TOOLS_WARN_EXCEPTION("filter.xmlfd", "An Exception occurred while opening File stream");
203 if (!sTypeName
.isEmpty())
205 if (location
== aArguments
.getLength())
207 aArguments
.realloc(nLength
+1);
208 aArguments
.getArray()[location
].Name
= "TypeName";
210 aArguments
.getArray()[location
].Value
<<=sTypeName
;
217 void SAL_CALL
FilterDetect::initialize( const Sequence
< Any
>& /*aArguments*/ )
222 OUString SAL_CALL
FilterDetect::getImplementationName( )
224 return "com.sun.star.comp.filters.XMLFilterDetect";
227 sal_Bool SAL_CALL
FilterDetect::supportsService( const OUString
& rServiceName
)
229 return cppu::supportsService( this, rServiceName
);
232 Sequence
< OUString
> SAL_CALL
FilterDetect::getSupportedServiceNames( )
234 return { "com.sun.star.document.ExtendedTypeDetection" };
237 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
238 filter_XMLFilterDetect_get_implementation(
239 css::uno::XComponentContext
* context
, css::uno::Sequence
<css::uno::Any
> const&)
241 return cppu::acquire(new FilterDetect(context
));
245 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */