1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "typedetection.hxx"
21 #include "constant.hxx"
23 #include <com/sun/star/document/XExtendedFilterDetection.hpp>
24 #include <com/sun/star/frame/Desktop.hpp>
25 #include <com/sun/star/util/URLTransformer.hpp>
26 #include <com/sun/star/util/XURLTransformer.hpp>
28 #include <com/sun/star/io/XInputStream.hpp>
29 #include <com/sun/star/io/XSeekable.hpp>
30 #include <com/sun/star/task/XInteractionHandler.hpp>
31 #include <tools/wldcrd.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <sal/log.hxx>
34 #include <framework/interaction.hxx>
35 #include <tools/urlobj.hxx>
36 #include <comphelper/fileurl.hxx>
37 #include <comphelper/processfactory.hxx>
38 #include <comphelper/sequence.hxx>
40 #define DEBUG_TYPE_DETECTION 0
42 #if DEBUG_TYPE_DETECTION
48 using namespace com::sun::star
;
53 TypeDetection::TypeDetection(const css::uno::Reference
< css::uno::XComponentContext
>& rxContext
)
54 : m_xContext(rxContext
)
55 , m_xTerminateListener(new TerminateDetection(this))
58 css::frame::Desktop::create(m_xContext
)->addTerminateListener(m_xTerminateListener
.get());
59 BaseContainer::init(rxContext
,
60 TypeDetection::impl_getImplementationName() ,
61 TypeDetection::impl_getSupportedServiceNames(),
62 FilterCache::E_TYPE
);
66 TypeDetection::~TypeDetection()
68 css::frame::Desktop::create(m_xContext
)->removeTerminateListener(m_xTerminateListener
.get());
72 OUString SAL_CALL
TypeDetection::queryTypeByURL(const OUString
& sURL
)
77 ::osl::ResettableMutexGuard
aLock(m_aLock
);
81 css::uno::Reference
< css::util::XURLTransformer
> xParser( css::util::URLTransformer::create(m_xContext
) );
82 xParser
->parseStrict(aURL
);
84 // set std types as minimum requirement first!
85 // Only in case no type was found for given URL,
86 // use optional types too ...
87 auto & cache
= TheFilterCache::get();
88 FlatDetection lFlatTypes
;
89 cache
.detectFlatForURL(aURL
, lFlatTypes
);
92 (lFlatTypes
.size() < 1 ) &&
93 (!cache
.isFillState(FilterCache::E_CONTAINS_TYPES
))
96 cache
.load(FilterCache::E_CONTAINS_TYPES
);
97 cache
.detectFlatForURL(aURL
, lFlatTypes
);
100 // first item is guaranteed as "preferred" one!
101 if (lFlatTypes
.size() > 0)
103 const FlatDetectionInfo
& aMatch
= *(lFlatTypes
.begin());
104 sType
= aMatch
.sType
;
114 * Rank format types in order of complexity. More complex formats are
115 * ranked higher so that they get tested sooner over simpler formats.
117 * Guidelines to determine how complex a format is (subject to change):
119 * 1) compressed text (XML, HTML, etc)
121 * 3) non-compressed text
122 * 3.1) structured text
123 * 3.1.1) dialect of a structured text (e.g. docbook XML)
124 * 3.1.2) generic structured text (e.g. generic XML)
125 * 3.2) non-structured text
127 * In each category, rank them from strictly-structured to
128 * loosely-structured.
130 int getFlatTypeRank(const OUString
& rType
)
132 // List formats from more complex to less complex.
134 static const char* ranks
[] = {
136 // Compressed XML (ODF XML zip formats)
147 "writerglobal8_template",
149 "writerweb8_writer_template",
152 // Compressed XML (OOXML)
153 "writer_OOXML_Text_Template",
155 "writer_MS_Word_2007_Template",
156 "writer_MS_Word_2007",
157 "Office Open XML Spreadsheet Template",
158 "Office Open XML Spreadsheet",
159 "MS Excel 2007 XML Template",
161 "MS PowerPoint 2007 XML Template",
162 "MS PowerPoint 2007 XML AutoPlay",
163 "MS PowerPoint 2007 XML",
165 // Compressed XML (Uniform/Unified Office Format)
166 "Unified_Office_Format_text",
167 "Unified_Office_Format_spreadsheet",
168 "Unified_Office_Format_presentation",
170 // Compressed XML (StarOffice XML zip formats)
171 "calc_StarOffice_XML_Calc",
172 "calc_StarOffice_XML_Calc_Template",
173 "chart_StarOffice_XML_Chart",
174 "draw_StarOffice_XML_Draw",
175 "draw_StarOffice_XML_Draw_Template",
176 "impress_StarOffice_XML_Impress",
177 "impress_StarOffice_XML_Impress_Template",
178 "math_StarOffice_XML_Math",
179 "writer_StarOffice_XML_Writer",
180 "writer_StarOffice_XML_Writer_Template",
181 "writer_globaldocument_StarOffice_XML_Writer_GlobalDocument",
182 "writer_web_StarOffice_XML_Writer_Web_Template",
185 "pdf_Portable_Document_Format",
188 "writer_T602_Document",
189 "writer_WordPerfect_Document",
190 "writer_MS_Works_Document",
191 "writer_MS_Word_97_Vorlage",
193 "writer_MS_Word_95_Vorlage",
195 "writer_MS_WinWord_60",
196 "writer_MS_WinWord_5",
197 "MS Excel 2007 Binary",
198 "calc_MS_Excel_97_VorlageTemplate",
200 "calc_MS_Excel_95_VorlageTemplate",
202 "calc_MS_Excel_5095_VorlageTemplate",
203 "calc_MS_Excel_5095",
204 "calc_MS_Excel_40_VorlageTemplate",
206 "calc_Pocket_Excel_File",
207 "impress_MS_PowerPoint_97_Vorlage",
208 "impress_MS_PowerPoint_97_AutoPlay",
209 "impress_MS_PowerPoint_97",
216 // Binary (raster and vector image files)
217 "emf_MS_Windows_Metafile",
218 "wmf_MS_Windows_Metafile",
220 "svm_StarView_Metafile",
222 "tif_Tag_Image_File",
223 "tga_Truevision_TARGA",
224 "sgf_StarOffice_Writer_SGF",
225 "ras_Sun_Rasterfile",
226 "psd_Adobe_Photoshop",
227 "png_Portable_Network_Graphic",
230 "gif_Graphics_Interchange",
232 "pcx_Zsoft_Paintbrush",
235 "pcd_Photo_CD_Base4",
236 "pcd_Photo_CD_Base16",
237 "impress_CGM_Computer_Graphics_Metafile", // There is binary and ascii variants ?
238 "draw_WordPerfect_Graphics",
239 "draw_Visio_Document",
240 "draw_Publisher_Document",
241 "draw_Corel_Presentation_Exchange",
242 "draw_CorelDraw_Document",
243 "writer_LotusWordPro_Document",
244 "writer_MIZI_Hwp_97", // Hanword (Hancom Office)
246 // Non-compressed XML
247 "writer_ODT_FlatXML",
249 "impress_ODP_FlatXML",
251 "calc_ADO_rowset_XML",
252 "calc_MS_Excel_2003_XML",
253 "writer_MS_Word_2003_XML",
254 "writer_DocBook_File",
256 "svg_Scalable_Vector_Graphics",
257 "math_MathML_XML_Math",
259 // Non-compressed text
260 "dxf_AutoCAD_Interchange",
261 "eps_Encapsulated_PostScript",
262 "pbm_Portable_Bitmap", // There is 'raw' and 'ascii' variants.
263 "ppm_Portable_Pixelmap", // There is 'raw' and 'ascii' variants.
264 "pgm_Portable_Graymap", // There is 'raw' and 'ascii' variants.
267 "writer_Rich_Text_Format",
268 "writer_web_HTML_help",
271 "generic_Text", // Plain text (catch all)
273 // Anything ranked lower than generic_Text will never be used during
274 // type detection (since generic_Text catches all).
277 "writer_layout_dump_xml",
282 "StarBaseReportChart",
284 "math_MathType_3x", // MathType equation embedded in Word doc.
287 size_t n
= SAL_N_ELEMENTS(ranks
);
289 for (size_t i
= 0; i
< n
; ++i
)
291 if (rType
.equalsAscii(ranks
[i
]))
295 // Not ranked. Treat them equally. Unranked formats have higher priority
296 // than the ranked internal ones since they may be defined externally.
301 * Types with matching pattern first, then extension, then custom ranks by
302 * types, then types that are supported by the document service come next.
303 * Lastly, sort them alphabetically.
305 struct SortByPriority
307 bool operator() (const FlatDetectionInfo
& r1
, const FlatDetectionInfo
& r2
) const
309 if (r1
.bMatchByPattern
!= r2
.bMatchByPattern
)
310 return r1
.bMatchByPattern
;
312 if (r1
.bMatchByExtension
!= r2
.bMatchByExtension
)
313 return r1
.bMatchByExtension
;
315 int rank1
= getFlatTypeRank(r1
.sType
);
316 int rank2
= getFlatTypeRank(r2
.sType
);
319 return rank1
> rank2
;
321 if (r1
.bPreselectedByDocumentService
!= r2
.bPreselectedByDocumentService
)
322 return r1
.bPreselectedByDocumentService
;
324 // All things being equal, sort them alphabetically.
325 return r1
.sType
> r2
.sType
;
332 bool operator() (const FlatDetectionInfo
& r1
, const FlatDetectionInfo
& r2
) const
334 return r1
.sType
> r2
.sType
;
340 bool operator() (const FlatDetectionInfo
& r1
, const FlatDetectionInfo
& r2
) const
342 return r1
.sType
== r2
.sType
;
350 explicit FindByType(const OUString
& rType
) : maType(rType
) {}
351 bool operator() (const FlatDetectionInfo
& rInfo
) const
353 return rInfo
.sType
== maType
;
357 #if DEBUG_TYPE_DETECTION
358 void printFlatDetectionList(const char* caption
, const FlatDetection
& types
)
360 cout
<< "-- " << caption
<< " (size=" << types
.size() << ")" << endl
;
361 for (auto const& item
: types
)
363 cout
<< " type='" << item
.sType
<< "'; match by extension (" << item
.bMatchByExtension
364 << "); match by pattern (" << item
.bMatchByPattern
<< "); pre-selected by doc service ("
365 << item
.bPreselectedByDocumentService
<< ")" << endl
;
367 cout
<< "--" << endl
;
373 OUString SAL_CALL
TypeDetection::queryTypeByDescriptor(css::uno::Sequence
< css::beans::PropertyValue
>& lDescriptor
,
374 sal_Bool bAllowDeep
)
376 // make the descriptor more usable :-)
377 utl::MediaDescriptor
stlDescriptor(lDescriptor
);
378 OUString sType
, sURL
;
382 // SAFE -> ----------------------------------
383 ::osl::ResettableMutexGuard
aLock(m_aLock
);
385 // parse given URL to split it into e.g. main and jump marks ...
386 sURL
= stlDescriptor
.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL(), OUString());
388 #if OSL_DEBUG_LEVEL > 0
389 if (stlDescriptor
.find( "FileName" ) != stlDescriptor
.end())
390 OSL_FAIL("Detect using of deprecated and already unsupported MediaDescriptor property \"FileName\"!");
394 aURL
.Complete
= sURL
;
395 css::uno::Reference
< css::util::XURLTransformer
> xParser(css::util::URLTransformer::create(m_xContext
));
396 xParser
->parseStrict(aURL
);
398 OUString aSelectedFilter
= stlDescriptor
.getUnpackedValueOrDefault(
399 utl::MediaDescriptor::PROP_FILTERNAME(), OUString());
400 if (!aSelectedFilter
.isEmpty())
402 // Caller specified the filter type. Honor it. Just get the default
403 // type for that filter, and bail out.
404 if (impl_validateAndSetFilterOnDescriptor(stlDescriptor
, aSelectedFilter
))
405 return stlDescriptor
[utl::MediaDescriptor::PROP_TYPENAME()].get
<OUString
>();
408 FlatDetection lFlatTypes
;
409 impl_getAllFormatTypes(aURL
, stlDescriptor
, lFlatTypes
);
412 // <- SAFE ----------------------------------
414 // Properly prioritize all candidate types.
415 std::stable_sort(lFlatTypes
.begin(), lFlatTypes
.end(), objSortByPriority
);
416 auto last
= std::unique(lFlatTypes
.begin(), lFlatTypes
.end(), objEqualByType
);
417 lFlatTypes
.erase(last
, lFlatTypes
.end());
419 OUString sLastChance
;
421 // verify every flat detected (or preselected!) type
422 // by calling its registered deep detection service.
423 // But break this loop if a type match to the given descriptor
424 // by an URL pattern(!) or if deep detection isn't allowed from
425 // outside (bAllowDeep=sal_False) or break the whole detection by
426 // throwing an exception if creation of the might needed input
427 // stream failed by e.g. an IO exception ...
428 std::vector
<OUString
> lUsedDetectors
;
429 if (lFlatTypes
.size()>0)
430 sType
= impl_detectTypeFlatAndDeep(stlDescriptor
, lFlatTypes
, bAllowDeep
, lUsedDetectors
, sLastChance
);
432 // flat detection failed
433 // pure deep detection failed
434 // => ask might existing InteractionHandler
435 // means: ask user for its decision
436 if (sType
.isEmpty() && !m_bCancel
)
437 sType
= impl_askUserForTypeAndFilterIfAllowed(stlDescriptor
);
440 // no real detected type - but a might valid one.
441 // update descriptor and set last chance for return.
442 if (sType
.isEmpty() && !sLastChance
.isEmpty() && !m_bCancel
)
444 OSL_FAIL("set first flat detected type without a registered deep detection service as \"last chance\" ... nevertheless some other deep detections said \"NO\". I TRY IT!");
448 catch(const css::uno::RuntimeException
&)
452 catch(const css::uno::Exception
& e
)
454 SAL_WARN("filter.config", "caught " << e
455 << " while querying type of " << sURL
);
459 // adapt media descriptor, so it contains the right values
460 // for type/filter name/document service/ etcpp.
461 impl_checkResultsAndAddBestFilter(stlDescriptor
, sType
); // Attention: sType is used as IN/OUT param here and will might be changed inside this method !!!
462 impl_validateAndSetTypeOnDescriptor(stlDescriptor
, sType
);
464 stlDescriptor
>> lDescriptor
;
469 void TypeDetection::impl_checkResultsAndAddBestFilter(utl::MediaDescriptor
& rDescriptor
,
473 // Don't overwrite a might preselected filter!
474 OUString sFilter
= rDescriptor
.getUnpackedValueOrDefault(
475 utl::MediaDescriptor::PROP_FILTERNAME(),
477 if (!sFilter
.isEmpty())
480 auto & cache
= TheFilterCache::get();
483 // check a preselected document service too.
484 // Then we have to search a suitable filter within this module.
485 OUString sDocumentService
= rDescriptor
.getUnpackedValueOrDefault(
486 utl::MediaDescriptor::PROP_DOCUMENTSERVICE(),
488 if (!sDocumentService
.isEmpty())
492 OUString sRealType
= sType
;
495 ::osl::ResettableMutexGuard
aLock(m_aLock
);
497 // Attention: For executing next lines of code, We must be sure that
498 // all filters already loaded :-(
499 // That can disturb our "load on demand feature". But we have no other chance!
500 cache
.load(FilterCache::E_CONTAINS_FILTERS
);
503 lIProps
[PROPNAME_DOCUMENTSERVICE
] <<= sDocumentService
;
504 lIProps
[PROPNAME_TYPE
] <<= sRealType
;
505 std::vector
<OUString
> lFilters
= cache
.getMatchingItemsByProps(FilterCache::E_FILTER
, lIProps
);
510 for (auto const& filter
: lFilters
)
516 CacheItem aFilter
= cache
.getItem(FilterCache::E_FILTER
, filter
);
517 sal_Int32 nFlags
= 0;
518 aFilter
[PROPNAME_FLAGS
] >>= nFlags
;
520 if (static_cast<SfxFilterFlags
>(nFlags
) & SfxFilterFlags::IMPORT
)
522 if (static_cast<SfxFilterFlags
>(nFlags
) & SfxFilterFlags::PREFERED
)
525 catch(const css::uno::Exception
&) {}
530 if (!sFilter
.isEmpty())
532 rDescriptor
[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sRealType
;
533 rDescriptor
[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter
;
538 catch(const css::uno::Exception
&)
543 // We can use the preferred filter for the specified type.
544 // Such preferred filter points:
545 // - to the default filter of the preferred application
546 // - or to any other filter if no preferred filter was set.
547 // Note: It's an optimization only!
548 // It's not guaranteed, that such preferred filter exists.
553 ::osl::ResettableMutexGuard
aLock(m_aLock
);
555 CacheItem aType
= cache
.getItem(FilterCache::E_TYPE
, sType
);
556 aType
[PROPNAME_PREFERREDFILTER
] >>= sFilter
;
557 CacheItem aFilter
= cache
.getItem(FilterCache::E_FILTER
, sFilter
);
562 // no exception => found valid type and filter => set it on the given descriptor
563 rDescriptor
[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType
;
564 rDescriptor
[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter
;
567 catch(const css::uno::Exception
&)
571 // Search for any import(!) filter, which is registered for this type.
576 ::osl::ResettableMutexGuard
aLock(m_aLock
);
578 // Attention: For executing next lines of code, We must be sure that
579 // all filters already loaded :-(
580 // That can disturb our "load on demand feature". But we have no other chance!
581 cache
.load(FilterCache::E_CONTAINS_FILTERS
);
584 lIProps
[PROPNAME_TYPE
] <<= sType
;
585 std::vector
<OUString
> lFilters
= cache
.getMatchingItemsByProps(FilterCache::E_FILTER
, lIProps
);
590 for (auto const& filter
: lFilters
)
598 CacheItem aFilter
= cache
.getItem(FilterCache::E_FILTER
, sFilter
);
599 sal_Int32 nFlags
= 0;
600 aFilter
[PROPNAME_FLAGS
] >>= nFlags
;
602 if (static_cast<SfxFilterFlags
>(nFlags
) & SfxFilterFlags::IMPORT
)
605 catch(const css::uno::Exception
&)
613 if (!sFilter
.isEmpty())
615 rDescriptor
[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType
;
616 rDescriptor
[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter
;
620 catch(const css::uno::Exception
&)
625 bool TypeDetection::impl_getPreselectionForType(
626 const OUString
& sPreSelType
, const util::URL
& aParsedURL
, FlatDetection
& rFlatTypes
, bool bDocService
)
628 // Can be used to suppress execution of some parts of this method
629 // if it's already clear that detected type is valid or not.
630 // It's necessary to use shared code at the end, which update
631 // all return parameters consistency!
632 bool bBreakDetection
= false;
634 // Further we must know if it matches by pattern
635 // Every flat detected type by pattern won't be detected deep!
636 bool bMatchByPattern
= false;
638 // And we must know if a preselection must be preferred, because
639 // it matches by its extension too.
640 bool bMatchByExtension
= false;
643 OUString
sType(sPreSelType
);
647 // SAFE -> --------------------------
648 ::osl::ResettableMutexGuard
aLock(m_aLock
);
649 aType
= TheFilterCache::get().getItem(FilterCache::E_TYPE
, sType
);
651 // <- SAFE --------------------------
653 catch(const css::container::NoSuchElementException
&)
656 bBreakDetection
= true;
659 if (!bBreakDetection
)
661 // We can't check a preselected type for a given stream!
662 // So we must believe, that it can work ...
663 if ( aParsedURL
.Complete
== "private:stream" )
664 bBreakDetection
= true;
667 if (!bBreakDetection
)
669 // extract extension from URL .. to check it case-insensitive !
670 INetURLObject
aParser (aParsedURL
.Main
);
671 OUString sExtension
= aParser
.getExtension(INetURLObject::LAST_SEGMENT
,
673 INetURLObject::DecodeMechanism::WithCharset
);
674 sExtension
= sExtension
.toAsciiLowerCase();
676 // otherwise we must know, if it matches to the given URL really.
677 // especially if it matches by its extension or pattern registration.
678 std::vector
<OUString
> lExtensions(comphelper::sequenceToContainer
< std::vector
<OUString
> >(aType
[PROPNAME_EXTENSIONS
].get
<css::uno::Sequence
<OUString
> >() ));
679 std::vector
<OUString
> lURLPattern(comphelper::sequenceToContainer
< std::vector
<OUString
> >(aType
[PROPNAME_URLPATTERN
].get
<css::uno::Sequence
<OUString
> >() ));
681 for (auto const& extension
: lExtensions
)
683 OUString
sCheckExtension(extension
.toAsciiLowerCase());
684 if (sCheckExtension
== sExtension
)
686 bBreakDetection
= true;
687 bMatchByExtension
= true;
692 if (!bBreakDetection
)
694 for (auto const& elem
: lURLPattern
)
696 WildCard
aCheck(elem
);
697 if (aCheck
.Matches(aParsedURL
.Main
))
699 bMatchByPattern
= true;
706 // if it's a valid type - set it on all return values!
707 if (!sType
.isEmpty())
709 FlatDetection::iterator it
= std::find_if(rFlatTypes
.begin(), rFlatTypes
.end(), FindByType(sType
));
710 if (it
!= rFlatTypes
.end())
712 if (bMatchByExtension
)
713 it
->bMatchByExtension
= true;
715 it
->bMatchByPattern
= true;
717 it
->bPreselectedByDocumentService
= true;
727 void TypeDetection::impl_getPreselectionForDocumentService(
728 const OUString
& sPreSelDocumentService
, const util::URL
& aParsedURL
, FlatDetection
& rFlatTypes
)
730 // get all filters, which match to this doc service
731 std::vector
<OUString
> lFilters
;
734 // SAFE -> --------------------------
735 ::osl::ResettableMutexGuard
aLock(m_aLock
);
737 // Attention: For executing next lines of code, We must be sure that
738 // all filters already loaded :-(
739 // That can disturb our "load on demand feature". But we have no other chance!
740 auto & cache
= TheFilterCache::get();
741 cache
.load(FilterCache::E_CONTAINS_FILTERS
);
744 lIProps
[PROPNAME_DOCUMENTSERVICE
] <<= sPreSelDocumentService
;
745 lFilters
= cache
.getMatchingItemsByProps(FilterCache::E_FILTER
, lIProps
);
748 // <- SAFE --------------------------
750 catch (const css::container::NoSuchElementException
&)
755 // step over all filters, and check if its registered type
756 // match the given URL.
757 // But use temp. list of "preselected types" instead of incoming rFlatTypes list!
758 // The reason behind: we must filter the obtained results. And copying stl entries
759 // is an easier job than removing them .-)
760 for (auto const& filter
: lFilters
)
762 OUString aType
= impl_getTypeFromFilter(filter
);
766 impl_getPreselectionForType(aType
, aParsedURL
, rFlatTypes
, true);
770 OUString
TypeDetection::impl_getTypeFromFilter(const OUString
& rFilterName
)
775 osl::MutexGuard
aLock(m_aLock
);
776 aFilter
= TheFilterCache::get().getItem(FilterCache::E_FILTER
, rFilterName
);
778 catch (const container::NoSuchElementException
&)
784 aFilter
[PROPNAME_TYPE
] >>= aType
;
788 void TypeDetection::impl_getAllFormatTypes(
789 const util::URL
& aParsedURL
, utl::MediaDescriptor
const & rDescriptor
, FlatDetection
& rFlatTypes
)
793 // Get all filters that we have.
794 std::vector
<OUString
> aFilterNames
;
797 osl::MutexGuard
aLock(m_aLock
);
798 auto & cache
= TheFilterCache::get();
799 cache
.load(FilterCache::E_CONTAINS_FILTERS
);
800 aFilterNames
= cache
.getItemNames(FilterCache::E_FILTER
);
802 catch (const container::NoSuchElementException
&)
807 // Retrieve the default type for each of these filters, and store them.
808 for (auto const& filterName
: aFilterNames
)
810 OUString aType
= impl_getTypeFromFilter(filterName
);
815 FlatDetectionInfo aInfo
; // all flags set to false by default.
817 rFlatTypes
.push_back(aInfo
);
821 // Get all types that match the URL alone.
822 FlatDetection aFlatByURL
;
823 TheFilterCache::get().detectFlatForURL(aParsedURL
, aFlatByURL
);
824 for (auto const& elem
: aFlatByURL
)
826 FlatDetection::iterator itPos
= std::find_if(rFlatTypes
.begin(), rFlatTypes
.end(), FindByType(elem
.sType
));
827 if (itPos
== rFlatTypes
.end())
828 // Not in the list yet.
829 rFlatTypes
.push_back(elem
);
832 // Already in the list. Update the flags.
833 FlatDetectionInfo
& rInfo
= *itPos
;
834 const FlatDetectionInfo
& rThisInfo
= elem
;
835 if (rThisInfo
.bMatchByExtension
)
836 rInfo
.bMatchByExtension
= true;
837 if (rThisInfo
.bMatchByPattern
)
838 rInfo
.bMatchByPattern
= true;
839 if (rThisInfo
.bPreselectedByDocumentService
)
840 rInfo
.bPreselectedByDocumentService
= true;
845 // Remove duplicates.
846 std::stable_sort(rFlatTypes
.begin(), rFlatTypes
.end(), objSortByType
);
847 auto last
= std::unique(rFlatTypes
.begin(), rFlatTypes
.end(), objEqualByType
);
848 rFlatTypes
.erase(last
, rFlatTypes
.end());
850 // Mark pre-selected type (if any) to have it prioritized.
851 OUString sSelectedType
= rDescriptor
.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_TYPENAME(), OUString());
852 if (!sSelectedType
.isEmpty())
853 impl_getPreselectionForType(sSelectedType
, aParsedURL
, rFlatTypes
, false);
855 // Mark all types preferred by the current document service, to have it prioritized.
856 OUString sSelectedDoc
= rDescriptor
.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_DOCUMENTSERVICE(), OUString());
857 if (!sSelectedDoc
.isEmpty())
858 impl_getPreselectionForDocumentService(sSelectedDoc
, aParsedURL
, rFlatTypes
);
862 OUString
TypeDetection::impl_detectTypeFlatAndDeep( utl::MediaDescriptor
& rDescriptor
,
863 const FlatDetection
& lFlatTypes
,
865 std::vector
<OUString
>& rUsedDetectors
,
866 OUString
& rLastChance
)
868 // reset it everytimes, so the outside code can distinguish between
869 // a set and a not set value.
871 rUsedDetectors
.clear();
873 // step over all possible types for this URL.
875 // a) no types => no detection
876 // b) deep detection not allowed => return first valid type of list (because it's the preferred or the first valid one)
877 // or(!) match by URLPattern => in such case a deep detection will be suppressed!
878 // c) type has no detect service => safe the first occurred type without a detect service
879 // as "last chance"(!). It will be used outside of this method
880 // if no further type could be detected.
881 // It must be the first one, because it can be a preferred type.
882 // Our types list was sorted by such criteria!
883 // d) detect service return a valid result => return its decision
884 // e) detect service return an invalid result
885 // or any needed information could not be
886 // obtained from the cache => ignore it, and continue with search
888 for (auto const& flatTypeInfo
: lFlatTypes
)
892 OUString sFlatType
= flatTypeInfo
.sType
;
894 if (!impl_validateAndSetTypeOnDescriptor(rDescriptor
, sFlatType
))
900 (flatTypeInfo
.bMatchByPattern
)
908 // SAFE -> ----------------------------------
909 ::osl::ResettableMutexGuard
aLock(m_aLock
);
910 CacheItem aType
= TheFilterCache::get().getItem(FilterCache::E_TYPE
, sFlatType
);
913 OUString sDetectService
;
914 aType
[PROPNAME_DETECTSERVICE
] >>= sDetectService
;
917 if (sDetectService
.isEmpty())
919 // flat detected types without any registered deep detection service and not
920 // preselected by the user can be used as LAST CHANCE in case no other type could
921 // be detected. Of course only the first type without deep detector can be used.
922 // Further ones has to be ignored.
923 if (rLastChance
.isEmpty())
924 rLastChance
= sFlatType
;
929 // don't forget to add every real asked deep detection service here.
930 // Such detectors will be ignored if may be "impl_detectTypeDeepOnly()"
931 // must be called later!
932 rUsedDetectors
.push_back(sDetectService
);
933 OUString sDeepType
= impl_askDetectService(sDetectService
, rDescriptor
);
936 if (!sDeepType
.isEmpty())
939 catch(const css::container::NoSuchElementException
&)
945 // <- SAFE ----------------------------------
948 void TypeDetection::impl_seekStreamToZero(utl::MediaDescriptor
const & rDescriptor
)
950 // try to seek to 0 ...
951 // But because XSeekable is an optional interface ... try it only .-)
952 css::uno::Reference
< css::io::XInputStream
> xStream
= rDescriptor
.getUnpackedValueOrDefault(
953 utl::MediaDescriptor::PROP_INPUTSTREAM(),
954 css::uno::Reference
< css::io::XInputStream
>());
955 css::uno::Reference
< css::io::XSeekable
> xSeek(xStream
, css::uno::UNO_QUERY
);
962 catch(const css::uno::RuntimeException
&)
966 catch(const css::uno::Exception
&)
972 OUString
TypeDetection::impl_askDetectService(const OUString
& sDetectService
,
973 utl::MediaDescriptor
& rDescriptor
)
975 // Open the stream and add it to the media descriptor if this method is called for the first time.
976 // All following requests to this method will detect, that there already exists a stream .-)
977 // Attention: This method throws an exception if the stream could not be opened.
978 // It's important to break any further detection in such case.
979 // Catch it on the highest detection level only !!!
980 impl_openStream(rDescriptor
);
982 // seek to 0 is an optional feature to be more robust against
983 // "simple implemented detect services" .-)
984 impl_seekStreamToZero(rDescriptor
);
986 css::uno::Reference
< css::document::XExtendedFilterDetection
> xDetector
;
987 css::uno::Reference
< css::uno::XComponentContext
> xContext
;
990 ::osl::ResettableMutexGuard
aLock(m_aLock
);
991 xContext
= m_xContext
;
997 // Attention! If e.g. an office module was not installed sometimes we
998 // find a registered detect service, which is referred inside the
999 // configuration ... but not really installed. On the other side we use
1000 // third party components here, which can make trouble anyway. So we
1001 // should handle errors during creation of such services more
1004 xContext
->getServiceManager()->createInstanceWithContext(sDetectService
, xContext
),
1005 css::uno::UNO_QUERY_THROW
);
1011 if ( ! xDetector
.is())
1017 // start deep detection
1018 // Don't forget to convert stl descriptor to its uno representation.
1021 You have to use an explicit instance of this uno sequence ...
1022 Because its used as an in out parameter. And in case of a temp. used object
1023 we will run into memory corruptions!
1025 css::uno::Sequence
< css::beans::PropertyValue
> lDescriptor
;
1026 rDescriptor
>> lDescriptor
;
1027 sDeepType
= xDetector
->detect(lDescriptor
);
1028 rDescriptor
<< lDescriptor
;
1032 // We should ignore errors here.
1033 // Thrown exceptions mostly will end in crash recovery ...
1034 // But might be we find another deep detection service which can detect the same
1035 // document without a problem .-)
1039 // seek to 0 is an optional feature to be more robust against
1040 // "simple implemented detect services" .-)
1041 impl_seekStreamToZero(rDescriptor
);
1043 // analyze the results
1044 // a) detect service returns "" => return "" too and remove TYPE/FILTER prop from descriptor
1045 // b) returned type is unknown => return "" too and remove TYPE/FILTER prop from descriptor
1046 // c) returned type is valid => check TYPE/FILTER props inside descriptor and return the type
1048 // this special helper checks for a valid type
1049 // and set right values on the descriptor!
1050 bool bValidType
= impl_validateAndSetTypeOnDescriptor(rDescriptor
, sDeepType
);
1058 OUString
TypeDetection::impl_askUserForTypeAndFilterIfAllowed(utl::MediaDescriptor
& rDescriptor
)
1060 css::uno::Reference
< css::task::XInteractionHandler
> xInteraction
=
1061 rDescriptor
.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INTERACTIONHANDLER(),
1062 css::uno::Reference
< css::task::XInteractionHandler
>());
1064 if (!xInteraction
.is())
1068 rDescriptor
.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL(),
1071 css::uno::Reference
< css::io::XInputStream
> xStream
=
1072 rDescriptor
.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INPUTSTREAM(),
1073 css::uno::Reference
< css::io::XInputStream
>());
1075 // Don't disturb the user for "non existing files - means empty URLs" or
1076 // if we was forced to detect a stream.
1077 // Reason behind: we must be sure to ask user for "unknown contents" only...
1078 // and not for "missing files". Especially if detection is done by a stream only
1079 // we can't check if the stream points to an "existing content"!
1081 (sURL
.isEmpty() ) || // "non existing file" ?
1082 (!xStream
.is() ) || // non existing file !
1083 (sURL
.equalsIgnoreAsciiCase("private:stream")) // not a good idea .-)
1089 // create a new request to ask user for its decision about the usable filter
1090 ::framework::RequestFilterSelect
aRequest(sURL
);
1091 xInteraction
->handle(aRequest
.GetRequest());
1093 // "Cancel" pressed? => return with error
1094 if (aRequest
.isAbort())
1097 // "OK" pressed => verify the selected filter, get its corresponding
1098 // type and return it. (BTW: We must update the media descriptor here ...)
1099 // The user selected explicitly a filter ... but normally we are interested on
1100 // a type here only. But we must be sure, that the selected filter is used
1101 // too and no ambiguous filter registration disturb us .-)
1103 OUString sFilter
= aRequest
.getFilter();
1104 if (!impl_validateAndSetFilterOnDescriptor(rDescriptor
, sFilter
))
1108 rDescriptor
[utl::MediaDescriptor::PROP_TYPENAME()] >>= sType
;
1111 catch(const css::uno::Exception
&)
1118 void TypeDetection::impl_openStream(utl::MediaDescriptor
& rDescriptor
)
1120 bool bSuccess
= false;
1121 OUString sURL
= rDescriptor
.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_URL(), OUString() );
1122 bool bRequestedReadOnly
= rDescriptor
.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_READONLY(), false );
1123 if ( comphelper::isFileUrl( sURL
) )
1125 // OOo uses own file locking mechanics in case of local file
1126 bSuccess
= rDescriptor
.addInputStreamOwnLock();
1129 bSuccess
= rDescriptor
.addInputStream();
1132 throw css::uno::Exception(
1133 "Could not open stream for <" + sURL
+ ">",
1134 static_cast<OWeakObject
*>(this));
1136 if ( !bRequestedReadOnly
)
1138 // The MediaDescriptor implementation adds ReadOnly argument if the file can not be opened for writing
1139 // this argument should be either removed or an additional argument should be added so that application
1140 // can separate the case when the user explicitly requests readonly document.
1141 // The current solution is to remove it here.
1142 rDescriptor
.erase( utl::MediaDescriptor::PROP_READONLY() );
1147 void TypeDetection::impl_removeTypeFilterFromDescriptor(utl::MediaDescriptor
& rDescriptor
)
1149 utl::MediaDescriptor::iterator pItType
= rDescriptor
.find(utl::MediaDescriptor::PROP_TYPENAME() );
1150 utl::MediaDescriptor::iterator pItFilter
= rDescriptor
.find(utl::MediaDescriptor::PROP_FILTERNAME());
1151 if (pItType
!= rDescriptor
.end())
1152 rDescriptor
.erase(pItType
);
1153 if (pItFilter
!= rDescriptor
.end())
1154 rDescriptor
.erase(pItFilter
);
1158 bool TypeDetection::impl_validateAndSetTypeOnDescriptor( utl::MediaDescriptor
& rDescriptor
,
1159 const OUString
& sType
)
1162 ::osl::ResettableMutexGuard
aLock(m_aLock
);
1163 if (TheFilterCache::get().hasItem(FilterCache::E_TYPE
, sType
))
1165 rDescriptor
[utl::MediaDescriptor::PROP_TYPENAME()] <<= sType
;
1171 // remove all related information from the descriptor
1172 impl_removeTypeFilterFromDescriptor(rDescriptor
);
1177 bool TypeDetection::impl_validateAndSetFilterOnDescriptor( utl::MediaDescriptor
& rDescriptor
,
1178 const OUString
& sFilter
)
1183 ::osl::ResettableMutexGuard
aLock(m_aLock
);
1185 auto & cache
= TheFilterCache::get();
1186 CacheItem aFilter
= cache
.getItem(FilterCache::E_FILTER
, sFilter
);
1188 aFilter
[PROPNAME_TYPE
] >>= sType
;
1189 CacheItem aType
= cache
.getItem(FilterCache::E_TYPE
, sType
);
1194 // found valid type and filter => set it on the given descriptor
1195 rDescriptor
[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType
;
1196 rDescriptor
[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter
;
1199 catch(const css::container::NoSuchElementException
&){}
1201 // remove all related information from the descriptor
1202 impl_removeTypeFilterFromDescriptor(rDescriptor
);
1207 OUString
TypeDetection::impl_getImplementationName()
1209 return OUString( "com.sun.star.comp.filter.config.TypeDetection" );
1213 css::uno::Sequence
< OUString
> TypeDetection::impl_getSupportedServiceNames()
1215 return { "com.sun.star.document.TypeDetection" };
1219 css::uno::Reference
< css::uno::XInterface
> TypeDetection::impl_createInstance(const css::uno::Reference
< css::lang::XMultiServiceFactory
>& xSMGR
)
1221 TypeDetection
* pNew
= new TypeDetection( comphelper::getComponentContext(xSMGR
) );
1222 return css::uno::Reference
< css::uno::XInterface
>(static_cast< css::document::XTypeDetection
* >(pNew
), css::uno::UNO_QUERY
);
1225 } // namespace config
1226 } // namespace filter
1228 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */