Revert "tdf#110987: type detection, binary Office formats > templates"
[LibreOffice.git] / filter / source / config / cache / typedetection.cxx
blobfa23e96ba1266fac1a11751836a6afca1d9db68c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "typedetection.hxx"
21 #include "constant.hxx"
23 #include <com/sun/star/document/XExtendedFilterDetection.hpp>
24 #include <com/sun/star/frame/Desktop.hpp>
25 #include <com/sun/star/util/URLTransformer.hpp>
26 #include <com/sun/star/util/XURLTransformer.hpp>
28 #include <com/sun/star/io/XInputStream.hpp>
29 #include <com/sun/star/io/XSeekable.hpp>
30 #include <com/sun/star/task/XInteractionHandler.hpp>
31 #include <tools/wldcrd.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <sal/log.hxx>
34 #include <framework/interaction.hxx>
35 #include <tools/urlobj.hxx>
36 #include <comphelper/fileurl.hxx>
37 #include <comphelper/processfactory.hxx>
38 #include <comphelper/sequence.hxx>
40 #define DEBUG_TYPE_DETECTION 0
42 #if DEBUG_TYPE_DETECTION
43 #include <iostream>
44 using std::cout;
45 using std::endl;
46 #endif
48 using namespace com::sun::star;
50 namespace filter{
51 namespace config{
53 TypeDetection::TypeDetection(const css::uno::Reference< css::uno::XComponentContext >& rxContext)
54 : m_xContext(rxContext)
55 , m_xTerminateListener(new TerminateDetection(this))
56 , m_bCancel(false)
58 css::frame::Desktop::create(m_xContext)->addTerminateListener(m_xTerminateListener.get());
59 BaseContainer::init(rxContext ,
60 TypeDetection::impl_getImplementationName() ,
61 TypeDetection::impl_getSupportedServiceNames(),
62 FilterCache::E_TYPE );
66 TypeDetection::~TypeDetection()
68 css::frame::Desktop::create(m_xContext)->removeTerminateListener(m_xTerminateListener.get());
72 OUString SAL_CALL TypeDetection::queryTypeByURL(const OUString& sURL)
74 OUString sType;
76 // SAFE ->
77 ::osl::ResettableMutexGuard aLock(m_aLock);
79 css::util::URL aURL;
80 aURL.Complete = sURL;
81 css::uno::Reference< css::util::XURLTransformer > xParser( css::util::URLTransformer::create(m_xContext) );
82 xParser->parseStrict(aURL);
84 // set std types as minimum requirement first!
85 // Only in case no type was found for given URL,
86 // use optional types too ...
87 auto & cache = TheFilterCache::get();
88 FlatDetection lFlatTypes;
89 cache.detectFlatForURL(aURL, lFlatTypes);
91 if (
92 (lFlatTypes.size() < 1 ) &&
93 (!cache.isFillState(FilterCache::E_CONTAINS_TYPES))
96 cache.load(FilterCache::E_CONTAINS_TYPES);
97 cache.detectFlatForURL(aURL, lFlatTypes);
100 // first item is guaranteed as "preferred" one!
101 if (lFlatTypes.size() > 0)
103 const FlatDetectionInfo& aMatch = *(lFlatTypes.begin());
104 sType = aMatch.sType;
107 return sType;
108 // <- SAFE
111 namespace {
114 * Rank format types in order of complexity. More complex formats are
115 * ranked higher so that they get tested sooner over simpler formats.
117 * Guidelines to determine how complex a format is (subject to change):
119 * 1) compressed text (XML, HTML, etc)
120 * 2) binary
121 * 3) non-compressed text
122 * 3.1) structured text
123 * 3.1.1) dialect of a structured text (e.g. docbook XML)
124 * 3.1.2) generic structured text (e.g. generic XML)
125 * 3.2) non-structured text
127 * In each category, rank them from strictly-structured to
128 * loosely-structured.
130 int getFlatTypeRank(const OUString& rType)
132 // List formats from more complex to less complex.
133 // TODO: Add more.
134 static const char* ranks[] = {
136 // Compressed XML (ODF XML zip formats)
137 "writer8_template",
138 "writer8",
139 "calc8_template",
140 "calc8",
141 "impress8_template",
142 "impress8",
143 "draw8_template",
144 "draw8",
145 "chart8",
146 "math8",
147 "writerglobal8_template",
148 "writerglobal8",
149 "writerweb8_writer_template",
150 "StarBase",
152 // Compressed XML (OOXML)
153 "writer_OOXML_Text_Template",
154 "writer_OOXML",
155 "writer_MS_Word_2007_Template",
156 "writer_MS_Word_2007",
157 "Office Open XML Spreadsheet Template",
158 "Office Open XML Spreadsheet",
159 "MS Excel 2007 XML Template",
160 "MS Excel 2007 XML",
161 "MS PowerPoint 2007 XML Template",
162 "MS PowerPoint 2007 XML AutoPlay",
163 "MS PowerPoint 2007 XML",
165 // Compressed XML (Uniform/Unified Office Format)
166 "Unified_Office_Format_text",
167 "Unified_Office_Format_spreadsheet",
168 "Unified_Office_Format_presentation",
170 // Compressed XML (StarOffice XML zip formats)
171 "calc_StarOffice_XML_Calc",
172 "calc_StarOffice_XML_Calc_Template",
173 "chart_StarOffice_XML_Chart",
174 "draw_StarOffice_XML_Draw",
175 "draw_StarOffice_XML_Draw_Template",
176 "impress_StarOffice_XML_Impress",
177 "impress_StarOffice_XML_Impress_Template",
178 "math_StarOffice_XML_Math",
179 "writer_StarOffice_XML_Writer",
180 "writer_StarOffice_XML_Writer_Template",
181 "writer_globaldocument_StarOffice_XML_Writer_GlobalDocument",
182 "writer_web_StarOffice_XML_Writer_Web_Template",
184 // Compressed text
185 "pdf_Portable_Document_Format",
187 // Binary
188 "writer_T602_Document",
189 "writer_WordPerfect_Document",
190 "writer_MS_Works_Document",
191 "writer_MS_Word_97_Vorlage",
192 "writer_MS_Word_97",
193 "writer_MS_Word_95_Vorlage",
194 "writer_MS_Word_95",
195 "writer_MS_WinWord_60",
196 "writer_MS_WinWord_5",
197 "MS Excel 2007 Binary",
198 "calc_MS_Excel_97_VorlageTemplate",
199 "calc_MS_Excel_97",
200 "calc_MS_Excel_95_VorlageTemplate",
201 "calc_MS_Excel_95",
202 "calc_MS_Excel_5095_VorlageTemplate",
203 "calc_MS_Excel_5095",
204 "calc_MS_Excel_40_VorlageTemplate",
205 "calc_MS_Excel_40",
206 "calc_Pocket_Excel_File",
207 "impress_MS_PowerPoint_97_Vorlage",
208 "impress_MS_PowerPoint_97_AutoPlay",
209 "impress_MS_PowerPoint_97",
210 "calc_Lotus",
211 "calc_QPro",
212 "calc_SYLK",
213 "calc_DIF",
214 "calc_dBase",
216 // Binary (raster and vector image files)
217 "emf_MS_Windows_Metafile",
218 "wmf_MS_Windows_Metafile",
219 "met_OS2_Metafile",
220 "svm_StarView_Metafile",
221 "sgv_StarDraw_20",
222 "tif_Tag_Image_File",
223 "tga_Truevision_TARGA",
224 "sgf_StarOffice_Writer_SGF",
225 "ras_Sun_Rasterfile",
226 "psd_Adobe_Photoshop",
227 "png_Portable_Network_Graphic",
228 "jpg_JPEG",
229 "mov_MOV",
230 "gif_Graphics_Interchange",
231 "bmp_MS_Windows",
232 "pcx_Zsoft_Paintbrush",
233 "pct_Mac_Pict",
234 "pcd_Photo_CD_Base",
235 "pcd_Photo_CD_Base4",
236 "pcd_Photo_CD_Base16",
237 "impress_CGM_Computer_Graphics_Metafile", // There is binary and ascii variants ?
238 "draw_WordPerfect_Graphics",
239 "draw_Visio_Document",
240 "draw_Publisher_Document",
241 "draw_Corel_Presentation_Exchange",
242 "draw_CorelDraw_Document",
243 "writer_LotusWordPro_Document",
244 "writer_MIZI_Hwp_97", // Hanword (Hancom Office)
246 // Non-compressed XML
247 "writer_ODT_FlatXML",
248 "calc_ODS_FlatXML",
249 "impress_ODP_FlatXML",
250 "draw_ODG_FlatXML",
251 "calc_ADO_rowset_XML",
252 "calc_MS_Excel_2003_XML",
253 "writer_MS_Word_2003_XML",
254 "writer_DocBook_File",
255 "XHTML_File",
256 "svg_Scalable_Vector_Graphics",
257 "math_MathML_XML_Math",
259 // Non-compressed text
260 "dxf_AutoCAD_Interchange",
261 "eps_Encapsulated_PostScript",
262 "pbm_Portable_Bitmap", // There is 'raw' and 'ascii' variants.
263 "ppm_Portable_Pixelmap", // There is 'raw' and 'ascii' variants.
264 "pgm_Portable_Graymap", // There is 'raw' and 'ascii' variants.
265 "xpm_XPM",
266 "xbm_X_Consortium",
267 "writer_Rich_Text_Format",
268 "writer_web_HTML_help",
269 "generic_HTML",
271 "generic_Text", // Plain text (catch all)
273 // Anything ranked lower than generic_Text will never be used during
274 // type detection (since generic_Text catches all).
276 // Export only
277 "writer_layout_dump_xml",
278 "graphic_SWF",
279 "graphic_HTML",
281 // Internal use only
282 "StarBaseReportChart",
283 "StarBaseReport",
284 "math_MathType_3x", // MathType equation embedded in Word doc.
287 size_t n = SAL_N_ELEMENTS(ranks);
289 for (size_t i = 0; i < n; ++i)
291 if (rType.equalsAscii(ranks[i]))
292 return n - i - 1;
295 // Not ranked. Treat them equally. Unranked formats have higher priority
296 // than the ranked internal ones since they may be defined externally.
297 return n;
301 * Types with matching pattern first, then extension, then custom ranks by
302 * types, then types that are supported by the document service come next.
303 * Lastly, sort them alphabetically.
305 struct SortByPriority
307 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
309 if (r1.bMatchByPattern != r2.bMatchByPattern)
310 return r1.bMatchByPattern;
312 if (r1.bMatchByExtension != r2.bMatchByExtension)
313 return r1.bMatchByExtension;
315 int rank1 = getFlatTypeRank(r1.sType);
316 int rank2 = getFlatTypeRank(r2.sType);
318 if (rank1 != rank2)
319 return rank1 > rank2;
321 if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService)
322 return r1.bPreselectedByDocumentService;
324 // All things being equal, sort them alphabetically.
325 return r1.sType > r2.sType;
327 } objSortByPriority;
329 struct SortByType
332 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
334 return r1.sType > r2.sType;
336 } objSortByType;
338 struct EqualByType
340 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
342 return r1.sType == r2.sType;
344 } objEqualByType;
346 class FindByType
348 OUString maType;
349 public:
350 explicit FindByType(const OUString& rType) : maType(rType) {}
351 bool operator() (const FlatDetectionInfo& rInfo) const
353 return rInfo.sType == maType;
357 #if DEBUG_TYPE_DETECTION
358 void printFlatDetectionList(const char* caption, const FlatDetection& types)
360 cout << "-- " << caption << " (size=" << types.size() << ")" << endl;
361 for (auto const& item : types)
363 cout << " type='" << item.sType << "'; match by extension (" << item.bMatchByExtension
364 << "); match by pattern (" << item.bMatchByPattern << "); pre-selected by doc service ("
365 << item.bPreselectedByDocumentService << ")" << endl;
367 cout << "--" << endl;
369 #endif
373 OUString SAL_CALL TypeDetection::queryTypeByDescriptor(css::uno::Sequence< css::beans::PropertyValue >& lDescriptor,
374 sal_Bool bAllowDeep )
376 // make the descriptor more usable :-)
377 utl::MediaDescriptor stlDescriptor(lDescriptor);
378 OUString sType, sURL;
382 // SAFE -> ----------------------------------
383 ::osl::ResettableMutexGuard aLock(m_aLock);
385 // parse given URL to split it into e.g. main and jump marks ...
386 sURL = stlDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL(), OUString());
388 #if OSL_DEBUG_LEVEL > 0
389 if (stlDescriptor.find( "FileName" ) != stlDescriptor.end())
390 OSL_FAIL("Detect using of deprecated and already unsupported MediaDescriptor property \"FileName\"!");
391 #endif
393 css::util::URL aURL;
394 aURL.Complete = sURL;
395 css::uno::Reference< css::util::XURLTransformer > xParser(css::util::URLTransformer::create(m_xContext));
396 xParser->parseStrict(aURL);
398 OUString aSelectedFilter = stlDescriptor.getUnpackedValueOrDefault(
399 utl::MediaDescriptor::PROP_FILTERNAME(), OUString());
400 if (!aSelectedFilter.isEmpty())
402 // Caller specified the filter type. Honor it. Just get the default
403 // type for that filter, and bail out.
404 if (impl_validateAndSetFilterOnDescriptor(stlDescriptor, aSelectedFilter))
405 return stlDescriptor[utl::MediaDescriptor::PROP_TYPENAME()].get<OUString>();
408 FlatDetection lFlatTypes;
409 impl_getAllFormatTypes(aURL, stlDescriptor, lFlatTypes);
411 aLock.clear();
412 // <- SAFE ----------------------------------
414 // Properly prioritize all candidate types.
415 std::stable_sort(lFlatTypes.begin(), lFlatTypes.end(), objSortByPriority);
416 auto last = std::unique(lFlatTypes.begin(), lFlatTypes.end(), objEqualByType);
417 lFlatTypes.erase(last, lFlatTypes.end());
419 OUString sLastChance;
421 // verify every flat detected (or preselected!) type
422 // by calling its registered deep detection service.
423 // But break this loop if a type match to the given descriptor
424 // by an URL pattern(!) or if deep detection isn't allowed from
425 // outside (bAllowDeep=sal_False) or break the whole detection by
426 // throwing an exception if creation of the might needed input
427 // stream failed by e.g. an IO exception ...
428 std::vector<OUString> lUsedDetectors;
429 if (lFlatTypes.size()>0)
430 sType = impl_detectTypeFlatAndDeep(stlDescriptor, lFlatTypes, bAllowDeep, lUsedDetectors, sLastChance);
432 // flat detection failed
433 // pure deep detection failed
434 // => ask might existing InteractionHandler
435 // means: ask user for its decision
436 if (sType.isEmpty() && !m_bCancel)
437 sType = impl_askUserForTypeAndFilterIfAllowed(stlDescriptor);
440 // no real detected type - but a might valid one.
441 // update descriptor and set last chance for return.
442 if (sType.isEmpty() && !sLastChance.isEmpty() && !m_bCancel)
444 OSL_FAIL("set first flat detected type without a registered deep detection service as \"last chance\" ... nevertheless some other deep detections said \"NO\". I TRY IT!");
445 sType = sLastChance;
448 catch(const css::uno::RuntimeException&)
450 throw;
452 catch(const css::uno::Exception& e)
454 SAL_WARN("filter.config", "caught " << e
455 << " while querying type of " << sURL);
456 sType.clear();
459 // adapt media descriptor, so it contains the right values
460 // for type/filter name/document service/ etcpp.
461 impl_checkResultsAndAddBestFilter(stlDescriptor, sType); // Attention: sType is used as IN/OUT param here and will might be changed inside this method !!!
462 impl_validateAndSetTypeOnDescriptor(stlDescriptor, sType);
464 stlDescriptor >> lDescriptor;
465 return sType;
469 void TypeDetection::impl_checkResultsAndAddBestFilter(utl::MediaDescriptor& rDescriptor,
470 OUString& sType )
472 // a)
473 // Don't overwrite a might preselected filter!
474 OUString sFilter = rDescriptor.getUnpackedValueOrDefault(
475 utl::MediaDescriptor::PROP_FILTERNAME(),
476 OUString());
477 if (!sFilter.isEmpty())
478 return;
480 auto & cache = TheFilterCache::get();
482 // b)
483 // check a preselected document service too.
484 // Then we have to search a suitable filter within this module.
485 OUString sDocumentService = rDescriptor.getUnpackedValueOrDefault(
486 utl::MediaDescriptor::PROP_DOCUMENTSERVICE(),
487 OUString());
488 if (!sDocumentService.isEmpty())
492 OUString sRealType = sType;
494 // SAFE ->
495 ::osl::ResettableMutexGuard aLock(m_aLock);
497 // Attention: For executing next lines of code, We must be sure that
498 // all filters already loaded :-(
499 // That can disturb our "load on demand feature". But we have no other chance!
500 cache.load(FilterCache::E_CONTAINS_FILTERS);
502 CacheItem lIProps;
503 lIProps[PROPNAME_DOCUMENTSERVICE] <<= sDocumentService;
504 lIProps[PROPNAME_TYPE ] <<= sRealType;
505 std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
507 aLock.clear();
508 // <- SAFE
510 for (auto const& filter : lFilters)
512 // SAFE ->
513 aLock.reset();
516 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, filter);
517 sal_Int32 nFlags = 0;
518 aFilter[PROPNAME_FLAGS] >>= nFlags;
520 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT)
521 sFilter = filter;
522 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::PREFERED)
523 break;
525 catch(const css::uno::Exception&) {}
526 aLock.clear();
527 // <- SAFE
530 if (!sFilter.isEmpty())
532 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sRealType;
533 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter;
534 sType = sRealType;
535 return;
538 catch(const css::uno::Exception&)
542 // c)
543 // We can use the preferred filter for the specified type.
544 // Such preferred filter points:
545 // - to the default filter of the preferred application
546 // - or to any other filter if no preferred filter was set.
547 // Note: It's an optimization only!
548 // It's not guaranteed, that such preferred filter exists.
549 sFilter.clear();
552 // SAFE ->
553 ::osl::ResettableMutexGuard aLock(m_aLock);
555 CacheItem aType = cache.getItem(FilterCache::E_TYPE, sType);
556 aType[PROPNAME_PREFERREDFILTER] >>= sFilter;
557 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter);
559 aLock.clear();
560 // <- SAFE
562 // no exception => found valid type and filter => set it on the given descriptor
563 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType ;
564 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter;
565 return;
567 catch(const css::uno::Exception&)
570 // d)
571 // Search for any import(!) filter, which is registered for this type.
572 sFilter.clear();
575 // SAFE ->
576 ::osl::ResettableMutexGuard aLock(m_aLock);
578 // Attention: For executing next lines of code, We must be sure that
579 // all filters already loaded :-(
580 // That can disturb our "load on demand feature". But we have no other chance!
581 cache.load(FilterCache::E_CONTAINS_FILTERS);
583 CacheItem lIProps;
584 lIProps[PROPNAME_TYPE] <<= sType;
585 std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
587 aLock.clear();
588 // <- SAFE
590 for (auto const& filter : lFilters)
592 sFilter = filter;
594 // SAFE ->
595 aLock.reset();
598 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter);
599 sal_Int32 nFlags = 0;
600 aFilter[PROPNAME_FLAGS] >>= nFlags;
602 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT)
603 break;
605 catch(const css::uno::Exception&)
606 { continue; }
607 aLock.clear();
608 // <- SAFE
610 sFilter.clear();
613 if (!sFilter.isEmpty())
615 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType ;
616 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter;
617 return;
620 catch(const css::uno::Exception&)
625 bool TypeDetection::impl_getPreselectionForType(
626 const OUString& sPreSelType, const util::URL& aParsedURL, FlatDetection& rFlatTypes, bool bDocService)
628 // Can be used to suppress execution of some parts of this method
629 // if it's already clear that detected type is valid or not.
630 // It's necessary to use shared code at the end, which update
631 // all return parameters consistency!
632 bool bBreakDetection = false;
634 // Further we must know if it matches by pattern
635 // Every flat detected type by pattern won't be detected deep!
636 bool bMatchByPattern = false;
638 // And we must know if a preselection must be preferred, because
639 // it matches by its extension too.
640 bool bMatchByExtension = false;
642 // validate type
643 OUString sType(sPreSelType);
644 CacheItem aType;
647 // SAFE -> --------------------------
648 ::osl::ResettableMutexGuard aLock(m_aLock);
649 aType = TheFilterCache::get().getItem(FilterCache::E_TYPE, sType);
650 aLock.clear();
651 // <- SAFE --------------------------
653 catch(const css::container::NoSuchElementException&)
655 sType.clear();
656 bBreakDetection = true;
659 if (!bBreakDetection)
661 // We can't check a preselected type for a given stream!
662 // So we must believe, that it can work ...
663 if ( aParsedURL.Complete == "private:stream" )
664 bBreakDetection = true;
667 if (!bBreakDetection)
669 // extract extension from URL .. to check it case-insensitive !
670 INetURLObject aParser (aParsedURL.Main);
671 OUString sExtension = aParser.getExtension(INetURLObject::LAST_SEGMENT ,
672 true ,
673 INetURLObject::DecodeMechanism::WithCharset);
674 sExtension = sExtension.toAsciiLowerCase();
676 // otherwise we must know, if it matches to the given URL really.
677 // especially if it matches by its extension or pattern registration.
678 std::vector<OUString> lExtensions(comphelper::sequenceToContainer< std::vector<OUString> >(aType[PROPNAME_EXTENSIONS].get<css::uno::Sequence<OUString> >() ));
679 std::vector<OUString> lURLPattern(comphelper::sequenceToContainer< std::vector<OUString> >(aType[PROPNAME_URLPATTERN].get<css::uno::Sequence<OUString> >() ));
681 for (auto const& extension : lExtensions)
683 OUString sCheckExtension(extension.toAsciiLowerCase());
684 if (sCheckExtension == sExtension)
686 bBreakDetection = true;
687 bMatchByExtension = true;
688 break;
692 if (!bBreakDetection)
694 for (auto const& elem : lURLPattern)
696 WildCard aCheck(elem);
697 if (aCheck.Matches(aParsedURL.Main))
699 bMatchByPattern = true;
700 break;
706 // if it's a valid type - set it on all return values!
707 if (!sType.isEmpty())
709 FlatDetection::iterator it = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(sType));
710 if (it != rFlatTypes.end())
712 if (bMatchByExtension)
713 it->bMatchByExtension = true;
714 if (bMatchByPattern)
715 it->bMatchByPattern = true;
716 if (bDocService)
717 it->bPreselectedByDocumentService = true;
720 return true;
723 // not valid!
724 return false;
727 void TypeDetection::impl_getPreselectionForDocumentService(
728 const OUString& sPreSelDocumentService, const util::URL& aParsedURL, FlatDetection& rFlatTypes)
730 // get all filters, which match to this doc service
731 std::vector<OUString> lFilters;
734 // SAFE -> --------------------------
735 ::osl::ResettableMutexGuard aLock(m_aLock);
737 // Attention: For executing next lines of code, We must be sure that
738 // all filters already loaded :-(
739 // That can disturb our "load on demand feature". But we have no other chance!
740 auto & cache = TheFilterCache::get();
741 cache.load(FilterCache::E_CONTAINS_FILTERS);
743 CacheItem lIProps;
744 lIProps[PROPNAME_DOCUMENTSERVICE] <<= sPreSelDocumentService;
745 lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
747 aLock.clear();
748 // <- SAFE --------------------------
750 catch (const css::container::NoSuchElementException&)
752 lFilters.clear();
755 // step over all filters, and check if its registered type
756 // match the given URL.
757 // But use temp. list of "preselected types" instead of incoming rFlatTypes list!
758 // The reason behind: we must filter the obtained results. And copying stl entries
759 // is an easier job than removing them .-)
760 for (auto const& filter : lFilters)
762 OUString aType = impl_getTypeFromFilter(filter);
763 if (aType.isEmpty())
764 continue;
766 impl_getPreselectionForType(aType, aParsedURL, rFlatTypes, true);
770 OUString TypeDetection::impl_getTypeFromFilter(const OUString& rFilterName)
772 CacheItem aFilter;
775 osl::MutexGuard aLock(m_aLock);
776 aFilter = TheFilterCache::get().getItem(FilterCache::E_FILTER, rFilterName);
778 catch (const container::NoSuchElementException&)
780 return OUString();
783 OUString aType;
784 aFilter[PROPNAME_TYPE] >>= aType;
785 return aType;
788 void TypeDetection::impl_getAllFormatTypes(
789 const util::URL& aParsedURL, utl::MediaDescriptor const & rDescriptor, FlatDetection& rFlatTypes)
791 rFlatTypes.clear();
793 // Get all filters that we have.
794 std::vector<OUString> aFilterNames;
797 osl::MutexGuard aLock(m_aLock);
798 auto & cache = TheFilterCache::get();
799 cache.load(FilterCache::E_CONTAINS_FILTERS);
800 aFilterNames = cache.getItemNames(FilterCache::E_FILTER);
802 catch (const container::NoSuchElementException&)
804 return;
807 // Retrieve the default type for each of these filters, and store them.
808 for (auto const& filterName : aFilterNames)
810 OUString aType = impl_getTypeFromFilter(filterName);
812 if (aType.isEmpty())
813 continue;
815 FlatDetectionInfo aInfo; // all flags set to false by default.
816 aInfo.sType = aType;
817 rFlatTypes.push_back(aInfo);
821 // Get all types that match the URL alone.
822 FlatDetection aFlatByURL;
823 TheFilterCache::get().detectFlatForURL(aParsedURL, aFlatByURL);
824 for (auto const& elem : aFlatByURL)
826 FlatDetection::iterator itPos = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(elem.sType));
827 if (itPos == rFlatTypes.end())
828 // Not in the list yet.
829 rFlatTypes.push_back(elem);
830 else
832 // Already in the list. Update the flags.
833 FlatDetectionInfo& rInfo = *itPos;
834 const FlatDetectionInfo& rThisInfo = elem;
835 if (rThisInfo.bMatchByExtension)
836 rInfo.bMatchByExtension = true;
837 if (rThisInfo.bMatchByPattern)
838 rInfo.bMatchByPattern = true;
839 if (rThisInfo.bPreselectedByDocumentService)
840 rInfo.bPreselectedByDocumentService = true;
845 // Remove duplicates.
846 std::stable_sort(rFlatTypes.begin(), rFlatTypes.end(), objSortByType);
847 auto last = std::unique(rFlatTypes.begin(), rFlatTypes.end(), objEqualByType);
848 rFlatTypes.erase(last, rFlatTypes.end());
850 // Mark pre-selected type (if any) to have it prioritized.
851 OUString sSelectedType = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_TYPENAME(), OUString());
852 if (!sSelectedType.isEmpty())
853 impl_getPreselectionForType(sSelectedType, aParsedURL, rFlatTypes, false);
855 // Mark all types preferred by the current document service, to have it prioritized.
856 OUString sSelectedDoc = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_DOCUMENTSERVICE(), OUString());
857 if (!sSelectedDoc.isEmpty())
858 impl_getPreselectionForDocumentService(sSelectedDoc, aParsedURL, rFlatTypes);
862 OUString TypeDetection::impl_detectTypeFlatAndDeep( utl::MediaDescriptor& rDescriptor ,
863 const FlatDetection& lFlatTypes ,
864 bool bAllowDeep ,
865 std::vector<OUString>& rUsedDetectors,
866 OUString& rLastChance )
868 // reset it everytimes, so the outside code can distinguish between
869 // a set and a not set value.
870 rLastChance.clear();
871 rUsedDetectors.clear();
873 // step over all possible types for this URL.
874 // solutions:
875 // a) no types => no detection
876 // b) deep detection not allowed => return first valid type of list (because it's the preferred or the first valid one)
877 // or(!) match by URLPattern => in such case a deep detection will be suppressed!
878 // c) type has no detect service => safe the first occurred type without a detect service
879 // as "last chance"(!). It will be used outside of this method
880 // if no further type could be detected.
881 // It must be the first one, because it can be a preferred type.
882 // Our types list was sorted by such criteria!
883 // d) detect service return a valid result => return its decision
884 // e) detect service return an invalid result
885 // or any needed information could not be
886 // obtained from the cache => ignore it, and continue with search
888 for (auto const& flatTypeInfo : lFlatTypes)
890 if (m_bCancel)
891 break;
892 OUString sFlatType = flatTypeInfo.sType;
894 if (!impl_validateAndSetTypeOnDescriptor(rDescriptor, sFlatType))
895 continue;
897 // b)
898 if (
899 (!bAllowDeep ) ||
900 (flatTypeInfo.bMatchByPattern)
903 return sFlatType;
908 // SAFE -> ----------------------------------
909 ::osl::ResettableMutexGuard aLock(m_aLock);
910 CacheItem aType = TheFilterCache::get().getItem(FilterCache::E_TYPE, sFlatType);
911 aLock.clear();
913 OUString sDetectService;
914 aType[PROPNAME_DETECTSERVICE] >>= sDetectService;
916 // c)
917 if (sDetectService.isEmpty())
919 // flat detected types without any registered deep detection service and not
920 // preselected by the user can be used as LAST CHANCE in case no other type could
921 // be detected. Of course only the first type without deep detector can be used.
922 // Further ones has to be ignored.
923 if (rLastChance.isEmpty())
924 rLastChance = sFlatType;
926 continue;
929 // don't forget to add every real asked deep detection service here.
930 // Such detectors will be ignored if may be "impl_detectTypeDeepOnly()"
931 // must be called later!
932 rUsedDetectors.push_back(sDetectService);
933 OUString sDeepType = impl_askDetectService(sDetectService, rDescriptor);
935 // d)
936 if (!sDeepType.isEmpty())
937 return sDeepType;
939 catch(const css::container::NoSuchElementException&)
941 // e)
944 return OUString();
945 // <- SAFE ----------------------------------
948 void TypeDetection::impl_seekStreamToZero(utl::MediaDescriptor const & rDescriptor)
950 // try to seek to 0 ...
951 // But because XSeekable is an optional interface ... try it only .-)
952 css::uno::Reference< css::io::XInputStream > xStream = rDescriptor.getUnpackedValueOrDefault(
953 utl::MediaDescriptor::PROP_INPUTSTREAM(),
954 css::uno::Reference< css::io::XInputStream >());
955 css::uno::Reference< css::io::XSeekable > xSeek(xStream, css::uno::UNO_QUERY);
956 if (xSeek.is())
960 xSeek->seek(0);
962 catch(const css::uno::RuntimeException&)
964 throw;
966 catch(const css::uno::Exception&)
972 OUString TypeDetection::impl_askDetectService(const OUString& sDetectService,
973 utl::MediaDescriptor& rDescriptor )
975 // Open the stream and add it to the media descriptor if this method is called for the first time.
976 // All following requests to this method will detect, that there already exists a stream .-)
977 // Attention: This method throws an exception if the stream could not be opened.
978 // It's important to break any further detection in such case.
979 // Catch it on the highest detection level only !!!
980 impl_openStream(rDescriptor);
982 // seek to 0 is an optional feature to be more robust against
983 // "simple implemented detect services" .-)
984 impl_seekStreamToZero(rDescriptor);
986 css::uno::Reference< css::document::XExtendedFilterDetection > xDetector;
987 css::uno::Reference< css::uno::XComponentContext > xContext;
989 // SAFE ->
990 ::osl::ResettableMutexGuard aLock(m_aLock);
991 xContext = m_xContext;
992 aLock.clear();
993 // <- SAFE
997 // Attention! If e.g. an office module was not installed sometimes we
998 // find a registered detect service, which is referred inside the
999 // configuration ... but not really installed. On the other side we use
1000 // third party components here, which can make trouble anyway. So we
1001 // should handle errors during creation of such services more
1002 // gracefully .-)
1003 xDetector.set(
1004 xContext->getServiceManager()->createInstanceWithContext(sDetectService, xContext),
1005 css::uno::UNO_QUERY_THROW);
1007 catch (...)
1011 if ( ! xDetector.is())
1012 return OUString();
1014 OUString sDeepType;
1017 // start deep detection
1018 // Don't forget to convert stl descriptor to its uno representation.
1020 /* Attention!
1021 You have to use an explicit instance of this uno sequence ...
1022 Because its used as an in out parameter. And in case of a temp. used object
1023 we will run into memory corruptions!
1025 css::uno::Sequence< css::beans::PropertyValue > lDescriptor;
1026 rDescriptor >> lDescriptor;
1027 sDeepType = xDetector->detect(lDescriptor);
1028 rDescriptor << lDescriptor;
1030 catch (...)
1032 // We should ignore errors here.
1033 // Thrown exceptions mostly will end in crash recovery ...
1034 // But might be we find another deep detection service which can detect the same
1035 // document without a problem .-)
1036 sDeepType.clear();
1039 // seek to 0 is an optional feature to be more robust against
1040 // "simple implemented detect services" .-)
1041 impl_seekStreamToZero(rDescriptor);
1043 // analyze the results
1044 // a) detect service returns "" => return "" too and remove TYPE/FILTER prop from descriptor
1045 // b) returned type is unknown => return "" too and remove TYPE/FILTER prop from descriptor
1046 // c) returned type is valid => check TYPE/FILTER props inside descriptor and return the type
1048 // this special helper checks for a valid type
1049 // and set right values on the descriptor!
1050 bool bValidType = impl_validateAndSetTypeOnDescriptor(rDescriptor, sDeepType);
1051 if (bValidType)
1052 return sDeepType;
1054 return OUString();
1058 OUString TypeDetection::impl_askUserForTypeAndFilterIfAllowed(utl::MediaDescriptor& rDescriptor)
1060 css::uno::Reference< css::task::XInteractionHandler > xInteraction =
1061 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INTERACTIONHANDLER(),
1062 css::uno::Reference< css::task::XInteractionHandler >());
1064 if (!xInteraction.is())
1065 return OUString();
1067 OUString sURL =
1068 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL(),
1069 OUString());
1071 css::uno::Reference< css::io::XInputStream > xStream =
1072 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INPUTSTREAM(),
1073 css::uno::Reference< css::io::XInputStream >());
1075 // Don't disturb the user for "non existing files - means empty URLs" or
1076 // if we was forced to detect a stream.
1077 // Reason behind: we must be sure to ask user for "unknown contents" only...
1078 // and not for "missing files". Especially if detection is done by a stream only
1079 // we can't check if the stream points to an "existing content"!
1080 if (
1081 (sURL.isEmpty() ) || // "non existing file" ?
1082 (!xStream.is() ) || // non existing file !
1083 (sURL.equalsIgnoreAsciiCase("private:stream")) // not a good idea .-)
1085 return OUString();
1089 // create a new request to ask user for its decision about the usable filter
1090 ::framework::RequestFilterSelect aRequest(sURL);
1091 xInteraction->handle(aRequest.GetRequest());
1093 // "Cancel" pressed? => return with error
1094 if (aRequest.isAbort())
1095 return OUString();
1097 // "OK" pressed => verify the selected filter, get its corresponding
1098 // type and return it. (BTW: We must update the media descriptor here ...)
1099 // The user selected explicitly a filter ... but normally we are interested on
1100 // a type here only. But we must be sure, that the selected filter is used
1101 // too and no ambiguous filter registration disturb us .-)
1103 OUString sFilter = aRequest.getFilter();
1104 if (!impl_validateAndSetFilterOnDescriptor(rDescriptor, sFilter))
1105 return OUString();
1107 OUString sType;
1108 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME()] >>= sType;
1109 return sType;
1111 catch(const css::uno::Exception&)
1114 return OUString();
1118 void TypeDetection::impl_openStream(utl::MediaDescriptor& rDescriptor)
1120 bool bSuccess = false;
1121 OUString sURL = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_URL(), OUString() );
1122 bool bRequestedReadOnly = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_READONLY(), false );
1123 if ( comphelper::isFileUrl( sURL ) )
1125 // OOo uses own file locking mechanics in case of local file
1126 bSuccess = rDescriptor.addInputStreamOwnLock();
1128 else
1129 bSuccess = rDescriptor.addInputStream();
1131 if ( !bSuccess )
1132 throw css::uno::Exception(
1133 "Could not open stream for <" + sURL + ">",
1134 static_cast<OWeakObject *>(this));
1136 if ( !bRequestedReadOnly )
1138 // The MediaDescriptor implementation adds ReadOnly argument if the file can not be opened for writing
1139 // this argument should be either removed or an additional argument should be added so that application
1140 // can separate the case when the user explicitly requests readonly document.
1141 // The current solution is to remove it here.
1142 rDescriptor.erase( utl::MediaDescriptor::PROP_READONLY() );
1147 void TypeDetection::impl_removeTypeFilterFromDescriptor(utl::MediaDescriptor& rDescriptor)
1149 utl::MediaDescriptor::iterator pItType = rDescriptor.find(utl::MediaDescriptor::PROP_TYPENAME() );
1150 utl::MediaDescriptor::iterator pItFilter = rDescriptor.find(utl::MediaDescriptor::PROP_FILTERNAME());
1151 if (pItType != rDescriptor.end())
1152 rDescriptor.erase(pItType);
1153 if (pItFilter != rDescriptor.end())
1154 rDescriptor.erase(pItFilter);
1158 bool TypeDetection::impl_validateAndSetTypeOnDescriptor( utl::MediaDescriptor& rDescriptor,
1159 const OUString& sType )
1161 // SAFE ->
1162 ::osl::ResettableMutexGuard aLock(m_aLock);
1163 if (TheFilterCache::get().hasItem(FilterCache::E_TYPE, sType))
1165 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME()] <<= sType;
1166 return true;
1168 aLock.clear();
1169 // <- SAFE
1171 // remove all related information from the descriptor
1172 impl_removeTypeFilterFromDescriptor(rDescriptor);
1173 return false;
1177 bool TypeDetection::impl_validateAndSetFilterOnDescriptor( utl::MediaDescriptor& rDescriptor,
1178 const OUString& sFilter )
1182 // SAFE ->
1183 ::osl::ResettableMutexGuard aLock(m_aLock);
1185 auto & cache = TheFilterCache::get();
1186 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter);
1187 OUString sType;
1188 aFilter[PROPNAME_TYPE] >>= sType;
1189 CacheItem aType = cache.getItem(FilterCache::E_TYPE, sType);
1191 aLock.clear();
1192 // <- SAFE
1194 // found valid type and filter => set it on the given descriptor
1195 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType ;
1196 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter;
1197 return true;
1199 catch(const css::container::NoSuchElementException&){}
1201 // remove all related information from the descriptor
1202 impl_removeTypeFilterFromDescriptor(rDescriptor);
1203 return false;
1207 OUString TypeDetection::impl_getImplementationName()
1209 return OUString( "com.sun.star.comp.filter.config.TypeDetection" );
1213 css::uno::Sequence< OUString > TypeDetection::impl_getSupportedServiceNames()
1215 return { "com.sun.star.document.TypeDetection" };
1219 css::uno::Reference< css::uno::XInterface > TypeDetection::impl_createInstance(const css::uno::Reference< css::lang::XMultiServiceFactory >& xSMGR)
1221 TypeDetection* pNew = new TypeDetection( comphelper::getComponentContext(xSMGR) );
1222 return css::uno::Reference< css::uno::XInterface >(static_cast< css::document::XTypeDetection* >(pNew), css::uno::UNO_QUERY);
1225 } // namespace config
1226 } // namespace filter
1228 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */