Support conversion of linkshere
[dueringa_WikiWalker.git] / test / WikimediaJsonToArticleConverterTests.cpp
blobed6c2c89fb69ed7473ada5f9791c44da041ba6e5
1 #include <algorithm>
3 #include <UnitTest++/UnitTest++.h>
5 #include "Article.h"
6 #include "WalkerException.h"
7 #include "WikimediaJsonToArticleConverter.h"
9 SUITE(WikimediaJsonToArticleConverterTests)
11 using namespace WikiWalker;
12 using namespace WikiWalker::CollectionUtils;
14 TEST(JsonDataWithOneLinkedArticle)
16 std::string testdata =
17 R"({"batchcomplete":"","servedby":"mw1197","query":{"pages":[{"pageid":36669940,"ns":0,"title":"3PTT","links":[{"ns":0,"title":"Switch"}]}]}})";
19 ArticleCollection ac;
20 WikimediaJsonToArticleConverter conv;
21 auto cont = conv.convert(testdata, ac);
22 CHECK(WikimediaJsonToArticleConverter::ContinuationStatus::
23 ConversionCompleted == cont);
24 CHECK_EQUAL("", conv.continuationData()["plcontinue"]);
25 auto getArticle = CollectionUtils::get(ac, "3PTT");
26 CHECK(getArticle != nullptr);
27 CHECK_EQUAL(1, getArticle->countLinks());
28 CHECK_EQUAL(2, ac.size());
31 TEST(JsonDataWithInvalidArticle_Throws)
33 std::string testdata =
34 R"({"batchcomplete":"","servedby":"mw1208","query":{"pages":[{"ns":0,"title":"FoObAr","missing":""}]}})";
36 ArticleCollection ac;
37 WikimediaJsonToArticleConverter conv;
38 auto ret = conv.convert(testdata, ac);
39 CHECK(ret == WikiWalker::WikimediaJsonToArticleConverter::
40 ContinuationStatus::ConversionCompleted);
41 auto art = CollectionUtils::get(ac, "FoObAr");
42 CHECK(art != nullptr);
43 CHECK(art->marked());
46 TEST(JsonData_MoreLinks_HasContinueData)
48 std::string testdata =
49 R"({"continue":{"plcontinue":"34419161|0|Jharkhand","continue":"||"},"servedby":"mw1283","query":{"pages":[{"pageid":34419161,"ns":0,"title":"Satar, Deoghar","links":[{"ns":0,"title":"Deoghar district"}]}]}})";
51 ArticleCollection ac;
52 WikimediaJsonToArticleConverter conv;
53 auto cont = conv.convert(testdata, ac);
54 CHECK(cont == WikimediaJsonToArticleConverter::ContinuationStatus::
55 ConversionNeedsMoreData);
56 CHECK_EQUAL("34419161|0|Jharkhand", conv.continuationData()["plcontinue"]);
57 auto getArticle = CollectionUtils::get(ac, "Satar, Deoghar");
58 CHECK(getArticle != nullptr);
59 CHECK_EQUAL(1, getArticle->countLinks());
60 CHECK_EQUAL(2, ac.size());
63 TEST(JsonData_ContainsMultipleArticles)
65 std::string testdata =
66 R"#({"batchcomplete": true,"query": {"normalized": [{"fromencoded": false,"from": "Zanfina_Ismajli","to": "Zanfina Ismajli"},{"fromencoded": false,"from": "Kleite_(Tochter_des_Danaos)","to": "Kleite (Tochter des Danaos)"}],"pages": [{"pageid": 2834303,"ns": 0,"title": "Zanfina Ismajli","links": [{"ns": 0,"title": "10. Mai"},{"ns": 0,"title": "1985"}]},{"pageid": 8086803,"ns": 0,"title": "Kleite (Tochter des Danaos)","links": [{"ns": 0,"title": "Aigyptos"},{"ns": 0,"title": "Altgriechische Sprache"}]}]},"limits": {"links": 500}})#";
67 WikimediaJsonToArticleConverter conv;
68 ArticleCollection ac;
69 auto cont = conv.convert(testdata, ac);
70 CHECK(WikimediaJsonToArticleConverter::ContinuationStatus::
71 ConversionCompleted == cont);
73 CHECK_EQUAL(2, CollectionUtils::countAnalyzedArticles(ac));
74 auto ptr = CollectionUtils::get(ac, "Zanfina Ismajli");
75 CHECK(ptr != nullptr);
76 ptr = CollectionUtils::get(ac, "Kleite (Tochter des Danaos)");
77 CHECK(ptr != nullptr);
78 CHECK_EQUAL(6, ac.size());
81 TEST(JsonData_ConvertLinkshere)
83 std::string testdata =
84 R"#({"batchcomplete":true,"query":{"pages":[{"pageid":2,"ns":0,"title":"Eins","linkshere":[{"ns":0,"title":"Zw\u00f6lf"},{"ns":0,"title":"Dreizehn"},{"ns":0,"title":"Ens"}]}]},"limits":{"linkshere":500}})#";
85 WikimediaJsonToArticleConverter conv;
86 ArticleCollection ac;
87 auto cont = conv.convert(testdata, ac);
89 // exp: zwölf, dreizehn, ens --> eins
90 CHECK(WikimediaJsonToArticleConverter::ContinuationStatus::
91 ConversionCompleted == cont);
92 CHECK_EQUAL(3, CollectionUtils::countAnalyzedArticles(ac));
93 CHECK_EQUAL(4, ac.size());
95 auto ptr = CollectionUtils::get(ac, "Ens");
96 REQUIRE CHECK(ptr != nullptr);
97 CHECK(ptr->analyzed());
98 ptr = CollectionUtils::get(ac, "Dreizehn");
99 REQUIRE CHECK(ptr != nullptr);
100 CHECK(ptr->analyzed());
101 // better be save than sorry
102 ptr = CollectionUtils::get(ac, "Zw\u00f6lf");
103 REQUIRE CHECK(ptr != nullptr);
104 CHECK(ptr->analyzed());
105 ptr = CollectionUtils::get(ac, "Eins");
106 REQUIRE CHECK(ptr != nullptr);
107 CHECK(!ptr->analyzed());