2 // ShaarliHtmlClient.swift
5 // Created by Marcus Rohrmoser on 09.06.19.
6 // Copyright © 2019-2021 Marcus Rohrmoser mobile Software http://mro.name/me. All rights reserved.
8 // This program is free software: you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation, either version 3 of the License, or
11 // (at your option) any later version.
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU General Public License for more details.
18 // You should have received a copy of the GNU General Public License
19 // along with this program. If not, see <http://www.gnu.org/licenses/>.
24 internal func isEmoji(character: Character?) -> Bool {
25 guard let character = character else { return false }
26 return isEmoji(rune:character.unicodeScalars.first!)
29 // https://code.mro.name/mro/ShaarliGo/src/c65e142dda32bac7cec02deedc345b8f32a2cf8e/atom.go#L467
30 // https://stackoverflow.com/a/39425959
31 internal func isEmoji(rune: UnicodeScalar) -> Bool {
34 0x2b50...0x2b50, // star
35 0x1F600...0x1F64F, // Emoticons
36 0x1F300...0x1F5FF, // Misc Symbols and Pictographs
37 0x1F680...0x1F6FF, // Transport and Map
38 0x1F1E6...0x1F1FF, // Regional country flags
39 0x2600...0x26FF, // Misc symbols
40 0x2700...0x27BF, // Dingbats
41 0xFE00...0xFE0F, // Variation Selectors
42 0x1F900...0x1F9FF, // Supplemental Symbols and Pictographs
43 0x1f018...0x1f270, // Various asian characters
44 0xfe00...0xfe0f, // Variation selector
45 0x238c...0x2454, // Misc items
46 0x20d0...0x20ff: // Combining Diacritical Marks for Symbols
55 private let myPunct:CharacterSet = {
56 var cs = CharacterSet.punctuationCharacters
57 cs.remove(charactersIn:"§†\(tpf)")
61 // https://code.mro.name/mro/ShaarliGo/src/c65e142dda32bac7cec02deedc345b8f32a2cf8e/atom.go#L485
62 internal func isTag(word: Substring?) -> String {
63 guard let word = word else { return "" }
64 let tag = word.hasPrefix(tpf)
66 : isEmoji(character:word.first)
69 return tag.trimmingCharacters(in: myPunct)
72 internal func tagsFrom(string: String) -> Set<String> {
73 let sca = Scanner(string:string)
74 var ret = Set<String>()
75 // https://news.ycombinator.com/item?id=8822835
76 // not https://medium.com/@sorenlind/three-ways-to-enumerate-the-words-in-a-string-using-swift-7da5504f0062
78 while sca.scanUpToCharacters(from:CharacterSet.whitespacesAndNewlines, into:&word) {
79 ret.insert(isTag(word:word as Substring?))
85 internal func fold(lbl:String) -> String {
86 let trm = lbl.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
87 return trm.folding(options: [.diacriticInsensitive, .caseInsensitive], locale:nil)
90 func tagsNormalise(description ds: String, extended ex: String, tags ta: Set<String>, known:Set<String>) -> (description: String, extended: String, tags: Set<String>) {
91 func foldr(_ di: inout [String:String], _ tag:String) { di[fold(lbl:tag)] = tag }
93 let tadi = ta.reduce(into:[:], foldr) // previously declared tags
96 let txdi = tagsFrom(string:ds).union(tagsFrom(string:ex)).reduce(into:[:], foldr) // factual used tags
99 let nedi = txdi.filter { !take.contains($0.0) } // used, but undeclared: new
100 let tags = ta.union(nedi.values)
101 // let kndi = known.reduce(into:[:], foldr) // may be large
102 // should we replace values from tags with corresponding from kndi now?
104 let miss = tadi.filter{ !txke.contains($0.0) }.values.sorted().reduce("") {
105 let hashpre = "" == isTag(word:Substring($1))
108 let tg = "\(hashpre)\($1)"
113 func trim(_ s:String) -> String { return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines) }
115 description:trim(ds),
116 extended:trim("\(ex)\n\(miss)"),
121 func tagsSplit(_ s:String?) -> Set<String> {
122 return Set(s?.split(whereSeparator:{ $0 == "," || $0 == " " }).map({ String($0) }) ?? [])
125 let URLEmpty = URLComponents().url!
127 let HTTP_HTTP = "http"
128 let HTTP_HTTPS = "https"
129 let HTTP_POST = "POST"
131 let KEY_HEAD_CONTENT_TYPE = "Content-Type"
132 let VAL_HEAD_CONTENT_TYPE = "application/x-www-form-urlencoded"
134 let LF_URL = "lf_url"
135 let LF_TIT = "lf_title"
136 let LF_DSC = "lf_description"
137 let LF_TGS = "lf_tags"
138 let LF_PRI = "lf_private"
139 let LF_TIM = "lf_linkdate"
140 internal let VAL_ON = "on"
141 internal let VAL_OFF = "off"
144 private let KEY_PAR_DO = "do"
145 private let KEY_PAR_POST = "post"
146 private let KEY_PAR_SCRAPE = "scrape"
147 private let KEY_VAL_NO = "no"
148 private let KEY_PAR_DESC = "description"
149 private let CMD_DO_CFG = "configure"
151 internal let LOGIN_FORM = "loginform"
152 internal let KEY_FORM_LOGIN = "login"
153 internal let KEY_FORM_PASSWORD = "password"
155 internal let PAT_WRONG_LOGIN = "^<script>alert\\((?:\".*?\"|'.*?')\\);"
156 private let PAT_BANNED = ">\\s*(\\S.*ou have been banned from logi.*\\S)\\s*<"
157 private let STR_BANNED = "I said: NO. You are banned for the moment. Go away."
159 private let LINK_FORM = "linkform"
160 private let KEY_FORM_TITLE = "title"
162 private let CFG_FORM = "configform"
163 private let KEY_FORM_PRIDE = "privateLinkByDefault"
164 private let KEY_FORM_CONT = "continent"
165 private let KEY_FORM_CITY = "city"
167 // Some (all?) shaarlis return post urls in linkform lf_url
168 // containg a blank – which isn't allowed for urls.
169 func issue61(_ s : String?) -> URL {
170 URL(string:s?.replacingOccurrences(of:" ", with:"+") ?? "") ?? URLEmpty
173 // unreserved https://www.ietf.org/rfc/rfc3986.txt
174 private let rfc3986_unreserved = [
175 "abcdefghijklmnopqrstuvwxyz",
176 "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
179 ].reduce(CharacterSet(), { $0.union(CharacterSet.init(charactersIn:$1)) })
181 // https://www.w3.org/TR/2009/REC-xforms-20091020/#serialize-urlencode
182 // curl encodes some more (all but alnum?), but I go with the standard.
184 // obsoletes: Not fully compliant https://useyourloaf.com/blog/how-to-percent-encode-a-url-string/
185 // and https://stackoverflow.com/a/50116064
186 func formData(_ form:HtmlFormDict) -> Data {
187 func rfc3986(_ stst : String?) -> String {
188 return stst?.addingPercentEncoding(withAllowedCharacters:rfc3986_unreserved) ?? ""
191 .reduce("") { "\($0)\($0 == "" ? "" : "&")\(rfc3986($1.key))=\(rfc3986($1.value))" }
195 func encoding(name:String?) -> String.Encoding {
197 case "latin1": return .isoLatin1
198 case "latin2": return .isoLatin2
199 case "cp1250": return .windowsCP1250
200 case "cp1251": return .windowsCP1251
201 case "cp1252": return .windowsCP1252
202 case "cp1253": return .windowsCP1253
203 case "cp1254": return .windowsCP1254
204 case "ascii": return .ascii
205 default: return .utf8
209 internal func check(_ data: Data?, _ rep: URLResponse?, _ err: Error?) -> (HtmlFormDictDict, String) {
210 let fail : HtmlFormDictDict = [:]
212 return (fail, err.localizedDescription)
214 guard let http = rep as? HTTPURLResponse else {
215 return (fail, String(format:NSLocalizedString("Not a HTTP response, but %@", comment:"ShaarliHtmlClient"), rep ?? "<nil>"))
217 guard (200...299).contains(http.statusCode) else {
218 let msg = HTTPURLResponse.localizedString(forStatusCode:http.statusCode)
219 // here we loose the knowledge of the http status code.
220 return (fail, String(format:NSLocalizedString("Expected response HTTP status '%d %@' but got '%d %@'", comment:"ShaarliHtmlClient"), 200, "Ok", http.statusCode, msg))
222 guard let data = data, data.count > 0 else {
223 return (fail, NSLocalizedString("Got no data. That's not enough.", comment:"ShaarliHtmlClient"))
225 // debugPrint("\(http.allHeaderFields["Date"])")
226 let enco = http.textEncodingName
227 let fo = findHtmlForms(data, enco)
229 // check several typical error scenarios why there may be no form:
230 guard let str = String(bytes: data, encoding: encoding(name:enco)), str.count > 0 else {
231 return (fo, NSLocalizedString("Got no data. That's not enough.", comment:"ShaarliHtmlClient"))
233 guard STR_BANNED != str else {
234 return (fo, STR_BANNED)
236 if let ra = str.range(of:PAT_WRONG_LOGIN, options:.regularExpression) {
237 let err = String(str[ra]).dropFirst(15).dropLast(3)
238 return (fo, String(err))
240 if let ra = str.range(of:PAT_BANNED, options:.regularExpression) {
241 let err = String(str[ra]).dropFirst(1).dropLast(1).trimmingCharacters(in: .whitespacesAndNewlines)
248 private func serverTime(_ rep : URLResponse?) -> Date? {
249 guard let http = rep as? HTTPURLResponse else {return nil}
250 let str0 = http.allHeaderFields["Date"] as? String
251 guard let str = str0, str != "" else {return nil}
252 // https://blog.mro.name/2009/08/nsdateformatter-http-header/
253 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
254 let fmt = DateFormatter()
255 fmt.timeZone = TimeZone(secondsFromGMT:0)
256 fmt.locale = Locale(identifier: "en_US_POSIX")
257 fmt.dateFormat = "EEE',' dd MMM yyyy HH':'mm':'ss 'GMT'" // rfc1123
258 guard let ret = fmt.date(from:str) else {
259 fmt.dateFormat = "EEEE',' dd'-'MMM'-'yy HH':'mm':'ss z" // rfc850
260 guard let ret = fmt.date(from:str) else {
261 fmt.dateFormat = "EEE MMM d HH':'mm':'ss yyyy" // asctime
262 return fmt.date(from:str)
269 internal func isOld(_ reqSrt : Date, _ srvNow : Date?, _ shaarli : Date? ) -> Bool {
270 // not getting a time we assume the entry is recent and overwrite in case :-(
271 guard let shaarli = shaarli else {return false}
272 guard let srvNow = srvNow else {
273 // webserver doesn't tell it's current time
274 return shaarli.timeIntervalSince(reqSrt) < 0 // post is older than request start (comparing client and server time)
276 return -shaarli.timeIntervalSince(srvNow) >= -reqSrt.timeIntervalSinceNow // post is older than request start (comparing server and server time)
279 private func createReq(endpoint: URL, params:[URLQueryItem]) -> URLRequest {
280 var uc = URLComponents(url:endpoint, resolvingAgainstBaseURL:true)!
283 uc.queryItems = params.count == 0
286 return URLRequest(url:uc.url!)
289 // a credential-supplying delegate. Otherwise we would have to feed a credential with
290 // matching port and realm into the URLSessionConfiguration.urlCredentialStorage.
291 // That's IMO too picky for NSURLAuthenticationMethodHTTPBasic
292 internal class dlgt : NSObject, URLSessionTaskDelegate {
293 private let cre : URLCredential?
294 internal init(_ cre: URLCredential?) {
298 func urlSession(_ session: URLSession, task: URLSessionTask, didReceive challenge: URLAuthenticationChallenge, completionHandler: @escaping (URLSession.AuthChallengeDisposition, URLCredential?) -> Void) {
299 completionHandler(.useCredential, cre)
303 class ShaarliHtmlClient {
305 static func isOk(_ err: String) -> Bool {
311 init(_ semver : String) {
315 // prepare the login and be ready for payload - both retrieval and publication.
316 // todo https://youtu.be/vDe-4o8Uwl8?t=3090
317 internal func loginAndGet(_ ses: URLSession, _ endpoint: URL, _ url: URL, _ callback: @escaping (
319 _ lifo: HtmlFormDict,
321 _ error: String) -> ()
323 let req0 = createReq(endpoint: endpoint, params: [URLQueryItem(name:KEY_PAR_POST, value:url.absoluteString), URLQueryItem(name:KEY_PAR_SCRAPE, value:KEY_VAL_NO)])
324 debugPrint("loginAndGet \(req0.httpMethod ?? HTTP_GET)) -> \(req0)")
325 // https://developer.apple.com/documentation/foundation/url_loading_system/fetching_website_data_into_memory
326 let tsk0 = ses.dataTask(with: req0) { data, response, erro in
327 let seti = serverTime(response)
329 func do_finish(_ lifobase:URL?, _ lifo:HtmlFormDict) {
330 guard nil != lifo[LF_URL] else {
331 callback(URLEmpty, [:], seti, String(format:NSLocalizedString("%@ not found.", comment: "ShaarliHtmlClient"), LF_URL))
334 // assume link form action == link form html base url
335 callback(lifobase ?? URLEmpty, lifo, seti, "")
338 let d = check(data, response, erro)
339 debugPrint("loginAndGet \(HTTP_GET) <- \(response?.url ?? URLEmpty) data:'\(d)'")
340 guard "" == d.1 else {
341 callback(URLEmpty, [:], seti, d.1)
345 guard let lifo = d.0[LINK_FORM] else {
346 // actually that's what we normally expect: not logged in yet.
347 guard var lofo = d.0[LOGIN_FORM] else {
348 callback(URLEmpty, [:], seti, String(format:NSLocalizedString("%@ not found.", comment: "ShaarliHtmlClient"), LOGIN_FORM))
351 if let uc0 = URLComponents(url:endpoint, resolvingAgainstBaseURL:true) {
352 lofo[KEY_FORM_LOGIN] = uc0.user
353 lofo[KEY_FORM_PASSWORD] = uc0.password
355 callback(URLEmpty, [:], seti, String(format:NSLocalizedString("Cannot parse endpoint '%@'", comment: "ShaarliHtmlClient"), endpoint.absoluteString))
358 guard let u0 = response?.url else {
359 callback(URLEmpty, [:], seti, String(format:NSLocalizedString("Response not usable.", comment: "")))
362 var req1 = URLRequest(url:u0)
363 req1.setValue(VAL_HEAD_CONTENT_TYPE, forHTTPHeaderField:KEY_HEAD_CONTENT_TYPE)
364 req1.httpMethod = HTTP_POST
365 let formDat = formData(lofo)
366 debugPrint("loginAndGet \(req1.httpMethod ?? HTTP_POST) \(req1)")
367 let tsk1 = ses.uploadTask(with: req1, from: formDat) { data, response, erro in
368 let d = check(data, response, erro)
369 debugPrint("loginAndGet \(HTTP_POST) <- \(response?.url ?? URLEmpty) data:'\(d)'")
370 guard "" == d.1 else {
371 callback(URLEmpty, [:], seti, d.1)
374 guard let lifo = d.0[LINK_FORM] else {
375 callback(URLEmpty, [:], seti, String(format:NSLocalizedString("%@ not found.", comment: "ShaarliHtmlClient"), LINK_FORM))
378 do_finish(response?.url, lifo)
381 // print("HTTP \(tsk1.originalRequest?.httpMethod) \(tsk1.originalRequest?.url)")
385 do_finish(response?.url, lifo)
389 // print("HTTP \(tsk0.originalRequest?.httpMethod) \(tsk0.originalRequest?.url)")
392 private func cfg(_ cfg:URLSessionConfiguration, _ to: TimeInterval) -> URLSessionConfiguration {
393 cfg.allowsCellularAccess = true
394 cfg.httpMaximumConnectionsPerHost = 1
395 cfg.httpShouldSetCookies = true
396 cfg.httpShouldUsePipelining = true
397 cfg.timeoutIntervalForRequest = to
398 cfg.timeoutIntervalForResource = to
399 // cfg.waitsForConnectivity = true
400 cfg.requestCachePolicy = .reloadIgnoringLocalAndRemoteCacheData
404 // We need the name of the server. Reliably. So we have to look at ?do=configure.
405 // That's where it's in a HTML form.
406 // so we pretend to ?post= in order to get past the login and then ?do=configure.
408 // The URLCredential are for an optional additional HTTP Basic Auth.
409 func probe(_ endpoint: URL, _ cre: URLCredential?,_ to: TimeInterval, _ completion: @escaping (
414 _ error:String) -> Void
416 debugPrint("probe \(endpoint)")
417 let ses = URLSession(configuration:cfg(.ephemeral, to), delegate:dlgt(cre), delegateQueue: nil)
419 loginAndGet(ses, endpoint, URLEmpty) { lurl, lifo, seti, err in
421 guard ShaarliHtmlClient.isOk(err) else {
422 completion(URLEmpty, "", false, nil, err)
425 // do not call back yet, but rather call ?do=configure and report the title.
426 // do we need the evtl. rewritten endpoint url?
427 let req = createReq(endpoint:endpoint, params:[URLQueryItem(name: KEY_PAR_DO, value: CMD_DO_CFG)])
428 let tsk = ses.dataTask(with: req) { data, response, err in
429 let res = check(data, response, err)
430 guard "" == res.1 else {
431 completion(URLEmpty, "", false, nil, res.1)
434 guard let cffo = res.0[CFG_FORM] else {
435 completion(URLEmpty, "", false, nil, String(format:NSLocalizedString("%@ not found.", comment: "ShaarliHtmlClient"), CFG_FORM))
438 let tizo = TimeZone(identifier:"\(cffo[KEY_FORM_CONT] ?? "")/\(cffo[KEY_FORM_CITY] ?? "")")
439 completion(base, cffo[KEY_FORM_TITLE] ?? "", cffo[KEY_FORM_PRIDE] != nil, tizo, "")
445 func get(_ endpoint: URL, _ cre: URLCredential?, _ to: TimeInterval, _ url: URL, _ completion: @escaping (
450 _ description: String,
458 let ses = URLSession(configuration:cfg(.ephemeral, to), delegate:dlgt(cre), delegateQueue:nil)
459 loginAndGet(ses, endpoint, url) { action, lifo, serverTime, err in
464 issue61(lifo[LF_URL]),
467 tagsSplit(lifo[LF_TGS]),
468 (lifo[LF_PRI] ?? VAL_OFF) != VAL_OFF,
476 // Requires a logged-in session as left over by get().
477 func add(_ ses: URLSession,
481 _ description: String,
485 _ completion: @escaping (_ error: String) -> ()
488 lifo[LF_URL] = url.absoluteString
489 lifo[LF_TIT] = description
490 lifo[LF_DSC] = extended
491 lifo[LF_TGS] = tags.joined(separator: " ")
492 lifo[LF_PRI] = privat
495 lifo["save_edit"] = "Save"
496 lifo["cancel_edit"] = nil
497 lifo["delete_link"] = nil
498 var req = createReq(endpoint:action, params:[])
499 req.setValue(VAL_HEAD_CONTENT_TYPE, forHTTPHeaderField:KEY_HEAD_CONTENT_TYPE)
500 req.httpMethod = HTTP_POST
501 let foda = formData(lifo)
502 debugPrint("-> \(req.httpMethod ?? "?") \(req.url ?? URLEmpty) data:\(String(data:foda, encoding:.utf8) ?? "-")")
503 let tsk = ses.uploadTask(with: req, from: foda) { data, response, err in
504 debugPrint("<- \(HTTP_POST) \(response?.url ?? URLEmpty) data:\(data == nil ? "-" : String(data:data!, encoding:.utf8) ?? ""))")
505 let res = check(data, response, err)
509 // print("HTTP", tsk.originalRequest?.httpMethod, tsk.originalRequest?.url)
513 func timeShaarli(_ tz:TimeZone?, _ str:String?) -> Date? {
514 guard let str = str, str != "" else {return nil}
515 let fmt = DateFormatter()
516 // fmt.locale = Locale(identifier: "en_US_POSIX")
517 fmt.dateFormat = "yyyyMMdd_HHmmss"
519 return fmt.date(from:str)
524 // https://oleb.net/2018/sequence-head-tail/#preserving-the-subsequence-type
526 var headAndTail: (head: Element, tail: SubSequence)? {
527 var first: Element? = nil
528 let tail = drop(while: { element in
536 guard let head = first else {