libtorrent/src/torrent_info.cpp

   1 /*
   2
   3 Copyright (c) 2003-2008, Arvid Norberg
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions
   8 are met:
   9
  10     * Redistributions of source code must retain the above copyright
  11       notice, this list of conditions and the following disclaimer.
  12     * Redistributions in binary form must reproduce the above copyright
  13       notice, this list of conditions and the following disclaimer in
  14       the documentation and/or other materials provided with the distribution.
  15     * Neither the name of the author nor the names of its
  16       contributors may be used to endorse or promote products derived
  17       from this software without specific prior written permission.
  18
  19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29 POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 #include "libtorrent/pch.hpp"
  34
  35 #include <ctime>
  36 #include <iostream>
  37 #include <fstream>
  38 #include <iomanip>
  39 #include <iterator>
  40 #include <algorithm>
  41 #include <set>
  42
  43 #ifdef _MSC_VER
  44 #pragma warning(push, 1)
  45 #endif
  46
  47 #include <boost/lexical_cast.hpp>
  48 #include <boost/filesystem/path.hpp>
  49 #include <boost/filesystem.hpp>
  50 #include <boost/bind.hpp>
  51
  52 #ifdef _MSC_VER
  53 #pragma warning(pop)
  54 #endif
  55
  56 #include "libtorrent/torrent_info.hpp"
  57 #include "libtorrent/bencode.hpp"
  58 #include "libtorrent/hasher.hpp"
  59 #include "libtorrent/entry.hpp"
  60 #include "libtorrent/file.hpp"
  61
  62 namespace gr = boost::gregorian;
  63
  64 using namespace libtorrent;
  65
  66 namespace
  67 {
  68
  69         namespace fs = boost::filesystem;
  70
  71         void convert_to_utf8(std::string& str, unsigned char chr)
  72         {
  73                 str += 0xc0 | ((chr & 0xff) >> 6);
  74                 str += 0x80 | (chr & 0x3f);
  75         }
  76
  77         void verify_encoding(file_entry& target)
  78         {
  79                 std::string tmp_path;
  80                 std::string file_path = target.path.string();
  81                 bool valid_encoding = true;
  82                 for (std::string::iterator i = file_path.begin()
  83                         , end(file_path.end()); i != end; ++i)
  84                 {
  85                         // valid ascii-character
  86                         if ((*i & 0x80) == 0)
  87                         {
  88                                 tmp_path += *i;
  89                                 continue;
  90                         }
  91
  92                         if (std::distance(i, end) < 2)
  93                         {
  94                                 convert_to_utf8(tmp_path, *i);
  95                                 valid_encoding = false;
  96                                 continue;
  97                         }
  98
  99                         // valid 2-byte utf-8 character
 100                         if ((i[0] & 0xe0) == 0xc0
 101                                 && (i[1] & 0xc0) == 0x80)
 102                         {
 103                                 tmp_path += i[0];
 104                                 tmp_path += i[1];
 105                                 i += 1;
 106                                 continue;
 107                         }
 108
 109                         if (std::distance(i, end) < 3)
 110                         {
 111                                 convert_to_utf8(tmp_path, *i);
 112                                 valid_encoding = false;
 113                                 continue;
 114                         }
 115
 116                         // valid 3-byte utf-8 character
 117                         if ((i[0] & 0xf0) == 0xe0
 118                                 && (i[1] & 0xc0) == 0x80
 119                                 && (i[2] & 0xc0) == 0x80)
 120                         {
 121                                 tmp_path += i[0];
 122                                 tmp_path += i[1];
 123                                 tmp_path += i[2];
 124                                 i += 2;
 125                                 continue;
 126                         }
 127
 128                         if (std::distance(i, end) < 4)
 129                         {
 130                                 convert_to_utf8(tmp_path, *i);
 131                                 valid_encoding = false;
 132                                 continue;
 133                         }
 134
 135                         // valid 4-byte utf-8 character
 136                         if ((i[0] & 0xf0) == 0xe0
 137                                 && (i[1] & 0xc0) == 0x80
 138                                 && (i[2] & 0xc0) == 0x80
 139                                 && (i[3] & 0xc0) == 0x80)
 140                         {
 141                                 tmp_path += i[0];
 142                                 tmp_path += i[1];
 143                                 tmp_path += i[2];
 144                                 tmp_path += i[3];
 145                                 i += 3;
 146                                 continue;
 147                         }
 148
 149                         convert_to_utf8(tmp_path, *i);
 150                         valid_encoding = false;
 151                 }
 152                 // the encoding was not valid utf-8
 153                 // save the original encoding and replace the
 154                 // commonly used path with the correctly
 155                 // encoded string
 156                 if (!valid_encoding) target.path = tmp_path;
 157         }
 158
 159         bool extract_single_file(lazy_entry const& dict, file_entry& target
 160                 , std::string const& root_dir)
 161         {
 162                 lazy_entry const* length = dict.dict_find("length");
 163                 if (length == 0 || length->type() != lazy_entry::int_t)
 164                         return false;
 165                 target.size = length->int_value();
 166                 target.path = root_dir;
 167                 target.file_base = 0;
 168
 169                 // prefer the name.utf-8
 170                 // because if it exists, it is more
 171                 // likely to be correctly encoded
 172
 173                 lazy_entry const* p = dict.dict_find("path.utf-8");
 174                 if (p == 0 || p->type() != lazy_entry::list_t)
 175                         p = dict.dict_find("path");
 176                 if (p == 0 || p->type() != lazy_entry::list_t)
 177                         return false;
 178
 179                 for (int i = 0, end(p->list_size()); i < end; ++i)
 180                 {
 181                         if (p->list_at(i)->type() != lazy_entry::string_t)
 182                                 return false;
 183                         std::string path_element = p->list_at(i)->string_value();
 184                         if (path_element != "..")
 185                                 target.path /= path_element;
 186                 }
 187                 verify_encoding(target);
 188                 if (target.path.is_complete())
 189                         return false;
 190
 191                 // bitcomet pad file
 192
 193 #if BOOST_VERSION < 103600
 194                 if (target.path.leaf().substr(0, 18) == "_____padding_file_")
 195 #else
 196                 if (target.path.filename().substr(0, 18) == "_____padding_file_")
 197 #endif
 198                         target.pad_file = true;
 199
 200                 return true;
 201         }
 202
 203         bool extract_files(lazy_entry const& list, file_storage& target
 204                 , std::string const& root_dir)
 205         {
 206                 if (list.type() != lazy_entry::list_t) return false;
 207                 for (int i = 0, end(list.list_size()); i < end; ++i)
 208                 {
 209                         file_entry e;
 210                         if (!extract_single_file(*list.list_at(i), e, root_dir))
 211                                 return false;
 212                         target.add_file(e);
 213                 }
 214                 return true;
 215         }
 216 }
 217
 218 namespace libtorrent
 219 {
 220
 221         int load_file(fs::path const& filename, std::vector<char>& v)
 222         {
 223                 file f;
 224                 error_code ec;
 225                 if (!f.open(filename, file::read_only, ec)) return -1;
 226                 f.seek(0, file::end, ec);
 227                 if (ec) return -1;
 228                 size_type s = f.tell(ec);
 229                 if (ec) return -1;
 230                 if (s > 5000000) return -2;
 231                 v.resize(s);
 232                 if (s == 0) return 0;
 233                 f.seek(0, file::begin, ec);
 234                 if (ec) return -1;
 235                 size_type read = f.read(&v[0], s, ec);
 236                 if (read != s) return -3;
 237                 if (ec) return -3;
 238                 return 0;
 239         }
 240
 241 #ifndef TORRENT_NO_DEPRECATE
 242         // standard constructor that parses a torrent file
 243         torrent_info::torrent_info(entry const& torrent_file)
 244                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 245                 , m_multifile(false)
 246                 , m_private(false)
 247                 , m_info_section_size(0)
 248                 , m_piece_hashes(0)
 249         {
 250                 std::vector<char> tmp;
 251                 std::back_insert_iterator<std::vector<char> > out(tmp);
 252                 bencode(out, torrent_file);
 253
 254                 lazy_entry e;
 255                 lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
 256                 std::string error;
 257 #ifndef BOOST_NO_EXCEPTIONS
 258                 if (!parse_torrent_file(e, error))
 259                         throw invalid_torrent_file();
 260 #else
 261                 parse_torrent_file(e, error);
 262 #endif
 263         }
 264 #endif
 265
 266         torrent_info::torrent_info(lazy_entry const& torrent_file)
 267                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 268                 , m_multifile(false)
 269                 , m_private(false)
 270                 , m_info_section_size(0)
 271                 , m_piece_hashes(0)
 272         {
 273                 std::string error;
 274 #ifndef BOOST_NO_EXCEPTIONS
 275                 if (!parse_torrent_file(torrent_file, error))
 276                         throw invalid_torrent_file();
 277 #else
 278                 parse_torrent_file(torrent_file, error);
 279 #endif
 280         }
 281
 282         torrent_info::torrent_info(char const* buffer, int size)
 283                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 284                 , m_multifile(false)
 285                 , m_private(false)
 286                 , m_info_section_size(0)
 287                 , m_piece_hashes(0)
 288         {
 289                 std::string error;
 290                 lazy_entry e;
 291                 lazy_bdecode(buffer, buffer + size, e);
 292 #ifndef BOOST_NO_EXCEPTIONS
 293                 if (!parse_torrent_file(e, error))
 294                         throw invalid_torrent_file();
 295 #else
 296                 parse_torrent_file(e, error);
 297 #endif
 298         }
 299
 300         // constructor used for creating new torrents
 301         // will not contain any hashes, comments, creation date
 302         // just the necessary to use it with piece manager
 303         // used for torrents with no metadata
 304         torrent_info::torrent_info(sha1_hash const& info_hash)
 305                 : m_info_hash(info_hash)
 306                 , m_creation_date(pt::second_clock::universal_time())
 307                 , m_multifile(false)
 308                 , m_private(false)
 309                 , m_info_section_size(0)
 310                 , m_piece_hashes(0)
 311         {}
 312
 313         torrent_info::torrent_info(fs::path const& filename)
 314                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 315                 , m_multifile(false)
 316                 , m_private(false)
 317         {
 318                 std::vector<char> buf;
 319                 int ret = load_file(filename, buf);
 320                 if (ret < 0) return;
 321
 322                 if (buf.empty())
 323 #ifndef BOOST_NO_EXCEPTIONS
 324                         throw invalid_torrent_file();
 325 #else
 326                         return;
 327 #endif
 328
 329                 lazy_entry e;
 330                 lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
 331                 std::string error;
 332 #ifndef BOOST_NO_EXCEPTIONS
 333                 if (!parse_torrent_file(e, error))
 334                         throw invalid_torrent_file();
 335 #else
 336                 parse_torrent_file(e, error);
 337 #endif
 338         }
 339
 340         torrent_info::~torrent_info()
 341         {}
 342
 343         void torrent_info::swap(torrent_info& ti)
 344         {
 345                 using std::swap;
 346                 m_urls.swap(ti.m_urls);
 347                 m_url_seeds.swap(ti.m_url_seeds);
 348                 m_files.swap(ti.m_files);
 349                 m_nodes.swap(ti.m_nodes);
 350                 swap(m_info_hash, ti.m_info_hash);
 351                 swap(m_creation_date, ti.m_creation_date);
 352                 m_comment.swap(ti.m_comment);
 353                 m_created_by.swap(ti.m_created_by);
 354                 swap(m_multifile, ti.m_multifile);
 355                 swap(m_private, ti.m_private);
 356                 swap(m_info_section, ti.m_info_section);
 357                 swap(m_info_section_size, ti.m_info_section_size);
 358                 swap(m_piece_hashes, ti.m_piece_hashes);
 359                 swap(m_info_dict, ti.m_info_dict);
 360         }
 361
 362         bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
 363         {
 364                 if (info.type() != lazy_entry::dict_t)
 365                 {
 366                         error = "'info' entry is not a dictionary";
 367                         return false;
 368                 }
 369
 370                 // hash the info-field to calculate info-hash
 371                 hasher h;
 372                 std::pair<char const*, int> section = info.data_section();
 373                 h.update(section.first, section.second);
 374                 m_info_hash = h.final();
 375
 376                 // copy the info section
 377                 m_info_section_size = section.second;
 378                 m_info_section.reset(new char[m_info_section_size]);
 379                 memcpy(m_info_section.get(), section.first, m_info_section_size);
 380                 TORRENT_ASSERT(section.first[0] == 'd');
 381                 TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
 382
 383                 // extract piece length
 384                 int piece_length = info.dict_find_int_value("piece length", -1);
 385                 if (piece_length <= 0)
 386                 {
 387                         error = "invalid or missing 'piece length' entry in torrent file";
 388                         return false;
 389                 }
 390                 m_files.set_piece_length(piece_length);
 391
 392                 // extract file name (or the directory name if it's a multifile libtorrent)
 393                 std::string name = info.dict_find_string_value("name.utf-8");
 394                 if (name.empty()) name = info.dict_find_string_value("name");
 395                 if (name.empty())
 396                 {
 397                         error = "missing name in torrent file";
 398                         return false;
 399                 }
 400
 401                 fs::path tmp = name;
 402                 if (tmp.is_complete())
 403                 {
 404                         name = tmp.leaf();
 405                 }
 406 #if BOOST_VERSION < 103600
 407                 else if (tmp.has_branch_path())
 408 #else
 409                 else if (tmp.has_parent_path())
 410 #endif
 411                 {
 412                         fs::path p;
 413                         for (fs::path::iterator i = tmp.begin()
 414                                 , end(tmp.end()); i != end; ++i)
 415                         {
 416                                 if (*i == "." || *i == "..") continue;
 417                                 p /= *i;
 418                         }
 419                         name = p.string();
 420                 }
 421                 if (name == ".." || name == ".")
 422                 {
 423                         error = "invalid 'name' of torrent (possible exploit attempt)";
 424                         return false;
 425                 }
 426
 427                 // extract file list
 428                 lazy_entry const* i = info.dict_find_list("files");
 429                 if (i == 0)
 430                 {
 431                         // if there's no list of files, there has to be a length
 432                         // field.
 433                         file_entry e;
 434                         e.path = name;
 435                         e.offset = 0;
 436                         e.size = info.dict_find_int_value("length", -1);
 437                         // bitcomet pad file
 438 #if BOOST_VERSION < 103600
 439                         if (e.path.leaf().substr(0, 18) == "_____padding_file_")
 440 #else
 441                         if (e.path.filename().substr(0, 18) == "_____padding_file_")
 442 #endif
 443                                 e.pad_file = true;
 444                         if (e.size < 0)
 445                         {
 446                                 error = "invalid length of torrent";
 447                                 return false;
 448                         }
 449                         m_files.add_file(e);
 450                         m_multifile = false;
 451                 }
 452                 else
 453                 {
 454                         if (!extract_files(*i, m_files, name))
 455                         {
 456                                 error = "failed to parse files from torrent file";
 457                                 return false;
 458                         }
 459                         m_multifile = true;
 460                 }
 461                 m_files.set_name(name);
 462
 463                 // extract sha-1 hashes for all pieces
 464                 // we want this division to round upwards, that's why we have the
 465                 // extra addition
 466
 467                 m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
 468                         / m_files.piece_length()));
 469
 470                 lazy_entry const* pieces = info.dict_find("pieces");
 471                 if (pieces == 0 || pieces->type() != lazy_entry::string_t)
 472                 {
 473                         error = "invalid or missing 'pieces' entry in torrent file";
 474                         return false;
 475                 }
 476
 477                 if (pieces->string_length() != m_files.num_pieces() * 20)
 478                 {
 479                         error = "incorrect number of piece hashes in torrent file";
 480                         return false;
 481                 }
 482
 483                 m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
 484                 TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
 485                 TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
 486
 487                 m_private = info.dict_find_int_value("private", 0);
 488                 return true;
 489         }
 490
 491         bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
 492         {
 493                 if (torrent_file.type() != lazy_entry::dict_t)
 494                 {
 495                         error = "torrent file is not a dictionary";
 496                         return false;
 497                 }
 498
 499                 // extract the url of the tracker
 500                 lazy_entry const* i = torrent_file.dict_find_list("announce-list");
 501                 if (i)
 502                 {
 503                         m_urls.reserve(i->list_size());
 504                         for (int j = 0, end(i->list_size()); j < end; ++j)
 505                         {
 506                                 lazy_entry const* tier = i->list_at(j);
 507                                 if (tier->type() != lazy_entry::list_t) continue;
 508                                 for (int k = 0, end(tier->list_size()); k < end; ++k)
 509                                 {
 510                                         announce_entry e(tier->list_string_value_at(k));
 511                                         if (e.url.empty()) continue;
 512                                         e.tier = j;
 513                                         m_urls.push_back(e);
 514                                 }
 515                         }
 516
 517                         // shuffle each tier
 518                         std::vector<announce_entry>::iterator start = m_urls.begin();
 519                         std::vector<announce_entry>::iterator stop;
 520                         int current_tier = m_urls.front().tier;
 521                         for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
 522                         {
 523                                 if (stop->tier != current_tier)
 524                                 {
 525                                         std::random_shuffle(start, stop);
 526                                         start = stop;
 527                                         current_tier = stop->tier;
 528                                 }
 529                         }
 530                         std::random_shuffle(start, stop);
 531                 }
 532
 533
 534                 if (m_urls.empty())
 535                 {
 536                         announce_entry e(torrent_file.dict_find_string_value("announce"));
 537                         if (!e.url.empty()) m_urls.push_back(e);
 538                 }
 539
 540                 lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
 541                 if (nodes)
 542                 {
 543                         for (int i = 0, end(nodes->list_size()); i < end; ++i)
 544                         {
 545                                 lazy_entry const* n = nodes->list_at(i);
 546                                 if (n->type() != lazy_entry::list_t
 547                                         || n->list_size() < 2
 548                                         || n->list_at(0)->type() != lazy_entry::string_t
 549                                         || n->list_at(1)->type() != lazy_entry::int_t)
 550                                         continue;
 551                                 m_nodes.push_back(std::make_pair(
 552                                         n->list_at(0)->string_value()
 553                                         , int(n->list_at(1)->int_value())));
 554                         }
 555                 }
 556
 557                 // extract creation date
 558                 size_type cd = torrent_file.dict_find_int_value("creation date", -1);
 559                 if (cd >= 0)
 560                 {
 561                         m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
 562                                 + pt::seconds(long(cd));
 563                 }
 564
 565                 // if there are any url-seeds, extract them
 566                 lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
 567                 if (url_seeds && url_seeds->type() == lazy_entry::string_t)
 568                 {
 569                         m_url_seeds.push_back(url_seeds->string_value());
 570                 }
 571                 else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
 572                 {
 573                         for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
 574                         {
 575                                 lazy_entry const* url = url_seeds->list_at(i);
 576                                 if (url->type() != lazy_entry::string_t) continue;
 577                                 m_url_seeds.push_back(url->string_value());
 578                         }
 579                 }
 580
 581                 m_comment = torrent_file.dict_find_string_value("comment.utf-8");
 582                 if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
 583
 584                 m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
 585                 if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
 586
 587                 lazy_entry const* info = torrent_file.dict_find_dict("info");
 588                 if (info == 0)
 589                 {
 590                         error = "missing or invalid 'info' section in torrent file";
 591                         return false;
 592                 }
 593                 return parse_info_section(*info, error);
 594         }
 595
 596         boost::optional<pt::ptime>
 597         torrent_info::creation_date() const
 598         {
 599                 if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 600                 {
 601                         return boost::optional<pt::ptime>(m_creation_date);
 602                 }
 603                 return boost::optional<pt::ptime>();
 604         }
 605
 606         void torrent_info::add_tracker(std::string const& url, int tier)
 607         {
 608                 announce_entry e(url);
 609                 e.tier = tier;
 610                 m_urls.push_back(e);
 611
 612                 using boost::bind;
 613                 std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
 614                         , bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
 615         }
 616
 617 #ifndef TORRENT_NO_DEPRECATE
 618 // ------- start deprecation -------
 619
 620         void torrent_info::print(std::ostream& os) const
 621         {
 622                 os << "trackers:\n";
 623                 for (std::vector<announce_entry>::const_iterator i = trackers().begin();
 624                         i != trackers().end(); ++i)
 625                 {
 626                         os << i->tier << ": " << i->url << "\n";
 627                 }
 628                 if (!m_comment.empty())
 629                         os << "comment: " << m_comment << "\n";
 630 //              if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 631 //                      os << "creation date: " << to_simple_string(m_creation_date) << "\n";
 632                 os << "private: " << (m_private?"yes":"no") << "\n";
 633                 os << "number of pieces: " << num_pieces() << "\n";
 634                 os << "piece length: " << piece_length() << "\n";
 635                 os << "files:\n";
 636                 for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
 637                         os << "  " << std::setw(11) << i->size << "  " << i->path.string() << "\n";
 638         }
 639
 640 // ------- end deprecation -------
 641 #endif
 642
 643 }
 644