BioTorrents.de’s version of Gazelle
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BencodeTorrent.php 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. <?php
  2. namespace OrpheusNET\BencodeTorrent;
  3. /**
  4. * BEncode service that allows us to encode PHP objects into BEncode and decode
  5. * BEncode into PHP objects for torrents. BEncode supports the following PHP objects:
  6. * - Associated Array
  7. * - Lists
  8. * - Strings
  9. * - Integers
  10. * with any other type throwing an exception. A list is defined for our purposes
  11. * as an array with only numeric keys in perfect order, otherwise we assume it's
  12. * an associated array and will encode as a dictionary.
  13. *
  14. * Additionally, as this is for torrent files, we can make the following assumptions
  15. * and requirements:
  16. * 1. Top level data structure must be a dictionary
  17. * 2. Dictionary must contain an info key
  18. * If any of these are violated, then we raise an exception for this particular file.
  19. *
  20. * @see https://wiki.theory.org/index.php/BitTorrentSpecification
  21. *
  22. * For Gazelle, this also acts as a unification of the two original BEncode implementations
  23. * which were both used in separate areas of the codebase.
  24. */
  25. class BencodeTorrent extends Bencode {
  26. const FILELIST_DELIM = 0xF7;
  27. private static $utf8_filelist_delim = null;
  28. public function __construct() {
  29. $this->setDelim();
  30. }
  31. /**
  32. * Internal function that sets up the filelist_delim character for use. We cannot use encode
  33. * and char to set a class constant or variable, so we wait till the class is initialized
  34. * for the first time to set it.
  35. */
  36. private function setDelim() {
  37. if (self::$utf8_filelist_delim === null) {
  38. self::$utf8_filelist_delim = utf8_encode(chr(self::FILELIST_DELIM));
  39. }
  40. }
  41. /**
  42. * Sets the internal data array
  43. * @param array $data
  44. * @throws \RuntimeException
  45. */
  46. public function setData($data) {
  47. parent::setData($data);
  48. $this->validate();
  49. }
  50. /**
  51. * Given a BEncoded string and decode it
  52. * @param string $data
  53. * @throws \RuntimeException
  54. */
  55. public function decodeString(string $data) {
  56. parent::decodeString($data);
  57. $this->validate();
  58. }
  59. /**
  60. * Given a path to a file, decode the contents of it
  61. *
  62. * @param string $path
  63. * @throws \RuntimeException
  64. */
  65. public function decodeFile(string $path) {
  66. parent::decodeFile($path);
  67. $this->validate();
  68. }
  69. /**
  70. * Validates that the internal data array
  71. * @throws \RuntimeException
  72. */
  73. public function validate() {
  74. if (!is_array($this->data)) {
  75. throw new \TypeError('Data must be an array');
  76. }
  77. if (empty($this->data['info'])) {
  78. throw new \RuntimeException("Torrent dictionary doesn't have info key");
  79. }
  80. if (isset($this->data['info']['files'])) {
  81. foreach ($this->data['info']['files'] as $file) {
  82. $path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
  83. if (isset($file[$path_key])) {
  84. $filter = array_filter(
  85. $file[$path_key],
  86. function ($element) {
  87. return strlen($element) === 0;
  88. }
  89. );
  90. if (count($filter) > 0) {
  91. throw new \RuntimeException('Cannot have empty path for a file');
  92. }
  93. }
  94. }
  95. }
  96. }
  97. /**
  98. * Utility function to clean out keys in the data and info dictionaries that we don't need in
  99. * our torrent file when we go to store it in the DB or serve it up to the user (with the
  100. * expectation that we'll be calling at least setAnnounceUrl(...) when a user asks for a valid
  101. * torrent file).
  102. *
  103. * @return bool flag to indicate if we altered the info dictionary
  104. */
  105. public function clean() : bool {
  106. $this->cleanDataDictionary();
  107. return $this->cleanInfoDictionary();
  108. }
  109. /**
  110. * Clean out keys within the data dictionary that are not strictly necessary or will be
  111. * overwritten dynamically on any downloaded torrent (like announce or comment), so that we
  112. * store the smallest encoded string within the database and cuts down on potential waste.
  113. */
  114. public function cleanDataDictionary() {
  115. $allowed_keys = array('encoding', 'info');
  116. foreach ($this->data as $key => $value) {
  117. if (!in_array($key, $allowed_keys)) {
  118. unset($this->data[$key]);
  119. }
  120. }
  121. }
  122. /**
  123. * Cleans out keys within the info dictionary (and would affect the generated info_hash)
  124. * that are not standard or expected. We do allow some keys that are not strictly necessary
  125. * (primarily the two below), but that's because it's better to just have the extra bits in
  126. * the dictionary than having to force a user to re-download the torrent file for something
  127. * that they might have no idea their client is doing nor how to stop it. Returns TRUE if
  128. * we had to change something in the info dictionary that would affect the info_hash (thus
  129. * requiring a re-download), else return FALSE.
  130. *
  131. * x_cross_seed is added by PyroCor (@see https://github.com/pyroscope/pyrocore)
  132. * unique is added by xseed (@see https://whatbox.ca/wiki/xseed)
  133. *
  134. * @return bool
  135. */
  136. public function cleanInfoDictionary() : bool {
  137. $cleaned = false;
  138. $allowed_keys = array('files', 'name', 'piece length', 'pieces', 'private', 'length',
  139. 'name.utf8', 'name.utf-8', 'md5sum', 'sha1', 'source',
  140. 'file-duration', 'file-media', 'profiles', 'x_cross_seed', 'unique');
  141. foreach ($this->data['info'] as $key => $value) {
  142. if (!in_array($key, $allowed_keys)) {
  143. unset($this->data['info'][$key]);
  144. $cleaned = true;
  145. }
  146. }
  147. return $cleaned;
  148. }
  149. /**
  150. * Returns a bool on whether the private flag set to 1 within the info dictionary.
  151. *
  152. * @return bool
  153. */
  154. public function isPrivate() : bool {
  155. $this->hasData();
  156. return isset($this->data['info']['private']) && $this->data['info']['private'] === 1;
  157. }
  158. /**
  159. * Sets the private flag (if not already set) in the info dictionary. Setting this to 1 makes
  160. * it so a client will only publish its presence in the swarm via the tracker in the announce
  161. * URL, else it'll be discoverable via other means such as PEX peer exchange or dht, which is
  162. * a negative for security and privacy of a private swarm. Returns a bool on whether or not
  163. * the flag was changed so that an appropriate screen can be shown to the user.
  164. *
  165. * @return bool
  166. */
  167. public function makePrivate() : bool {
  168. $this->hasData();
  169. if ($this->isPrivate()) {
  170. return false;
  171. }
  172. $this->data['info']['private'] = 1;
  173. ksort($this->data['info']);
  174. return true;
  175. }
  176. /**
  177. * Set the source flag in the info dictionary equal to $source. This can be used to ensure a
  178. * unique info hash across sites so long as all sites use the source flag. This isn't an
  179. * 'official' flag (no accepted BEP on it), but it has become the defacto standard with more
  180. * clients supporting it natively. Returns a boolean on whether or not the source was changed
  181. * so that an appropriate screen can be shown to the user.
  182. *
  183. * @param string $source
  184. *
  185. * @return bool true if the source was set/changed, false if no change
  186. */
  187. public function setSource(string $source) : bool {
  188. $this->hasData();
  189. if (isset($this->data['info']['source']) && $this->data['info']['source'] === $source) {
  190. return false;
  191. }
  192. // Since we've set the source and will require a re-download, we might as well clean
  193. // these out as well
  194. unset($this->data['info']['x_cross_seed']);
  195. unset($this->data['info']['unique']);
  196. $this->setValue(['info.source' => $source]);
  197. return true;
  198. }
  199. /**
  200. * Function to allow you set any number of keys and values in the data dictionary. You can
  201. * set the value in a dictionary by concatenating the keys into a string with a period
  202. * separator (ex: info.name will set name field in the info dictionary) so that the rest
  203. * of the dictionary is unaffected.
  204. *
  205. * @param array $array
  206. */
  207. public function setValue(array $array) {
  208. foreach ($array as $key => $value) {
  209. if (is_array($value)) {
  210. ksort($value);
  211. }
  212. $keys = explode('.', $key);
  213. $data = &$this->data;
  214. for ($i = 0; $i < count($keys); $i++) {
  215. $data = &$data[$keys[$i]];
  216. }
  217. $data = $value;
  218. $data = &$this->data;
  219. for ($i = 0; $i < count($keys); $i++) {
  220. $data = &$data[$keys[$i]];
  221. if (is_array($data)) {
  222. ksort($data);
  223. }
  224. }
  225. }
  226. ksort($this->data);
  227. $this->validate();
  228. }
  229. /**
  230. * Get a sha1 encoding of the BEncoded info dictionary. The SHA1 encoding allows us to transmit
  231. * the info dictionary over the wire (such as within URLs or in submitted forms). Gazelle
  232. * primarily relies on this so that it can ensure that all torrents uploaded have unique
  233. * info hashes and so a user could search for a torrent based on its info hash. The
  234. * BitTorrent protocol uses this when announcing/scraping a torrent so that the tracker can
  235. * identify the torrent the client is talking about.
  236. *
  237. * @return string
  238. */
  239. public function getInfoHash() : string {
  240. $this->hasData();
  241. return sha1($this->encodeVal($this->data['info']));
  242. }
  243. public function getHexInfoHash(): string {
  244. return pack('H*', $this->getInfoHash());
  245. }
  246. /**
  247. * @return string
  248. */
  249. public function getName() {
  250. if (isset($this->data['info']['name.utf-8'])) {
  251. return $this->data['info']['name.utf-8'];
  252. }
  253. return $this->data['info']['name'];
  254. }
  255. /**
  256. * Get the total size in bytes of the files in the torrent. For a single file torrent, it'll
  257. * just be the 'length' key in the 'info' dictionary, else we iterate through the 'files' list
  258. * and add up the 'length' of each element.
  259. *
  260. * @return int
  261. */
  262. public function getSize() : int {
  263. $cur_size = 0;
  264. if (!isset($this->data['info']['files'])) {
  265. $cur_size = $this->data['info']['length'];
  266. }
  267. else {
  268. foreach ($this->data['info']['files'] as $file) {
  269. $cur_size += $file['length'];
  270. }
  271. }
  272. return $cur_size;
  273. }
  274. /**
  275. * Get an array of files that are in the torrent, where each element is a array that contains
  276. * the keys 'name' and 'size'. For single torrent files, then we just take the name and length
  277. * keys from the info dictionary. For multiple file torrents, we then iterate through the
  278. * 'files' list where each element has 'length' and 'path' (which is a list of all components
  279. * of the path, which we can join together with '/').
  280. *
  281. * @return array
  282. */
  283. public function getFileList() : array {
  284. $files = [];
  285. if (!isset($this->data['info']['files'])) {
  286. // Single-file torrent
  287. $name = (isset($this->data['info']['name.utf-8']) ?
  288. $this->data['info']['name.utf-8'] :
  289. $this->data['info']['name']);
  290. $size = $this->data['info']['length'];
  291. $files[] = ['path' => $name, 'size' => $size];
  292. }
  293. else {
  294. $size = 0;
  295. foreach ($this->data['info']['files'] as $file) {
  296. $size += $file['length'];
  297. $path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
  298. $files[] = ['path' => implode('/', $file[$path_key]), 'size' => $file['length']];
  299. }
  300. usort(
  301. $files,
  302. function ($a, $b) {
  303. return strnatcasecmp($a['path'], $b['path']);
  304. }
  305. );
  306. }
  307. return array('total_size' => $size, 'files' => $files);
  308. }
  309. public function hasFiles(): bool {
  310. return isset($this->data['info']['files']);
  311. }
  312. public function hasEncryptedFiles(): bool {
  313. return isset($this->data['encrypted_files']);
  314. }
  315. /**
  316. * Returns an array of strings formatted to be inserted into a Gazelle database into the table
  317. * torrents.FileList which is then used for displaying the table of files to the user when
  318. * viewing the group. Format of each string is:
  319. * {extension} s{size} {name} {delimiter}
  320. * We use the delimiter so that we can split the first three apart via ' ' and that then we
  321. * use the delimiter to find where the name ends.
  322. *
  323. * @return array
  324. */
  325. public function getGazelleFileList() : array {
  326. $files = [];
  327. foreach ($this->getFileList()['files'] as $file) {
  328. $path = $file['path'];
  329. $size = $file['size'];
  330. $path = $this->makeUTF8(strtr($path, "\n\r\t", ' '));
  331. $ext_pos = strrpos($path, '.');
  332. // Should not be $ExtPos !== false. Extension-less files that start with a .
  333. // should not get extensions
  334. $ext = ($ext_pos ? trim(substr($path, $ext_pos + 1)) : '');
  335. $files[] = sprintf("%s s%ds %s %s", ".$ext", $size, $path, self::$utf8_filelist_delim);
  336. }
  337. return $files;
  338. }
  339. /**
  340. * Given a string, convert it to UTF-8 format, if it's not already in UTF-8.
  341. *
  342. * @param string $str input to convert to utf-8 format
  343. *
  344. * @return string
  345. */
  346. private function makeUTF8(string $str) : string {
  347. if (preg_match('//u', $str)) {
  348. $encoding = 'UTF-8';
  349. }
  350. if (empty($encoding)) {
  351. $encoding = mb_detect_encoding($str, 'UTF-8, ISO-8859-1');
  352. }
  353. // Legacy thing for Gazelle, leaving it in, but not going to bother testing
  354. // @codeCoverageIgnoreStart
  355. if (empty($encoding)) {
  356. $encoding = 'ISO-8859-1';
  357. }
  358. // @codeCoverageIgnoreEnd
  359. if ($encoding === 'UTF-8') {
  360. return $str;
  361. }
  362. else {
  363. return @mb_convert_encoding($str, 'UTF-8', $encoding);
  364. }
  365. }
  366. }