|
@@ -0,0 +1,391 @@
|
|
1
|
+<?php
|
|
2
|
+
|
|
3
|
+namespace OrpheusNET\BencodeTorrent;
|
|
4
|
+
|
|
5
|
+/**
|
|
6
|
+ * BEncode service that allows us to encode PHP objects into BEncode and decode
|
|
7
|
+ * BEncode into PHP objects for torrents. BEncode supports the following PHP objects:
|
|
8
|
+ * - Associated Array
|
|
9
|
+ * - Lists
|
|
10
|
+ * - Strings
|
|
11
|
+ * - Integers
|
|
12
|
+ * with any other type throwing an exception. A list is defined for our purposes
|
|
13
|
+ * as an array with only numeric keys in perfect order, otherwise we assume it's
|
|
14
|
+ * an associated array and will encode as a dictionary.
|
|
15
|
+ *
|
|
16
|
+ * Additionally, as this is for torrent files, we can make the following assumptions
|
|
17
|
+ * and requirements:
|
|
18
|
+ * 1. Top level data structure must be a dictionary
|
|
19
|
+ * 2. Dictionary must contain an info key
|
|
20
|
+ * If any of these are violated, then we raise an exception for this particular file.
|
|
21
|
+ *
|
|
22
|
+ * @see https://wiki.theory.org/index.php/BitTorrentSpecification
|
|
23
|
+ *
|
|
24
|
+ * For Gazelle, this also acts as a unification of the two original BEncode implementations
|
|
25
|
+ * which were both used in separate areas of the codebase.
|
|
26
|
+ */
|
|
27
|
+class BencodeTorrent extends Bencode {
|
|
28
|
+ const FILELIST_DELIM = 0xF7;
|
|
29
|
+ private static $utf8_filelist_delim = null;
|
|
30
|
+
|
|
31
|
+ public function __construct() {
|
|
32
|
+ $this->setDelim();
|
|
33
|
+ }
|
|
34
|
+
|
|
35
|
+ /**
|
|
36
|
+ * Internal function that sets up the filelist_delim character for use. We cannot use encode
|
|
37
|
+ * and char to set a class constant or variable, so we wait till the class is initialized
|
|
38
|
+ * for the first time to set it.
|
|
39
|
+ */
|
|
40
|
+ private function setDelim() {
|
|
41
|
+ if (self::$utf8_filelist_delim === null) {
|
|
42
|
+ self::$utf8_filelist_delim = utf8_encode(chr(self::FILELIST_DELIM));
|
|
43
|
+ }
|
|
44
|
+ }
|
|
45
|
+
|
|
46
|
+ /**
|
|
47
|
+ * Sets the internal data array
|
|
48
|
+ * @param array $data
|
|
49
|
+ * @throws \RuntimeException
|
|
50
|
+ */
|
|
51
|
+ public function setData($data) {
|
|
52
|
+ parent::setData($data);
|
|
53
|
+ $this->validate();
|
|
54
|
+ }
|
|
55
|
+
|
|
56
|
+ /**
|
|
57
|
+ * Given a BEncoded string and decode it
|
|
58
|
+ * @param string $data
|
|
59
|
+ * @throws \RuntimeException
|
|
60
|
+ */
|
|
61
|
+ public function decodeString(string $data) {
|
|
62
|
+ parent::decodeString($data);
|
|
63
|
+ $this->validate();
|
|
64
|
+ }
|
|
65
|
+
|
|
66
|
+ /**
|
|
67
|
+ * Given a path to a file, decode the contents of it
|
|
68
|
+ *
|
|
69
|
+ * @param string $path
|
|
70
|
+ * @throws \RuntimeException
|
|
71
|
+ */
|
|
72
|
+ public function decodeFile(string $path) {
|
|
73
|
+ parent::decodeFile($path);
|
|
74
|
+ $this->validate();
|
|
75
|
+ }
|
|
76
|
+
|
|
77
|
+ /**
|
|
78
|
+ * Validates that the internal data array
|
|
79
|
+ * @throws \RuntimeException
|
|
80
|
+ */
|
|
81
|
+ public function validate() {
|
|
82
|
+ if (!is_array($this->data)) {
|
|
83
|
+ throw new \TypeError('Data must be an array');
|
|
84
|
+ }
|
|
85
|
+ if (empty($this->data['info'])) {
|
|
86
|
+ throw new \RuntimeException("Torrent dictionary doesn't have info key");
|
|
87
|
+ }
|
|
88
|
+ if (isset($this->data['info']['files'])) {
|
|
89
|
+ foreach ($this->data['info']['files'] as $file) {
|
|
90
|
+ $path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
|
|
91
|
+ if (isset($file[$path_key])) {
|
|
92
|
+ $filter = array_filter(
|
|
93
|
+ $file[$path_key],
|
|
94
|
+ function ($element) {
|
|
95
|
+ return strlen($element) === 0;
|
|
96
|
+ }
|
|
97
|
+ );
|
|
98
|
+ if (count($filter) > 0) {
|
|
99
|
+ throw new \RuntimeException('Cannot have empty path for a file');
|
|
100
|
+ }
|
|
101
|
+ }
|
|
102
|
+ }
|
|
103
|
+ }
|
|
104
|
+ }
|
|
105
|
+
|
|
106
|
+ /**
|
|
107
|
+ * Utility function to clean out keys in the data and info dictionaries that we don't need in
|
|
108
|
+ * our torrent file when we go to store it in the DB or serve it up to the user (with the
|
|
109
|
+ * expectation that we'll be calling at least setAnnounceUrl(...) when a user asks for a valid
|
|
110
|
+ * torrent file).
|
|
111
|
+ *
|
|
112
|
+ * @return bool flag to indicate if we altered the info dictionary
|
|
113
|
+ */
|
|
114
|
+ public function clean() : bool {
|
|
115
|
+ $this->cleanDataDictionary();
|
|
116
|
+ return $this->cleanInfoDictionary();
|
|
117
|
+ }
|
|
118
|
+
|
|
119
|
+ /**
|
|
120
|
+ * Clean out keys within the data dictionary that are not strictly necessary or will be
|
|
121
|
+ * overwritten dynamically on any downloaded torrent (like announce or comment), so that we
|
|
122
|
+ * store the smallest encoded string within the database and cuts down on potential waste.
|
|
123
|
+ */
|
|
124
|
+ public function cleanDataDictionary() {
|
|
125
|
+ $allowed_keys = array('encoding', 'info');
|
|
126
|
+ foreach ($this->data as $key => $value) {
|
|
127
|
+ if (!in_array($key, $allowed_keys)) {
|
|
128
|
+ unset($this->data[$key]);
|
|
129
|
+ }
|
|
130
|
+ }
|
|
131
|
+ }
|
|
132
|
+
|
|
133
|
+ /**
|
|
134
|
+ * Cleans out keys within the info dictionary (and would affect the generated info_hash)
|
|
135
|
+ * that are not standard or expected. We do allow some keys that are not strictly necessary
|
|
136
|
+ * (primarily the two below), but that's because it's better to just have the extra bits in
|
|
137
|
+ * the dictionary than having to force a user to re-download the torrent file for something
|
|
138
|
+ * that they might have no idea their client is doing nor how to stop it. Returns TRUE if
|
|
139
|
+ * we had to change something in the info dictionary that would affect the info_hash (thus
|
|
140
|
+ * requiring a re-download), else return FALSE.
|
|
141
|
+ *
|
|
142
|
+ * x_cross_seed is added by PyroCor (@see https://github.com/pyroscope/pyrocore)
|
|
143
|
+ * unique is added by xseed (@see https://whatbox.ca/wiki/xseed)
|
|
144
|
+ *
|
|
145
|
+ * @return bool
|
|
146
|
+ */
|
|
147
|
+ public function cleanInfoDictionary() : bool {
|
|
148
|
+ $cleaned = false;
|
|
149
|
+ $allowed_keys = array('files', 'name', 'piece length', 'pieces', 'private', 'length',
|
|
150
|
+ 'name.utf8', 'name.utf-8', 'md5sum', 'sha1', 'source',
|
|
151
|
+ 'file-duration', 'file-media', 'profiles', 'x_cross_seed', 'unique');
|
|
152
|
+ foreach ($this->data['info'] as $key => $value) {
|
|
153
|
+ if (!in_array($key, $allowed_keys)) {
|
|
154
|
+ unset($this->data['info'][$key]);
|
|
155
|
+ $cleaned = true;
|
|
156
|
+ }
|
|
157
|
+ }
|
|
158
|
+
|
|
159
|
+ return $cleaned;
|
|
160
|
+ }
|
|
161
|
+
|
|
162
|
+ /**
|
|
163
|
+ * Returns a bool on whether the private flag set to 1 within the info dictionary.
|
|
164
|
+ *
|
|
165
|
+ * @return bool
|
|
166
|
+ */
|
|
167
|
+ public function isPrivate() : bool {
|
|
168
|
+ $this->hasData();
|
|
169
|
+ return isset($this->data['info']['private']) && $this->data['info']['private'] === 1;
|
|
170
|
+ }
|
|
171
|
+
|
|
172
|
+ /**
|
|
173
|
+ * Sets the private flag (if not already set) in the info dictionary. Setting this to 1 makes
|
|
174
|
+ * it so a client will only publish its presence in the swarm via the tracker in the announce
|
|
175
|
+ * URL, else it'll be discoverable via other means such as PEX peer exchange or dht, which is
|
|
176
|
+ * a negative for security and privacy of a private swarm. Returns a bool on whether or not
|
|
177
|
+ * the flag was changed so that an appropriate screen can be shown to the user.
|
|
178
|
+ *
|
|
179
|
+ * @return bool
|
|
180
|
+ */
|
|
181
|
+ public function makePrivate() : bool {
|
|
182
|
+ $this->hasData();
|
|
183
|
+ if ($this->isPrivate()) {
|
|
184
|
+ return false;
|
|
185
|
+ }
|
|
186
|
+ $this->data['info']['private'] = 1;
|
|
187
|
+ ksort($this->data['info']);
|
|
188
|
+ return true;
|
|
189
|
+ }
|
|
190
|
+
|
|
191
|
+ /**
|
|
192
|
+ * Set the source flag in the info dictionary equal to $source. This can be used to ensure a
|
|
193
|
+ * unique info hash across sites so long as all sites use the source flag. This isn't an
|
|
194
|
+ * 'official' flag (no accepted BEP on it), but it has become the defacto standard with more
|
|
195
|
+ * clients supporting it natively. Returns a boolean on whether or not the source was changed
|
|
196
|
+ * so that an appropriate screen can be shown to the user.
|
|
197
|
+ *
|
|
198
|
+ * @param string $source
|
|
199
|
+ *
|
|
200
|
+ * @return bool true if the source was set/changed, false if no change
|
|
201
|
+ */
|
|
202
|
+ public function setSource(string $source) : bool {
|
|
203
|
+ $this->hasData();
|
|
204
|
+ if (isset($this->data['info']['source']) && $this->data['info']['source'] === $source) {
|
|
205
|
+ return false;
|
|
206
|
+ }
|
|
207
|
+ // Since we've set the source and will require a re-download, we might as well clean
|
|
208
|
+ // these out as well
|
|
209
|
+ unset($this->data['info']['x_cross_seed']);
|
|
210
|
+ unset($this->data['info']['unique']);
|
|
211
|
+ $this->setValue(['info.source' => $source]);
|
|
212
|
+ return true;
|
|
213
|
+ }
|
|
214
|
+
|
|
215
|
+ /**
|
|
216
|
+ * Function to allow you set any number of keys and values in the data dictionary. You can
|
|
217
|
+ * set the value in a dictionary by concatenating the keys into a string with a period
|
|
218
|
+ * separator (ex: info.name will set name field in the info dictionary) so that the rest
|
|
219
|
+ * of the dictionary is unaffected.
|
|
220
|
+ *
|
|
221
|
+ * @param array $array
|
|
222
|
+ */
|
|
223
|
+ public function setValue(array $array) {
|
|
224
|
+ foreach ($array as $key => $value) {
|
|
225
|
+ if (is_array($value)) {
|
|
226
|
+ ksort($value);
|
|
227
|
+ }
|
|
228
|
+ $keys = explode('.', $key);
|
|
229
|
+ $data = &$this->data;
|
|
230
|
+ for ($i = 0; $i < count($keys); $i++) {
|
|
231
|
+ $data = &$data[$keys[$i]];
|
|
232
|
+ }
|
|
233
|
+ $data = $value;
|
|
234
|
+ $data = &$this->data;
|
|
235
|
+ for ($i = 0; $i < count($keys); $i++) {
|
|
236
|
+ $data = &$data[$keys[$i]];
|
|
237
|
+ if (is_array($data)) {
|
|
238
|
+ ksort($data);
|
|
239
|
+ }
|
|
240
|
+ }
|
|
241
|
+ }
|
|
242
|
+ ksort($this->data);
|
|
243
|
+ $this->validate();
|
|
244
|
+ }
|
|
245
|
+
|
|
246
|
+ /**
|
|
247
|
+ * Get a sha1 encoding of the BEncoded info dictionary. The SHA1 encoding allows us to transmit
|
|
248
|
+ * the info dictionary over the wire (such as within URLs or in submitted forms). Gazelle
|
|
249
|
+ * primarily relies on this so that it can ensure that all torrents uploaded have unique
|
|
250
|
+ * info hashes and so a user could search for a torrent based on its info hash. The
|
|
251
|
+ * BitTorrent protocol uses this when announcing/scraping a torrent so that the tracker can
|
|
252
|
+ * identify the torrent the client is talking about.
|
|
253
|
+ *
|
|
254
|
+ * @return string
|
|
255
|
+ */
|
|
256
|
+ public function getInfoHash() : string {
|
|
257
|
+ $this->hasData();
|
|
258
|
+ return sha1($this->encodeVal($this->data['info']));
|
|
259
|
+ }
|
|
260
|
+
|
|
261
|
+ public function getHexInfoHash(): string {
|
|
262
|
+ return pack('H*', $this->getInfoHash());
|
|
263
|
+ }
|
|
264
|
+
|
|
265
|
+ /**
|
|
266
|
+ * @return string
|
|
267
|
+ */
|
|
268
|
+ public function getName() {
|
|
269
|
+ if (isset($this->data['info']['name.utf-8'])) {
|
|
270
|
+ return $this->data['info']['name.utf-8'];
|
|
271
|
+ }
|
|
272
|
+ return $this->data['info']['name'];
|
|
273
|
+ }
|
|
274
|
+
|
|
275
|
+ /**
|
|
276
|
+ * Get the total size in bytes of the files in the torrent. For a single file torrent, it'll
|
|
277
|
+ * just be the 'length' key in the 'info' dictionary, else we iterate through the 'files' list
|
|
278
|
+ * and add up the 'length' of each element.
|
|
279
|
+ *
|
|
280
|
+ * @return int
|
|
281
|
+ */
|
|
282
|
+ public function getSize() : int {
|
|
283
|
+ $cur_size = 0;
|
|
284
|
+ if (!isset($this->data['info']['files'])) {
|
|
285
|
+ $cur_size = $this->data['info']['length'];
|
|
286
|
+ }
|
|
287
|
+ else {
|
|
288
|
+ foreach ($this->data['info']['files'] as $file) {
|
|
289
|
+ $cur_size += $file['length'];
|
|
290
|
+ }
|
|
291
|
+ }
|
|
292
|
+ return $cur_size;
|
|
293
|
+ }
|
|
294
|
+
|
|
295
|
+ /**
|
|
296
|
+ * Get an array of files that are in the torrent, where each element is a array that contains
|
|
297
|
+ * the keys 'name' and 'size'. For single torrent files, then we just take the name and length
|
|
298
|
+ * keys from the info dictionary. For multiple file torrents, we then iterate through the
|
|
299
|
+ * 'files' list where each element has 'length' and 'path' (which is a list of all components
|
|
300
|
+ * of the path, which we can join together with '/').
|
|
301
|
+ *
|
|
302
|
+ * @return array
|
|
303
|
+ */
|
|
304
|
+ public function getFileList() : array {
|
|
305
|
+ $files = [];
|
|
306
|
+ if (!isset($this->data['info']['files'])) {
|
|
307
|
+ // Single-file torrent
|
|
308
|
+ $name = (isset($this->data['info']['name.utf-8']) ?
|
|
309
|
+ $this->data['info']['name.utf-8'] :
|
|
310
|
+ $this->data['info']['name']);
|
|
311
|
+ $size = $this->data['info']['length'];
|
|
312
|
+ $files[] = ['path' => $name, 'size' => $size];
|
|
313
|
+ }
|
|
314
|
+ else {
|
|
315
|
+ $size = 0;
|
|
316
|
+ foreach ($this->data['info']['files'] as $file) {
|
|
317
|
+ $size += $file['length'];
|
|
318
|
+ $path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
|
|
319
|
+ $files[] = ['path' => implode('/', $file[$path_key]), 'size' => $file['length']];
|
|
320
|
+ }
|
|
321
|
+ usort(
|
|
322
|
+ $files,
|
|
323
|
+ function ($a, $b) {
|
|
324
|
+ return strnatcasecmp($a['path'], $b['path']);
|
|
325
|
+ }
|
|
326
|
+ );
|
|
327
|
+ }
|
|
328
|
+ return array('total_size' => $size, 'files' => $files);
|
|
329
|
+ }
|
|
330
|
+
|
|
331
|
+ public function hasFiles(): bool {
|
|
332
|
+ return isset($this->data['info']['files']);
|
|
333
|
+ }
|
|
334
|
+
|
|
335
|
+ public function hasEncryptedFiles(): bool {
|
|
336
|
+ return isset($this->data['encrypted_files']);
|
|
337
|
+ }
|
|
338
|
+
|
|
339
|
+ /**
|
|
340
|
+ * Returns an array of strings formatted to be inserted into a Gazelle database into the table
|
|
341
|
+ * torrents.FileList which is then used for displaying the table of files to the user when
|
|
342
|
+ * viewing the group. Format of each string is:
|
|
343
|
+ * {extension} s{size} {name} {delimiter}
|
|
344
|
+ * We use the delimiter so that we can split the first three apart via ' ' and that then we
|
|
345
|
+ * use the delimiter to find where the name ends.
|
|
346
|
+ *
|
|
347
|
+ * @return array
|
|
348
|
+ */
|
|
349
|
+ public function getGazelleFileList() : array {
|
|
350
|
+ $files = [];
|
|
351
|
+ foreach ($this->getFileList()['files'] as $file) {
|
|
352
|
+ $path = $file['path'];
|
|
353
|
+ $size = $file['size'];
|
|
354
|
+ $path = $this->makeUTF8(strtr($path, "\n\r\t", ' '));
|
|
355
|
+ $ext_pos = strrpos($path, '.');
|
|
356
|
+ // Should not be $ExtPos !== false. Extension-less files that start with a .
|
|
357
|
+ // should not get extensions
|
|
358
|
+ $ext = ($ext_pos ? trim(substr($path, $ext_pos + 1)) : '');
|
|
359
|
+ $files[] = sprintf("%s s%ds %s %s", ".$ext", $size, $path, self::$utf8_filelist_delim);
|
|
360
|
+ }
|
|
361
|
+ return $files;
|
|
362
|
+ }
|
|
363
|
+
|
|
364
|
+ /**
|
|
365
|
+ * Given a string, convert it to UTF-8 format, if it's not already in UTF-8.
|
|
366
|
+ *
|
|
367
|
+ * @param string $str input to convert to utf-8 format
|
|
368
|
+ *
|
|
369
|
+ * @return string
|
|
370
|
+ */
|
|
371
|
+ private function makeUTF8(string $str) : string {
|
|
372
|
+ if (preg_match('//u', $str)) {
|
|
373
|
+ $encoding = 'UTF-8';
|
|
374
|
+ }
|
|
375
|
+ if (empty($encoding)) {
|
|
376
|
+ $encoding = mb_detect_encoding($str, 'UTF-8, ISO-8859-1');
|
|
377
|
+ }
|
|
378
|
+ // Legacy thing for Gazelle, leaving it in, but not going to bother testing
|
|
379
|
+ // @codeCoverageIgnoreStart
|
|
380
|
+ if (empty($encoding)) {
|
|
381
|
+ $encoding = 'ISO-8859-1';
|
|
382
|
+ }
|
|
383
|
+ // @codeCoverageIgnoreEnd
|
|
384
|
+ if ($encoding === 'UTF-8') {
|
|
385
|
+ return $str;
|
|
386
|
+ }
|
|
387
|
+ else {
|
|
388
|
+ return @mb_convert_encoding($str, 'UTF-8', $encoding);
|
|
389
|
+ }
|
|
390
|
+ }
|
|
391
|
+}
|