Browse Source

Make OPS bencode libs available

pjc 5 years ago
parent
commit
2ec772e042
2 changed files with 566 additions and 0 deletions
  1. 175
    0
      _packages/opsnet-bencode/Bencode.php
  2. 391
    0
      _packages/opsnet-bencode/BencodeTorrent.php

+ 175
- 0
_packages/opsnet-bencode/Bencode.php View File

1
+<?php
2
+
3
+namespace OrpheusNET\BencodeTorrent;
4
+
5
+class Bencode {
6
+    protected $data = null;
7
+
8
+    /**
9
+     * Sets the internal data array
10
+     * @param mixed $data
11
+     * @throws \RuntimeException
12
+     */
13
+    public function setData($data) {
14
+        $this->data = $data;
15
+    }
16
+
17
+    /**
18
+     * Given a BEncoded string and decode it
19
+     * @param string $data
20
+     * @throws \RuntimeException
21
+     */
22
+    public function decodeString(string $data) {
23
+        $this->data = $this->decode($data);
24
+    }
25
+
26
+    /**
27
+     * Given a path to a file, decode the contents of it
28
+     *
29
+     * @param string $path
30
+     * @throws \RuntimeException
31
+     */
32
+    public function decodeFile(string $path) {
33
+        $this->data = $this->decode(file_get_contents($path, FILE_BINARY));
34
+    }
35
+
36
+    /**
37
+     * Decodes a BEncoded string to the following values:
38
+     * - Dictionary (starts with d, ends with e)
39
+     * - List (starts with l, ends with e
40
+     * - Integer (starts with i, ends with e
41
+     * - String (starts with number denoting number of characters followed by : and then the string)
42
+     *
43
+     * @see https://wiki.theory.org/index.php/BitTorrentSpecification
44
+     *
45
+     * @param string $data
46
+     * @param int    $pos
47
+     * @return mixed
48
+     */
49
+    protected function decode(string $data, int &$pos = 0) {
50
+        $start_decode = $pos === 0;
51
+        if ($data[$pos] === 'd') {
52
+            $pos++;
53
+            $return = [];
54
+            while ($data[$pos] !== 'e') {
55
+                $key = $this->decode($data, $pos);
56
+                $value = $this->decode($data, $pos);
57
+                if ($key === null || $value === null) {
58
+                    break;
59
+                }
60
+                if (!is_string($key)) {
61
+                    throw new \RuntimeException('Invalid key type, must be string: '.gettype($key));
62
+                }
63
+                $return[$key] = $value;
64
+            }
65
+            ksort($return);
66
+            $pos++;
67
+        }
68
+        elseif ($data[$pos] === 'l') {
69
+            $pos++;
70
+            $return = [];
71
+            while ($data[$pos] !== 'e') {
72
+                $value = $this->decode($data, $pos);
73
+                $return[] = $value;
74
+            }
75
+            $pos++;
76
+        }
77
+        elseif ($data[$pos] === 'i') {
78
+            $pos++;
79
+            $digits = strpos($data, 'e', $pos) - $pos;
80
+            $return = substr($data, $pos, $digits);
81
+            if ($return === '-0') {
82
+                throw new \RuntimeException('Cannot have integer value -0');
83
+            }
84
+            $multiplier = 1;
85
+            if ($return[0] === '-') {
86
+                $multiplier = -1;
87
+                $return = substr($return, 1);
88
+            }
89
+            if (!ctype_digit($return)) {
90
+                $msg = 'Cannot have non-digit values in integer number: '.$return;
91
+                throw new \RuntimeException($msg);
92
+            }
93
+            $return = $multiplier * ((int) $return);
94
+            $pos += $digits + 1;
95
+        }
96
+        else {
97
+            $digits = strpos($data, ':', $pos) - $pos;
98
+            $len = (int) substr($data, $pos, $digits);
99
+            $pos += ($digits + 1);
100
+            $return = substr($data, $pos, $len);
101
+            $pos += $len;
102
+        }
103
+        if ($start_decode) {
104
+            if ($pos !== strlen($data)) {
105
+                throw new \RuntimeException('Could not fully decode bencode string');
106
+            }
107
+        }
108
+        return $return;
109
+    }
110
+
111
+    /**
112
+     * Get the internal data array
113
+     * @return mixed
114
+     */
115
+    public function getData() {
116
+        return $this->data;
117
+    }
118
+
119
+    /**
120
+     * @throws \RuntimeException
121
+     */
122
+    protected function hasData() {
123
+        if ($this->data === null) {
124
+            throw new \RuntimeException('Must decode proper bencode string first');
125
+        }
126
+    }
127
+
128
+    /**
129
+     * @return string
130
+     */
131
+    public function getEncode() : string {
132
+        $this->hasData();
133
+        return $this->encodeVal($this->data);
134
+    }
135
+
136
+    /**
137
+     * @param mixed $data
138
+     * @return string
139
+     */
140
+    protected function encodeVal($data) : string {
141
+        if (is_array($data)) {
142
+            $return = '';
143
+            $check = -1;
144
+            $list = true;
145
+            foreach ($data as $key => $value) {
146
+                if ($key !== ++$check) {
147
+                    $list = false;
148
+                    break;
149
+                }
150
+            }
151
+
152
+            if ($list) {
153
+                $return .= 'l';
154
+                foreach ($data as $value) {
155
+                    $return .= $this->encodeVal($value);
156
+                }
157
+            }
158
+            else {
159
+                $return .= 'd';
160
+                foreach ($data as $key => $value) {
161
+                    $return .= $this->encodeVal(strval($key));
162
+                    $return .= $this->encodeVal($value);
163
+                }
164
+            }
165
+            $return .= 'e';
166
+        }
167
+        elseif (is_integer($data)) {
168
+            $return = 'i'.$data.'e';
169
+        }
170
+        else {
171
+            $return = strlen($data) . ':' . $data;
172
+        }
173
+        return $return;
174
+    }
175
+}

+ 391
- 0
_packages/opsnet-bencode/BencodeTorrent.php View File

1
+<?php
2
+
3
+namespace OrpheusNET\BencodeTorrent;
4
+
5
+/**
6
+ * BEncode service that allows us to encode PHP objects into BEncode and decode
7
+ * BEncode into PHP objects for torrents. BEncode supports the following PHP objects:
8
+ *      - Associated Array
9
+ *      - Lists
10
+ *      - Strings
11
+ *      - Integers
12
+ * with any other type throwing an exception. A list is defined for our purposes
13
+ * as an array with only numeric keys in perfect order, otherwise we assume it's
14
+ * an associated array and will encode as a dictionary.
15
+ *
16
+ * Additionally, as this is for torrent files, we can make the following assumptions
17
+ * and requirements:
18
+ *  1. Top level data structure must be a dictionary
19
+ *  2. Dictionary must contain an info key
20
+ * If any of these are violated, then we raise an exception for this particular file.
21
+ *
22
+ * @see https://wiki.theory.org/index.php/BitTorrentSpecification
23
+ *
24
+ * For Gazelle, this also acts as a unification of the two original BEncode implementations
25
+ * which were both used in separate areas of the codebase.
26
+ */
27
+class BencodeTorrent extends Bencode {
28
+    const FILELIST_DELIM = 0xF7;
29
+    private static $utf8_filelist_delim = null;
30
+
31
+    public function __construct() {
32
+        $this->setDelim();
33
+    }
34
+
35
+    /**
36
+     * Internal function that sets up the filelist_delim character for use. We cannot use encode
37
+     * and char to set a class constant or variable, so we wait till the class is initialized
38
+     * for the first time to set it.
39
+     */
40
+    private function setDelim() {
41
+        if (self::$utf8_filelist_delim === null) {
42
+            self::$utf8_filelist_delim = utf8_encode(chr(self::FILELIST_DELIM));
43
+        }
44
+    }
45
+
46
+    /**
47
+     * Sets the internal data array
48
+     * @param array $data
49
+     * @throws \RuntimeException
50
+     */
51
+    public function setData($data) {
52
+        parent::setData($data);
53
+        $this->validate();
54
+    }
55
+
56
+    /**
57
+     * Given a BEncoded string and decode it
58
+     * @param string $data
59
+     * @throws \RuntimeException
60
+     */
61
+    public function decodeString(string $data) {
62
+        parent::decodeString($data);
63
+        $this->validate();
64
+    }
65
+
66
+    /**
67
+     * Given a path to a file, decode the contents of it
68
+     *
69
+     * @param string $path
70
+     * @throws \RuntimeException
71
+     */
72
+    public function decodeFile(string $path) {
73
+        parent::decodeFile($path);
74
+        $this->validate();
75
+    }
76
+
77
+    /**
78
+     * Validates that the internal data array
79
+     * @throws \RuntimeException
80
+     */
81
+    public function validate() {
82
+        if (!is_array($this->data)) {
83
+            throw new \TypeError('Data must be an array');
84
+        }
85
+        if (empty($this->data['info'])) {
86
+            throw new \RuntimeException("Torrent dictionary doesn't have info key");
87
+        }
88
+        if (isset($this->data['info']['files'])) {
89
+            foreach ($this->data['info']['files'] as $file) {
90
+                $path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
91
+                if (isset($file[$path_key])) {
92
+                    $filter = array_filter(
93
+                        $file[$path_key],
94
+                        function ($element) {
95
+                            return strlen($element) === 0;
96
+                        }
97
+                    );
98
+                    if (count($filter) > 0) {
99
+                        throw new \RuntimeException('Cannot have empty path for a file');
100
+                    }
101
+                }
102
+            }
103
+        }
104
+    }
105
+
106
+    /**
107
+     * Utility function to clean out keys in the data and info dictionaries that we don't need in
108
+     * our torrent file when we go to store it in the DB or serve it up to the user (with the
109
+     * expectation that we'll be calling at least setAnnounceUrl(...) when a user asks for a valid
110
+     * torrent file).
111
+     *
112
+     * @return bool flag to indicate if we altered the info dictionary
113
+     */
114
+    public function clean() : bool {
115
+        $this->cleanDataDictionary();
116
+        return $this->cleanInfoDictionary();
117
+    }
118
+
119
+    /**
120
+     * Clean out keys within the data dictionary that are not strictly necessary or will be
121
+     * overwritten dynamically on any downloaded torrent (like announce or comment), so that we
122
+     * store the smallest encoded string within the database and cuts down on potential waste.
123
+     */
124
+    public function cleanDataDictionary() {
125
+        $allowed_keys = array('encoding', 'info');
126
+        foreach ($this->data as $key => $value) {
127
+            if (!in_array($key, $allowed_keys)) {
128
+                unset($this->data[$key]);
129
+            }
130
+        }
131
+    }
132
+
133
+    /**
134
+     * Cleans out keys within the info dictionary (and would affect the generated info_hash)
135
+     * that are not standard or expected. We do allow some keys that are not strictly necessary
136
+     * (primarily the two below), but that's because it's better to just have the extra bits in
137
+     * the dictionary than having to force a user to re-download the torrent file for something
138
+     * that they might have no idea their client is doing nor how to stop it. Returns TRUE if
139
+     * we had to change something in the info dictionary that would affect the info_hash (thus
140
+     * requiring a re-download), else return FALSE.
141
+     *
142
+     * x_cross_seed is added by PyroCor (@see https://github.com/pyroscope/pyrocore)
143
+     * unique is added by xseed (@see https://whatbox.ca/wiki/xseed)
144
+     *
145
+     * @return bool
146
+     */
147
+    public function cleanInfoDictionary() : bool {
148
+        $cleaned = false;
149
+        $allowed_keys = array('files', 'name', 'piece length', 'pieces', 'private', 'length',
150
+                              'name.utf8', 'name.utf-8', 'md5sum', 'sha1', 'source',
151
+                              'file-duration', 'file-media', 'profiles', 'x_cross_seed', 'unique');
152
+        foreach ($this->data['info'] as $key => $value) {
153
+            if (!in_array($key, $allowed_keys)) {
154
+                unset($this->data['info'][$key]);
155
+                $cleaned = true;
156
+            }
157
+        }
158
+
159
+        return $cleaned;
160
+    }
161
+
162
+    /**
163
+     * Returns a bool on whether the private flag set to 1 within the info dictionary.
164
+     *
165
+     * @return bool
166
+     */
167
+    public function isPrivate() : bool {
168
+        $this->hasData();
169
+        return isset($this->data['info']['private']) && $this->data['info']['private'] === 1;
170
+    }
171
+
172
+    /**
173
+     * Sets the private flag (if not already set) in the info dictionary. Setting this to 1 makes
174
+     * it so a client will only publish its presence in the swarm via the tracker in the announce
175
+     * URL, else it'll be discoverable via other means such as PEX peer exchange or dht, which is
176
+     * a negative for security and privacy of a private swarm. Returns a bool on whether or not
177
+     * the flag was changed so that an appropriate screen can be shown to the user.
178
+     *
179
+     * @return bool
180
+     */
181
+    public function makePrivate() : bool {
182
+        $this->hasData();
183
+        if ($this->isPrivate()) {
184
+            return false;
185
+        }
186
+        $this->data['info']['private'] = 1;
187
+        ksort($this->data['info']);
188
+        return true;
189
+    }
190
+
191
+    /**
192
+     * Set the source flag in the info dictionary equal to $source. This can be used to ensure a
193
+     * unique info hash across sites so long as all sites use the source flag. This isn't an
194
+     * 'official' flag (no accepted BEP on it), but it has become the defacto standard with more
195
+     * clients supporting it natively. Returns a boolean on whether or not the source was changed
196
+     * so that an appropriate screen can be shown to the user.
197
+     *
198
+     * @param string $source
199
+     *
200
+     * @return bool true if the source was set/changed, false if no change
201
+     */
202
+    public function setSource(string $source) : bool {
203
+        $this->hasData();
204
+        if (isset($this->data['info']['source']) && $this->data['info']['source'] === $source) {
205
+            return false;
206
+        }
207
+        // Since we've set the source and will require a re-download, we might as well clean
208
+        // these out as well
209
+        unset($this->data['info']['x_cross_seed']);
210
+        unset($this->data['info']['unique']);
211
+        $this->setValue(['info.source' => $source]);
212
+        return true;
213
+    }
214
+
215
+    /**
216
+     * Function to allow you set any number of keys and values in the data dictionary. You can
217
+     * set the value in a dictionary by concatenating the keys into a string with a period
218
+     * separator (ex: info.name will set name field in the info dictionary) so that the rest
219
+     * of the dictionary is unaffected.
220
+     *
221
+     * @param array $array
222
+     */
223
+    public function setValue(array $array) {
224
+        foreach ($array as $key => $value) {
225
+            if (is_array($value)) {
226
+                ksort($value);
227
+            }
228
+            $keys = explode('.', $key);
229
+            $data = &$this->data;
230
+            for ($i = 0; $i < count($keys); $i++) {
231
+                $data = &$data[$keys[$i]];
232
+            }
233
+            $data = $value;
234
+            $data = &$this->data;
235
+            for ($i = 0; $i < count($keys); $i++) {
236
+                $data = &$data[$keys[$i]];
237
+                if (is_array($data)) {
238
+                    ksort($data);
239
+                }
240
+            }
241
+        }
242
+        ksort($this->data);
243
+        $this->validate();
244
+    }
245
+
246
+    /**
247
+     * Get a sha1 encoding of the BEncoded info dictionary. The SHA1 encoding allows us to transmit
248
+     * the info dictionary over the wire (such as within URLs or in submitted forms). Gazelle
249
+     * primarily relies on this so that it can ensure that all torrents uploaded have unique
250
+     * info hashes and so a user could search for a torrent based on its info hash. The
251
+     * BitTorrent protocol uses this when announcing/scraping a torrent so that the tracker can
252
+     * identify the torrent the client is talking about.
253
+     *
254
+     * @return string
255
+     */
256
+    public function getInfoHash() : string {
257
+        $this->hasData();
258
+        return sha1($this->encodeVal($this->data['info']));
259
+    }
260
+
261
+    public function getHexInfoHash(): string {
262
+        return pack('H*', $this->getInfoHash());
263
+    }
264
+
265
+    /**
266
+     * @return string
267
+     */
268
+    public function getName() {
269
+        if (isset($this->data['info']['name.utf-8'])) {
270
+            return $this->data['info']['name.utf-8'];
271
+        }
272
+        return $this->data['info']['name'];
273
+    }
274
+
275
+    /**
276
+     * Get the total size in bytes of the files in the torrent. For a single file torrent, it'll
277
+     * just be the 'length' key in the 'info' dictionary, else we iterate through the 'files' list
278
+     * and add up the 'length' of each element.
279
+     *
280
+     * @return int
281
+     */
282
+    public function getSize() : int {
283
+        $cur_size = 0;
284
+        if (!isset($this->data['info']['files'])) {
285
+            $cur_size = $this->data['info']['length'];
286
+        }
287
+        else {
288
+            foreach ($this->data['info']['files'] as $file) {
289
+                $cur_size += $file['length'];
290
+            }
291
+        }
292
+        return $cur_size;
293
+    }
294
+
295
+    /**
296
+     * Get an array of files that are in the torrent, where each element is a array that contains
297
+     * the keys 'name' and 'size'. For single torrent files, then we just take the name and length
298
+     * keys from the info dictionary. For multiple file torrents, we then iterate through the
299
+     * 'files' list where each element has 'length' and 'path' (which is a list of all components
300
+     * of the path, which we can join together with '/').
301
+     *
302
+     * @return array
303
+     */
304
+    public function getFileList() : array {
305
+        $files = [];
306
+        if (!isset($this->data['info']['files'])) {
307
+            // Single-file torrent
308
+            $name = (isset($this->data['info']['name.utf-8']) ?
309
+                $this->data['info']['name.utf-8'] :
310
+                $this->data['info']['name']);
311
+            $size = $this->data['info']['length'];
312
+            $files[] = ['path' => $name, 'size' => $size];
313
+        }
314
+        else {
315
+            $size = 0;
316
+            foreach ($this->data['info']['files'] as $file) {
317
+                $size += $file['length'];
318
+                $path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
319
+                $files[] = ['path' => implode('/', $file[$path_key]), 'size' => $file['length']];
320
+            }
321
+            usort(
322
+                $files,
323
+                function ($a, $b) {
324
+                    return strnatcasecmp($a['path'], $b['path']);
325
+                }
326
+            );
327
+        }
328
+        return array('total_size' => $size, 'files' => $files);
329
+    }
330
+
331
+    public function hasFiles(): bool {
332
+        return isset($this->data['info']['files']);
333
+    }
334
+
335
+    public function hasEncryptedFiles(): bool {
336
+        return isset($this->data['encrypted_files']);
337
+    }
338
+
339
+    /**
340
+     * Returns an array of strings formatted to be inserted into a Gazelle database into the table
341
+     * torrents.FileList which is then used for displaying the table of files to the user when
342
+     * viewing the group. Format of each string is:
343
+     * {extension} s{size} {name} {delimiter}
344
+     * We use the delimiter so that we can split the first three apart via ' ' and that then we
345
+     * use the delimiter to find where the name ends.
346
+     *
347
+     * @return array
348
+     */
349
+    public function getGazelleFileList() : array {
350
+        $files = [];
351
+        foreach ($this->getFileList()['files'] as $file) {
352
+            $path = $file['path'];
353
+            $size = $file['size'];
354
+            $path = $this->makeUTF8(strtr($path, "\n\r\t", '   '));
355
+            $ext_pos = strrpos($path, '.');
356
+            // Should not be $ExtPos !== false. Extension-less files that start with a .
357
+            // should not get extensions
358
+            $ext = ($ext_pos ? trim(substr($path, $ext_pos + 1)) : '');
359
+            $files[] =  sprintf("%s s%ds %s %s", ".$ext", $size, $path, self::$utf8_filelist_delim);
360
+        }
361
+        return $files;
362
+    }
363
+
364
+    /**
365
+     * Given a string, convert it to UTF-8 format, if it's not already in UTF-8.
366
+     *
367
+     * @param string $str input to convert to utf-8 format
368
+     *
369
+     * @return string
370
+     */
371
+    private function makeUTF8(string $str) : string {
372
+        if (preg_match('//u', $str)) {
373
+            $encoding = 'UTF-8';
374
+        }
375
+        if (empty($encoding)) {
376
+            $encoding = mb_detect_encoding($str, 'UTF-8, ISO-8859-1');
377
+        }
378
+        // Legacy thing for Gazelle, leaving it in, but not going to bother testing
379
+        // @codeCoverageIgnoreStart
380
+        if (empty($encoding)) {
381
+            $encoding = 'ISO-8859-1';
382
+        }
383
+        // @codeCoverageIgnoreEnd
384
+        if ($encoding === 'UTF-8') {
385
+            return $str;
386
+        }
387
+        else {
388
+            return @mb_convert_encoding($str, 'UTF-8', $encoding);
389
+        }
390
+    }
391
+}

Loading…
Cancel
Save