BioTorrents.de’s version of Gazelle
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ntseq.js 31KB


  1. var Nt = function() {
  2. 'use strict';
  3. function makeArray(length, val) {
  4. if (val === undefined) { val = 0|0; }
  5. if (val < 0) { val = 0; }
  6. length |= 0;
  7. var max = 0;
  8. for (var i = length; i !== 0; i >>>= 1) { max++; }
  9. var n = Array(max);
  10. n[0] = [val];
  11. for (i = 1; i < max; i++) {
  12. n[i] = n[i-1].concat(n[i-1]);
  13. }
  14. var a = [];
  15. for (var i = 0, l = length; l !== 0; l >>>= 1, i++) {
  16. if (l&1) {
  17. a = a.concat(n[i]);
  18. }
  19. }
  20. return a;
  21. };
  22. var __bitCount = (function() {
  23. var a = new Uint8Array(256);
  24. var bin;
  25. for (var i = 0; i < 256; i++) {
  26. bin = i;
  27. bin = bin - ((bin >> 1) & 0x55555555);
  28. bin = (bin & 0x33333333) + ((bin >> 2) & 0x33333333);
  29. a[i] = (((bin + (bin >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
  30. }
  31. return a;
  32. })();
  33. var __nucleotideTo4Bit = Object.create(null);
  34. __nucleotideTo4Bit['A'] = 8; // 0b1000
  35. __nucleotideTo4Bit['T'] = 4; // 0b0100
  36. __nucleotideTo4Bit['G'] = 2; // 0b0010
  37. __nucleotideTo4Bit['C'] = 1; // 0b0001
  38. function setNucleotide() {
  39. var n = arguments[0];
  40. __nucleotideTo4Bit[n] = 0;
  41. for (var i = 1; i < arguments.length; i++) {
  42. __nucleotideTo4Bit[n] |= __nucleotideTo4Bit[arguments[i]];
  43. }
  44. };
  45. setNucleotide('-');
  46. setNucleotide('W', 'A', 'T');
  47. setNucleotide('S', 'G', 'C');
  48. setNucleotide('M', 'A', 'C');
  49. setNucleotide('K', 'G', 'T');
  50. setNucleotide('R', 'A', 'G');
  51. setNucleotide('Y', 'C', 'T');
  52. setNucleotide('B', 'C', 'G', 'T');
  53. setNucleotide('D', 'A', 'G', 'T');
  54. setNucleotide('H', 'A', 'C', 'T');
  55. setNucleotide('V', 'A', 'C', 'G');
  56. setNucleotide('N', 'A', 'T', 'G', 'C');
  57. var __4BitToNucleotide = (
  58. function() {
  59. var a = makeArray(16);
  60. var keys = Object.keys(__nucleotideTo4Bit);
  61. for (var i = 0, len = keys.length; i < len; i++) {
  62. a[__nucleotideTo4Bit[keys[i]]] = keys[i];
  63. }
  64. return a;
  65. }
  66. )();
  67. var __nucleotideList = Object.keys(__nucleotideTo4Bit);
  68. var __complementNucleotide = (
  69. function() {
  70. var a = Object.create(null);
  71. a['A'] = 'T';
  72. a['G'] = 'C';
  73. a['B'] = 'V';
  74. a['H'] = 'D';
  75. a['M'] = 'K';
  76. a['R'] = 'Y';
  77. // S, W, N, - not included
  78. var keys = Object.keys(a);
  79. for (var i = 0, len = keys.length; i < len; i++) {
  80. a[a[keys[i]]] = keys[i];
  81. }
  82. a['S'] = 'S';
  83. a['W'] = 'W';
  84. a['N'] = 'N';
  85. a['-'] = '-';
  86. return a;
  87. }
  88. )();
  89. var __complement4Bit = (
  90. function() {
  91. var a = new Uint8Array(16);
  92. for (var i = 0, len = a.length; i < len; i++) {
  93. a[i] = __nucleotideTo4Bit[__complementNucleotide[__4BitToNucleotide[i]]];
  94. }
  95. return a;
  96. }
  97. )();
  98. function nucleotideToBin(s) {
  99. return __nucleotideTo4Bit[s] | 0;
  100. }
  101. function binToNucleotide(b) {
  102. return __4BitToNucleotide[b] || '-';
  103. }
  104. function complementNucleotide(s) {
  105. return __complementNucleotide[s] || '-';
  106. }
  107. function complementBin(b) {
  108. return __complement4Bit[b] | 0;
  109. }
  110. /* Double up to form bytes */
  111. var __byteComplement;
  112. var __nucleotidesToByte;
  113. var __byteToNucleotides;
  114. var __byteNucleotideContent;
  115. void function() {
  116. var a = Object.create(null);
  117. var b = new Uint8Array(256);
  118. var c = new makeArray(256);
  119. var d = new makeArray(256);
  120. var keys = Object.keys(__nucleotideTo4Bit);
  121. var len = keys.length;
  122. var ki;
  123. var kj;
  124. var byte;
  125. for (var i = 0; i < len; i++) {
  126. ki = keys[i];
  127. for (var j = 0; j < len; j++) {
  128. kj = keys[j];
  129. byte = __nucleotideTo4Bit[ki] | (__nucleotideTo4Bit[kj] << 4);
  130. a[ki + kj] = byte;
  131. b[byte] = __nucleotideTo4Bit[complementNucleotide(kj)] | (__nucleotideTo4Bit[complementNucleotide(ki)] << 4);
  132. c[byte] = ki + kj;
  133. d[byte] = [ki, kj];
  134. }
  135. }
  136. __nucleotidesToByte = a;
  137. __byteComplement = b;
  138. __byteToNucleotides = c;
  139. __byteNucleotideContent = d;
  140. }();
  141. function nucleotidesToByte(ss) {
  142. return __nucleotidesToByte[ss] | 0;
  143. }
  144. /* amino acids */
  145. var __codonToAminoAcid = Object.create(null);
  146. __codonToAminoAcid['AAA'] = 'K';
  147. __codonToAminoAcid['AAT'] = 'N';
  148. __codonToAminoAcid['AAG'] = 'K';
  149. __codonToAminoAcid['AAC'] = 'N';
  150. __codonToAminoAcid['ATA'] = 'I';
  151. __codonToAminoAcid['ATT'] = 'I';
  152. __codonToAminoAcid['ATG'] = 'M';
  153. __codonToAminoAcid['ATC'] = 'I';
  154. __codonToAminoAcid['AGA'] = 'R';
  155. __codonToAminoAcid['AGT'] = 'S';
  156. __codonToAminoAcid['AGG'] = 'R';
  157. __codonToAminoAcid['AGC'] = 'S';
  158. __codonToAminoAcid['ACA'] = 'T';
  159. __codonToAminoAcid['ACT'] = 'T';
  160. __codonToAminoAcid['ACG'] = 'T';
  161. __codonToAminoAcid['ACC'] = 'T';
  162. __codonToAminoAcid['TAA'] = '*';
  163. __codonToAminoAcid['TAT'] = 'Y';
  164. __codonToAminoAcid['TAG'] = '&';
  165. __codonToAminoAcid['TAC'] = 'Y';
  166. __codonToAminoAcid['TTA'] = 'L';
  167. __codonToAminoAcid['TTT'] = 'F';
  168. __codonToAminoAcid['TTG'] = 'L';
  169. __codonToAminoAcid['TTC'] = 'F';
  170. __codonToAminoAcid['TGA'] = '$';
  171. __codonToAminoAcid['TGT'] = 'C';
  172. __codonToAminoAcid['TGG'] = 'W';
  173. __codonToAminoAcid['TGC'] = 'C';
  174. __codonToAminoAcid['TCA'] = 'S';
  175. __codonToAminoAcid['TCT'] = 'S';
  176. __codonToAminoAcid['TCG'] = 'S';
  177. __codonToAminoAcid['TCC'] = 'S';
  178. __codonToAminoAcid['GAA'] = 'E';
  179. __codonToAminoAcid['GAT'] = 'D';
  180. __codonToAminoAcid['GAG'] = 'E';
  181. __codonToAminoAcid['GAC'] = 'D';
  182. __codonToAminoAcid['GTA'] = 'V';
  183. __codonToAminoAcid['GTT'] = 'V';
  184. __codonToAminoAcid['GTG'] = 'V';
  185. __codonToAminoAcid['GTC'] = 'V';
  186. __codonToAminoAcid['GGA'] = 'G';
  187. __codonToAminoAcid['GGT'] = 'G';
  188. __codonToAminoAcid['GGG'] = 'G';
  189. __codonToAminoAcid['GGC'] = 'G';
  190. __codonToAminoAcid['GCA'] = 'A';
  191. __codonToAminoAcid['GCT'] = 'A';
  192. __codonToAminoAcid['GCG'] = 'A';
  193. __codonToAminoAcid['GCC'] = 'A';
  194. __codonToAminoAcid['CAA'] = 'Q';
  195. __codonToAminoAcid['CAT'] = 'H';
  196. __codonToAminoAcid['CAG'] = 'Q';
  197. __codonToAminoAcid['CAC'] = 'H';
  198. __codonToAminoAcid['CTA'] = 'L';
  199. __codonToAminoAcid['CTT'] = 'L';
  200. __codonToAminoAcid['CTG'] = 'L';
  201. __codonToAminoAcid['CTC'] = 'L';
  202. __codonToAminoAcid['CGA'] = 'R';
  203. __codonToAminoAcid['CGT'] = 'R';
  204. __codonToAminoAcid['CGG'] = 'R';
  205. __codonToAminoAcid['CGC'] = 'R';
  206. __codonToAminoAcid['CCA'] = 'P';
  207. __codonToAminoAcid['CCT'] = 'P';
  208. __codonToAminoAcid['CCG'] = 'P';
  209. __codonToAminoAcid['CCC'] = 'P';
  210. var __12BitToAminoAcid = (function() {
  211. var a = makeArray(4096, '?');
  212. var codons = Object.keys(__codonToAminoAcid);
  213. var codon;
  214. for (var i = 0, len = codons.length; i < len; i++) {
  215. codon = codons[i];
  216. a[
  217. (nucleotideToBin(codon[2]) << 8) |
  218. (nucleotideToBin(codon[1]) << 4) |
  219. nucleotideToBin(codon[0])
  220. ] = __codonToAminoAcid[codon];
  221. }
  222. return a;
  223. })();
  224. function Seq(type) {
  225. if (type === undefined) {
  226. type = 'DNA';
  227. }
  228. if (!{'RNA': true, 'DNA': true}[type]) {
  229. throw new Error('Sequence type ' + type + ' not supported');
  230. }
  231. this.__type = type;
  232. this.__isRNA = (type === 'RNA');
  233. this.__endPadding = 0;
  234. this.__buffer = new ArrayBuffer(4);
  235. this.__complement = null;
  236. this.__content = null;
  237. this.__fractionalContent = null;
  238. this.__contentATGC = null;
  239. this.__fractionalContent = null;
  240. };
  241. Seq.prototype.read = function(strData) {
  242. var ntToByte = nucleotidesToByte;
  243. var nucleotideString = strData.toUpperCase()
  244. .replace(/\s/g, '')
  245. .replace('U', 'T')
  246. .replace(/[^ATGCBVHDMKRYSWN\-]/g, '-');
  247. var length = nucleotideString.length | 0;
  248. var max = length >>> 1;
  249. var odd = length & 1;
  250. var endPadding = (4 - (max + odd) % 4) % 4;
  251. this.__endPadding = endPadding;
  252. var buffer = new ArrayBuffer(max + odd + endPadding + 4);
  253. var dataArray = new Int8Array(buffer, 4);
  254. var n;
  255. var byte;
  256. var content;
  257. for (var i = 0; i < max; i++) {
  258. n = i << 1;
  259. dataArray[i] = ntToByte(nucleotideString[n] + nucleotideString[++n]);
  260. }
  261. if (odd) {
  262. dataArray[i] = __nucleotideTo4Bit[nucleotideString[i << 1]];
  263. }
  264. this.__buffer = buffer;
  265. this.__length = length;
  266. (new Uint32Array(buffer, 0, 1))[0] = length;
  267. this.__complement = null;
  268. this.__content = null;
  269. this.__fractionalContent = null;
  270. this.__contentATGC = null;
  271. this.__fractionalContent = null;
  272. return this;
  273. };
  274. Seq.prototype.readFASTA = function(strFASTA) {
  275. var data = strFASTA.split(/\n\r?/gi);
  276. while (data.length && data[0][0] === '>') {
  277. data.shift();
  278. }
  279. return this.read(data.join(''));
  280. };
  281. Seq.prototype.readBuffer = function(buffer) {
  282. this.__buffer = buffer;
  283. var length = (new Uint32Array(buffer, 0, 1))[0];
  284. var max = length >>> 1;
  285. var odd = length & 1;
  286. var endPadding = (4 - (max + odd) % 4) % 4;
  287. this.__endPadding = endPadding;
  288. this.__length = length;
  289. this.__complement = null;
  290. this.__content = null;
  291. this.__fractionalContent = null;
  292. this.__contentATGC = null;
  293. this.__fractionalContent = null;
  294. return this;
  295. };
  296. Seq.prototype.__byteComplement = function() {
  297. var bComp = __byteComplement;
  298. var fwdBuffer = this.__buffer;
  299. var len = fwdBuffer.byteLength;
  300. var n, i;
  301. var copyBuffer, fromArray, copyArray;
  302. var isOdd = this.__length & 1;
  303. if (isOdd) {
  304. copyBuffer = new ArrayBuffer(len);
  305. copyArray = new Uint32Array(copyBuffer, 4);
  306. fromArray = new Uint32Array(fwdBuffer, 4);
  307. n = (len - 4) >>> 2;
  308. while(n--) {
  309. copyArray[n] = (fromArray[n] << 4) | ((fromArray[n - 1]) >>> 28);
  310. }
  311. } else {
  312. copyBuffer = fwdBuffer;
  313. }
  314. var fwdArray = new Uint8Array(copyBuffer, 4);
  315. var buffer = new ArrayBuffer(len);
  316. var dataArray = new Uint8Array(buffer, 4);
  317. n = (len - 4) - this.__endPadding;
  318. i = 0;
  319. while(n--) {
  320. dataArray[i++] = bComp[fwdArray[n]];
  321. }
  322. (new Uint32Array(buffer, 0, 1))[0] = this.__length;
  323. return buffer;
  324. };
  325. Seq.prototype.size = function() {
  326. return this.__length;
  327. };
  328. Seq.prototype.sequence = function() {
  329. var byteToNt = __byteToNucleotides;
  330. var buffer = this.__buffer;
  331. if (buffer.byteLength < 4) {
  332. return '';
  333. }
  334. var dataArray = new Uint8Array(buffer, 4);
  335. var len = (buffer.byteLength - 4) - this.__endPadding;
  336. var nts = makeArray(len);
  337. for (var i = 0; i < len; i++) {
  338. nts[i] = byteToNt[dataArray[i]];
  339. }
  340. var returnString;
  341. i = nts.length - 1;
  342. if (this.__length & 1) {
  343. nts[i] = nts[i][0];
  344. }
  345. returnString = nts.join('');
  346. if (this.__isRNA) {
  347. returnString = returnString.replace(/T/gi, 'U');
  348. }
  349. return returnString;
  350. };
  351. Seq.prototype.complement = function() {
  352. if (!this.__complement) {
  353. this.__complement = this.__byteComplement();
  354. }
  355. var complement = new Seq(this.__type).readBuffer(this.__complement.slice(0));
  356. complement.__complement = this.__buffer.slice(0);
  357. return complement;
  358. };
  359. Seq.prototype.equivalent = function(seq) {
  360. if (!(seq instanceof Seq)) {
  361. throw new Error('Can only check for equivalence between sequences');
  362. }
  363. if (this.__type !== seq.__type) {
  364. return false;
  365. }
  366. var checkInts = new Uint32Array(this.__buffer);
  367. var compareInts = new Uint32Array(seq.__buffer);
  368. for (var i = 0, len = checkInts.length; i < len; i++) {
  369. if (checkInts[i] !== compareInts[i]) {
  370. return false;
  371. }
  372. }
  373. return true;
  374. };
  375. Seq.prototype.replicate = function(start, length) {
  376. start |= 0;
  377. if (start < 0) {
  378. start = this.__length + start;
  379. }
  380. if (length === undefined) {
  381. if (start === 0) {
  382. return this.__clone();
  383. }
  384. length = this.__length - start;
  385. } else {
  386. length |= 0;
  387. length = Math.min(length, this.__length - start);
  388. }
  389. length = Math.min(length, this.__length - start);
  390. if (length <= 0) {
  391. return this.__nullSeq();
  392. }
  393. return this.__slice(start, length);
  394. };
  395. Seq.prototype.polymerize = function(seq) {
  396. var seqLen = seq.__length;
  397. if (!(seq instanceof Seq)) {
  398. throw new Error('.polymerize requires valid sequence');
  399. }
  400. if (!this.__length) {
  401. return seq.__clone();
  402. }
  403. var offset = this.__length;
  404. var length = this.__length + seqLen;
  405. var max = length >>> 1;
  406. var odd = length & 1;
  407. var endPadding = (4 - (max + odd) % 4) % 4;
  408. var newBuffer = new ArrayBuffer(max + odd + endPadding + 4);
  409. var newArray = new Uint32Array(newBuffer, 4);
  410. var copyBuffer = this.__buffer;
  411. var copyArray = new Uint32Array(copyBuffer, 4);
  412. var seqBuffer = seq.__buffer;
  413. var seqArray = new Uint32Array(seqBuffer, 4);
  414. var copyPos = 0;
  415. var shift = (this.__length % 8) * 4;
  416. var shiftSeq = 32 - shift;
  417. for (var len = copyArray.length; copyPos < len; copyPos++) {
  418. newArray[copyPos] = copyArray[copyPos];
  419. }
  420. if (shift) {
  421. newArray[--copyPos] |= seqArray[0] << shift;
  422. for (var i = 0, len = seqArray.length; i < len; i++) {
  423. newArray[++copyPos] = (seqArray[i] >>> shiftSeq) | (seqArray[i + 1] << shift);
  424. }
  425. } else {
  426. for (var i = 0, len = seqArray.length; i < len; i++) {
  427. newArray[copyPos++] = seqArray[i];
  428. }
  429. }
  430. (new Uint32Array(newBuffer, 0, 1))[0] = length;
  431. return new Seq(this.__type).readBuffer(newBuffer);
  432. };
  433. Seq.prototype.insertion = function(seq, offset) {
  434. if (!(seq instanceof Seq)) {
  435. throw new Error('Insertion requires valid sequence');
  436. }
  437. offset |= 0;
  438. if (offset < 0) {
  439. offset = this.__length + offset;
  440. }
  441. offset = Math.min(offset, this.__length);
  442. return this.replicate(0, offset).polymerize(seq).polymerize(this.replicate(offset));
  443. };
  444. Seq.prototype.deletion = function(offset, count) {
  445. if (offset === undefined || count === undefined) {
  446. throw new Error('Must give valid offset and count for deletion');
  447. }
  448. offset |= 0;
  449. count |= 0;
  450. if (count === 0) {
  451. return this.__clone();
  452. }
  453. if (count < 0) {
  454. throw new Error('Invalid count for deletion');
  455. }
  456. if (offset < 0) {
  457. offset = this.__length + offset;
  458. }
  459. offset = Math.min(offset, this.__length);
  460. return this.replicate(0, offset).polymerize(this.replicate(offset + count));
  461. };
  462. Seq.prototype.repeat = function(count) {
  463. count |= 0;
  464. var copy = this.replicate();
  465. var base = new Seq(this.__type);
  466. if (count <= 0) {
  467. return base;
  468. }
  469. while(true) {
  470. if (count & 1) {
  471. base = base.polymerize(copy);
  472. }
  473. count >>>= 1;
  474. if (!count) {
  475. break;
  476. }
  477. copy = copy.polymerize(copy);
  478. }
  479. return base;
  480. };
  481. Seq.prototype.mask = function(seq) {
  482. if (!(seq instanceof Seq)) {
  483. throw new Error('Can only mask with valid sequence');
  484. }
  485. var newBuffer = this.__buffer.slice(0);
  486. var newArray = new Uint32Array(newBuffer, 4);
  487. var compareArray = new Uint32Array(seq.__buffer, 4);
  488. for (var i = 0, len = newArray.length; i < len; i++) {
  489. newArray[i] &= compareArray[i];
  490. }
  491. return new Seq(this.__type).readBuffer(newBuffer);
  492. };
  493. Seq.prototype.cover = function(seq) {
  494. if (!(seq instanceof Seq)) {
  495. throw new Error('Can only cover with valid sequence');
  496. }
  497. var newBuffer = this.__buffer.slice(0);
  498. var newArray = new Uint32Array(newBuffer, 4);
  499. var compareArray = new Uint32Array(seq.__buffer, 4);
  500. for (var i = 0, len = newArray.length; i < len; i++) {
  501. newArray[i] |= compareArray[i];
  502. }
  503. return new Seq(this.__type).readBuffer(newBuffer);
  504. };
  505. Seq.prototype.__nullSeq = function() {
  506. return new Seq(this.__type).readBuffer(new ArrayBuffer(4));
  507. };
  508. Seq.prototype.__clone = function() {
  509. return new Seq(this.__type).readBuffer(this.__buffer.slice(0));
  510. };
  511. Seq.prototype.__slice = function(start, length) {
  512. var max = length >>> 1;
  513. var odd = length & 1;
  514. var endPadding = (4 - (max + odd) % 4) % 4;
  515. var newBuffer = new ArrayBuffer(max + odd + endPadding + 4);
  516. var newArray = new Uint32Array(newBuffer, 4);
  517. var subBuffer = this.__buffer.slice(4 + (start >>> 1), 4 + (start >>> 1) + newBuffer.byteLength);
  518. var subInt32Length = subBuffer.byteLength >>> 2;
  519. var subArray = new Uint32Array(subBuffer, 0, subInt32Length);
  520. if (start & 1) {
  521. for (var i = 0, len = subArray.length; i < len; i++) {
  522. newArray[i] = (subArray[i] >>> 4) | (subArray[i + 1] << 28);
  523. }
  524. var remainder = subBuffer.byteLength - subArray.byteLength;
  525. if (remainder) {
  526. var remainderArray = new Uint8Array(newBuffer, 4 + (i << 2));
  527. var subRemainderArray = new Uint8Array(subBuffer, i << 2);
  528. if (newArray.length > 0) {
  529. newArray[i - 1] |= subRemainderArray[0] << 28;
  530. }
  531. for (var i = 0, len = subRemainderArray.length; i < len; i++) {
  532. remainderArray[i] = (subRemainderArray[i] >>> 4) | (subRemainderArray[i + 1] << 4);
  533. }
  534. }
  535. } else {
  536. for (var i = 0, len = subArray.length; i < len; i++) {
  537. newArray[i] = subArray[i];
  538. }
  539. var remainder = subArray.byteLength - subBuffer.byteLength;
  540. if (remainder) {
  541. var remainderArray = new Uint8Array(newBuffer, 4 + (i << 2));
  542. var subRemainderArray = new Uint8Array(subBuffer, i << 2);
  543. for (var i = 0, len = subRemainderArray.length; i < len; i++) {
  544. remainderArray[i] = subRemainderArray[i];
  545. }
  546. }
  547. }
  548. var clearShift = ((endPadding * 2) + odd) * 4;
  549. var clearOut = new Uint32Array(newBuffer, newBuffer.byteLength - 4);
  550. clearOut[0] = (clearOut[0] << clearShift) >>> clearShift;
  551. (new Uint32Array(newBuffer, 0, 1))[0] = length;
  552. return new Seq(this.__type).readBuffer(newBuffer);
  553. };
  554. Seq.prototype.content = function() {
  555. if (!this.__content) {
  556. var ntContentByte = makeArray(256);
  557. var buffer = this.__buffer;
  558. var dataArray = new Uint8Array(buffer);
  559. for(var i = 4; i < buffer.byteLength - this.__endPadding; i++) {
  560. ntContentByte[dataArray[i]]++;
  561. }
  562. var binToNt = binToNucleotide;
  563. var ntList = __nucleotideList;
  564. var ntContent = Object.create(null);
  565. for (var i = 0, len = ntList.length; i < len; i++) {
  566. ntContent[ntList[i]] = 0;
  567. }
  568. for (var i = 0, len = ntContentByte.length; i < len; i++) {
  569. if (ntContentByte[i]) {
  570. ntContent[binToNt(i & 0xF)] += ntContentByte[i];
  571. ntContent[binToNt(i >>> 4)] += ntContentByte[i];
  572. }
  573. }
  574. if (this.__length & 1) {
  575. ntContent['-']--;
  576. }
  577. if (this.__isRNA) {
  578. ntContent['U'] = ntContent['T'];
  579. delete ntContent['T'];
  580. }
  581. this.__content = ntContent;
  582. }
  583. var returnContent = Object.create(null);
  584. var keys = Object.keys(this.__content);
  585. for (var i = 0, len = keys.length; i < len; i++) {
  586. returnContent[keys[i]] = this.__content[keys[i]];
  587. }
  588. return returnContent;
  589. };
  590. Seq.prototype.fractionalContent = function() {
  591. if (!this.__fractionalContent) {
  592. var content = this.content();
  593. var nts = Object.keys(content);
  594. for (var i = 0, len = nts.length; i < len; i++) {
  595. content[nts[i]] = content[nts[i]] / this.__length;
  596. }
  597. this.__fractionalContent = content;
  598. }
  599. var returnContent = Object.create(null);
  600. var keys = Object.keys(this.__fractionalContent);
  601. for (var i = 0, len = keys.length; i < len; i++) {
  602. returnContent[keys[i]] = this.__fractionalContent[keys[i]];
  603. }
  604. return returnContent;
  605. };
  606. Seq.prototype.contentATGC = function() {
  607. if (!this.__contentATGC) {
  608. var ntToBin = nucleotideToBin;
  609. var content = this.content();
  610. var nts = Object.keys(content);
  611. var contentATGC = Object.create(null);
  612. contentATGC['A'] = 0;
  613. contentATGC['T'] = 0;
  614. contentATGC['G'] = 0;
  615. contentATGC['C'] = 0;
  616. var bits = 0;
  617. var nt;
  618. var ntBin;
  619. var n;
  620. var curContent;
  621. for (var i = 0, len = nts.length; i < len; i++) {
  622. nt = nts[i];
  623. n = ntToBin(nt);
  624. for (bits = 0; n; bits++) { n &= n - 1; }
  625. ntBin = ntToBin(nt);
  626. curContent = content[nts[i]] * (1 / bits);
  627. contentATGC['A'] += (((ntToBin('A') & ntBin) | 0) && curContent);
  628. contentATGC['T'] += (((ntToBin('T') & ntBin) | 0) && curContent);
  629. contentATGC['G'] += (((ntToBin('G') & ntBin) | 0) && curContent);
  630. contentATGC['C'] += (((ntToBin('C') & ntBin) | 0) && curContent);
  631. }
  632. if (this.__isRNA) {
  633. contentATGC['U'] = contentATGC['T'];
  634. delete contentATGC['T'];
  635. }
  636. this.__contentATGC = contentATGC;
  637. }
  638. var returnContent = Object.create(null);
  639. var keys = Object.keys(this.__contentATGC);
  640. for (var i = 0, len = keys.length; i < len; i++) {
  641. returnContent[keys[i]] = this.__contentATGC[keys[i]];
  642. }
  643. return returnContent;
  644. };
  645. Seq.prototype.fractionalContentATGC = function() {
  646. if (!this.__fractionalContentATGC) {
  647. var content = this.contentATGC();
  648. var nts = Object.keys(content);
  649. for (var i = 0, len = nts.length; i < len; i++) {
  650. content[nts[i]] = content[nts[i]] / this.__length;
  651. }
  652. this.__fractionalContentATGC = content;
  653. }
  654. var returnContent = Object.create(null);
  655. var keys = Object.keys(this.__fractionalContentATGC);
  656. for (var i = 0, len = keys.length; i < len; i++) {
  657. returnContent[keys[i]] = this.__fractionalContentATGC[keys[i]];
  658. }
  659. return returnContent;
  660. };
  661. Seq.prototype.translate = function(ntOffset, ntCount) {
  662. var binToAA = __12BitToAminoAcid;
  663. ntOffset |= 0;
  664. if (ntCount === undefined) {
  665. ntCount = this.__length - ntOffset;
  666. }
  667. ntCount |= 0;
  668. ntCount -= (ntCount % 3);
  669. var offset = (ntOffset >>> 1) + 4;
  670. var max = offset + (ntCount >>> 1) + (ntCount & 1);
  671. var dataArray = new Uint8Array(this.__buffer);
  672. var aminoAcids = makeArray(ntCount / 3);
  673. /**/
  674. var aa = 0;
  675. var lastByte, byte1, byte2, byte3;
  676. if ((ntOffset & 1) === 0) {
  677. for (var i = offset; i < max; i += 3) {
  678. var byte1 = dataArray[i];
  679. var byte2 = dataArray[i+1];
  680. var byte3 = dataArray[i+2];
  681. aminoAcids[aa++] = binToAA[byte1 | ((byte2 & 0xF) << 8)];
  682. aminoAcids[aa++] = binToAA[(byte3 << 4) | (byte2 >>> 4)];
  683. }
  684. } else {
  685. lastByte = dataArray[offset];
  686. for (var i = offset + 1; i < max; i += 3) {
  687. byte1 = dataArray[i];
  688. byte2 = dataArray[i+1];
  689. byte3 = dataArray[i+2];
  690. aminoAcids[aa++] = binToAA[(lastByte >> 4) | (byte1 << 4)];
  691. aminoAcids[aa++] = binToAA[byte2 | ((byte3 & 0xF) << 8)];
  692. lastByte = byte3;
  693. }
  694. }
  695. if (ntCount & 1) { aminoAcids.pop(); }
  696. return aminoAcids.join('');
  697. };
  698. Seq.prototype.translateFrame = function(frame, AAoffset, AAcount) {
  699. if (frame === undefined) {
  700. frame = 0;
  701. }
  702. if (frame !== 0 && frame !== 1 && frame !== 2) {
  703. throw new Error('Invalid translation frame, must be 0, 1 or 2.');
  704. }
  705. if (AAoffset === undefined) {
  706. return this.translate(frame);
  707. }
  708. if (AAcount === undefined) {
  709. return this.translate(frame + ((AAoffset | 0) * 3));
  710. }
  711. return this.translate(frame + ((AAoffset | 0) * 3), (AAcount * 3) | 0);
  712. };
  713. Seq.prototype.mapSequence = function(seq, offset) {
  714. if (!(seq instanceof Seq)) {
  715. throw new Error('.mapSequence requires valid Seq');
  716. }
  717. return new MatchMap(seq, this, offset);
  718. };
  719. /* MatchResult */
  720. function MatchResult(matchMap, pos, score) {
  721. Object.defineProperty(this, '__matchMap', {value: matchMap});
  722. this.position = pos;
  723. this.score = score;
  724. this.__align = null;
  725. };
  726. MatchResult.prototype.alignment = function() {
  727. if (!this.__align) {
  728. var map = this.__matchMap;
  729. if (this.position < 0) {
  730. this.__align = (new Nt.Seq())
  731. .read('-')
  732. .repeat(-this.position)
  733. .polymerize(map.__searchSpace.replicate(0, map.__query.__length + this.position));
  734. } else if (this.position + map.__query.__length > map.__searchSpace.__length) {
  735. this.__align = map.__searchSpace
  736. .replicate(this.position)
  737. .polymerize((new Nt.Seq()).read('-').repeat(map.__searchSpace.__length - this.position));
  738. } else {
  739. this.__align = map.__searchSpace.replicate(this.position, map.__query.__length);
  740. }
  741. }
  742. return this.__align;
  743. };
  744. MatchResult.prototype.alignmentMask = function() {
  745. return this.__matchMap.__query.mask(this.alignment());
  746. };
  747. MatchResult.prototype.alignmentCover = function() {
  748. return this.__matchMap.__query.cover(this.alignment());
  749. };
  750. /* MatchMap */
  751. function MatchMap(query, searchSpace, offset) {
  752. if (!(query instanceof Seq) || !(searchSpace instanceof Seq)) {
  753. throw new Error('MatchMap requires valid Seq');
  754. }
  755. this.__query = query.replicate();
  756. this.__searchSpace = searchSpace.replicate();
  757. this.__results = [];
  758. this.__orderedResults = [];
  759. this.__matchFrequencyData = null;
  760. this.__initialized = false;
  761. this.__offset = Math.max(0, offset | 0);
  762. this.__positionAdjustment = this.__offset - (this.__query.size() - 1);
  763. this.__debug = {
  764. searchTime: null,
  765. prepareTime: null,
  766. sortTime: null
  767. };
  768. };
  769. MatchMap.prototype.initialize = function(results) {
  770. this.__orderedResults = [];
  771. this.__matchFrequencyData = null;
  772. this.__initialized = true;
  773. var t = (new Date).valueOf();
  774. if (!results) {
  775. var dataArray = new Uint32Array(this.__execute(this.__query.__buffer, this.__searchSpace.__buffer));
  776. this.__debug.searchTime = (-t) + (t = (new Date).valueOf());
  777. var queryLen = this.__query.size();
  778. var searchLen = this.__searchSpace.size();
  779. var resultsLen = ((8 - (queryLen % 8)) % 8) + 1;
  780. var totalLen = searchLen + queryLen - 1;
  781. results = [].slice.call(dataArray, resultsLen, resultsLen + totalLen);
  782. }
  783. this.__results = results;
  784. this.__debug.prepareTime = (-t) + (t = (new Date).valueOf());
  785. return this;
  786. };
  787. MatchMap.prototype.sort = function() {
  788. if (!this.__initialized) {
  789. throw new Error('MatchMap must be initialized first.');
  790. }
  791. var t = new Date().valueOf();
  792. if (this.__debug.sortTime !== null) {
  793. return this;
  794. }
  795. var adjust = this.__positionAdjustment;
  796. this.__orderedResults = this.__results
  797. .map(function(v, i) { return {n: i + adjust, s: v}; })
  798. .sort(function(a, b) { return b.s - a.s; });
  799. this.__debug.sortTime = new Date().valueOf() - t;
  800. return this;
  801. };
  802. MatchMap.prototype.__calculate_p_match = function(query, searchSpace) {
  803. /*
  804. The approximate probability that two randomly chosen nucleotides
  805. from QUERY and SEARCH match each other
  806. */
  807. var queryContent = query.fractionalContentATGC();
  808. var searchSpaceContent = searchSpace.fractionalContentATGC();
  809. return (queryContent['A'] * searchSpaceContent['A']) +
  810. (queryContent['T'] * searchSpaceContent['T']) +
  811. (queryContent['G'] * searchSpaceContent['G']) +
  812. (queryContent['C'] * searchSpaceContent['C']);
  813. };
  814. MatchMap.prototype.results = function(offset, count) {
  815. if (!this.__initialized) {
  816. throw new Error('MatchMap must be initialized first.');
  817. }
  818. if (offset === undefined) {
  819. return this.__results.slice();
  820. }
  821. if (count === undefined) {
  822. return this.__results.slice(offset | 0);
  823. }
  824. return this.__results.slice(offset | 0, count | 0);
  825. };
  826. MatchMap.prototype.best = function() {
  827. if (!this.__initialized) {
  828. throw new Error('MatchMap must be initialized first.');
  829. }
  830. if (!this.__orderedResults.length) {
  831. throw new Error('MatchMap must be sorted first.');
  832. }
  833. var result = this.__orderedResults[0];
  834. return new MatchResult(this, result.n, result.s);
  835. };
  836. MatchMap.prototype.top = function(n) {
  837. if (!this.__initialized) {
  838. throw new Error('MatchMap must be initialized first.');
  839. }
  840. if (!this.__orderedResults.length) {
  841. throw new Error('MatchMap must be sorted first.');
  842. }
  843. var self = this;
  844. return this.__orderedResults.slice(0, n).map(function(result) {
  845. return new MatchResult(self, result.n, result.s);
  846. });
  847. };
  848. MatchMap.prototype.bottom = function(n) {
  849. if (!this.__initialized) {
  850. throw new Error('MatchMap must be initialized first.');
  851. }
  852. if (!this.__orderedResults.length) {
  853. throw new Error('MatchMap must be sorted first.');
  854. }
  855. var self = this;
  856. var adjust = this.__positionAdjustment;
  857. var len = this.__orderedResults.length;
  858. return this.__orderedResults.slice(this.__orderedResults.length - n, n).map(function(result) {
  859. return new MatchResult(self, result.n, result.s);
  860. });
  861. };
  862. /* Can be optimized with binary splitting */
  863. MatchMap.prototype.matchFrequencyData = function() {
  864. if (!this.__initialized) {
  865. throw new Error('MatchMap must be initialized first.');
  866. }
  867. if (!this.__orderedResults.length) {
  868. throw new Error('MatchMap must be sorted first.');
  869. }
  870. if (this.__matchFrequencyData) {
  871. return this.__matchFrequencyData;
  872. }
  873. var ordered = this.__orderedResults;
  874. var matchFrequencyData = makeArray(this.__query.size() + 1);
  875. var maxMatch = this.__query.size();
  876. var lastIndex = 0;
  877. var num;
  878. for (var i = 0, len = ordered.length; i < len; i++) {
  879. num = ordered[i].s;
  880. if (num < maxMatch) {
  881. matchFrequencyData[maxMatch] = i - lastIndex;
  882. lastIndex = i;
  883. maxMatch = num;
  884. }
  885. if (num === 0) {
  886. matchFrequencyData[0] = len - i;
  887. break;
  888. }
  889. }
  890. return (this.__matchFrequencyData = matchFrequencyData);
  891. };
  892. MatchMap.prototype.__countMatches = function(int, bitCount) {
  893. int |= int >>> 1;
  894. int |= int >>> 2;
  895. int &= 0x11111111;
  896. int |= int >>> 3;
  897. int |= int >>> 6;
  898. return bitCount[((int >>> 12) & 0xF0) | (int & 0xF)];
  899. };
  900. MatchMap.prototype.__execute = function(queryBuffer, searchSpaceBuffer) {
  901. var queryInts, spaceInts, queryIntsLength, spaceIntsLength,
  902. arrLen, mapBuffer, mapArray,
  903. A, B, A1, A2, T, cur, pos, move, i, k,
  904. adjustNeg, adjustPos,
  905. fnCountMatches, bitCount;
  906. queryInts = new Uint32Array(queryBuffer, 4);
  907. spaceInts = new Uint32Array(searchSpaceBuffer, 4);
  908. fnCountMatches = this.__countMatches;
  909. bitCount = __bitCount;
  910. queryIntsLength = queryInts.length|0;
  911. spaceIntsLength = spaceInts.length|0;
  912. arrLen = (queryIntsLength + spaceIntsLength) << 3;
  913. mapBuffer = new ArrayBuffer(4 * arrLen);
  914. mapArray = new Uint32Array(mapBuffer);
  915. for (k = 0|0; k < queryIntsLength; k++) {
  916. A = queryInts[k];
  917. cur = (queryIntsLength - k) << 3;
  918. for (i = 0|0; i < spaceIntsLength; i++) {
  919. (T = A & spaceInts[i]) && (mapArray[(i << 3) + cur] += fnCountMatches(T, bitCount));
  920. }
  921. A1 = A >>> 4;
  922. A2 = A << 4;
  923. adjustNeg = cur - 1;
  924. adjustPos = cur + 1;
  925. while(A1 || A2) {
  926. for (i = 0|0; i < spaceIntsLength; i++) {
  927. B = spaceInts[i];
  928. pos = (i << 3);
  929. (T = A1 & B) && (mapArray[pos + adjustNeg] += fnCountMatches(T, bitCount));
  930. (T = A2 & B) && (mapArray[pos + adjustPos] += fnCountMatches(T, bitCount));
  931. }
  932. A1 >>>= 4;
  933. A2 <<= 4;
  934. --adjustNeg;
  935. ++adjustPos;
  936. }
  937. }
  938. return mapBuffer;
  939. };
  940. return {
  941. Seq: Seq,
  942. MatchMap: MatchMap
  943. };
  944. }();