BioTorrents.de’s version of Gazelle
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

config.template.php 43KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * Environment
  5. * Config Loader v2
  6. *
  7. * To use the new system, which has significant security benefits,
  8. * (fine-grained scoping, ephemeral access lifetime, public vs. private, etc.),
  9. * please follow the example below.
  10. *
  11. * $ENV = ENV::go();
  12. * $ENV->PUBLIC_VALUE;
  13. * $ENV->getPriv('PRIVATE_VALUE');
  14. *
  15. * Using a central static $ENV singleton class has additional benefits.
  16. * The RecursiveArrayObject class included in env.class.php is a powerful tool:
  17. *
  18. * $LongArray = [];
  19. * ENV::setPub(
  20. * 'CONFIG',
  21. * $ENV->convert($LongArray)
  22. * );
  23. *
  24. * $ENV = ENV::go();
  25. * foreach ($ENV->CATS as $Cat) {
  26. * var_dump($Cat->Name);
  27. * }
  28. *
  29. * One more example using custom RecursiveArrayObject methods:
  30. * @see https://www.php.net/manual/en/class.arrayobject.php
  31. *
  32. * var_dump(
  33. * $ENV->dedupe(
  34. * $ENV->META->Formats->Sequences,
  35. * $ENV->META->Formats->Proteins->toArray()
  36. * )
  37. * );
  38. */
  39. # Initialize
  40. require_once 'env.class.php';
  41. $ENV = ENV::go();
  42. # Basic info
  43. ENV::setPub('PHP_MIN', '7.4.0');
  44. ENV::setPub('DEV', true);
  45. /**
  46. * Site identity
  47. */
  48. # Site name
  49. ENV::setPub(
  50. 'SITE_NAME',
  51. (!$ENV->DEV
  52. ? 'BioTorrents.de' # Production
  53. : '[Dev] BioTorrents.de') # Development
  54. );
  55. # Meta description
  56. ENV::setPub('DESCRIPTION', 'A platform to share biological sequence and medical imaging data');
  57. # Navigation glyphs
  58. ENV::setPub('SEP', '⸬'); # e.g., News ⸬ BioTorrents.de
  59. ENV::setPub('CRUMB', '›'); # e.g., Forums › Board › Thread
  60. # The FQDN of your site, e.g., dev.biotorrents.de
  61. ( # Old format
  62. !$ENV->DEV
  63. ? define('SITE_DOMAIN', 'biotorrents.de') # Production
  64. : define('SITE_DOMAIN', 'dev.biotorrents.de') # Development
  65. );
  66. ENV::setPub(
  67. 'SITE_DOMAIN',
  68. (!$ENV->DEV
  69. ? 'biotorrents.de' # Production
  70. : 'dev.biotorrents.de') # Development
  71. );
  72. # The FQDN of your image host, e.g., pics.biotorrents.de
  73. ENV::setPub('IMAGE_DOMAIN', 'pics.biotorrents.de');
  74. # Web root. Currently used for Twig but may also include config files
  75. ENV::setPub('WEB_ROOT', '/var/www/');
  76. # The root of the server, used for includes, e.g., /var/www/html/dev.biotorrents.de/
  77. ( # Old format
  78. !$ENV->DEV
  79. ? define('SERVER_ROOT', '/var/www/html/biotorrents.de/') # Production
  80. : define('SERVER_ROOT', '/var/www/html/dev.biotorrents.de/') # Development
  81. );
  82. ENV::setPub(
  83. 'SERVER_ROOT',
  84. (!$ENV->DEV
  85. ? '/var/www/html/biotorrents.de/' # Production
  86. : '/var/www/html/dev.biotorrents.de/') # Development
  87. );
  88. # Where torrent files are stored, e.g., /var/www/torrents-dev/
  89. ( # Old format
  90. !$ENV->DEV
  91. ? define('TORRENT_STORE', '/var/www/torrents/') # Production
  92. : define('TORRENT_STORE', '/var/www/torrents-dev/') # Development
  93. );
  94. ENV::setPub(
  95. 'TORRENT_STORE',
  96. (!$ENV->DEV
  97. ? '/var/www/torrents/' # Production
  98. : '/var/www/torrents-dev/') # Development);
  99. );
  100. # Allows you to run static content off another server. Default is usually what you want
  101. define('STATIC_SERVER', '/static/');
  102. ENV::setPub('STATIC_SERVER', '/static/');
  103. # The hashing algorithm used for SRI
  104. ENV::setPub('SRI', 'sha384');
  105. /**
  106. * Tech support
  107. */
  108. /*
  109. $TechSupport = [
  110. 'Email' => 'help@biotorrents.de',
  111. 'Subject' => '[TxID '.strtoupper(bin2hex(random_bytes(2))).'] Specific subject line with TxID intact',
  112. 'Body' => 'A detailed description of how you reach the error and the full text of any site messages you may receive.'
  113. ];
  114. ENV::setPub(
  115. 'HELP',
  116. $ENV->convert($TechSupport)
  117. );
  118. */
  119. /**
  120. * App keys
  121. *
  122. * Separate keys for development and production.
  123. * Increased security and protection against config overwrites.
  124. */
  125. # Pre-shared key for generating hmacs for the image proxy
  126. ENV::setPriv('IMAGE_PSK', '');
  127. # Production
  128. if (!$ENV->DEV) {
  129. # Unused in OT Gazelle. Currently used for API token auth
  130. ENV::setPriv('ENCKEY', '');
  131. # Alphanumeric random key. This key must be the argument to schedule.php for the schedule to work
  132. ENV::setPriv('SCHEDULE_KEY', '');
  133. # Random key. Used for generating unique RSS auth key
  134. ENV::setPriv('RSS_HASH', '');
  135. # System API key. Used for getting resources via Json->fetch()
  136. ENV::setPriv('SELF_API', '');
  137. }
  138. # Development
  139. else {
  140. ENV::setPriv('ENCKEY', '');
  141. ENV::setPriv('SCHEDULE_KEY', '');
  142. ENV::setPriv('RSS_HASH', '');
  143. ENV::setPriv('SELF_API', '');
  144. }
  145. /**
  146. * Database
  147. */
  148. # Common info
  149. ENV::setPriv('SQLHOST', '10.0.0.3');
  150. ENV::setPriv('SQLPORT', 3306);
  151. #ENV::setPriv('SQLSOCK', '/var/run/mysqld/mysqld.sock');
  152. # TLS client certs
  153. ENV::setPriv('SQL_CERT', "$ENV->WEB_ROOT/sql-keys/client-cert.pem");
  154. ENV::setPriv('SQL_KEY', "$ENV->WEB_ROOT/sql-keys/client-key.pem");
  155. ENV::setPriv('SQL_CA', "$ENV->WEB_ROOT/sql-keys/ca.pem");
  156. # Production
  157. if (!$ENV->DEV) {
  158. ENV::setPriv('SQLDB', 'gazelle_production');
  159. ENV::setPriv('SQLLOGIN', 'gazelle_production');
  160. ENV::setPriv('SQLPASS', '');
  161. }
  162. # Development
  163. else {
  164. ENV::setPriv('SQLDB', 'gazelle_development');
  165. ENV::setPriv('SQLLOGIN', 'gazelle_development');
  166. ENV::setPriv('SQLPASS', '');
  167. }
  168. /**
  169. * Tracker
  170. */
  171. # Ocelot connection, e.g., 0.0.0.0
  172. ENV::setPriv('TRACKER_HOST', '0.0.0.0');
  173. # Production
  174. if (!$ENV->DEV) {
  175. ENV::setPriv('TRACKER_PORT', 34000);
  176. # Must be 32 alphanumeric characters and match site_password in ocelot.conf
  177. ENV::setPriv('TRACKER_SECRET', '');
  178. # Must be 32 alphanumeric characters and match report_password in ocelot.conf
  179. ENV::setPriv('TRACKER_REPORTKEY', '');
  180. }
  181. # Development
  182. else {
  183. ENV::setPriv('TRACKER_PORT', 34001);
  184. ENV::setPriv('TRACKER_SECRET', '');
  185. ENV::setPriv('TRACKER_REPORTKEY', '');
  186. }
  187. /**
  188. * Tracker URLs
  189. *
  190. * Added to torrents à la http://bittorrent.org/beps/bep_0012.html
  191. */
  192. # Production
  193. if (!$ENV->DEV) {
  194. define('ANNOUNCE_URLS', [
  195. [ # Tier 1
  196. 'https://track.biotorrents.de:443',
  197. ], [] # Tier 2
  198. ]);
  199. $AnnounceURLs = [
  200. [ # Tier 1
  201. 'https://track.biotorrents.de:443',
  202. ],
  203. [ # Tier 2
  204. #'udp://tracker.coppersurfer.tk:6969/announce',
  205. #'udp://tracker.cyberia.is:6969/announce',
  206. #'udp://tracker.leechers-paradise.org:6969/announce',
  207. ],
  208. ];
  209. ENV::setPub(
  210. 'ANNOUNCE_URLS',
  211. $ENV->convert($AnnounceURLs)
  212. );
  213. }
  214. # Development
  215. else {
  216. define('ANNOUNCE_URLS', [
  217. [ # Tier 1
  218. 'https://trx.biotorrents.de:443',
  219. ], [] # Tier 2
  220. ]);
  221. $AnnounceURLs = [
  222. [ # Tier 1
  223. 'https://trx.biotorrents.de:443',
  224. ], [], # Tier 2
  225. ];
  226. ENV::setPub(
  227. 'ANNOUNCE_URLS',
  228. $ENV->convert($AnnounceURLs)
  229. );
  230. }
  231. /**
  232. * Search
  233. */
  234. # SphinxqlQuery needs constants
  235. # $ENV breaks the torrent and request pages
  236. define('SPHINXQL_HOST', '127.0.0.1');
  237. define('SPHINXQL_PORT', 9306);
  238. define('SPHINXQL_SOCK', false);
  239. define('SPHINX_MAX_MATCHES', 1000); // Must be <= the server's max_matches variable (default 1000)
  240. /**
  241. * memcached
  242. *
  243. * Very important to run two instances,
  244. * one each for development and production.
  245. */
  246. # Production
  247. if (!$ENV->DEV) {
  248. ENV::setPriv(
  249. 'MEMCACHED_SERVERS',
  250. [[
  251. 'host' => 'unix:///var/run/memcached/memcached.sock',
  252. 'port' => 0,
  253. 'buckets' => 1
  254. ]]
  255. );
  256. }
  257. # Development
  258. else {
  259. ENV::setPriv(
  260. 'MEMCACHED_SERVERS',
  261. [[
  262. 'host' => 'unix:///var/run/memcached/memcached-dev.sock',
  263. 'port' => 0,
  264. 'buckets' => 1
  265. ]]
  266. );
  267. }
  268. /**
  269. * IRC/Slack
  270. */
  271. # IRC server address. Used for onsite chat tool
  272. define('BOT_SERVER', "irc.$ENV->SITE_DOMAIN");
  273. define('SOCKET_LISTEN_ADDRESS', 'localhost');
  274. define('SOCKET_LISTEN_PORT', 51010);
  275. define('BOT_NICK', 'ebooks');
  276. # IRC channels for official business
  277. define('ANNOUNCE_CHAN', '#announce');
  278. define('DEBUG_CHAN', '#debug');
  279. define('REQUEST_CHAN', '#requests');
  280. define('STAFF_CHAN', '#staff');
  281. define('ADMIN_CHAN', '#staff');
  282. define('HELP_CHAN', '#support');
  283. define('DISABLED_CHAN', '#support');
  284. #define('BOT_CHAN', '#userbots');
  285. # Slack invite link
  286. ENV::setPub(
  287. 'SLACK_INVITE',
  288. ''
  289. );
  290. /**
  291. * ================
  292. * = NO MORE =
  293. * = PRIVATE INFO =
  294. * ================
  295. */
  296. /**
  297. * Features
  298. */
  299. # Enable donation page
  300. ENV::setPub('FEATURE_DONATE', true);
  301. # Send re-enable requests to user's email
  302. define('FEATURE_EMAIL_REENABLE', true);
  303. ENV::setPub('FEATURE_EMAIL_REENABLE', true);
  304. # Require users to verify login from unknown locations
  305. ENV::setPub('FEATURE_ENFORCE_LOCATIONS', false);
  306. # Attempt to send messages to IRC
  307. ENV::setPub('FEATURE_IRC', true);
  308. # Attempt to send email from the site
  309. ENV::setPub('FEATURE_SEND_EMAIL', true);
  310. # Allow the site encryption key to be set without an account
  311. # (should only be used for initial setup)
  312. ENV::setPub('FEATURE_SET_ENC_KEY_PUBLIC', false);
  313. # Attempt to support the BioPHP library
  314. # https://packagist.org/packages/biotorrents/biophp
  315. # https://blog.libredna.org/post/seqhash/
  316. ENV::setPub('FEATURE_BIOPHP', true);
  317. /**
  318. * Settings
  319. */
  320. # Production
  321. if (!$ENV->DEV) {
  322. # Set to false if you don't want everyone to see debug information; can be overriden with 'site_debug'
  323. define('DEBUG_MODE', false);
  324. ENV::setPub('DEBUG_MODE', false);
  325. }
  326. # Development
  327. else {
  328. define('DEBUG_MODE', false);
  329. ENV::setPub('DEBUG_MODE', false);
  330. # Gazelle's debug mode is broken, so let's use PHP errors instead
  331. error_reporting(E_ALL);
  332. }
  333. # Set to false to disable open registration, true to allow anyone to register
  334. ENV::setPub(
  335. 'OPEN_REGISTRATION',
  336. (!$ENV->DEV
  337. ? true # Production
  338. : false) # Development
  339. );
  340. # The maximum number of users the site can have, 0 for no limit
  341. define('USER_LIMIT', 0);
  342. ENV::setPub('USER_LIMIT', 0);
  343. # User perks
  344. ENV::setPub('STARTING_INVITES', 2);
  345. ENV::setPub('STARTING_TOKENS', 2);
  346. ENV::setPub('STARTING_UPLOAD', 5368709120);
  347. ENV::setPub('DONOR_INVITES', 2);
  348. # Bonus Points
  349. define('BONUS_POINTS', 'Bonus Points');
  350. ENV::setPub('BONUS_POINTS', 'Bonus Points');
  351. ENV::setPub('BP_COEFF', 1.5); # OT default 0.5
  352. # Tag namespaces (configurable via CSS selectors)
  353. #define('TAG_NAMESPACES', ['male', 'female', 'parody', 'character']);
  354. # Banned stuff (file characters, browsers, etc.)
  355. ENV::setPub(
  356. 'BAD_CHARS',
  357. ['"', '*', '/', ':', '<', '>', '?', '\\', '|']
  358. );
  359. # Set to true to block Opera Mini proxy
  360. ENV::setPub('BLOCK_OPERA_MINI', true);
  361. # Password length limits
  362. ENV::setPub('PW_MIN', 15); # Brute force
  363. ENV::setPub('PW_MAX', 10000); # DDoS; default 307200
  364. # Misc stuff like generic reusable snippets
  365. # Example of a variable using heredoc syntax
  366. ENV::setPub(
  367. 'PW_ADVICE',
  368. <<<HTML
  369. <p>
  370. Any password $ENV->PW_MIN characters or longer is accepted, but a strong password
  371. <ul>
  372. <li>is a pass<em>phrase</em> of mixed case with many small words,</li>
  373. <li>that contains complex characters including Unicode and emoji.</li>
  374. </ul>
  375. </p>
  376. HTML
  377. );
  378. /**
  379. * Services
  380. *
  381. * Public APIs, domains, etc.
  382. * Not intended for private API keys.
  383. */
  384. # Current Sci-Hub domains
  385. # https://lovescihub.wordpress.com
  386. define('SCI_HUB', 'se');
  387. ENV::setPub(
  388. 'SCI_HUB',
  389. ['ren', 'tw', 'se']
  390. );
  391. # Semantic Scholar
  392. # https://api.semanticscholar.org
  393. ENV::setPub('SS', 'https://api.semanticscholar.org/v1/paper/');
  394. # IP Geolocation
  395. ENV::setPub('IP_GEO', 'https://tools.keycdn.com/geo.json?host=');
  396. /**
  397. * User class IDs
  398. *
  399. * Needed for automatic promotions.
  400. * Found in the `permissions` table.
  401. */
  402. # Name of class Class ID (not level)
  403. define('ADMIN', '1');
  404. define('USER', '2');
  405. define('MEMBER', '3');
  406. define('POWER', '4');
  407. define('ELITE', '5');
  408. define('LEGEND', '8');
  409. define('MOD', '11');
  410. define('SYSOP', '15');
  411. define('ARTIST', '19');
  412. define('DONOR', '20');
  413. define('VIP', '21');
  414. define('TORRENT_MASTER', '23');
  415. define('POWER_TM', '24');
  416. define('FLS_TEAM', '33');
  417. define('FORUM_MOD', '9001');
  418. /**
  419. * Forums
  420. */
  421. define('STAFF_FORUM', 3);
  422. define('DONOR_FORUM', 7);
  423. ENV::setPub('TRASH_FORUM', 8);
  424. ENV::setPub('ANNOUNCEMENT_FORUM', 1);
  425. ENV::setPub('SUGGESTIONS_FORUM', 2);
  426. # Pagination
  427. define('TORRENT_COMMENTS_PER_PAGE', 10);
  428. define('POSTS_PER_PAGE', 25);
  429. define('TOPICS_PER_PAGE', 50);
  430. define('TORRENTS_PER_PAGE', 50);
  431. define('REQUESTS_PER_PAGE', 25);
  432. define('MESSAGES_PER_PAGE', 25);
  433. define('LOG_ENTRIES_PER_PAGE', 50);
  434. # Cache catalogues
  435. define('THREAD_CATALOGUE', 500); // Limit to THREAD_CATALOGUE posts per cache key
  436. # Miscellaneous values
  437. define('MAX_RANK', 6);
  438. define('MAX_EXTRA_RANK', 8);
  439. define('MAX_SPECIAL_RANK', 3);
  440. ENV::setPub('DONOR_FORUM_RANK', 6);
  441. /**
  442. * Ratio and badges
  443. */
  444. # Ratio requirements, in descending order
  445. define('RATIO_REQUIREMENTS', [
  446. # Downloaded Req (0% seed) Req (100% seed)
  447. [200 * 1024**3, 0.60, 0.60],
  448. [160 * 1024**3, 0.60, 0.50],
  449. [120 * 1024**3, 0.50, 0.40],
  450. [100 * 1024**3, 0.40, 0.30],
  451. [80 * 1024**3, 0.30, 0.20],
  452. [60 * 1024**3, 0.20, 0.10],
  453. [40 * 1024**3, 0.15, 0.00],
  454. [20 * 1024**3, 0.10, 0.00],
  455. [10 * 1024**3, 0.05, 0.00],
  456. ]);
  457. # God I wish I didn't have to do this but I just don't care anymore
  458. $AutomatedBadgeIDs = [
  459. 'DL' => [
  460. '8' => 10,
  461. '16' => 11,
  462. '32' => 12,
  463. '64' => 13,
  464. '128' => 14,
  465. '256' => 15,
  466. '512' => 16,
  467. '1024' => 17,
  468. '2048' => 18,
  469. ],
  470. 'UL' => [
  471. '16' => 20,
  472. '32' => 21,
  473. '64' => 22,
  474. '128' => 23,
  475. '256' => 24,
  476. '512' => 25,
  477. '1024' => 26,
  478. '2048' => 27,
  479. '4096' => 28,
  480. ],
  481. 'Posts' => [
  482. '5' => 30,
  483. '10' => 31,
  484. '25' => 32,
  485. '50' => 33,
  486. '100' => 34,
  487. '250' => 35,
  488. '500' => 36,
  489. '1000' => 37,
  490. '2500' => 38,
  491. ]
  492. ];
  493. ENV::setPub(
  494. 'AUTOMATED_BADGE_IDS',
  495. $ENV->convert($AutomatedBadgeIDs)
  496. );
  497. /**
  498. * Site categories and meta
  499. *
  500. * THIS IS THE OLD FORMAT AND WILL GO AWAY.
  501. * PLEASE SEE $ENV->{DB,META,CATS} BELOW.
  502. */
  503. # Categories
  504. $Categories = [
  505. 'Sequences',
  506. 'Graphs',
  507. 'Systems',
  508. 'Geometric',
  509. 'Scalars/Vectors',
  510. 'Patterns',
  511. 'Constraints',
  512. 'Images',
  513. 'Spatial',
  514. 'Models',
  515. 'Documents',
  516. 'Machine Data',
  517. ];
  518. $GroupedCategories = $Categories;
  519. # Plain Formats
  520. $PlainFormats = [
  521. 'CSV' => ['csv'],
  522. 'JSON' => ['json'],
  523. 'Text' => ['txt'],
  524. 'XML' => ['xml'],
  525. ];
  526. # Sequence Formats
  527. $SeqFormats = [
  528. 'BAM' => ['bam'],
  529. 'CRAM' => ['cram'],
  530. 'EMBL' => ['embl'],
  531. 'FASTA' => ['fa', 'fasta', 'fsa'],
  532. 'FASTA+QUAL' => ['qual'],
  533. 'CSFASTA' => ['csfa', 'csfasta', 'csfsa'],
  534. 'FASTQ' => ['fastq', 'fq', 'sanfastq'],
  535. 'GFF' => ['gff', 'gff2', 'gff3'],
  536. 'GTF' => ['gtf'],
  537. 'GenBank' => ['gb', 'gbk', 'genbank'],
  538. 'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
  539. 'PIR' => ['pir'],
  540. 'QSeq' => ['qseq'],
  541. 'SAM' => ['sam'],
  542. 'SFF' => ['sff'],
  543. 'SRF' => ['srf'],
  544. 'SnapGene' => ['dna', 'seq'],
  545. 'SwissProt' => ['dat'],
  546. 'VCF' => ['vcf'],
  547. ];
  548. # Protein Formats
  549. # DON'T PARSE RAW FILES. TOO MANY COMPETING VENDORS
  550. $ProtFormats = [
  551. 'ABI/Sciex' => ['t2d', 'wiff'],
  552. 'APML' => ['apml'],
  553. 'ASF' => ['asf'],
  554. 'Agilent/Bruker' => ['baf', 'd', 'fid', 'tdf', 'yep'],
  555. 'BlibBuild' => ['blib'],
  556. 'Bruker/Varian' => ['sms', 'xms'],
  557. 'Finnigan' => ['dat', 'ms'],
  558. 'ION-TOF' => ['ita', 'itm'],
  559. 'JCAMP-DX' => ['jdx'],
  560. 'MGF' => ['mgf'],
  561. 'MS2' => ['ms2'],
  562. 'MSF' => ['msf'],
  563. 'mzData' => ['mzdata'],
  564. 'mzML' => ['mzml'],
  565. 'mzXML' => ['mzxml'],
  566. 'OMSSA' => ['omssa', 'omx'],
  567. 'PEFF' => ['peff'],
  568. 'pepXML' => ['pepxml'],
  569. 'protXML' => ['protxml'],
  570. 'Shimadzu' => ['lcd', 'qgd', 'spc'],
  571. 'Skyline' => ['sky', 'skyd'],
  572. 'TPP/SPC' => ['dta'],
  573. 'Tandem' => ['tandem'],
  574. 'TraML' => ['traml'],
  575. 'ULVAC-PHI' => ['tdc'],
  576. ];
  577. # XML Graph Formats
  578. $GraphXmlFormats = [
  579. 'DGML' => ['dgml'],
  580. 'DotML' => ['dotml'],
  581. 'GEXF' => ['gexf'],
  582. 'GXL' => ['gxl'],
  583. 'GraphML' => ['graphml'],
  584. 'XGMML' => ['xgmml'],
  585. ];
  586. # Text Graph Formats
  587. $GraphTxtFormats = [
  588. 'DOT' => ['gv'],
  589. 'GML' => ['gml'],
  590. 'LCF' => ['lcf'],
  591. 'Newick' => ['xsd', 'sgf'],
  592. 'SIF' => ['sif'],
  593. 'TGF' => ['tgf'],
  594. ];
  595. # Image Formats
  596. # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3948928/
  597. $ImgFormats = [
  598. 'Analyze' => ['hdr', 'img'],
  599. 'Interfile' => ['h33'],
  600. 'DICOM' => ['dcm', 'dicom'],
  601. 'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
  602. 'NIfTI' => ['nii', 'nifti'],
  603. 'MINC' => ['minc', 'mnc'],
  604. 'JPEG' => ['jfif', 'jpeg', 'jpg'],
  605. 'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
  606. 'PNG' => ['png'],
  607. 'TIFF' => ['tif', 'tiff'],
  608. 'WebP' => ['webp'],
  609. ];
  610. # Vector Map Formats
  611. $MapVectorFormats = [
  612. 'AutoCAD DXF' => ['dxf'],
  613. 'Cartesian (XYZ)' => ['xyz'],
  614. 'DLG' => ['dlg'],
  615. 'Esri TIN' => ['adf', 'dbf'],
  616. 'GML' => ['gml'],
  617. 'GeoJSON' => ['geojson'],
  618. 'ISFC' => ['isfc'],
  619. 'KML' => ['kml', 'kmzv'],
  620. # DAT omitted
  621. # https://en.wikipedia.org/wiki/MapInfo_TAB_format
  622. 'MapInfo TAB' => ['tab', 'ind', 'map', 'id'],
  623. 'Measure Map Pro' => ['mmp'],
  624. 'NTF' => ['ntf'],
  625. # DBF omitted
  626. # https://en.wikipedia.org/wiki/Shapefile
  627. 'Shapefile' => ['shp', 'shx'],
  628. 'Spatial Data File' => ['sdf', 'sdf3', 'sif', 'kif'],
  629. 'SOSI' => ['sosi'],
  630. 'SVG' => ['svg'],
  631. 'TIGER' => ['tiger'],
  632. 'VPF' => ['vpf'],
  633. ];
  634. # Raster Map Formats
  635. $MapRasterFormats = [
  636. 'ADRG' => ['adrg'],
  637. 'Binary' => ['bsq', 'bip', 'bil'],
  638. 'DRG' => ['drg'],
  639. 'ECRG' => ['ecrg'],
  640. 'ECW' => ['ecw'],
  641. # DAT and ASC omitted (common)
  642. # https://support.esri.com/en/technical-article/000008526
  643. # https://web.archive.org/web/20150128024528/http://docs.codehaus.org/display/GEOTOOLS/ArcInfo+ASCII+Grid+format
  644. 'Esri Grid' => ['adf', 'nit', 'asc', 'grd'],
  645. 'GeoTIFF' => ['tfw'],
  646. #'IMG' => ['img'],
  647. #'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
  648. 'MrSID' => ['sid'],
  649. 'netCDF' => ['nc'],
  650. 'RPF' => ['cadrg', 'cib'],
  651. ];
  652. # Binary Document Formats
  653. # https://en.wikipedia.org/wiki/OpenDocument
  654. # https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions
  655. $BinDocFormats = [
  656. 'OpenDocument' => ['odt', 'fodt', 'ods', 'fods', 'odp', 'fodp', 'odg', 'fodg', 'odf'],
  657. 'Word' => ['doc', 'dot', 'wbk', 'docx', 'docm', 'dotx', 'dotm', 'docb'],
  658. 'PowerPoint' => ['ppt', 'pot', 'pps', 'pptx', 'pptm', 'potx', 'potm', 'ppam', 'ppsx', 'ppsm', 'sldx', 'sldm'],
  659. 'Excel' => ['xls', 'xlt', 'xlm', 'xlsx', 'xlsm', 'xltx', 'xltm', 'xlsb', 'xla', 'xlam', 'xll', 'xlw'],
  660. 'PDF' => ['pdf', 'fdf', 'xfdf'],
  661. ];
  662. # Extra Formats
  663. # DON'T PARSE IMG OR ISO FILES
  664. # https://en.wikipedia.org/wiki/Disk_image#File_formats
  665. # http://dcjtech.info/topic/python-file-extensions/
  666. $CpuGenFormats = [
  667. 'Docker' => ['dockerfile'],
  668. 'Hard Disk' => ['fvd', 'dmg', 'esd', 'qcow', 'qcow2', 'qcow3', 'smi', 'swm', 'vdi', 'vhd', 'vhdx', 'vmdk', 'wim'],
  669. 'Optical Disc' => ['bin', 'ccd', 'cso', 'cue', 'daa', 'isz', 'mdf', 'mds', 'mdx', 'nrg', 'uif'],
  670. 'Python' => ['pxd', 'py', 'py3', 'pyc', 'pyd', 'pyde', 'pyi', 'pyo', 'pyp', 'pyt', 'pyw', 'pywz', 'pyx', 'pyz', 'rpy', 'xpy'],
  671. 'Jupyter' => ['ipynb'],
  672. 'Ontology' => ['cgif', 'cl', 'clif', 'csv', 'htm', 'html', 'kif', 'obo', 'owl', 'rdf', 'rdfa', 'rdfs', 'rif', 'tsv', 'xcl', 'xht', 'xhtml', 'xml'],
  673. ];
  674. # Resolutions
  675. $Resolutions = [
  676. 'Contig',
  677. 'Scaffold',
  678. 'Chromosome',
  679. 'Genome',
  680. 'Proteome',
  681. 'Transcriptome',
  682. ];
  683. /**
  684. * $ENV->DB
  685. *
  686. * One flat array with all possible torrent/group fields.
  687. * These are mostly used in Twig templates as {{ db.title }}.
  688. * Meta abstraction layer for flavor text *around* DB fields.
  689. * Gazelle's job is to query the right tables, which will shift.
  690. */
  691. $DB = [
  692. # torrents_group
  693. 'category_id' => ['name' => 'Category', 'desc' => ''],
  694. 'title' => ['name' => 'Torrent Title', 'desc' => 'Definition line, e.g., Alcohol dehydrogenase ADH1'],
  695. 'subject' => ['name' => 'Organism', 'desc' => 'Organism line binomial, e.g., Saccharomyces cerevisiae'],
  696. 'object' => ['name' => 'Strain/Variety', 'desc' => 'Organism line if any, e.g., S288C'],
  697. 'year' => ['name' => 'Year', 'desc' => 'Publication year'],
  698. 'workgroup' => ['name' => 'Department/Lab', 'desc' => "Last author's institution, e.g., Lawrence Berkeley Laboratory"],
  699. 'location' => ['name' => 'Location', 'desc' => 'Physical location, e.g., Berkeley, CA 94720'],
  700. 'identifier' => ['name' => 'Accession Number', 'desc' => 'RefSeq and UniProt preferred'],
  701. 'tag_list' => ['name' => 'Tag List', 'desc' => 'Comma-seperated list of at least 5 tags'],
  702. 'timestamp' => ['name' => 'Uploaded On', 'desc' => ''],
  703. 'revision_id' => ['name' => 'Revision ID', 'desc' => ''],
  704. 'description' => ['name' => 'Group Description', 'desc' => ''],
  705. 'picture' => ['name' => 'Picture', 'desc' => 'A meaningful picture, e.g., the specimen or a thumbnail'],
  706. # From the non-renamed `torrents` table
  707. 'version' => ['name' => 'Version', 'desc' => 'Start with 0.1.0', 'note' => 'Please see <a href="https://semver.org" target="_blank">Semantic Versioning</a>'],
  708. 'license' => ['name' => 'License', 'desc' => '', 'note' => 'Please see <a href="http://www.dcc.ac.uk/resources/how-guides/license-research-data" target="_blank">How to License Research Data</a>'],
  709. 'mirrors' => ['name' => 'Mirrors', 'desc' => 'Up to two FTP/HTTP addresses that either point directly to a file, or for multi-file torrents, to the enclosing folder'],
  710. # Original fields
  711. 'seqhash' => ['name' => 'Seqhash', 'desc' => 'Sample genome sequence in FASTA format (GenBank pending)', 'note' => 'Please see <a href="https://blog.libredna.org/post/seqhash/" target="_blank">The Seqhash Algorithm</a>'],
  712. ];
  713. ENV::setPub(
  714. 'DB',
  715. $ENV->convert($DB)
  716. );
  717. /**
  718. * $ENV->META
  719. *
  720. * Main metadata object.
  721. * Responsible for defining field values.
  722. * These eventually go into the database,
  723. * so take care to define them well here.
  724. * Avoid nesting > 3 levels deep.
  725. */
  726. $META = [
  727. /**
  728. * 1.
  729. * PLATFORMS
  730. */
  731. 'Platforms' => [
  732. /**
  733. * 2.
  734. * Sequences
  735. */
  736. 'Sequences' => [
  737. # DNA
  738. 'Complete Genomics',
  739. 'cPAS-BGI/MGI',
  740. 'Helicos',
  741. 'Illumina HiSeq',
  742. 'Illumina MiSeq',
  743. 'Ion Torrent',
  744. 'Microfluidics',
  745. 'Nanopore',
  746. 'PacBio',
  747. 'Roche 454',
  748. 'Sanger',
  749. 'SOLiD',
  750. # RNA, Protein, etc.
  751. 'De Novo',
  752. 'HPLC',
  753. 'Mass Spec',
  754. 'RNA-Seq',
  755. ],
  756. /**
  757. * 2.
  758. * Graphs
  759. * https://en.wikipedia.org/wiki/Graph_drawing#Software
  760. */
  761. 'Graphs' => [
  762. 'BioFabric',
  763. 'BioTapestry',
  764. 'Cytoscape',
  765. 'Edraw Max',
  766. 'GenMAPP',
  767. 'Gephi',
  768. 'graph-tool',
  769. 'Graphviz',
  770. 'InCroMAP',
  771. 'LaNet-vi',
  772. 'Linkurious',
  773. 'MATLAB',
  774. 'MEGA',
  775. 'Maple',
  776. 'Mathematica',
  777. #'Microsoft Automatic Graph Layout',
  778. 'NetworkX',
  779. 'PGF/TikZ',
  780. 'PathVisio',
  781. 'Pathview',
  782. 'R',
  783. 'Systrip',
  784. 'Tom Sawyer Software',
  785. 'Tulip',
  786. 'yEd',
  787. ],
  788. /**
  789. * 2.
  790. * Images
  791. */
  792. 'Images' => [
  793. 'CT/CAT',
  794. 'ECG',
  795. 'Elastography',
  796. 'FNIR/NIRS',
  797. 'MPI',
  798. 'MRI/NMR',
  799. 'Microscopy',
  800. 'Photoacoustic',
  801. 'Photography',
  802. 'Scint/SPECT/PET',
  803. 'Ultrasound',
  804. 'X-Rays',
  805. ],
  806. /**
  807. * 2.
  808. * Documents
  809. */
  810. 'Documents' => [
  811. # Composed
  812. 'Literature',
  813. 'Software',
  814. # Generated
  815. 'Kernel',
  816. 'Metadata',
  817. 'Notebook',
  818. ],
  819. /**
  820. * 2.
  821. * Machine Data
  822. */
  823. 'Raw' => [
  824. 'Binary',
  825. 'Text',
  826. ],
  827. ], # End $ENV->META->Platforms
  828. /**
  829. * 1.
  830. * FORMATS
  831. */
  832. 'Formats' => [
  833. /**
  834. * 2.
  835. * Plain
  836. */
  837. 'Plain' => [
  838. 'CSV' => ['csv'], # 3
  839. 'JSON' => ['json'], # 3
  840. 'Text' => ['txt', 'asc'], # 3
  841. 'XML' => ['xml'], # etc.
  842. ],
  843. /**
  844. * 2.
  845. * Databases
  846. */
  847. 'Databases' => [
  848. 'MS SQL' => ['mdf', 'ndf', 'ldf'],
  849. 'MySQL' => ['sql', 'mysql'],
  850. 'Oracle' => ['dbf', 'ora', 'oraenv'],
  851. 'IBM Db2' => ['ixf', 'del', 'cursor'],
  852. 'Postgres' => ['sql']
  853. ],
  854. /**
  855. * 2.
  856. * Archives
  857. */
  858. 'Archives' => [
  859. '7z' => ['7z'],
  860. 'bzip2' => ['bz2', 'bzip2'],
  861. 'gzip' => ['gz', 'gzip', 'tgz', 'tpz'],
  862. 'Pickle' => ['pickle', 'pkl'],
  863. 'RAR' => ['rar', 'rev'],
  864. 'tar' => ['tar'],
  865. 'ZIP' => ['zip', 'zipx'],
  866. 'None' => [''],
  867. ],
  868. /**
  869. * 2.
  870. * Sequences
  871. * https://www.ncbi.nlm.nih.gov/sra/docs/submitformats/
  872. */
  873. 'Sequences' => [
  874. 'BAM' => ['bam'],
  875. 'CRAM' => ['cram'],
  876. 'EMBL' => ['embl'],
  877. 'FASTA' => ['fa', 'fasta', 'fsa'],
  878. 'FASTA+QUAL' => ['qual'],
  879. 'CSFASTA' => ['csfa', 'csfasta', 'csfsa'],
  880. 'FASTQ' => ['fastq', 'fq', 'sanfastq'],
  881. 'GFF' => ['gff', 'gff2', 'gff3'],
  882. 'GTF' => ['gtf'],
  883. 'GenBank' => ['gb', 'gbk', 'genbank'],
  884. 'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
  885. 'PIR' => ['pir'],
  886. 'QSeq' => ['qseq'],
  887. 'SAM' => ['sam'],
  888. 'SFF' => ['sff'],
  889. 'SRF' => ['srf'],
  890. 'SnapGene' => ['dna', 'seq'],
  891. 'SwissProt' => ['dat'],
  892. 'VCF' => ['vcf'],
  893. ],
  894. /**
  895. * 2.
  896. * Proteins
  897. * https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518119/
  898. */
  899. 'Proteins' => [
  900. 'ABI/Sciex' => ['t2d', 'wiff'],
  901. 'APML' => ['apml'],
  902. 'ASF' => ['asf'],
  903. 'Agilent/Bruker' => ['baf', 'd', 'fid', 'tdf', 'yep'],
  904. 'BlibBuild' => ['blib'],
  905. 'Bruker/Varian' => ['sms', 'xms'],
  906. 'Finnigan' => ['dat', 'ms'],
  907. 'ION-TOF' => ['ita', 'itm'],
  908. 'JCAMP-DX' => ['jdx'],
  909. 'MGF' => ['mgf'],
  910. 'MS2' => ['ms2'],
  911. 'MSF' => ['msf'],
  912. 'mzData' => ['mzdata'],
  913. 'mzML' => ['mzml'],
  914. 'mzXML' => ['mzxml'],
  915. 'OMSSA' => ['omssa', 'omx'],
  916. 'PEFF' => ['peff'],
  917. 'pepXML' => ['pepxml'],
  918. 'protXML' => ['protxml'],
  919. 'Shimadzu' => ['lcd', 'qgd', 'spc'],
  920. 'Skyline' => ['sky', 'skyd'],
  921. 'TPP/SPC' => ['dta'],
  922. 'Tandem' => ['tandem'],
  923. 'TraML' => ['traml'],
  924. 'ULVAC-PHI' => ['tdc'],
  925. ],
  926. /**
  927. * 2.
  928. * Graph XML
  929. */
  930. 'GraphXml' => [
  931. 'DGML' => ['dgml'],
  932. 'DotML' => ['dotml'],
  933. 'GEXF' => ['gexf'],
  934. 'GXL' => ['gxl'],
  935. 'GraphML' => ['graphml'],
  936. 'XGMML' => ['xgmml'],
  937. ],
  938. /**
  939. * 2.
  940. * Graph plain
  941. */
  942. 'GraphTxt' => [
  943. 'DOT' => ['gv'],
  944. 'GML' => ['gml'],
  945. 'LCF' => ['lcf'],
  946. 'Newick' => ['xsd', 'sgf'],
  947. 'SIF' => ['sif'],
  948. 'TGF' => ['tgf'],
  949. ],
  950. /**
  951. * 2.
  952. * Image vector
  953. */
  954. 'ImgVector' => [
  955. 'AI' => ['ai'],
  956. 'CorelDRAW' => ['cdr'],
  957. 'EPS' => ['eps', 'epsf', 'epsi'],
  958. 'SVG' => ['svg'],
  959. 'WMF' => ['emf', 'emz', 'wmf', 'wmz'],
  960. ],
  961. /**
  962. * 2.
  963. * Image raster
  964. */
  965. 'ImgRaster' => [
  966. 'Analyze' => ['hdr', 'img'],
  967. 'Interfile' => ['h33'],
  968. 'DICOM' => ['dcm', 'dicom'],
  969. 'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
  970. 'NIfTI' => ['nii', 'nifti'],
  971. 'MINC' => ['minc', 'mnc'],
  972. 'JPEG' => ['jfif', 'jpeg', 'jpg'],
  973. 'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
  974. 'PNG' => ['png'],
  975. 'TIFF' => ['tif', 'tiff'],
  976. 'WebP' => ['webp'],
  977. ],
  978. /**
  979. * 2.
  980. * Map vector
  981. */
  982. 'MapVector' => [
  983. 'AutoCAD DXF' => ['dxf'],
  984. 'Cartesian (XYZ)' => ['xyz'],
  985. 'DLG' => ['dlg'],
  986. 'Esri TIN' => ['adf', 'dbf'],
  987. 'GML' => ['gml'],
  988. 'GeoJSON' => ['geojson'],
  989. 'ISFC' => ['isfc'],
  990. 'KML' => ['kml', 'kmzv'],
  991. # DAT omitted
  992. # https://en.wikipedia.org/wiki/MapInfo_TAB_format
  993. 'MapInfo TAB' => ['tab', 'ind', 'map', 'id'],
  994. 'Measure Map Pro' => ['mmp'],
  995. 'NTF' => ['ntf'],
  996. # DBF omitted
  997. # https://en.wikipedia.org/wiki/Shapefile
  998. 'Shapefile' => ['shp', 'shx'],
  999. 'Spatial Data File' => ['sdf', 'sdf3', 'sif', 'kif'],
  1000. 'SOSI' => ['sosi'],
  1001. 'SVG' => ['svg'],
  1002. 'TIGER' => ['tiger'],
  1003. 'VPF' => ['vpf'],
  1004. ],
  1005. /**
  1006. * 2.
  1007. * Map raster
  1008. */
  1009. 'MapRaster' => [
  1010. 'ADRG' => ['adrg'],
  1011. 'Binary' => ['bsq', 'bip', 'bil'],
  1012. 'DRG' => ['drg'],
  1013. 'ECRG' => ['ecrg'],
  1014. 'ECW' => ['ecw'],
  1015. # DAT and ASC omitted (common)
  1016. # https://support.esri.com/en/technical-article/000008526
  1017. # https://web.archive.org/web/20150128024528/http://docs.codehaus.org/display/GEOTOOLS/ArcInfo+ASCII+Grid+format
  1018. 'Esri Grid' => ['adf', 'nit', 'asc', 'grd'],
  1019. 'GeoTIFF' => ['tfw'],
  1020. #'IMG' => ['img'],
  1021. #'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
  1022. 'MrSID' => ['sid'],
  1023. 'netCDF' => ['nc'],
  1024. 'RPF' => ['cadrg', 'cib'],
  1025. ],
  1026. /**
  1027. * 2.
  1028. * Binary documents
  1029. */
  1030. 'BinDoc' => [
  1031. 'OpenDocument' => ['odt', 'fodt', 'ods', 'fods', 'odp', 'fodp', 'odg', 'fodg', 'odf'],
  1032. 'Word' => ['doc', 'dot', 'wbk', 'docx', 'docm', 'dotx', 'dotm', 'docb'],
  1033. 'PowerPoint' => ['ppt', 'pot', 'pps', 'pptx', 'pptm', 'potx', 'potm', 'ppam', 'ppsx', 'ppsm', 'sldx', 'sldm'],
  1034. 'Excel' => ['xls', 'xlt', 'xlm', 'xlsx', 'xlsm', 'xltx', 'xltm', 'xlsb', 'xla', 'xlam', 'xll', 'xlw'],
  1035. 'PDF' => ['pdf', 'fdf', 'xfdf'],
  1036. ],
  1037. /**
  1038. * 2.
  1039. * Extra formats
  1040. */
  1041. 'CpuGen' => [
  1042. 'Docker' => ['dockerfile'],
  1043. 'Hard Disk' => ['fvd', 'dmg', 'esd', 'qcow', 'qcow2', 'qcow3', 'smi', 'swm', 'vdi', 'vhd', 'vhdx', 'vmdk', 'wim'],
  1044. 'Optical Disc' => ['bin', 'ccd', 'cso', 'cue', 'daa', 'isz', 'mdf', 'mds', 'mdx', 'nrg', 'uif'],
  1045. 'Python' => ['pxd', 'py', 'py3', 'pyc', 'pyd', 'pyde', 'pyi', 'pyo', 'pyp', 'pyt', 'pyw', 'pywz', 'pyx', 'pyz', 'rpy', 'xpy'],
  1046. 'Jupyter' => ['ipynb'],
  1047. 'Ontology' => ['cgif', 'cl', 'clif', 'csv', 'htm', 'html', 'kif', 'obo', 'owl', 'rdf', 'rdfa', 'rdfs', 'rif', 'tsv', 'xcl', 'xht', 'xhtml', 'xml'],
  1048. ],
  1049. ], # End $ENV->META->Formats
  1050. /**
  1051. * 1.
  1052. * SCOPES
  1053. */
  1054. 'Scopes' => [
  1055. /**
  1056. * 2.
  1057. * SI
  1058. */
  1059. 'SI' => [
  1060. 'Nano',
  1061. 'Micro',
  1062. 'Milli',
  1063. 'Centi',
  1064. 'Kilo',
  1065. 'Mega',
  1066. 'Giga',
  1067. 'Tera',
  1068. ],
  1069. /**
  1070. * 2.
  1071. * Sequences
  1072. */
  1073. 'Sequences' => [
  1074. 'Contig',
  1075. 'Scaffold',
  1076. 'Chromosome',
  1077. 'Genome',
  1078. 'Proteome',
  1079. 'Transcriptome',
  1080. ],
  1081. /**
  1082. * 2.
  1083. * Locations
  1084. */
  1085. 'Locations' => [
  1086. 'Organization',
  1087. 'Locality',
  1088. 'State',
  1089. 'Province',
  1090. 'Country',
  1091. 'Continent',
  1092. 'World',
  1093. ],
  1094. /**
  1095. * 2.
  1096. * XML
  1097. */
  1098. 'XML' => [
  1099. 'Value',
  1100. 'Attribute',
  1101. 'Group',
  1102. 'Element',
  1103. 'Schema',
  1104. ],
  1105. /**
  1106. * 2.
  1107. * Scalar
  1108. */
  1109. 'Scalar' => [
  1110. 'Area',
  1111. 'Density',
  1112. 'Distance',
  1113. 'Energy',
  1114. 'Mass',
  1115. 'Speed',
  1116. 'Temperature',
  1117. 'Time',
  1118. 'Volume',
  1119. 'Work',
  1120. ],
  1121. /**
  1122. * 2.
  1123. * Vector
  1124. */
  1125. 'Vector' => [
  1126. 'Acceleration',
  1127. 'Displacement',
  1128. 'Force',
  1129. 'Polarization',
  1130. 'Momentum',
  1131. 'Position',
  1132. 'Thrust',
  1133. 'Velocity',
  1134. 'Weight',
  1135. ],
  1136. ], # End $ENV->META->Scopes
  1137. /**
  1138. * 1.
  1139. * LICENSES
  1140. */
  1141. 'Licenses' => [
  1142. 'BSD-2',
  1143. 'BSD-3',
  1144. 'CC BY',
  1145. 'CC BY-SA',
  1146. 'CC BY-ND',
  1147. 'CC BY-NC',
  1148. 'CC BY-NC-SA',
  1149. 'CC BY-NC-ND',
  1150. 'GNU GPL',
  1151. 'GNU LGPL',
  1152. 'GNU AGPL',
  1153. 'GNU FDL',
  1154. 'MIT',
  1155. 'ODC-By',
  1156. 'ODC-ODbL',
  1157. 'OpenMTA',
  1158. 'Public Domain',
  1159. 'Unspecified',
  1160. ], # End $ENV->META->Licenses
  1161. ];
  1162. ENV::setPub(
  1163. 'META',
  1164. $ENV->convert($META)
  1165. );
  1166. /**
  1167. * Categories
  1168. * https://www.ncbi.nlm.nih.gov/books/NBK25464/
  1169. */
  1170. $CatIcons = "$ENV->STATIC_SERVER/common/bioicons";
  1171. $CollageCats = [
  1172. 0 => 'Personal',
  1173. 1 => 'Theme',
  1174. 2 => 'Staff Picks',
  1175. 3 => 'Group Picks',
  1176. ];
  1177. $CATS = [
  1178. 1 => [
  1179. 'ID' => 1,
  1180. 'Name' => 'Sequences',
  1181. 'Icon' => "$CatIcons/sequences.png",
  1182. 'Description' => "For data that's ACGT, ACGU, amino acid letters on disk.",
  1183. 'Platforms' => $ENV->META->Platforms->Sequences,
  1184. 'Formats' => [
  1185. $ENV->META->Formats->Sequences,
  1186. $ENV->META->Formats->Proteins,
  1187. $ENV->META->Formats->Plain,
  1188. ],
  1189. ],
  1190. 2 => [
  1191. 'ID' => 2,
  1192. 'Name' => 'Graphs',
  1193. 'Icon' => "$CatIcons/graphs.png",
  1194. 'Description' => 'For pathway and regulatory network data, structured taxonomies, etc.',
  1195. 'Platforms' => $ENV->META->Platforms->Graphs,
  1196. 'Formats' => [
  1197. $ENV->META->Formats->GraphXml,
  1198. $ENV->META->Formats->GraphTxt,
  1199. $ENV->META->Formats->Plain,
  1200. ],
  1201. ],
  1202. 3 => [
  1203. 'ID' => 3,
  1204. 'Name' => 'Systems',
  1205. 'Icon' => "$CatIcons/systems.png",
  1206. 'Description' => 'For data that examines one facet broadly, not one subject deeply.',
  1207. 'Platforms' => $ENV->META->Platforms->Graphs,
  1208. 'Formats' => [
  1209. $ENV->META->Formats->GraphXml,
  1210. $ENV->META->Formats->GraphTxt,
  1211. $ENV->META->Formats->Plain,
  1212. ],
  1213. ],
  1214. 4 => [
  1215. 'ID' => 4,
  1216. 'Name' => 'Geometric',
  1217. 'Icon' => "$CatIcons/geometric.png",
  1218. 'Description' => "For structured data (XML, etc.) that describes the subject's orientation in space.",
  1219. 'Platforms' => $ENV->META->Platforms->Graphs,
  1220. 'Formats' => [
  1221. $ENV->META->Formats->GraphXml,
  1222. $ENV->META->Formats->GraphTxt,
  1223. $ENV->META->Formats->Plain,
  1224. ],
  1225. ],
  1226. 5 => [
  1227. 'ID' => 5,
  1228. 'Name' => 'Scalars/Vectors',
  1229. 'Icon' => "$CatIcons/scalars_vectors.png",
  1230. 'Description' => 'For data that describes observations over time and/or space.',
  1231. 'Platforms' => $ENV->META->Platforms->Graphs,
  1232. 'Formats' => [
  1233. $ENV->META->Formats->GraphXml,
  1234. $ENV->META->Formats->GraphTxt,
  1235. $ENV->META->Formats->Plain,
  1236. ],
  1237. ],
  1238. 6 => [
  1239. 'ID' => 6,
  1240. 'Name' => 'Patterns',
  1241. 'Icon' => "$CatIcons/patterns.png",
  1242. 'Description' => 'For data that describes recurring structures in nature such as common pathways or motifs in the proteome or metabolome.',
  1243. 'Platforms' => $ENV->META->Platforms->Graphs,
  1244. 'Formats' => [
  1245. $ENV->META->Formats->GraphXml,
  1246. $ENV->META->Formats->GraphTxt,
  1247. $ENV->META->Formats->Plain,
  1248. ],
  1249. ],
  1250. 7 => [
  1251. 'ID' => 7,
  1252. 'Name' => 'Constraints',
  1253. 'Icon' => "$CatIcons/constraints.png",
  1254. 'Description' => 'For data that records experimental control behavior, checks readings against known physical constants, tracks the thermodynamic limits of reactions, etc.',
  1255. 'Platforms' => $ENV->META->Platforms->Graphs,
  1256. 'Formats' => [
  1257. $ENV->META->Formats->GraphXml,
  1258. $ENV->META->Formats->GraphTxt,
  1259. $ENV->META->Formats->Plain,
  1260. ],
  1261. ],
  1262. 8 => [
  1263. 'ID' => 8,
  1264. 'Name' => 'Images',
  1265. 'Icon' => "$CatIcons/images.png",
  1266. 'Description' => 'For data you can look at!',
  1267. 'Platforms' => $ENV->META->Platforms->Images,
  1268. 'Formats' => [
  1269. $ENV->META->Formats->ImgRaster,
  1270. $ENV->META->Formats->ImgVector,
  1271. ],
  1272. ],
  1273. 9 => [
  1274. 'ID' => 9,
  1275. 'Name' => 'Spatial',
  1276. 'Icon' => "$CatIcons/spatial.png",
  1277. 'Description' => "For data that's limited to specific locations or otherwise describes macroscopic space.",
  1278. 'Platforms' => $ENV->META->Platforms->Graphs,
  1279. 'Formats' => [
  1280. $ENV->META->Formats->MapRaster,
  1281. $ENV->META->Formats->MapVector,
  1282. $ENV->META->Formats->ImgRaster,
  1283. $ENV->META->Formats->ImgVector,
  1284. ],
  1285. ],
  1286. 10 => [
  1287. 'ID' => 10,
  1288. 'Name' => 'Models',
  1289. 'Icon' => "$CatIcons/models.png",
  1290. 'Description' => 'For projections, simulations, and other hypothetical or computer-generated data.',
  1291. 'Platforms' => $ENV->META->Platforms->Graphs,
  1292. 'Formats' => [
  1293. $ENV->META->Formats->MapRaster,
  1294. $ENV->META->Formats->MapVector,
  1295. $ENV->META->Formats->ImgRaster,
  1296. $ENV->META->Formats->ImgVector,
  1297. ],
  1298. ],
  1299. 11 => [
  1300. 'ID' => 11,
  1301. 'Name' => 'Documents',
  1302. 'Icon' => "$CatIcons/documents.png",
  1303. 'Description' => 'For documentation, software, disk images, and literature datasets.',
  1304. 'Platforms' => $ENV->META->Platforms->Documents,
  1305. 'Formats' => [
  1306. $ENV->META->Formats->BinDoc,
  1307. $ENV->META->Formats->CpuGen,
  1308. $ENV->META->Formats->Plain,
  1309. ],
  1310. ],
  1311. 12 => [
  1312. 'ID' => 12,
  1313. 'Name' => 'Machine Data',
  1314. 'Icon' => "$CatIcons/machine_data.png",
  1315. 'Description' => 'For raw reads and machine data of any category.',
  1316. 'Platforms' => $ENV->META->Platforms->Raw,
  1317. 'Formats' => [
  1318. $ENV->META->Formats->Plain,
  1319. ],
  1320. ],
  1321. ];
  1322. ENV::setPub(
  1323. 'CATS',
  1324. $ENV->convert($CATS)
  1325. );
  1326. /**
  1327. * Regular expressions
  1328. *
  1329. * The Gazelle regex collection.
  1330. * Formerly in classes/regex.php.
  1331. */
  1332. // resource_type://username:password@domain:port/path?query_string#anchor
  1333. define('RESOURCE_REGEX', '(https?|ftps?|dat|ipfs):\/\/');
  1334. ENV::setPub(
  1335. 'RESOURCE_REGEX',
  1336. '(https?|ftps?|dat|ipfs):\/\/'
  1337. );
  1338. define('IP_REGEX', '(\d{1,3}\.){3}\d{1,3}');
  1339. ENV::setPub(
  1340. 'IP_REGEX',
  1341. '(\d{1,3}\.){3}\d{1,3}'
  1342. );
  1343. define('DOMAIN_REGEX', '([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+');
  1344. ENV::setPub(
  1345. 'DOMAIN_REGEX',
  1346. '([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+'
  1347. );
  1348. define('PORT_REGEX', ':\d{1,5}');
  1349. ENV::setPub(
  1350. 'PORT_REGEX',
  1351. ':\d{1,5}'
  1352. );
  1353. define('URL_REGEX', '('.RESOURCE_REGEX.')('.IP_REGEX.'|'.DOMAIN_REGEX.')('.PORT_REGEX.')?(\/\S*)*');
  1354. ENV::setPub(
  1355. 'URL_REGEX',
  1356. "($ENV->RESOURCE_REGEX)($ENV->IP_REGEX|$ENV->DOMAIN_REGEX)($ENV->PORT_REGEX)?(\/\S*)*"
  1357. );
  1358. define('USERNAME_REGEX', '/^[a-z0-9_]{2,20}$/iD');
  1359. ENV::setPub(
  1360. 'USERNAME_REGEX',
  1361. '/^[a-z0-9_]{2,20}$/iD'
  1362. );
  1363. define('EMAIL_REGEX', '[_a-z0-9-]+([.+][_a-z0-9-]+)*@'.DOMAIN_REGEX);
  1364. ENV::setPub(
  1365. 'EMAIL_REGEX',
  1366. "[_a-z0-9-]+([.+][_a-z0-9-]+)*@$ENV->DOMAIN_REGEX"
  1367. );
  1368. define('IMAGE_REGEX', URL_REGEX.'\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?');
  1369. ENV::setPub(
  1370. 'IMAGE_REGEX',
  1371. "$ENV->URL_REGEX\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?"
  1372. );
  1373. define('VIDEO_REGEX', URL_REGEX.'\/\S+\.(webm)(\?\S*)?');
  1374. ENV::setPub(
  1375. 'VIDEO_REGEX',
  1376. "$ENV->URL_REGEX\/\S+\.(webm)(\?\S*)?"
  1377. );
  1378. define('CSS_REGEX', URL_REGEX.'\/\S+\.css(\?\S*)?');
  1379. ENV::setPub(
  1380. 'CSS_REGEX',
  1381. "$ENV->URL_REGEX\/\S+\.css(\?\S*)?"
  1382. );
  1383. define('SITELINK_REGEX', RESOURCE_REGEX.'(www.)?'.preg_quote(SITE_DOMAIN, '/'));
  1384. ENV::setPub(
  1385. 'SITELINK_REGEX',
  1386. "$ENV->RESOURCE_REGEX(www.)?".preg_quote(SITE_DOMAIN, '/')
  1387. );
  1388. define('TORRENT_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?torrentid=(\d+)'); // torrentid = group 4
  1389. ENV::setPub(
  1390. 'TORRENT_REGEX',
  1391. "$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?torrentid=(\d+)"
  1392. );
  1393. define('TORRENT_GROUP_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?id=(\d+)'); // id = group 4
  1394. ENV::setPub(
  1395. 'TORRENT_GROUP_REGEX',
  1396. "$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?id=(\d+)"
  1397. );
  1398. define('ARTIST_REGEX', SITELINK_REGEX.'\/artist\.php\?(.*&)?id=(\d+)'); // id = group 4
  1399. ENV::setPub(
  1400. 'ARTIST_REGEX',
  1401. "$ENV->SITELINK_REGEX\/artist\.php\?(.*&)?id=(\d+)"
  1402. );
  1403. # https://stackoverflow.com/a/3180176
  1404. ENV::setPub(
  1405. 'HTML_REGEX',
  1406. '<([\w]+)([^>]*?)(([\s]*\/>)|(>((([^<]*?|<\!\-\-.*?\-\->)|(?R))*)<\/\\1[\s]*>))'
  1407. );
  1408. ENV::setPub(
  1409. 'BBCODE_REGEX',
  1410. '\[([\w]+)([^\]]*?)(([\s]*\/\])|(\]((([^\[]*?|\[\!\-\-.*?\-\-\])|(?R))*)\[\/\\1[\s]*\]))'
  1411. );
  1412. # https://www.crossref.org/blog/dois-and-matching-regular-expressions/
  1413. ENV::setPub(
  1414. 'DOI_REGEX',
  1415. '10.\d{4,9}\/[-._;()\/:A-Z0-9]+'
  1416. );
  1417. # https://www.biostars.org/p/13753/
  1418. ENV::setPub(
  1419. 'ENTREZ_REGEX',
  1420. '\d*'
  1421. );
  1422. # https://www.wikidata.org/wiki/Property:P496
  1423. ENV::setPub(
  1424. 'ORCID_REGEX',
  1425. '0000-000(1-[5-9]|2-[0-9]|3-[0-4])\d{3}-\d{3}[\dX]'
  1426. );
  1427. # https://www.biostars.org/p/13753/
  1428. ENV::setPub(
  1429. 'REFSEQ_REGEX',
  1430. '\w{2}_\d{1,}\.\d{1,}'
  1431. );
  1432. # https://www.uniprot.org/help/accession_numbers
  1433. ENV::setPub(
  1434. 'UNIPROT_REGEX',
  1435. '[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}'
  1436. );