PUBLIC_VALUE;
* $ENV->getPriv('PRIVATE_VALUE');
*
* Using a central static $ENV singleton class has additional benefits.
* The RecursiveArrayObject class included in env.class.php is a powerful tool:
*
* $LongArray = [];
* ENV::setPub(
* 'CONFIG',
* $ENV->convert($LongArray)
* );
*
* $ENV = ENV::go();
* foreach ($ENV->CATS as $Cat) {
* var_dump($Cat->Name);
* }
*
* One more example using custom RecursiveArrayObject methods:
* @see https://www.php.net/manual/en/class.arrayobject.php
*
* var_dump(
* $ENV->dedupe(
* $ENV->META->Formats->Sequences,
* $ENV->META->Formats->Proteins->toArray()
* )
* );
*/
# Initialize
require_once 'env.class.php';
$ENV = ENV::go();
# Basic info
ENV::setPub('PHP_MIN', '7.4.0');
ENV::setPub('DEV', true);
/**
* Site identity
*/
# Site name
ENV::setPub(
'SITE_NAME',
(!$ENV->DEV
? 'torrents.bio' # Production
: 'dev.torrents.bio') # Development
);
# Meta description
ENV::setPub('DESCRIPTION', 'A platform to share biological sequence and medical imaging data');
# Navigation glyphs
ENV::setPub('SEP', '-'); # e.g., News ⸬ BioTorrents.de
ENV::setPub('CRUMB', '›'); # e.g., Forums › Board › Thread
# The FQDN of your site, e.g., dev.biotorrents.de
( # Old format
!$ENV->DEV
? define('SITE_DOMAIN', 'biotorrents.de') # Production
: define('SITE_DOMAIN', 'dev.biotorrents.de') # Development
);
ENV::setPub(
'SITE_DOMAIN',
(!$ENV->DEV
? 'biotorrents.de' # Production
: 'dev.biotorrents.de') # Development
);
# Old domain, to handle the biotorrents.de => torrents.bio migration
# If not needed, simply set to the same values as $ENV->SITE_DOMAIN
ENV::setPub(
'OLD_SITE_DOMAIN',
(!$ENV->DEV
? 'biotorrents.de' # Production
: 'dev.biotorrents.de') # Development
);
# The FQDN of your image host, e.g., pics.biotorrents.de
ENV::setPub('IMAGE_DOMAIN', 'pics.biotorrents.de');
# Web root. Currently used for Twig but may also include config files
ENV::setPub('WEB_ROOT', '/var/www/');
# The root of the server, used for includes, e.g., /var/www/html/dev.biotorrents.de/
( # Old format
!$ENV->DEV
? define('SERVER_ROOT', '/var/www/html/biotorrents.de/') # Production
: define('SERVER_ROOT', '/var/www/html/dev.biotorrents.de/') # Development
);
ENV::setPub(
'SERVER_ROOT',
(!$ENV->DEV
? '/var/www/html/biotorrents.de/' # Production
: '/var/www/html/dev.biotorrents.de/') # Development
);
# Where torrent files are stored, e.g., /var/www/torrents-dev/
( # Old format
!$ENV->DEV
? define('TORRENT_STORE', '/var/www/torrents/') # Production
: define('TORRENT_STORE', '/var/www/torrents-dev/') # Development
);
ENV::setPub(
'TORRENT_STORE',
(!$ENV->DEV
? '/var/www/torrents/' # Production
: '/var/www/torrents-dev/') # Development);
);
# Allows you to run static content off another server. Default is usually what you want
define('STATIC_SERVER', '/static/');
ENV::setPub('STATIC_SERVER', '/static/');
# The hashing algorithm used for SRI
ENV::setPub('SRI', 'sha384');
/**
* Tech support
*/
/*
$TechSupport = [
'Email' => 'help@biotorrents.de',
'Subject' => '[TxID '.strtoupper(bin2hex(random_bytes(2))).'] Specific subject line with TxID intact',
'Body' => 'A detailed description of how you reach the error and the full text of any site messages you may receive.'
];
ENV::setPub(
'HELP',
$ENV->convert($TechSupport)
);
*/
/**
* App keys
*
* Separate keys for development and production.
* Increased security and protection against config overwrites.
*/
# Pre-shared key for generating hmacs for the image proxy
ENV::setPriv('IMAGE_PSK', '');
# Production
if (!$ENV->DEV) {
# Unused in OT Gazelle. Currently used for API token auth
ENV::setPriv('ENCKEY', '');
# Alphanumeric random key. This key must be the argument to schedule.php for the schedule to work
ENV::setPriv('SCHEDULE_KEY', '');
# Random key. Used for generating unique RSS auth key
ENV::setPriv('RSS_HASH', '');
# System API key. Used for getting resources via Json->fetch()
ENV::setPriv('SELF_API', '');
}
# Development
else {
ENV::setPriv('ENCKEY', '');
ENV::setPriv('SCHEDULE_KEY', '');
ENV::setPriv('RSS_HASH', '');
ENV::setPriv('SELF_API', '');
}
/**
* Database
*/
# Common info
ENV::setPriv('SQLHOST', '10.0.0.3');
ENV::setPriv('SQLPORT', 3306);
#ENV::setPriv('SQLSOCK', '/var/run/mysqld/mysqld.sock');
# TLS client certs
ENV::setPriv('SQL_CERT', "$ENV->WEB_ROOT/sql-keys/client-cert.pem");
ENV::setPriv('SQL_KEY', "$ENV->WEB_ROOT/sql-keys/client-key.pem");
ENV::setPriv('SQL_CA', "$ENV->WEB_ROOT/sql-keys/ca.pem");
# Production
if (!$ENV->DEV) {
ENV::setPriv('SQLDB', 'gazelle_production');
ENV::setPriv('SQLLOGIN', 'gazelle_production');
ENV::setPriv('SQLPASS', '');
}
# Development
else {
ENV::setPriv('SQLDB', 'gazelle_development');
ENV::setPriv('SQLLOGIN', 'gazelle_development');
ENV::setPriv('SQLPASS', '');
}
/**
* Tracker
*/
# Ocelot connection, e.g., 0.0.0.0
ENV::setPriv('TRACKER_HOST', '0.0.0.0');
# Production
if (!$ENV->DEV) {
ENV::setPriv('TRACKER_PORT', 34000);
# Must be 32 alphanumeric characters and match site_password in ocelot.conf
ENV::setPriv('TRACKER_SECRET', '');
# Must be 32 alphanumeric characters and match report_password in ocelot.conf
ENV::setPriv('TRACKER_REPORTKEY', '');
}
# Development
else {
ENV::setPriv('TRACKER_PORT', 34001);
ENV::setPriv('TRACKER_SECRET', '');
ENV::setPriv('TRACKER_REPORTKEY', '');
}
/**
* Tracker URLs
*
* Added to torrents à la http://bittorrent.org/beps/bep_0012.html
*/
# Production
if (!$ENV->DEV) {
define('ANNOUNCE_URLS', [
[ # Tier 1
'https://track.biotorrents.de:443',
], [] # Tier 2
]);
$AnnounceURLs = [
[ # Tier 1
'https://track.biotorrents.de:443',
],
[ # Tier 2
#'udp://tracker.coppersurfer.tk:6969/announce',
#'udp://tracker.cyberia.is:6969/announce',
#'udp://tracker.leechers-paradise.org:6969/announce',
],
];
ENV::setPub(
'ANNOUNCE_URLS',
$ENV->convert($AnnounceURLs)
);
}
# Development
else {
define('ANNOUNCE_URLS', [
[ # Tier 1
'https://trx.biotorrents.de:443',
], [] # Tier 2
]);
$AnnounceURLs = [
[ # Tier 1
'https://trx.biotorrents.de:443',
], [], # Tier 2
];
ENV::setPub(
'ANNOUNCE_URLS',
$ENV->convert($AnnounceURLs)
);
}
/**
* Search
*/
# SphinxqlQuery needs constants
# $ENV breaks the torrent and request pages
define('SPHINXQL_HOST', '127.0.0.1');
define('SPHINXQL_PORT', 9306);
define('SPHINXQL_SOCK', false);
define('SPHINX_MAX_MATCHES', 1000); // Must be <= the server's max_matches variable (default 1000)
/**
* memcached
*
* Very important to run two instances,
* one each for development and production.
*/
# Production
if (!$ENV->DEV) {
ENV::setPriv(
'MEMCACHED_SERVERS',
[[
'host' => 'unix:///var/run/memcached/memcached.sock',
'port' => 0,
'buckets' => 1
]]
);
}
# Development
else {
ENV::setPriv(
'MEMCACHED_SERVERS',
[[
'host' => 'unix:///var/run/memcached/memcached-dev.sock',
'port' => 0,
'buckets' => 1
]]
);
}
/**
* IRC/Slack
*/
# IRC server address. Used for onsite chat tool
define('BOT_SERVER', "irc.$ENV->SITE_DOMAIN");
define('SOCKET_LISTEN_ADDRESS', 'localhost');
define('SOCKET_LISTEN_PORT', 51010);
define('BOT_NICK', 'ebooks');
# IRC channels for official business
define('ANNOUNCE_CHAN', '#announce');
define('DEBUG_CHAN', '#debug');
define('REQUEST_CHAN', '#requests');
define('STAFF_CHAN', '#staff');
define('ADMIN_CHAN', '#staff');
define('HELP_CHAN', '#support');
define('DISABLED_CHAN', '#support');
#define('BOT_CHAN', '#userbots');
# Slack invite link
ENV::setPub(
'SLACK_INVITE',
''
);
/**
* ================
* = NO MORE =
* = PRIVATE INFO =
* ================
*/
/**
* Features
*/
# Enable donation page
ENV::setPub('FEATURE_DONATE', true);
# Send re-enable requests to user's email
define('FEATURE_EMAIL_REENABLE', true);
ENV::setPub('FEATURE_EMAIL_REENABLE', true);
# Require users to verify login from unknown locations
ENV::setPub('FEATURE_ENFORCE_LOCATIONS', false);
# Attempt to send messages to IRC
ENV::setPub('FEATURE_IRC', true);
# Attempt to send email from the site
ENV::setPub('FEATURE_SEND_EMAIL', true);
# Allow the site encryption key to be set without an account
# (should only be used for initial setup)
ENV::setPub('FEATURE_SET_ENC_KEY_PUBLIC', false);
# Attempt to support the BioPHP library
# https://packagist.org/packages/biotorrents/biophp
# https://blog.libredna.org/post/seqhash/
ENV::setPub('FEATURE_BIOPHP', true);
/**
* Settings
*/
# Production
if (!$ENV->DEV) {
# Set to false if you don't want everyone to see debug information; can be overriden with 'site_debug'
define('DEBUG_MODE', false);
ENV::setPub('DEBUG_MODE', false);
}
# Development
else {
define('DEBUG_MODE', false);
ENV::setPub('DEBUG_MODE', false);
# Gazelle's debug mode is broken, so let's use PHP errors instead
error_reporting(E_ALL);
}
# Set to false to disable open registration, true to allow anyone to register
ENV::setPub(
'OPEN_REGISTRATION',
(!$ENV->DEV
? true # Production
: false) # Development
);
# The maximum number of users the site can have, 0 for no limit
define('USER_LIMIT', 0);
ENV::setPub('USER_LIMIT', 0);
# User perks
ENV::setPub('STARTING_INVITES', 2);
ENV::setPub('STARTING_TOKENS', 2);
ENV::setPub('STARTING_UPLOAD', 5368709120);
ENV::setPub('DONOR_INVITES', 2);
# Bonus Points
define('BONUS_POINTS', 'Bonus Points');
ENV::setPub('BONUS_POINTS', 'Bonus Points');
ENV::setPub('BP_COEFF', 1.5); # OT default 0.5
# Tag namespaces (configurable via CSS selectors)
#define('TAG_NAMESPACES', ['male', 'female', 'parody', 'character']);
# Banned stuff (file characters, browsers, etc.)
ENV::setPub(
'BAD_CHARS',
['"', '*', '/', ':', '<', '>', '?', '\\', '|']
);
# Set to true to block Opera Mini proxy
ENV::setPub('BLOCK_OPERA_MINI', true);
# Password length limits
ENV::setPub('PW_MIN', 15); # Brute force
ENV::setPub('PW_MAX', 10000); # DDoS; default 307200
# Misc stuff like generic reusable snippets
# Example of a variable using heredoc syntax
ENV::setPub(
'PW_ADVICE',
<<
Any password $ENV->PW_MIN characters or longer is accepted, but a strong password
- is a passphrase of mixed case with many small words,
- that contains complex characters including Unicode and emoji.
HTML
);
/**
* Services
*
* Public APIs, domains, etc.
* Not intended for private API keys.
*/
# Current Sci-Hub domains
# https://lovescihub.wordpress.com
define('SCI_HUB', 'se');
ENV::setPub(
'SCI_HUB',
['ren', 'tw', 'se']
);
# Semantic Scholar
# https://api.semanticscholar.org
ENV::setPub('SS', 'https://api.semanticscholar.org/v1/paper/');
# IP Geolocation
ENV::setPub('IP_GEO', 'https://tools.keycdn.com/geo.json?host=');
/**
* User class IDs
*
* Needed for automatic promotions.
* Found in the `permissions` table.
*/
# Name of class Class ID (not level)
define('ADMIN', '1');
define('USER', '2');
define('MEMBER', '3');
define('POWER', '4');
define('ELITE', '5');
define('LEGEND', '8');
define('MOD', '11');
define('SYSOP', '15');
define('ARTIST', '19');
define('DONOR', '20');
define('VIP', '21');
define('TORRENT_MASTER', '23');
define('POWER_TM', '24');
define('FLS_TEAM', '33');
define('FORUM_MOD', '9001');
/**
* Forums
*/
define('STAFF_FORUM', 3);
define('DONOR_FORUM', 7);
ENV::setPub('TRASH_FORUM', 8);
ENV::setPub('ANNOUNCEMENT_FORUM', 1);
ENV::setPub('SUGGESTIONS_FORUM', 2);
# Pagination
define('TORRENT_COMMENTS_PER_PAGE', 10);
define('POSTS_PER_PAGE', 25);
define('TOPICS_PER_PAGE', 50);
define('TORRENTS_PER_PAGE', 50);
define('REQUESTS_PER_PAGE', 25);
define('MESSAGES_PER_PAGE', 25);
define('LOG_ENTRIES_PER_PAGE', 50);
# Cache catalogues
define('THREAD_CATALOGUE', 500); // Limit to THREAD_CATALOGUE posts per cache key
# Miscellaneous values
define('MAX_RANK', 6);
define('MAX_EXTRA_RANK', 8);
define('MAX_SPECIAL_RANK', 3);
ENV::setPub('DONOR_FORUM_RANK', 6);
/**
* Ratio and badges
*/
# Ratio requirements, in descending order
define('RATIO_REQUIREMENTS', [
# Downloaded Req (0% seed) Req (100% seed)
[200 * 1024**3, 0.60, 0.60],
[160 * 1024**3, 0.60, 0.50],
[120 * 1024**3, 0.50, 0.40],
[100 * 1024**3, 0.40, 0.30],
[80 * 1024**3, 0.30, 0.20],
[60 * 1024**3, 0.20, 0.10],
[40 * 1024**3, 0.15, 0.00],
[20 * 1024**3, 0.10, 0.00],
[10 * 1024**3, 0.05, 0.00],
]);
# God I wish I didn't have to do this but I just don't care anymore
$AutomatedBadgeIDs = [
'DL' => [
'8' => 10,
'16' => 11,
'32' => 12,
'64' => 13,
'128' => 14,
'256' => 15,
'512' => 16,
'1024' => 17,
'2048' => 18,
],
'UL' => [
'16' => 20,
'32' => 21,
'64' => 22,
'128' => 23,
'256' => 24,
'512' => 25,
'1024' => 26,
'2048' => 27,
'4096' => 28,
],
'Posts' => [
'5' => 30,
'10' => 31,
'25' => 32,
'50' => 33,
'100' => 34,
'250' => 35,
'500' => 36,
'1000' => 37,
'2500' => 38,
]
];
ENV::setPub(
'AUTOMATED_BADGE_IDS',
$ENV->convert($AutomatedBadgeIDs)
);
/**
* Site categories and meta
*
* THIS IS THE OLD FORMAT AND WILL GO AWAY.
* PLEASE SEE $ENV->{DB,META,CATS} BELOW.
*/
# Categories
$Categories = [
'Sequences',
'Graphs',
'Systems',
'Geometric',
'Scalars/Vectors',
'Patterns',
'Constraints',
'Images',
'Spatial',
'Models',
'Documents',
'Machine Data',
];
$GroupedCategories = $Categories;
# Plain Formats
$PlainFormats = [
'CSV' => ['csv'],
'JSON' => ['json'],
'Text' => ['txt'],
'XML' => ['xml'],
];
# Sequence Formats
$SeqFormats = [
'BAM' => ['bam'],
'CRAM' => ['cram'],
'EMBL' => ['embl'],
'FASTA' => ['fa', 'fasta', 'fsa'],
'FASTA+QUAL' => ['qual'],
'CSFASTA' => ['csfa', 'csfasta', 'csfsa'],
'FASTQ' => ['fastq', 'fq', 'sanfastq'],
'GFF' => ['gff', 'gff2', 'gff3'],
'GTF' => ['gtf'],
'GenBank' => ['gb', 'gbk', 'genbank'],
'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
'PIR' => ['pir'],
'QSeq' => ['qseq'],
'SAM' => ['sam'],
'SFF' => ['sff'],
'SRF' => ['srf'],
'SnapGene' => ['dna', 'seq'],
'SwissProt' => ['dat'],
'VCF' => ['vcf'],
];
# Protein Formats
# DON'T PARSE RAW FILES. TOO MANY COMPETING VENDORS
$ProtFormats = [
'ABI/Sciex' => ['t2d', 'wiff'],
'APML' => ['apml'],
'ASF' => ['asf'],
'Agilent/Bruker' => ['baf', 'd', 'fid', 'tdf', 'yep'],
'BlibBuild' => ['blib'],
'Bruker/Varian' => ['sms', 'xms'],
'Finnigan' => ['dat', 'ms'],
'ION-TOF' => ['ita', 'itm'],
'JCAMP-DX' => ['jdx'],
'MGF' => ['mgf'],
'MS2' => ['ms2'],
'MSF' => ['msf'],
'mzData' => ['mzdata'],
'mzML' => ['mzml'],
'mzXML' => ['mzxml'],
'OMSSA' => ['omssa', 'omx'],
'PEFF' => ['peff'],
'pepXML' => ['pepxml'],
'protXML' => ['protxml'],
'Shimadzu' => ['lcd', 'qgd', 'spc'],
'Skyline' => ['sky', 'skyd'],
'TPP/SPC' => ['dta'],
'Tandem' => ['tandem'],
'TraML' => ['traml'],
'ULVAC-PHI' => ['tdc'],
];
# XML Graph Formats
$GraphXmlFormats = [
'DGML' => ['dgml'],
'DotML' => ['dotml'],
'GEXF' => ['gexf'],
'GXL' => ['gxl'],
'GraphML' => ['graphml'],
'XGMML' => ['xgmml'],
];
# Text Graph Formats
$GraphTxtFormats = [
'DOT' => ['gv'],
'GML' => ['gml'],
'LCF' => ['lcf'],
'Newick' => ['xsd', 'sgf'],
'SIF' => ['sif'],
'TGF' => ['tgf'],
];
# Image Formats
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3948928/
$ImgFormats = [
'Analyze' => ['hdr', 'img'],
'Interfile' => ['h33'],
'DICOM' => ['dcm', 'dicom'],
'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
'NIfTI' => ['nii', 'nifti'],
'MINC' => ['minc', 'mnc'],
'JPEG' => ['jfif', 'jpeg', 'jpg'],
'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
'PNG' => ['png'],
'TIFF' => ['tif', 'tiff'],
'WebP' => ['webp'],
];
# Vector Map Formats
$MapVectorFormats = [
'AutoCAD DXF' => ['dxf'],
'Cartesian (XYZ)' => ['xyz'],
'DLG' => ['dlg'],
'Esri TIN' => ['adf', 'dbf'],
'GML' => ['gml'],
'GeoJSON' => ['geojson'],
'ISFC' => ['isfc'],
'KML' => ['kml', 'kmzv'],
# DAT omitted
# https://en.wikipedia.org/wiki/MapInfo_TAB_format
'MapInfo TAB' => ['tab', 'ind', 'map', 'id'],
'Measure Map Pro' => ['mmp'],
'NTF' => ['ntf'],
# DBF omitted
# https://en.wikipedia.org/wiki/Shapefile
'Shapefile' => ['shp', 'shx'],
'Spatial Data File' => ['sdf', 'sdf3', 'sif', 'kif'],
'SOSI' => ['sosi'],
'SVG' => ['svg'],
'TIGER' => ['tiger'],
'VPF' => ['vpf'],
];
# Raster Map Formats
$MapRasterFormats = [
'ADRG' => ['adrg'],
'Binary' => ['bsq', 'bip', 'bil'],
'DRG' => ['drg'],
'ECRG' => ['ecrg'],
'ECW' => ['ecw'],
# DAT and ASC omitted (common)
# https://support.esri.com/en/technical-article/000008526
# https://web.archive.org/web/20150128024528/http://docs.codehaus.org/display/GEOTOOLS/ArcInfo+ASCII+Grid+format
'Esri Grid' => ['adf', 'nit', 'asc', 'grd'],
'GeoTIFF' => ['tfw'],
#'IMG' => ['img'],
#'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
'MrSID' => ['sid'],
'netCDF' => ['nc'],
'RPF' => ['cadrg', 'cib'],
];
# Binary Document Formats
# https://en.wikipedia.org/wiki/OpenDocument
# https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions
$BinDocFormats = [
'OpenDocument' => ['odt', 'fodt', 'ods', 'fods', 'odp', 'fodp', 'odg', 'fodg', 'odf'],
'Word' => ['doc', 'dot', 'wbk', 'docx', 'docm', 'dotx', 'dotm', 'docb'],
'PowerPoint' => ['ppt', 'pot', 'pps', 'pptx', 'pptm', 'potx', 'potm', 'ppam', 'ppsx', 'ppsm', 'sldx', 'sldm'],
'Excel' => ['xls', 'xlt', 'xlm', 'xlsx', 'xlsm', 'xltx', 'xltm', 'xlsb', 'xla', 'xlam', 'xll', 'xlw'],
'PDF' => ['pdf', 'fdf', 'xfdf'],
];
# Extra Formats
# DON'T PARSE IMG OR ISO FILES
# https://en.wikipedia.org/wiki/Disk_image#File_formats
# http://dcjtech.info/topic/python-file-extensions/
$CpuGenFormats = [
'Docker' => ['dockerfile'],
'Hard Disk' => ['fvd', 'dmg', 'esd', 'qcow', 'qcow2', 'qcow3', 'smi', 'swm', 'vdi', 'vhd', 'vhdx', 'vmdk', 'wim'],
'Optical Disc' => ['bin', 'ccd', 'cso', 'cue', 'daa', 'isz', 'mdf', 'mds', 'mdx', 'nrg', 'uif'],
'Python' => ['pxd', 'py', 'py3', 'pyc', 'pyd', 'pyde', 'pyi', 'pyo', 'pyp', 'pyt', 'pyw', 'pywz', 'pyx', 'pyz', 'rpy', 'xpy'],
'Jupyter' => ['ipynb'],
'Ontology' => ['cgif', 'cl', 'clif', 'csv', 'htm', 'html', 'kif', 'obo', 'owl', 'rdf', 'rdfa', 'rdfs', 'rif', 'tsv', 'xcl', 'xht', 'xhtml', 'xml'],
];
# Resolutions
$Resolutions = [
'Contig',
'Scaffold',
'Chromosome',
'Genome',
'Proteome',
'Transcriptome',
];
/**
* $ENV->DB
*
* One flat array with all possible torrent/group fields.
* These are mostly used in Twig templates as {{ db.title }}.
* Meta abstraction layer for flavor text *around* DB fields.
* Gazelle's job is to query the right tables, which will shift.
*/
$DB = [
# torrents_group
'category_id' => ['name' => 'Category', 'desc' => ''],
'title' => ['name' => 'Torrent Title', 'desc' => 'Definition line, e.g., Alcohol dehydrogenase ADH1'],
'subject' => ['name' => 'Organism', 'desc' => 'Organism line binomial, e.g., Saccharomyces cerevisiae'],
'object' => ['name' => 'Strain/Variety', 'desc' => 'Organism line if any, e.g., S288C'],
'year' => ['name' => 'Year', 'desc' => 'Publication year'],
'workgroup' => ['name' => 'Department/Lab', 'desc' => "Last author's institution, e.g., Lawrence Berkeley Laboratory"],
'location' => ['name' => 'Location', 'desc' => 'Physical location, e.g., Berkeley, CA 94720'],
'identifier' => ['name' => 'Accession Number', 'desc' => 'RefSeq and UniProt preferred'],
'tag_list' => ['name' => 'Tag List', 'desc' => 'Comma-seperated list of at least 5 tags'],
'timestamp' => ['name' => 'Uploaded On', 'desc' => ''],
'revision_id' => ['name' => 'Revision ID', 'desc' => ''],
'description' => ['name' => 'Group Description', 'desc' => ''],
'picture' => ['name' => 'Picture', 'desc' => 'A meaningful picture, e.g., the specimen or a thumbnail'],
# From the non-renamed `torrents` table
'version' => ['name' => 'Version', 'desc' => 'Start with 0.1.0', 'note' => 'Please see Semantic Versioning'],
'license' => ['name' => 'License', 'desc' => '', 'note' => 'Please see How to License Research Data'],
'mirrors' => ['name' => 'Mirrors', 'desc' => 'Up to two FTP/HTTP addresses that either point directly to a file, or for multi-file torrents, to the enclosing folder'],
# Original fields
'seqhash' => ['name' => 'Seqhash', 'desc' => 'Sample genome sequence in FASTA format (GenBank pending)', 'note' => 'Please see The Seqhash Algorithm'],
];
ENV::setPub(
'DB',
$ENV->convert($DB)
);
/**
* $ENV->META
*
* Main metadata object.
* Responsible for defining field values.
* These eventually go into the database,
* so take care to define them well here.
* Avoid nesting > 3 levels deep.
*/
$META = [
/**
* 1.
* PLATFORMS
*/
'Platforms' => [
/**
* 2.
* Sequences
*/
'Sequences' => [
# DNA
'Complete Genomics',
'cPAS-BGI/MGI',
'Helicos',
'Illumina HiSeq',
'Illumina MiSeq',
'Ion Torrent',
'Microfluidics',
'Nanopore',
'PacBio',
'Roche 454',
'Sanger',
'SOLiD',
# RNA, Protein, etc.
'De Novo',
'HPLC',
'Mass Spec',
'RNA-Seq',
],
/**
* 2.
* Graphs
* https://en.wikipedia.org/wiki/Graph_drawing#Software
*/
'Graphs' => [
'BioFabric',
'BioTapestry',
'Cytoscape',
'Edraw Max',
'GenMAPP',
'Gephi',
'graph-tool',
'Graphviz',
'InCroMAP',
'LaNet-vi',
'Linkurious',
'MATLAB',
'MEGA',
'Maple',
'Mathematica',
#'Microsoft Automatic Graph Layout',
'NetworkX',
'PGF/TikZ',
'PathVisio',
'Pathview',
'R',
'Systrip',
'Tom Sawyer Software',
'Tulip',
'yEd',
],
/**
* 2.
* Images
*/
'Images' => [
'CT/CAT',
'ECG',
'Elastography',
'FNIR/NIRS',
'MPI',
'MRI/NMR',
'Microscopy',
'Photoacoustic',
'Photography',
'Scint/SPECT/PET',
'Ultrasound',
'X-Rays',
],
/**
* 2.
* Documents
*/
'Documents' => [
# Composed
'Literature',
'Software',
# Generated
'Kernel',
'Metadata',
'Notebook',
],
/**
* 2.
* Machine Data
*/
'Raw' => [
'Binary',
'Text',
],
], # End $ENV->META->Platforms
/**
* 1.
* FORMATS
*/
'Formats' => [
/**
* 2.
* Plain
*/
'Plain' => [
'CSV' => ['csv'], # 3
'JSON' => ['json'], # 3
'Text' => ['txt', 'asc'], # 3
'XML' => ['xml'], # etc.
],
/**
* 2.
* Databases
*/
'Databases' => [
'MS SQL' => ['mdf', 'ndf', 'ldf'],
'MySQL' => ['sql', 'mysql'],
'Oracle' => ['dbf', 'ora', 'oraenv'],
'IBM Db2' => ['ixf', 'del', 'cursor'],
'Postgres' => ['sql']
],
/**
* 2.
* Archives
*/
'Archives' => [
'7z' => ['7z'],
'bzip2' => ['bz2', 'bzip2'],
'gzip' => ['gz', 'gzip', 'tgz', 'tpz'],
'Pickle' => ['pickle', 'pkl'],
'RAR' => ['rar', 'rev'],
'tar' => ['tar'],
'ZIP' => ['zip', 'zipx'],
'None' => [''],
],
/**
* 2.
* Sequences
* https://www.ncbi.nlm.nih.gov/sra/docs/submitformats/
*/
'Sequences' => [
'BAM' => ['bam'],
'CRAM' => ['cram'],
'EMBL' => ['embl'],
'FASTA' => ['fa', 'fasta', 'fsa'],
'FASTA+QUAL' => ['qual'],
'CSFASTA' => ['csfa', 'csfasta', 'csfsa'],
'FASTQ' => ['fastq', 'fq', 'sanfastq'],
'GFF' => ['gff', 'gff2', 'gff3'],
'GTF' => ['gtf'],
'GenBank' => ['gb', 'gbk', 'genbank'],
'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
'PIR' => ['pir'],
'QSeq' => ['qseq'],
'SAM' => ['sam'],
'SFF' => ['sff'],
'SRF' => ['srf'],
'SnapGene' => ['dna', 'seq'],
'SwissProt' => ['dat'],
'VCF' => ['vcf'],
],
/**
* 2.
* Proteins
* https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518119/
*/
'Proteins' => [
'ABI/Sciex' => ['t2d', 'wiff'],
'APML' => ['apml'],
'ASF' => ['asf'],
'Agilent/Bruker' => ['baf', 'd', 'fid', 'tdf', 'yep'],
'BlibBuild' => ['blib'],
'Bruker/Varian' => ['sms', 'xms'],
'Finnigan' => ['dat', 'ms'],
'ION-TOF' => ['ita', 'itm'],
'JCAMP-DX' => ['jdx'],
'MGF' => ['mgf'],
'MS2' => ['ms2'],
'MSF' => ['msf'],
'mzData' => ['mzdata'],
'mzML' => ['mzml'],
'mzXML' => ['mzxml'],
'OMSSA' => ['omssa', 'omx'],
'PEFF' => ['peff'],
'pepXML' => ['pepxml'],
'protXML' => ['protxml'],
'Shimadzu' => ['lcd', 'qgd', 'spc'],
'Skyline' => ['sky', 'skyd'],
'TPP/SPC' => ['dta'],
'Tandem' => ['tandem'],
'TraML' => ['traml'],
'ULVAC-PHI' => ['tdc'],
],
/**
* 2.
* Graph XML
*/
'GraphXml' => [
'DGML' => ['dgml'],
'DotML' => ['dotml'],
'GEXF' => ['gexf'],
'GXL' => ['gxl'],
'GraphML' => ['graphml'],
'XGMML' => ['xgmml'],
],
/**
* 2.
* Graph plain
*/
'GraphTxt' => [
'DOT' => ['gv'],
'GML' => ['gml'],
'LCF' => ['lcf'],
'Newick' => ['xsd', 'sgf'],
'SIF' => ['sif'],
'TGF' => ['tgf'],
],
/**
* 2.
* Image vector
*/
'ImgVector' => [
'AI' => ['ai'],
'CorelDRAW' => ['cdr'],
'EPS' => ['eps', 'epsf', 'epsi'],
'SVG' => ['svg'],
'WMF' => ['emf', 'emz', 'wmf', 'wmz'],
],
/**
* 2.
* Image raster
*/
'ImgRaster' => [
'Analyze' => ['hdr', 'img'],
'Interfile' => ['h33'],
'DICOM' => ['dcm', 'dicom'],
'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
'NIfTI' => ['nii', 'nifti'],
'MINC' => ['minc', 'mnc'],
'JPEG' => ['jfif', 'jpeg', 'jpg'],
'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
'PNG' => ['png'],
'TIFF' => ['tif', 'tiff'],
'WebP' => ['webp'],
],
/**
* 2.
* Map vector
*/
'MapVector' => [
'AutoCAD DXF' => ['dxf'],
'Cartesian (XYZ)' => ['xyz'],
'DLG' => ['dlg'],
'Esri TIN' => ['adf', 'dbf'],
'GML' => ['gml'],
'GeoJSON' => ['geojson'],
'ISFC' => ['isfc'],
'KML' => ['kml', 'kmzv'],
# DAT omitted
# https://en.wikipedia.org/wiki/MapInfo_TAB_format
'MapInfo TAB' => ['tab', 'ind', 'map', 'id'],
'Measure Map Pro' => ['mmp'],
'NTF' => ['ntf'],
# DBF omitted
# https://en.wikipedia.org/wiki/Shapefile
'Shapefile' => ['shp', 'shx'],
'Spatial Data File' => ['sdf', 'sdf3', 'sif', 'kif'],
'SOSI' => ['sosi'],
'SVG' => ['svg'],
'TIGER' => ['tiger'],
'VPF' => ['vpf'],
],
/**
* 2.
* Map raster
*/
'MapRaster' => [
'ADRG' => ['adrg'],
'Binary' => ['bsq', 'bip', 'bil'],
'DRG' => ['drg'],
'ECRG' => ['ecrg'],
'ECW' => ['ecw'],
# DAT and ASC omitted (common)
# https://support.esri.com/en/technical-article/000008526
# https://web.archive.org/web/20150128024528/http://docs.codehaus.org/display/GEOTOOLS/ArcInfo+ASCII+Grid+format
'Esri Grid' => ['adf', 'nit', 'asc', 'grd'],
'GeoTIFF' => ['tfw'],
#'IMG' => ['img'],
#'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
'MrSID' => ['sid'],
'netCDF' => ['nc'],
'RPF' => ['cadrg', 'cib'],
],
/**
* 2.
* Binary documents
*/
'BinDoc' => [
'OpenDocument' => ['odt', 'fodt', 'ods', 'fods', 'odp', 'fodp', 'odg', 'fodg', 'odf'],
'Word' => ['doc', 'dot', 'wbk', 'docx', 'docm', 'dotx', 'dotm', 'docb'],
'PowerPoint' => ['ppt', 'pot', 'pps', 'pptx', 'pptm', 'potx', 'potm', 'ppam', 'ppsx', 'ppsm', 'sldx', 'sldm'],
'Excel' => ['xls', 'xlt', 'xlm', 'xlsx', 'xlsm', 'xltx', 'xltm', 'xlsb', 'xla', 'xlam', 'xll', 'xlw'],
'PDF' => ['pdf', 'fdf', 'xfdf'],
],
/**
* 2.
* Extra formats
*/
'CpuGen' => [
'Docker' => ['dockerfile'],
'Hard Disk' => ['fvd', 'dmg', 'esd', 'qcow', 'qcow2', 'qcow3', 'smi', 'swm', 'vdi', 'vhd', 'vhdx', 'vmdk', 'wim'],
'Optical Disc' => ['bin', 'ccd', 'cso', 'cue', 'daa', 'isz', 'mdf', 'mds', 'mdx', 'nrg', 'uif'],
'Python' => ['pxd', 'py', 'py3', 'pyc', 'pyd', 'pyde', 'pyi', 'pyo', 'pyp', 'pyt', 'pyw', 'pywz', 'pyx', 'pyz', 'rpy', 'xpy'],
'Jupyter' => ['ipynb'],
'Ontology' => ['cgif', 'cl', 'clif', 'csv', 'htm', 'html', 'kif', 'obo', 'owl', 'rdf', 'rdfa', 'rdfs', 'rif', 'tsv', 'xcl', 'xht', 'xhtml', 'xml'],
],
], # End $ENV->META->Formats
/**
* 1.
* SCOPES
*/
'Scopes' => [
/**
* 2.
* SI
*/
'SI' => [
'Nano',
'Micro',
'Milli',
'Centi',
'Kilo',
'Mega',
'Giga',
'Tera',
],
/**
* 2.
* Sequences
*/
'Sequences' => [
'Contig',
'Scaffold',
'Chromosome',
'Genome',
'Proteome',
'Transcriptome',
],
/**
* 2.
* Locations
*/
'Locations' => [
'Organization',
'Locality',
'State',
'Province',
'Country',
'Continent',
'World',
],
/**
* 2.
* XML
*/
'XML' => [
'Value',
'Attribute',
'Group',
'Element',
'Schema',
],
/**
* 2.
* Scalar
*/
'Scalar' => [
'Area',
'Density',
'Distance',
'Energy',
'Mass',
'Speed',
'Temperature',
'Time',
'Volume',
'Work',
],
/**
* 2.
* Vector
*/
'Vector' => [
'Acceleration',
'Displacement',
'Force',
'Polarization',
'Momentum',
'Position',
'Thrust',
'Velocity',
'Weight',
],
], # End $ENV->META->Scopes
/**
* 1.
* LICENSES
*/
'Licenses' => [
'BSD-2',
'BSD-3',
'CC BY',
'CC BY-SA',
'CC BY-ND',
'CC BY-NC',
'CC BY-NC-SA',
'CC BY-NC-ND',
'GNU GPL',
'GNU LGPL',
'GNU AGPL',
'GNU FDL',
'MIT',
'ODC-By',
'ODC-ODbL',
'OpenMTA',
'Public Domain',
'Unspecified',
], # End $ENV->META->Licenses
];
ENV::setPub(
'META',
$ENV->convert($META)
);
/**
* Categories
* https://www.ncbi.nlm.nih.gov/books/NBK25464/
*/
$CatIcons = "$ENV->STATIC_SERVER/common/bioicons";
$CollageCats = [
0 => 'Personal',
1 => 'Theme',
2 => 'Staff Picks',
3 => 'Group Picks',
];
$CATS = [
1 => [
'ID' => 1,
'Name' => 'Sequences',
'Icon' => "$CatIcons/sequences.png",
'Description' => "For data that's ACGT, ACGU, amino acid letters on disk.",
'Platforms' => $ENV->META->Platforms->Sequences,
'Formats' => [
$ENV->META->Formats->Sequences,
$ENV->META->Formats->Proteins,
$ENV->META->Formats->Plain,
],
],
2 => [
'ID' => 2,
'Name' => 'Graphs',
'Icon' => "$CatIcons/graphs.png",
'Description' => 'For pathway and regulatory network data, structured taxonomies, etc.',
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->GraphXml,
$ENV->META->Formats->GraphTxt,
$ENV->META->Formats->Plain,
],
],
3 => [
'ID' => 3,
'Name' => 'Systems',
'Icon' => "$CatIcons/systems.png",
'Description' => 'For data that examines one facet broadly, not one subject deeply.',
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->GraphXml,
$ENV->META->Formats->GraphTxt,
$ENV->META->Formats->Plain,
],
],
4 => [
'ID' => 4,
'Name' => 'Geometric',
'Icon' => "$CatIcons/geometric.png",
'Description' => "For structured data (XML, etc.) that describes the subject's orientation in space.",
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->GraphXml,
$ENV->META->Formats->GraphTxt,
$ENV->META->Formats->Plain,
],
],
5 => [
'ID' => 5,
'Name' => 'Scalars/Vectors',
'Icon' => "$CatIcons/scalars_vectors.png",
'Description' => 'For data that describes observations over time and/or space.',
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->GraphXml,
$ENV->META->Formats->GraphTxt,
$ENV->META->Formats->Plain,
],
],
6 => [
'ID' => 6,
'Name' => 'Patterns',
'Icon' => "$CatIcons/patterns.png",
'Description' => 'For data that describes recurring structures in nature such as common pathways or motifs in the proteome or metabolome.',
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->GraphXml,
$ENV->META->Formats->GraphTxt,
$ENV->META->Formats->Plain,
],
],
7 => [
'ID' => 7,
'Name' => 'Constraints',
'Icon' => "$CatIcons/constraints.png",
'Description' => 'For data that records experimental control behavior, checks readings against known physical constants, tracks the thermodynamic limits of reactions, etc.',
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->GraphXml,
$ENV->META->Formats->GraphTxt,
$ENV->META->Formats->Plain,
],
],
8 => [
'ID' => 8,
'Name' => 'Images',
'Icon' => "$CatIcons/images.png",
'Description' => 'For data you can look at!',
'Platforms' => $ENV->META->Platforms->Images,
'Formats' => [
$ENV->META->Formats->ImgRaster,
$ENV->META->Formats->ImgVector,
],
],
9 => [
'ID' => 9,
'Name' => 'Spatial',
'Icon' => "$CatIcons/spatial.png",
'Description' => "For data that's limited to specific locations or otherwise describes macroscopic space.",
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->MapRaster,
$ENV->META->Formats->MapVector,
$ENV->META->Formats->ImgRaster,
$ENV->META->Formats->ImgVector,
],
],
10 => [
'ID' => 10,
'Name' => 'Models',
'Icon' => "$CatIcons/models.png",
'Description' => 'For projections, simulations, and other hypothetical or computer-generated data.',
'Platforms' => $ENV->META->Platforms->Graphs,
'Formats' => [
$ENV->META->Formats->MapRaster,
$ENV->META->Formats->MapVector,
$ENV->META->Formats->ImgRaster,
$ENV->META->Formats->ImgVector,
],
],
11 => [
'ID' => 11,
'Name' => 'Documents',
'Icon' => "$CatIcons/documents.png",
'Description' => 'For documentation, software, disk images, and literature datasets.',
'Platforms' => $ENV->META->Platforms->Documents,
'Formats' => [
$ENV->META->Formats->BinDoc,
$ENV->META->Formats->CpuGen,
$ENV->META->Formats->Plain,
],
],
12 => [
'ID' => 12,
'Name' => 'Machine Data',
'Icon' => "$CatIcons/machine_data.png",
'Description' => 'For raw reads and machine data of any category.',
'Platforms' => $ENV->META->Platforms->Raw,
'Formats' => [
$ENV->META->Formats->Plain,
],
],
];
ENV::setPub(
'CATS',
$ENV->convert($CATS)
);
/**
* Regular expressions
*
* The Gazelle regex collection.
* Formerly in classes/regex.php.
*/
// resource_type://username:password@domain:port/path?query_string#anchor
define('RESOURCE_REGEX', '(https?|ftps?|dat|ipfs):\/\/');
ENV::setPub(
'RESOURCE_REGEX',
'(https?|ftps?|dat|ipfs):\/\/'
);
define('IP_REGEX', '(\d{1,3}\.){3}\d{1,3}');
ENV::setPub(
'IP_REGEX',
'(\d{1,3}\.){3}\d{1,3}'
);
define('DOMAIN_REGEX', '([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+');
ENV::setPub(
'DOMAIN_REGEX',
'([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+'
);
define('PORT_REGEX', ':\d{1,5}');
ENV::setPub(
'PORT_REGEX',
':\d{1,5}'
);
define('URL_REGEX', '('.RESOURCE_REGEX.')('.IP_REGEX.'|'.DOMAIN_REGEX.')('.PORT_REGEX.')?(\/\S*)*');
ENV::setPub(
'URL_REGEX',
"($ENV->RESOURCE_REGEX)($ENV->IP_REGEX|$ENV->DOMAIN_REGEX)($ENV->PORT_REGEX)?(\/\S*)*"
);
define('USERNAME_REGEX', '/^[a-z0-9_]{2,20}$/iD');
ENV::setPub(
'USERNAME_REGEX',
'/^[a-z0-9_]{2,20}$/iD'
);
define('EMAIL_REGEX', '[_a-z0-9-]+([.+][_a-z0-9-]+)*@'.DOMAIN_REGEX);
ENV::setPub(
'EMAIL_REGEX',
"[_a-z0-9-]+([.+][_a-z0-9-]+)*@$ENV->DOMAIN_REGEX"
);
define('IMAGE_REGEX', URL_REGEX.'\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?');
ENV::setPub(
'IMAGE_REGEX',
"$ENV->URL_REGEX\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?"
);
define('VIDEO_REGEX', URL_REGEX.'\/\S+\.(webm)(\?\S*)?');
ENV::setPub(
'VIDEO_REGEX',
"$ENV->URL_REGEX\/\S+\.(webm)(\?\S*)?"
);
define('CSS_REGEX', URL_REGEX.'\/\S+\.css(\?\S*)?');
ENV::setPub(
'CSS_REGEX',
"$ENV->URL_REGEX\/\S+\.css(\?\S*)?"
);
define('SITELINK_REGEX', RESOURCE_REGEX.'(www.)?'.preg_quote(SITE_DOMAIN, '/'));
ENV::setPub(
'SITELINK_REGEX',
"$ENV->RESOURCE_REGEX(www.)?".preg_quote(SITE_DOMAIN, '/')
);
define('TORRENT_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?torrentid=(\d+)'); // torrentid = group 4
ENV::setPub(
'TORRENT_REGEX',
"$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?torrentid=(\d+)"
);
define('TORRENT_GROUP_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?id=(\d+)'); // id = group 4
ENV::setPub(
'TORRENT_GROUP_REGEX',
"$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?id=(\d+)"
);
define('ARTIST_REGEX', SITELINK_REGEX.'\/artist\.php\?(.*&)?id=(\d+)'); // id = group 4
ENV::setPub(
'ARTIST_REGEX',
"$ENV->SITELINK_REGEX\/artist\.php\?(.*&)?id=(\d+)"
);
# https://stackoverflow.com/a/3180176
ENV::setPub(
'HTML_REGEX',
'<([\w]+)([^>]*?)(([\s]*\/>)|(>((([^<]*?|<\!\-\-.*?\-\->)|(?R))*)<\/\\1[\s]*>))'
);
ENV::setPub(
'BBCODE_REGEX',
'\[([\w]+)([^\]]*?)(([\s]*\/\])|(\]((([^\[]*?|\[\!\-\-.*?\-\-\])|(?R))*)\[\/\\1[\s]*\]))'
);
# https://www.crossref.org/blog/dois-and-matching-regular-expressions/
ENV::setPub(
'DOI_REGEX',
'10.\d{4,9}\/[-._;()\/:A-Z0-9]+'
);
# https://www.biostars.org/p/13753/
ENV::setPub(
'ENTREZ_REGEX',
'\d*'
);
# https://www.wikidata.org/wiki/Property:P496
ENV::setPub(
'ORCID_REGEX',
'0000-000(1-[5-9]|2-[0-9]|3-[0-4])\d{3}-\d{3}[\dX]'
);
# https://www.biostars.org/p/13753/
ENV::setPub(
'REFSEQ_REGEX',
'\w{2}_\d{1,}\.\d{1,}'
);
# https://www.uniprot.org/help/accession_numbers
ENV::setPub(
'UNIPROT_REGEX',
'[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}'
);