PUBLIC_VALUE;
* $ENV->getPriv('PRIVATE_VALUE');
*
* Using a central static $ENV singleton class has additional benefits.
* The RecursiveArrayObject class included in env.class.php is a powerful tool:
*
* $LongArray = [];
* ENV::setPub(
* 'CONFIG',
* new RecursiveArrayObject($LongArray)
* );
*
* $ENV = ENV::go();
* foreach ($ENV->CATS as $Cat) {
* var_dump($Cat->Name);
* }
*
* One more example using custom RecursiveArrayObject methods:
* @see https://www.php.net/manual/en/class.arrayobject.php
*
* var_dump(
* $ENV->dedupe(
* $ENV->CATS->SEQ->Platforms,
* $ENV->CATS->IMG->Platforms->toArray(),
* [$MapVectorFormats, $MapRasterFormats, $PlainFormats]
* )
* );
*/
# https://www.php.net/manual/en/language.oop5.autoload.php
require_once 'env.class.php';
# Initialize
$ENV = ENV::go();
ENV::setPub('PHP_MIN', '7.4.0');
ENV::setPub('DEV', true);
# Modern PHP
if (version_compare($ENV->PHP_MIN, '7.4.0', '<')) {
throw new Exception("Gazelle requires PHP > $ENV->PHP_MIN");
}
/**
* Site identity
*/
# Site name
ENV::setPub(
'SITE_NAME',
(!$ENV->DEV
? 'BioTorrents.de' # Production
: '[Dev] BioTorrents.de') # Development
);
# Meta description
ENV::setPub('DESCRIPTION', 'A platform to share biological sequence and medical imaging data');
# Navigation glyphs
ENV::setPub('SEP', '⸬'); # e.g., News ⸬ BioTorrents.de
ENV::setPub('CRUMB', '›'); # e.g., Forums › Board › Thread
# The FQDN of your site, e.g., dev.biotorrents.de
( # Old format: ternary
!$ENV->DEV
? define('SITE_DOMAIN', 'biotorrents.de') # Production
: define('SITE_DOMAIN', 'dev.biotorrents.de') # Development
);
ENV::setPub(
'SITE_DOMAIN',
(!$ENV->DEV
? 'biotorrents.de' # Production
: 'dev.biotorrents.de') # Development
);
# The FQDN of your image host, e.g., pics.biotorrents.de
ENV::setPub('IMAGE_DOMAIN', 'pics.biotorrents.de');
# The root of the server, used for includes, e.g., /var/www/html/dev.biotorrents.de/
( # Old format: ternary
!$ENV->DEV
? define('SERVER_ROOT', '/var/www/html/biotorrents.de/') # Production
: define('SERVER_ROOT', '/var/www/html/dev.biotorrents.de/') # Development
);
ENV::setPub(
'SERVER_ROOT',
(!$ENV->DEV
? '/var/www/html/biotorrents.de/' # Production
: '/var/www/html/dev.biotorrents.de/') # Development
);
# Where torrent files are stored, e.g., /var/www/torrents-dev/
( # Old format: ternary
!$ENV->DEV
? define('TORRENT_STORE', '/var/www/torrents/') # Production
: define('TORRENT_STORE', '/var/www/torrents-dev/') # Development
);
ENV::setPub(
'TORRENT_STORE',
(!$ENV->DEV
? '/var/www/torrents/' # Production
: '/var/www/torrents-dev/') # Development);
);
# Allows you to run static content off another server. Default is usually what you want
define('STATIC_SERVER', '/static/');
ENV::setPub('STATIC_SERVER', '/static/');
# The hashing algorithm used for SRI
ENV::setPub('SRI', 'sha384');
/**
* App keys
*
* Separate keys for development and production.
* Increased security and protection against config overwrites.
*/
# Pre-shared key for generating hmacs for the image proxy
define('IMAGE_PSK', '00000000000000000000000000000000');
ENV::setPriv('IMAGE_PSK', '00000000000000000000000000000000');
# Production
if (!$ENV->DEV) {
# Unused in OT Gazelle. Currently used for API token auth
ENV::setPriv('ENCKEY', '00000000000000000000000000000000');
# Alphanumeric random key. This key must be the argument to schedule.php for the schedule to work
define('SCHEDULE_KEY', '00000000000000000000000000000000');
ENV::setPriv('SCHEDULE_KEY', '00000000000000000000000000000000');
# Random key. Used for generating unique RSS auth key
define('RSS_HASH', '00000000000000000000000000000000');
ENV::setPriv('RSS_HASH', '00000000000000000000000000000000');
}
# Development
else {
ENV::setPriv('ENCKEY', '00000000000000000000000000000000');
define('SCHEDULE_KEY', '00000000000000000000000000000000');
ENV::setPriv('SCHEDULE_KEY', '00000000000000000000000000000000');
define('RSS_HASH', '00000000000000000000000000000000');
ENV::setPriv('RSS_HASH', '00000000000000000000000000000000');
}
/**
* Database
*/
# Common info
ENV::setPriv('SQLHOST', 'localhost');
ENV::setPriv('SQLSOCK', '/var/run/mysqld/mysqld.sock');
ENV::setPriv('SQLPORT', 3306);
# Production
if (!$ENV->DEV) {
ENV::setPriv('SQLDB', 'gazelle_production');
ENV::setPriv('SQLLOGIN', 'gazelle_production');
ENV::setPriv('SQLPASS', '00000000000000000000000000000000');
}
# Development
else {
ENV::setPriv('SQLDB', 'gazelle_development');
ENV::setPriv('SQLLOGIN', 'gazelle_development');
ENV::setPriv('SQLPASS', '00000000000000000000000000000000');
}
/**
* Tracker
*/
# Ocelot connection, e.g., 0.0.0.0
define('TRACKER_HOST', '0.0.0.0');
ENV::setPriv('TRACKER_HOST', '0.0.0.0');
# Production
if (!$ENV->DEV) {
define('TRACKER_PORT', 34000);
ENV::setPriv('TRACKER_PORT', 34000);
# Must be 32 alphanumeric characters and match site_password in ocelot.conf
define('TRACKER_SECRET', '00000000000000000000000000000000');
ENV::setPriv('TRACKER_SECRET', '00000000000000000000000000000000');
# Must be 32 alphanumeric characters and match report_password in ocelot.conf
define('TRACKER_REPORTKEY', '00000000000000000000000000000000');
ENV::setPriv('TRACKER_REPORTKEY', '00000000000000000000000000000000');
}
# Development
else {
define('TRACKER_PORT', 34001);
ENV::setPriv('TRACKER_PORT', 34001);
define('TRACKER_SECRET', '00000000000000000000000000000000');
ENV::setPriv('TRACKER_SECRET', '00000000000000000000000000000000');
define('TRACKER_REPORTKEY', '00000000000000000000000000000000');
ENV::setPriv('TRACKER_REPORTKEY', '00000000000000000000000000000000');
}
/**
* Tracker URLs
*
* Added to torrents à la http://bittorrent.org/beps/bep_0012.html
*/
# Production
if (!$ENV->DEV) {
define('ANNOUNCE_URLS', [
[ # Tier 1
'https://track.biotorrents.de:443',
], [] # Tier 2
]);
$AnnounceURLs = [
[ # Tier 1
'https://track.biotorrents.de:443',
],
[ # Tier 2
#'udp://tracker.coppersurfer.tk:6969/announce',
#'udp://tracker.cyberia.is:6969/announce',
#'udp://tracker.leechers-paradise.org:6969/announce',
],
];
ENV::setPub(
'ANNOUNCE_URLS',
new RecursiveArrayObject($AnnounceURLs)
);
}
# Development
else {
define('ANNOUNCE_URLS', [
[ # Tier 1
'https://devtr.biotorrents.de:443',
], [] # Tier 2
]);
$AnnounceURLs = [
[ # Tier 1
'https://devtr.biotorrents.de:443',
], [], # Tier 2
];
ENV::setPub(
'ANNOUNCE_URLS',
new RecursiveArrayObject($AnnounceURLs)
);
}
/**
* Search
*/
# SphinxqlQuery needs constants
# $ENV breaks the torrent and request pages
define('SPHINXQL_HOST', '127.0.0.1');
define('SPHINXQL_PORT', 9306);
define('SPHINXQL_SOCK', false);
define('SPHINX_MAX_MATCHES', 1000); // Must be <= the server's max_matches variable (default 1000)
/**
* memcached
*
* Very important to run two instances,
* one each for development and production.
*/
# Production
if (!$ENV->DEV) {
ENV::setPriv(
'MEMCACHED_SERVERS',
[[
'host' => 'unix:///var/run/memcached/memcached.sock',
'port' => 0,
'buckets' => 1
]]
);
}
# Development
else {
ENV::setPriv(
'MEMCACHED_SERVERS',
[[
'host' => 'unix:///var/run/memcached/memcached-dev.sock',
'port' => 0,
'buckets' => 1
]]
);
}
/**
* IRC/Slack
*/
# IRC server address. Used for onsite chat tool
define('BOT_SERVER', 'irc.'.SITE_DOMAIN);
define('SOCKET_LISTEN_ADDRESS', 'localhost');
define('SOCKET_LISTEN_PORT', 51010);
define('BOT_NICK', 'ebooks');
# IRC channels for official business
define('ANNOUNCE_CHAN', '#announce');
define('DEBUG_CHAN', '#debug');
define('REQUEST_CHAN', '#requests');
define('STAFF_CHAN', '#staff');
define('ADMIN_CHAN', '#staff');
define('HELP_CHAN', '#support');
define('DISABLED_CHAN', '#support');
#define('BOT_CHAN', '#userbots');
# Slack invite link
ENV::setPub(
'SLACK_INVITE',
'00000000000000000000000000000000'
);
/**
* ================
* = NO MORE =
* = PRIVATE INFO =
* ================
*/
/**
* Features
*/
# Enable donation page
ENV::setPub('FEATURE_DONATE', true);
# Send re-enable requests to user's email
define('FEATURE_EMAIL_REENABLE', true); //
ENV::setPub('FEATURE_EMAIL_REENABLE', true);
# Require users to verify login from unknown locations
ENV::setPub('FEATURE_ENFORCE_LOCATIONS', false);
# Attempt to send messages to IRC
ENV::setPub('FEATURE_IRC', true);
# Attempt to send email from the site
ENV::setPub('FEATURE_SEND_EMAIL', true);
# Allow the site encryption key to be set without an account
# (should only be used for initial setup)
ENV::setPub('FEATURE_SET_ENC_KEY_PUBLIC', false);
/**
* Settings
*/
# Set to false if you don't want everyone to see debug information; can be overriden with 'site_debug'
define('DEBUG_MODE', false);
ENV::setPub('DEBUG_MODE', false);
# Set to false to disable open registration, true to allow anyone to register
ENV::setPub(
'OPEN_REGISTRATION',
(!$ENV->DEV
? true # Production
: false) # Development
);
# The maximum number of users the site can have, 0 for no limit
define('USER_LIMIT', 0);
ENV::setPub('USER_LIMIT', 0);
# User perks
ENV::setPub('STARTING_INVITES', 2);
ENV::setPub('STARTING_TOKENS', 2);
ENV::setPub('STARTING_UPLOAD', 5368709120);
define('DONOR_INVITES', 2);
ENV::setPub('DONOR_INVITES', 2);
# Bonus Points
define('BONUS_POINTS', 'Bonus Points');
ENV::setPub('BONUS_POINTS', 'Bonus Points');
ENV::setPub('BP_COEFF', 1.5); # OT default 0.5
# Tag namespaces (configurable via CSS selectors)
#define('TAG_NAMESPACES', ['male', 'female', 'parody', 'character']);
# Banned stuff (file characters, browsers, etc.)
ENV::setPub(
'BAD_CHARS',
['"', '*', '/', ':', '<', '>', '?', '\\', '|']
);
# Set to true to block Opera Mini proxy
ENV::setPub('BLOCK_OPERA_MINI', true);
# Misc stuff like generic reusable snippets
# Example of a variable using heredoc syntax
ENV::setPub(
'PASSWORD_ADVICE',
<<
Any password of 15 characters or longer will be accepted, but a strong password
- is really a passphrase with uppercase and lowercase letters and many small words,
- that contains numbers and symbols, including complex Unicode characters like emoji.
HTML
);
/**
* Services
*
* Public APIs, domains, etc.
* Not intended for private API keys.
*/
# Current Sci-Hub domains
# https://lovescihub.wordpress.com
define('SCI_HUB', 'se');
ENV::setPub(
'SCI_HUB',
['ren', 'tw', 'se']
);
# Semantic Scholar
# https://api.semanticscholar.org
ENV::setPub(
'SS',
[
'Paper' => 'https://api.semanticscholar.org/v1/paper/',
'Author' => 'https://api.semanticscholar.org/v1/author/',
]
);
/**
* User class IDs
*
* Needed for automatic promotions.
* Found in the `permissions` table.
*/
# Name of class Class ID (not level)
define('ADMIN', '1');
define('USER', '2');
define('MEMBER', '3');
define('POWER', '4');
define('ELITE', '5');
define('LEGEND', '8');
define('MOD', '11');
define('SYSOP', '15');
define('ARTIST', '19');
define('DONOR', '20');
define('VIP', '21');
define('TORRENT_MASTER', '23');
define('POWER_TM', '24');
define('FLS_TEAM', '33');
define('FORUM_MOD', '9001');
/**
* Forums
*/
define('STAFF_FORUM', 3);
define('DONOR_FORUM', 7);
ENV::setPub('TRASH_FORUM', 8);
ENV::setPub('ANNOUNCEMENT_FORUM', 1);
ENV::setPub('SUGGESTIONS_FORUM', 2);
# Pagination
define('TORRENT_COMMENTS_PER_PAGE', 10);
define('POSTS_PER_PAGE', 25);
define('TOPICS_PER_PAGE', 50);
define('TORRENTS_PER_PAGE', 50);
define('REQUESTS_PER_PAGE', 25);
define('MESSAGES_PER_PAGE', 25);
define('LOG_ENTRIES_PER_PAGE', 50);
# Cache catalogues
define('THREAD_CATALOGUE', 500); // Limit to THREAD_CATALOGUE posts per cache key
# Miscellaneous values
define('MAX_RANK', 6);
define('MAX_EXTRA_RANK', 8);
define('MAX_SPECIAL_RANK', 3);
ENV::setPub('DONOR_FORUM_RANK', 6);
/**
* Ratio and badges
*/
# Ratio requirements, in descending order
define('RATIO_REQUIREMENTS', [
# Downloaded Req (0% seeded) Req (100% seeded)
[200 * 1024**3, 0.60, 0.60],
[160 * 1024**3, 0.60, 0.50],
[120 * 1024**3, 0.50, 0.40],
[100 * 1024**3, 0.40, 0.30],
[80 * 1024**3, 0.30, 0.20],
[60 * 1024**3, 0.20, 0.10],
[40 * 1024**3, 0.15, 0.00],
[20 * 1024**3, 0.10, 0.00],
[10 * 1024**3, 0.05, 0.00],
]);
# God I wish I didn't have to do this but I just don't care anymore
$AutomatedBadgeIDs = [
'DL' => [
'8' => 10,
'16' => 11,
'32' => 12,
'64' => 13,
'128' => 14,
'256' => 15,
'512' => 16,
'1024' => 17,
'2048' => 18
],
'UL' => [
'16' => 20,
'32' => 21,
'64' => 22,
'128' => 23,
'256' => 24,
'512' => 25,
'1024' => 26,
'2048' => 27,
'4096' => 28
],
'Posts' => [
'5' => 60,
'10' => 61,
'25' => 62,
'50' => 63,
'100' => 64,
'250' => 65,
'500' => 66,
'1000' => 67,
'2500' => 68
]
];
ENV::setPub(
'AUTOMATED_BADGE_IDS',
new RecursiveArrayObject($AutomatedBadgeIDs)
);
/**
* Metadata abstraction map
*
* A set of 'label' → $DB->query() mappings.
* The database should store generic data, e.g.,
* - Title1, Title2, Title3
* - Creator, Affiliation, Location
*
* Then Gazelle's job is to map text labels over the fields in HTML.
* So $Input->Print($ID = $ENV->Creator) would print an Author form input.
*
* The structure:
* $ENV->DBMAP =
* (DatabaseField
* ->(
* Label->TextLabel,
* OldField->Oppaitime,
* )
* );
*
* An example:
* $Title1 = $ENV->DBMAP->Title1;
* $ElementID =
* strtolower($Title1->Label)
* . '_class_label_'
* . $InstanceID;
*/
$DatabaseFields = [
'AccessionNumber' => [
'Label' => 'Accession Number',
'Selector' => ['DOI' => 'javdb', 'RefSeq' => 'anidb', 'UniProt' => 'ehentai'],
'OldField' => 'CatalogueNumber',
'Description' => 'RefSeq and UniProt preferred',
],
'Title1' => [
'Label' => 'Torrent Title',
'Selector' => ['title'],
'OldField' => 'Name',
'Description' => 'Definition line, e.g., Alcohol dehydrogenase ADH1',
],
'Title2' => [
'Label' => 'Organism',
'Selector' => ['DOI' => 'javdb_tr', 'RefSeq' => 'anidb_tr', 'UniProt' => 'ehentai_tr'],
'OldField' => 'NameRJ',
'Description' => 'Organism line binomial, e.g., Saccharomyces cerevisiae',
],
'Title3' => [
'Label' => 'Strain/Variety',
'Selector' => ['DOI' => 'javdb_tr', 'RefSeq' => 'anidb_tr', 'UniProt' => 'ehentai_tr'],
'OldField' => 'NameJP',
'Description' => 'Organism line if any, e.g., S288C',
],
# etc.
];
ENV::setPub(
'DBMAP',
new RecursiveArrayObject($DatabaseFields)
);
/**
* Site Categories
* v2 modular ontology
*/
# Main Categories
# Old OT Gazelle format
# https://www.ncbi.nlm.nih.gov/books/NBK25464/
$Categories = [
'Sequences',
'Graphs',
'Systems',
'Geometric',
'Scalars/Vectors',
'Patterns',
'Constraints',
'Images',
'Spatial',
'Models',
'Documents',
'Machine Data',
];
$GroupedCategories = $Categories;
#$CategoryIcons = ['music.png', 'apps.png', 'ebook.png', 'audiobook.png', 'elearning.png', 'comedy.png', 'comics.png'];
# Plain Formats
$PlainFormats = [
'CSV' => ['csv'],
'JSON' => ['json'],
'Text' => ['txt'],
'XML' => ['xml'],
'Other' => [''],
];
/**
* Sequences
*/
# Platforms
$SeqPlatforms = [
#$Media = [
# DNA
'Complete Genomics',
'cPAS-BGI/MGI',
'Helicos',
'Illumina HiSeq',
'Illumina MiSeq',
'Ion Torrent',
'Microfluidics',
'Nanopore',
'PacBio',
'Roche 454',
'Sanger',
'SOLiD',
# RNA, Protein, etc.
'De Novo',
'HPLC',
'Mass Spec',
'RNA-Seq',
'Other',
];
# Sequence Formats
# https://www.ncbi.nlm.nih.gov/sra/docs/submitformats/
$SeqFormats = [
#$Containers = [
'BAM' => ['bam'],
'CRAM' => ['cram'],
'EMBL' => ['embl'],
'FASTA' => ['fa', 'fasta', 'fsa'],
'FASTA+QUAL' => ['qual'],
'CSFASTA' => ['csfa', 'csfasta', 'csfsa'],
'FASTQ' => ['fastq', 'fq', 'sanfastq'],
'GFF' => ['gff', 'gff2', 'gff3'],
'GTF' => ['gtf'],
'GenBank' => ['gb', 'gbk', 'genbank'],
'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
'PIR' => ['pir'],
'QSeq' => ['qseq'],
'SAM' => ['sam'],
'SFF' => ['sff'],
'SRF' => ['srf'],
'SnapGene' => ['dna', 'seq'],
'SwissProt' => ['dat'],
'VCF' => ['vcf'],
];
# Protein Formats
# DON'T PARSE RAW FILES. TOO MANY COMPETING VENDORS
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518119/
$ProtFormats = [
#$ContainersProt = [
'ABI/Sciex' => ['t2d', 'wiff'],
'APML' => ['apml'],
'ASF' => ['asf'],
'Agilent/Bruker' => ['baf', 'd', 'fid', 'tdf', 'yep'],
'BlibBuild' => ['blib'],
'Bruker/Varian' => ['sms', 'xms'],
'Finnigan' => ['dat', 'ms'],
'ION-TOF' => ['ita', 'itm'],
'JCAMP-DX' => ['jdx'],
'MGF' => ['mgf'],
'MS2' => ['ms2'],
'MSF' => ['msf'],
'mzData' => ['mzdata'],
'mzML' => ['mzml'],
'mzXML' => ['mzxml'],
'OMSSA' => ['omssa', 'omx'],
'PEFF' => ['peff'],
'pepXML' => ['pepxml'],
'protXML' => ['protxml'],
'Shimadzu' => ['lcd', 'qgd', 'spc'],
'Skyline' => ['sky', 'skyd'],
'TPP/SPC' => ['dta'],
'Tandem' => ['tandem'],
'TraML' => ['traml'],
'ULVAC-PHI' => ['tdc'],
];
/**
* Graphs
*/
# Graph Platforms
# https://en.wikipedia.org/wiki/Graph_drawing#Software
$GraphPlatforms = [
'BioFabric',
'BioTapestry',
'Cytoscape',
'Edraw Max',
'GenMAPP',
'Gephi',
'graph-tool',
'Graphviz',
'InCroMAP',
'LaNet-vi',
'Linkurious',
'MATLAB',
'MEGA',
'Maple',
'Mathematica',
#'Microsoft Automatic Graph Layout',
'NetworkX',
'Other',
'PGF/TikZ',
'PathVisio',
'Pathview',
'R',
'Systrip',
'Tom Sawyer Software',
'Tulip',
'yEd',
];
# XML Graph Formats
$GraphXmlFormats = [
'DGML' => ['dgml'],
'DotML' => ['dotml'],
'GEXF' => ['gexf'],
'GXL' => ['gxl'],
'GraphML' => ['graphml'],
'XGMML' => ['xgmml'],
];
# Text Graph Formats
$GraphTxtFormats = [
'DOT' => ['gv'],
'GML' => ['gml'],
'LCF' => ['lcf'],
'Newick' => ['xsd', 'sgf'],
'SIF' => ['sif'],
'TGF' => ['tgf'],
];
/**
* Images
*/
# Image Platforms
$ImgPlatforms = [
#$MediaManga = [
'CT/CAT',
'ECG',
'Elastography',
'FNIR/NIRS',
'MPI',
'MRI/NMR',
'Microscopy',
'Photoacoustic',
'Photography',
'Scint/SPECT/PET',
'Ultrasound',
'X-Rays',
'Other',
];
# Image Formats
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3948928/
$ImgFormats = [
#$ContainersGames = [
'Analyze' => ['hdr', 'img'],
'Interfile' => ['h33'],
'DICOM' => ['dcm', 'dicom'],
'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'],
'NIfTI' => ['nii', 'nifti'],
'MINC' => ['minc', 'mnc'],
'JPEG' => ['jfif', 'jpeg', 'jpg'],
'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
'PNG' => ['png'],
'TIFF' => ['tif', 'tiff'],
'WebP' => ['webp'],
];
/**
* Spatial
*/
# Vector Map Formats
$MapVectorFormats = [
'AutoCAD DXF' => ['dxf'],
'Cartesian (XYZ)' => ['xyz'],
'DLG' => ['dlg'],
'Esri TIN' => ['adf', 'dbf'],
'GML' => ['gml'],
'GeoJSON' => ['geojson'],
'ISFC' => ['isfc'],
'KML' => ['kml', 'kmzv'],
# DAT omitted
# https://en.wikipedia.org/wiki/MapInfo_TAB_format
'MapInfo TAB' => ['tab', 'ind', 'map', 'id'],
'Measure Map Pro' => ['mmp'],
'NTF' => ['ntf'],
# DBF omitted
# https://en.wikipedia.org/wiki/Shapefile
'Shapefile' => ['shp', 'shx'],
'Spatial Data File' => ['sdf', 'sdf3', 'sif', 'kif'],
'SOSI' => ['sosi'],
'SVG' => ['svg'],
'TIGER' => ['tiger'],
'VPF' => ['vpf'],
];
# Raster Map Formats
$MapRasterFormats = [
'ADRG' => ['adrg'],
'Binary' => ['bsq', 'bip', 'bil'],
'DRG' => ['drg'],
'ECRG' => ['ecrg'],
'ECW' => ['ecw'],
# DAT and ASC omitted (common)
# https://support.esri.com/en/technical-article/000008526
# https://web.archive.org/web/20150128024528/http://docs.codehaus.org/display/GEOTOOLS/ArcInfo+ASCII+Grid+format
'Esri Grid' => ['adf', 'nit', 'asc', 'grd'],
'GeoTIFF' => ['tfw'],
#'IMG' => ['img'],
#'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'],
'MrSID' => ['sid'],
'netCDF' => ['nc'],
'RPF' => ['cadrg', 'cib'],
];
/**
* Documents
*/
$DocPlatforms = [
# Composed
'Literature',
'Software',
# Generated
'Kernel',
'Metadata',
'Notebook',
'Other',
];
# Binary Document Formats
# https://en.wikipedia.org/wiki/OpenDocument
# https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions
$BinDocFormats = [
'OpenDocument' => ['odt', 'fodt', 'ods', 'fods', 'odp', 'fodp', 'odg', 'fodg', 'odf'],
'Word' => ['doc', 'dot', 'wbk', 'docx', 'docm', 'dotx', 'dotm', 'docb'],
'PowerPoint' => ['ppt', 'pot', 'pps', 'pptx', 'pptm', 'potx', 'potm', 'ppam', 'ppsx', 'ppsm', 'sldx', 'sldm'],
'Excel' => ['xls', 'xlt', 'xlm', 'xlsx', 'xlsm', 'xltx', 'xltm', 'xlsb', 'xla', 'xlam', 'xll', 'xlw'],
'PDF' => ['pdf', 'fdf', 'xfdf'],
];
# Extra Formats
# DON'T PARSE IMG OR ISO FILES
# https://en.wikipedia.org/wiki/Disk_image#File_formats
# http://dcjtech.info/topic/python-file-extensions/
$CpuGenFormats = [
'Docker' => ['dockerfile'],
'Hard Disk' => ['fvd', 'dmg', 'esd', 'qcow', 'qcow2', 'qcow3', 'smi', 'swm', 'vdi', 'vhd', 'vhdx', 'vmdk', 'wim'],
'Optical Disc' => ['bin', 'ccd', 'cso', 'cue', 'daa', 'isz', 'mdf', 'mds', 'mdx', 'nrg', 'uif'],
'Python' => ['pxd', 'py', 'py3', 'pyc', 'pyd', 'pyde', 'pyi', 'pyo', 'pyp', 'pyt', 'pyw', 'pywz', 'pyx', 'pyz', 'rpy', 'xpy'],
'Jupyter' => ['ipynb'],
'Ontology' => ['cgif', 'cl', 'clif', 'csv', 'htm', 'html', 'kif', 'obo', 'owl', 'rdf', 'rdfa', 'rdfs', 'rif', 'tsv', 'xcl', 'xht', 'xhtml', 'xml'],
];
/**
* Machine Data
*/
$RawPlatforms = [
'Binary',
'Text',
];
# Archives
$Archives = [
'7z' => ['7z'],
'bzip2' => ['bz2', 'bzip2'],
'gzip' => ['gz', 'gzip', 'tgz', 'tpz'],
'Pickle' => ['pickle', 'pkl'],
'RAR' => ['rar', 'rev'],
'ZIP' => ['zip', 'zipx'],
'None' => [''],
];
# Licenses
$Codecs = [
'BSD-2',
'BSD-3',
'CC BY',
'CC BY-SA',
'CC BY-ND',
'CC BY-NC',
'CC BY-NC-SA',
'CC BY-NC-ND',
'GNU GPL',
'GNU LGPL',
'GNU AGPL',
'GNU FDL',
'MIT',
'ODC-By',
'ODC-ODbL',
'OpenMTA',
'Public Domain',
'Unspecified',
'Other',
];
# Scopes
$Resolutions = [
'Contig',
'Scaffold',
'Chromosome',
'Whole Genome',
'Other',
];
# Collage categories
$CollageCats = [
0 => 'Personal',
1 => 'Theme',
2 => 'Staff Picks',
3 => 'Group Picks',
];
/**
* Now for the good stuff.
* The short names are for convenience.
* It should be easy enough to find and replace,
* e.g., if you want to use other names.
*/
$CatIcons = '/static/common/bioicons/';
$CATS = [
'SEQ' => [
'ID' => 1,
'Name' => 'Sequences',
'Icon' => $CatIcons.'sequences.png',
'Platforms' => $SeqPlatforms,
'Formats' => [
'NucleoSeq' => $SeqFormats,
'ProtSeq' => $ProtFormats,
'Plain' => $PlainFormats
],
'Description' => "For data that's ACGT, ACGU, amino acid letters on disk.",
],
'GRF' => [
'ID' => 2,
'Name' => 'Graphs',
'Icon' => $CatIcons.'graphs.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'GraphXml' => $GraphXmlFormats,
'GraphTxt' => $GraphTxtFormats,
'Plain' => $PlainFormats
],
'Description' => 'For pathway and regulatory network data, structured taxonomies, etc.',
],
'SYS' => [
'ID' => 3,
'Name' => 'Systems',
'Icon' => $CatIcons.'systems.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'GraphXml' => $GraphXmlFormats,
'GraphTxt' => $GraphTxtFormats,
'Plain' => $PlainFormats
],
'Description' => 'For data that examines one facet broadly, not one subject deeply.',
],
'GEO' => [
'ID' => 4,
'Name' => 'Geometric',
'Icon' => $CatIcons.'geometric.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'GraphXml' => $GraphXmlFormats,
'GraphTxt' => $GraphTxtFormats,
'Plain' => $PlainFormats
],
'Description' => "For structured data (XML, etc.) that describes the subject's orientation in space.",
],
'SCV' => [
'ID' => 5,
'Name' => 'Scalars/Vectors',
'Icon' => $CatIcons.'scalars_vectors.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'GraphXml' => $GraphXmlFormats,
'GraphTxt' => $GraphTxtFormats,
'Plain' => $PlainFormats
],
'Description' => 'For data that describes observations over time and/or space.',
],
'PTRN' => [
'ID' => 6,
'Name' => 'Patterns',
'Icon' => $CatIcons.'patterns.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'GraphXml' => $GraphXmlFormats,
'GraphTxt' => $GraphTxtFormats,
'Plain' => $PlainFormats
],
'Description' => 'For data that describes recurring structures in nature such as common pathways or motifs in the proteome or metabolome.',
],
'CNST' => [
'ID' => 7,
'Name' => 'Constraints',
'Icon' => $CatIcons.'constraints.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'GraphXml' => $GraphXmlFormats,
'GraphTxt' => $GraphTxtFormats,
'Plain' => $PlainFormats
],
'Description' => 'For data that records experimental control behavior, checks readings against known physical constants, tracks the thermodynamic limits of reactions, etc.',
],
'IMG' => [
'ID' => 8,
'Name' => 'Images',
'Icon' => $CatIcons.'images.png',
'Platforms' => $ImgPlatforms,
'Formats' => [
'ImgRaster' => $ImgFormats,
#'ImgVector' => $ImgFormats,
'Plain' => $PlainFormats
],
'Description' => 'For data you can look at!',
],
'SPAT' => [
'ID' => 9,
'Name' => 'Spatial',
'Icon' => $CatIcons.'spatial.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'MapVector' => $MapVectorFormats,
'MapRaster' => $MapRasterFormats,
'ImgRaster' => $ImgFormats,
'Plain' => $PlainFormats
],
'Description' => "For data that's limited to specific locations or otherwise describes macroscopic space.",
],
'MOD' => [
'ID' => 10,
'Name' => 'Models',
'Icon' => $CatIcons.'models.png',
'Platforms' => $GraphPlatforms,
'Formats' => [
'MapVector' => $MapVectorFormats,
'MapRaster' => $MapRasterFormats,
'ImgRaster' => $ImgFormats,
'Plain' => $PlainFormats
],
'Description' => 'For projections, simulations, and other hypothetical or computer-generated data.',
],
'DOC' => [
'ID' => 11,
'Name' => 'Documents',
'Icon' => $CatIcons.'documents.png',
'Platforms' => $DocPlatforms,
'Formats' => [
'BinDoc' => $BinDocFormats,
'CpuGen' => $CpuGenFormats,
'Plain' => $PlainFormats
],
'Description' => 'For documentation, software, disk images, and literature datasets.',
],
'RAW' => [
'ID' => 12,
'Name' => 'Machine Data',
'Icon' => $CatIcons.'machine_data.png',
'Platforms' => $RawPlatforms,
'Formats' => ['Plain' => $PlainFormats],
'Description' => 'For raw reads and machine data of any category.',
],
];
ENV::setPub(
'CATS',
new RecursiveArrayObject($CATS)
);
/**
* Regular expressions
*
* The Gazelle regex collection.
* Formerly in classes/regex.php.
*/
// resource_type://username:password@domain:port/path?query_string#anchor
define('RESOURCE_REGEX', '(https?|ftps?):\/\/');
ENV::setPub(
'RESOURCE_REGEX',
'(https?|ftps?):\/\/'
);
define('IP_REGEX', '(\d{1,3}\.){3}\d{1,3}');
ENV::setPub(
'IP_REGEX',
'(\d{1,3}\.){3}\d{1,3}'
);
define('DOMAIN_REGEX', '([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+');
ENV::setPub(
'DOMAIN_REGEX',
'([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+'
);
define('PORT_REGEX', ':\d{1,5}');
ENV::setPub(
'PORT_REGEX',
':\d{1,5}'
);
define('URL_REGEX', '('.RESOURCE_REGEX.')('.IP_REGEX.'|'.DOMAIN_REGEX.')('.PORT_REGEX.')?(\/\S*)*');
ENV::setPub(
'URL_REGEX',
"($ENV->RESOURCE_REGEX)($ENV->IP_REGEX|$ENV->DOMAIN_REGEX)($ENV->PORT_REGEX)?(\/\S*)*"
);
define('USERNAME_REGEX', '/^[a-z0-9_]{2,20}$/iD');
ENV::setPub(
'USERNAME_REGEX',
'/^[a-z0-9_]{2,20}$/iD'
);
define('EMAIL_REGEX', '[_a-z0-9-]+([.+][_a-z0-9-]+)*@'.DOMAIN_REGEX);
ENV::setPub(
'EMAIL_REGEX',
"[_a-z0-9-]+([.+][_a-z0-9-]+)*@$ENV->DOMAIN_REGEX"
);
define('IMAGE_REGEX', URL_REGEX.'\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?');
ENV::setPub(
'IMAGE_REGEX',
"$ENV->URL_REGEX\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?"
);
define('VIDEO_REGEX', URL_REGEX.'\/\S+\.(webm)(\?\S*)?');
ENV::setPub(
'VIDEO_REGEX',
"$ENV->URL_REGEX\/\S+\.(webm)(\?\S*)?"
);
define('CSS_REGEX', URL_REGEX.'\/\S+\.css(\?\S*)?');
ENV::setPub(
'CSS_REGEX',
"$ENV->URL_REGEX\/\S+\.css(\?\S*)?"
);
define('SITELINK_REGEX', RESOURCE_REGEX.'(www.)?'.preg_quote(SITE_DOMAIN, '/'));
ENV::setPub(
'SITELINK_REGEX',
"$ENV->RESOURCE_REGEX(www.)?".preg_quote(SITE_DOMAIN, '/')
);
define('TORRENT_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?torrentid=(\d+)'); // torrentid = group 4
ENV::setPub(
'TORRENT_REGEX',
"$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?torrentid=(\d+)"
);
define('TORRENT_GROUP_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?id=(\d+)'); // id = group 4
ENV::setPub(
'TORRENT_GROUP_REGEX',
"$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?id=(\d+)"
);
define('ARTIST_REGEX', SITELINK_REGEX.'\/artist\.php\?(.*&)?id=(\d+)'); // id = group 4
ENV::setPub(
'ARTIST_REGEX',
"$ENV->SITELINK_REGEX\/artist\.php\?(.*&)?id=(\d+)"
);
# https://stackoverflow.com/a/3180176
ENV::setPub(
'HTML_REGEX',
'<([\w]+)([^>]*?)(([\s]*\/>)|(>((([^<]*?|<\!\-\-.*?\-\->)|(?R))*)<\/\\1[\s]*>))'
);
ENV::setPub(
'BBCODE_REGEX',
'\[([\w]+)([^\]]*?)(([\s]*\/\])|(\]((([^\[]*?|\[\!\-\-.*?\-\-\])|(?R))*)\[\/\\1[\s]*\]))'
);
# https://www.crossref.org/blog/dois-and-matching-regular-expressions/
ENV::setPub(
'DOI_REGEX',
'10.\d{4,9}\/[-._;()\/:A-Z0-9]+'
);
# https://www.biostars.org/p/13753/
ENV::setPub(
'ENTREZ_REGEX',
'\d*'
);
# https://www.wikidata.org/wiki/Property:P496
ENV::setPub(
'ORCID_REGEX',
'0000-000(1-[5-9]|2-[0-9]|3-[0-4])\d{3}-\d{3}[\dX]'
);
# https://www.biostars.org/p/13753/
ENV::setPub(
'REFSEQ_REGEX',
'\w{2}_\d{1,}\.\d{1,}'
);
# https://www.uniprot.org/help/accession_numbers
ENV::setPub(
'UNIPROT_REGEX',
'[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}'
);