PUBLIC_VALUE; * $ENV->getPriv('PRIVATE_VALUE'); * * Using a central static $ENV singleton class has additional benefits. * The RecursiveArrayObject class included in env.class.php is a powerful tool: * * $LongArray = []; * ENV::setPub( * 'CONFIG', * new RecursiveArrayObject($LongArray) * ); * * $ENV = ENV::go(); * foreach ($ENV->CATS as $Cat) { * var_dump($Cat->Name); * } * * One more example using custom RecursiveArrayObject methods: * @see https://www.php.net/manual/en/class.arrayobject.php * * var_dump( * $ENV->dedupe( * $ENV->CATS->SEQ->Platforms, * $ENV->CATS->IMG->Platforms->toArray(), * [$MapVectorFormats, $MapRasterFormats, $PlainFormats] * ) * ); */ # https://www.php.net/manual/en/language.oop5.autoload.php require_once 'env.class.php'; # Initialize $ENV = ENV::go(); ENV::setPub('PHP_MIN', '7.4.0'); ENV::setPub('DEV', true); # Modern PHP if (version_compare($ENV->PHP_MIN, '7.4.0', '<')) { throw new Exception("Gazelle requires PHP > $ENV->PHP_MIN"); } /** * Site identity */ # Site name ENV::setPub( 'SITE_NAME', (!$ENV->DEV ? 'BioTorrents.de' # Production : '[Dev] BioTorrents.de') # Development ); # Meta description ENV::setPub('DESCRIPTION', 'A platform to share biological sequence and medical imaging data'); # Navigation glyphs ENV::setPub('SEP', '⸬'); # e.g., News ⸬ BioTorrents.de ENV::setPub('CRUMB', '›'); # e.g., Forums › Board › Thread # The FQDN of your site, e.g., dev.biotorrents.de ( # Old format: ternary !$ENV->DEV ? define('SITE_DOMAIN', 'biotorrents.de') # Production : define('SITE_DOMAIN', 'dev.biotorrents.de') # Development ); ENV::setPub( 'SITE_DOMAIN', (!$ENV->DEV ? 'biotorrents.de' # Production : 'dev.biotorrents.de') # Development ); # The FQDN of your image host, e.g., pics.biotorrents.de ENV::setPub('IMAGE_DOMAIN', 'pics.biotorrents.de'); # The root of the server, used for includes, e.g., /var/www/html/dev.biotorrents.de/ ( # Old format: ternary !$ENV->DEV ? define('SERVER_ROOT', '/var/www/html/biotorrents.de/') # Production : define('SERVER_ROOT', '/var/www/html/dev.biotorrents.de/') # Development ); ENV::setPub( 'SERVER_ROOT', (!$ENV->DEV ? '/var/www/html/biotorrents.de/' # Production : '/var/www/html/dev.biotorrents.de/') # Development ); # Where torrent files are stored, e.g., /var/www/torrents-dev/ ( # Old format: ternary !$ENV->DEV ? define('TORRENT_STORE', '/var/www/torrents/') # Production : define('TORRENT_STORE', '/var/www/torrents-dev/') # Development ); ENV::setPub( 'TORRENT_STORE', (!$ENV->DEV ? '/var/www/torrents/' # Production : '/var/www/torrents-dev/') # Development); ); # Allows you to run static content off another server. Default is usually what you want define('STATIC_SERVER', '/static/'); ENV::setPub('STATIC_SERVER', '/static/'); # The hashing algorithm used for SRI ENV::setPub('SRI', 'sha384'); /** * App keys * * Separate keys for development and production. * Increased security and protection against config overwrites. */ # Pre-shared key for generating hmacs for the image proxy define('IMAGE_PSK', '00000000000000000000000000000000'); ENV::setPriv('IMAGE_PSK', '00000000000000000000000000000000'); # Production if (!$ENV->DEV) { # Unused in OT Gazelle. Currently used for API token auth ENV::setPriv('ENCKEY', '00000000000000000000000000000000'); # Alphanumeric random key. This key must be the argument to schedule.php for the schedule to work define('SCHEDULE_KEY', '00000000000000000000000000000000'); ENV::setPriv('SCHEDULE_KEY', '00000000000000000000000000000000'); # Random key. Used for generating unique RSS auth key define('RSS_HASH', '00000000000000000000000000000000'); ENV::setPriv('RSS_HASH', '00000000000000000000000000000000'); } # Development else { ENV::setPriv('ENCKEY', '00000000000000000000000000000000'); define('SCHEDULE_KEY', '00000000000000000000000000000000'); ENV::setPriv('SCHEDULE_KEY', '00000000000000000000000000000000'); define('RSS_HASH', '00000000000000000000000000000000'); ENV::setPriv('RSS_HASH', '00000000000000000000000000000000'); } /** * Database */ # Common info ENV::setPriv('SQLHOST', 'localhost'); ENV::setPriv('SQLSOCK', '/var/run/mysqld/mysqld.sock'); ENV::setPriv('SQLPORT', 3306); # Production if (!$ENV->DEV) { ENV::setPriv('SQLDB', 'gazelle_production'); ENV::setPriv('SQLLOGIN', 'gazelle_production'); ENV::setPriv('SQLPASS', '00000000000000000000000000000000'); } # Development else { ENV::setPriv('SQLDB', 'gazelle_development'); ENV::setPriv('SQLLOGIN', 'gazelle_development'); ENV::setPriv('SQLPASS', '00000000000000000000000000000000'); } /** * Tracker */ # Ocelot connection, e.g., 0.0.0.0 define('TRACKER_HOST', '0.0.0.0'); ENV::setPriv('TRACKER_HOST', '0.0.0.0'); # Production if (!$ENV->DEV) { define('TRACKER_PORT', 34000); ENV::setPriv('TRACKER_PORT', 34000); # Must be 32 alphanumeric characters and match site_password in ocelot.conf define('TRACKER_SECRET', '00000000000000000000000000000000'); ENV::setPriv('TRACKER_SECRET', '00000000000000000000000000000000'); # Must be 32 alphanumeric characters and match report_password in ocelot.conf define('TRACKER_REPORTKEY', '00000000000000000000000000000000'); ENV::setPriv('TRACKER_REPORTKEY', '00000000000000000000000000000000'); } # Development else { define('TRACKER_PORT', 34001); ENV::setPriv('TRACKER_PORT', 34001); define('TRACKER_SECRET', '00000000000000000000000000000000'); ENV::setPriv('TRACKER_SECRET', '00000000000000000000000000000000'); define('TRACKER_REPORTKEY', '00000000000000000000000000000000'); ENV::setPriv('TRACKER_REPORTKEY', '00000000000000000000000000000000'); } /** * Tracker URLs * * Added to torrents à la http://bittorrent.org/beps/bep_0012.html */ # Production if (!$ENV->DEV) { define('ANNOUNCE_URLS', [ [ # Tier 1 'https://track.biotorrents.de:443', ], [] # Tier 2 ]); $AnnounceURLs = [ [ # Tier 1 'https://track.biotorrents.de:443', ], [ # Tier 2 #'udp://tracker.coppersurfer.tk:6969/announce', #'udp://tracker.cyberia.is:6969/announce', #'udp://tracker.leechers-paradise.org:6969/announce', ], ]; ENV::setPub( 'ANNOUNCE_URLS', new RecursiveArrayObject($AnnounceURLs) ); } # Development else { define('ANNOUNCE_URLS', [ [ # Tier 1 'https://devtr.biotorrents.de:443', ], [] # Tier 2 ]); $AnnounceURLs = [ [ # Tier 1 'https://devtr.biotorrents.de:443', ], [], # Tier 2 ]; ENV::setPub( 'ANNOUNCE_URLS', new RecursiveArrayObject($AnnounceURLs) ); } /** * Search */ # SphinxqlQuery needs constants # $ENV breaks the torrent and request pages define('SPHINXQL_HOST', '127.0.0.1'); define('SPHINXQL_PORT', 9306); define('SPHINXQL_SOCK', false); define('SPHINX_MAX_MATCHES', 1000); // Must be <= the server's max_matches variable (default 1000) /** * memcached * * Very important to run two instances, * one each for development and production. */ # Production if (!$ENV->DEV) { ENV::setPriv( 'MEMCACHED_SERVERS', [[ 'host' => 'unix:///var/run/memcached/memcached.sock', 'port' => 0, 'buckets' => 1 ]] ); } # Development else { ENV::setPriv( 'MEMCACHED_SERVERS', [[ 'host' => 'unix:///var/run/memcached/memcached-dev.sock', 'port' => 0, 'buckets' => 1 ]] ); } /** * IRC/Slack */ # IRC server address. Used for onsite chat tool define('BOT_SERVER', 'irc.'.SITE_DOMAIN); define('SOCKET_LISTEN_ADDRESS', 'localhost'); define('SOCKET_LISTEN_PORT', 51010); define('BOT_NICK', 'ebooks'); # IRC channels for official business define('ANNOUNCE_CHAN', '#announce'); define('DEBUG_CHAN', '#debug'); define('REQUEST_CHAN', '#requests'); define('STAFF_CHAN', '#staff'); define('ADMIN_CHAN', '#staff'); define('HELP_CHAN', '#support'); define('DISABLED_CHAN', '#support'); #define('BOT_CHAN', '#userbots'); # Slack invite link ENV::setPub( 'SLACK_INVITE', '00000000000000000000000000000000' ); /** * ================ * = NO MORE = * = PRIVATE INFO = * ================ */ /** * Features */ # Enable donation page ENV::setPub('FEATURE_DONATE', true); # Send re-enable requests to user's email define('FEATURE_EMAIL_REENABLE', true); // ENV::setPub('FEATURE_EMAIL_REENABLE', true); # Require users to verify login from unknown locations ENV::setPub('FEATURE_ENFORCE_LOCATIONS', false); # Attempt to send messages to IRC ENV::setPub('FEATURE_IRC', true); # Attempt to send email from the site ENV::setPub('FEATURE_SEND_EMAIL', true); # Allow the site encryption key to be set without an account # (should only be used for initial setup) ENV::setPub('FEATURE_SET_ENC_KEY_PUBLIC', false); /** * Settings */ # Set to false if you don't want everyone to see debug information; can be overriden with 'site_debug' define('DEBUG_MODE', false); ENV::setPub('DEBUG_MODE', false); # Set to false to disable open registration, true to allow anyone to register ENV::setPub( 'OPEN_REGISTRATION', (!$ENV->DEV ? true # Production : false) # Development ); # The maximum number of users the site can have, 0 for no limit define('USER_LIMIT', 0); ENV::setPub('USER_LIMIT', 0); # User perks ENV::setPub('STARTING_INVITES', 2); ENV::setPub('STARTING_TOKENS', 2); ENV::setPub('STARTING_UPLOAD', 5368709120); define('DONOR_INVITES', 2); ENV::setPub('DONOR_INVITES', 2); # Bonus Points define('BONUS_POINTS', 'Bonus Points'); ENV::setPub('BONUS_POINTS', 'Bonus Points'); ENV::setPub('BP_COEFF', 1.5); # OT default 0.5 # Tag namespaces (configurable via CSS selectors) #define('TAG_NAMESPACES', ['male', 'female', 'parody', 'character']); # Banned stuff (file characters, browsers, etc.) ENV::setPub( 'BAD_CHARS', ['"', '*', '/', ':', '<', '>', '?', '\\', '|'] ); # Set to true to block Opera Mini proxy ENV::setPub('BLOCK_OPERA_MINI', true); # Misc stuff like generic reusable snippets # Example of a variable using heredoc syntax ENV::setPub( 'PASSWORD_ADVICE', << Any password of 15 characters or longer will be accepted, but a strong password

HTML ); /** * Services * * Public APIs, domains, etc. * Not intended for private API keys. */ # Current Sci-Hub domains # https://lovescihub.wordpress.com define('SCI_HUB', 'se'); ENV::setPub( 'SCI_HUB', ['ren', 'tw', 'se'] ); # Semantic Scholar # https://api.semanticscholar.org ENV::setPub( 'SS', [ 'Paper' => 'https://api.semanticscholar.org/v1/paper/', 'Author' => 'https://api.semanticscholar.org/v1/author/', ] ); /** * User class IDs * * Needed for automatic promotions. * Found in the `permissions` table. */ # Name of class Class ID (not level) define('ADMIN', '1'); define('USER', '2'); define('MEMBER', '3'); define('POWER', '4'); define('ELITE', '5'); define('LEGEND', '8'); define('MOD', '11'); define('SYSOP', '15'); define('ARTIST', '19'); define('DONOR', '20'); define('VIP', '21'); define('TORRENT_MASTER', '23'); define('POWER_TM', '24'); define('FLS_TEAM', '33'); define('FORUM_MOD', '9001'); /** * Forums */ define('STAFF_FORUM', 3); define('DONOR_FORUM', 7); ENV::setPub('TRASH_FORUM', 8); ENV::setPub('ANNOUNCEMENT_FORUM', 1); ENV::setPub('SUGGESTIONS_FORUM', 2); # Pagination define('TORRENT_COMMENTS_PER_PAGE', 10); define('POSTS_PER_PAGE', 25); define('TOPICS_PER_PAGE', 50); define('TORRENTS_PER_PAGE', 50); define('REQUESTS_PER_PAGE', 25); define('MESSAGES_PER_PAGE', 25); define('LOG_ENTRIES_PER_PAGE', 50); # Cache catalogues define('THREAD_CATALOGUE', 500); // Limit to THREAD_CATALOGUE posts per cache key # Miscellaneous values define('MAX_RANK', 6); define('MAX_EXTRA_RANK', 8); define('MAX_SPECIAL_RANK', 3); ENV::setPub('DONOR_FORUM_RANK', 6); /** * Ratio and badges */ # Ratio requirements, in descending order define('RATIO_REQUIREMENTS', [ # Downloaded Req (0% seeded) Req (100% seeded) [200 * 1024**3, 0.60, 0.60], [160 * 1024**3, 0.60, 0.50], [120 * 1024**3, 0.50, 0.40], [100 * 1024**3, 0.40, 0.30], [80 * 1024**3, 0.30, 0.20], [60 * 1024**3, 0.20, 0.10], [40 * 1024**3, 0.15, 0.00], [20 * 1024**3, 0.10, 0.00], [10 * 1024**3, 0.05, 0.00], ]); # God I wish I didn't have to do this but I just don't care anymore $AutomatedBadgeIDs = [ 'DL' => [ '8' => 10, '16' => 11, '32' => 12, '64' => 13, '128' => 14, '256' => 15, '512' => 16, '1024' => 17, '2048' => 18 ], 'UL' => [ '16' => 20, '32' => 21, '64' => 22, '128' => 23, '256' => 24, '512' => 25, '1024' => 26, '2048' => 27, '4096' => 28 ], 'Posts' => [ '5' => 60, '10' => 61, '25' => 62, '50' => 63, '100' => 64, '250' => 65, '500' => 66, '1000' => 67, '2500' => 68 ] ]; ENV::setPub( 'AUTOMATED_BADGE_IDS', new RecursiveArrayObject($AutomatedBadgeIDs) ); /** * Metadata abstraction map * * A set of 'label' → $DB->query() mappings. * The database should store generic data, e.g., * - Title1, Title2, Title3 * - Creator, Affiliation, Location * * Then Gazelle's job is to map text labels over the fields in HTML. * So $Input->Print($ID = $ENV->Creator) would print an Author form input. * * The structure: * $ENV->DBMAP = * (DatabaseField * ->( * Label->TextLabel, * OldField->Oppaitime, * ) * ); * * An example: * $Title1 = $ENV->DBMAP->Title1; * $ElementID = * strtolower($Title1->Label) * . '_class_label_' * . $InstanceID; */ $DatabaseFields = [ 'AccessionNumber' => [ 'Label' => 'Accession Number', 'Selector' => ['DOI' => 'javdb', 'RefSeq' => 'anidb', 'UniProt' => 'ehentai'], 'OldField' => 'CatalogueNumber', 'Description' => 'RefSeq and UniProt preferred', ], 'Title1' => [ 'Label' => 'Torrent Title', 'Selector' => ['title'], 'OldField' => 'Name', 'Description' => 'Definition line, e.g., Alcohol dehydrogenase ADH1', ], 'Title2' => [ 'Label' => 'Organism', 'Selector' => ['DOI' => 'javdb_tr', 'RefSeq' => 'anidb_tr', 'UniProt' => 'ehentai_tr'], 'OldField' => 'NameRJ', 'Description' => 'Organism line binomial, e.g., Saccharomyces cerevisiae', ], 'Title3' => [ 'Label' => 'Strain/Variety', 'Selector' => ['DOI' => 'javdb_tr', 'RefSeq' => 'anidb_tr', 'UniProt' => 'ehentai_tr'], 'OldField' => 'NameJP', 'Description' => 'Organism line if any, e.g., S288C', ], # etc. ]; ENV::setPub( 'DBMAP', new RecursiveArrayObject($DatabaseFields) ); /** * Site Categories * v2 modular ontology */ # Main Categories # Old OT Gazelle format # https://www.ncbi.nlm.nih.gov/books/NBK25464/ $Categories = [ 'Sequences', 'Graphs', 'Systems', 'Geometric', 'Scalars/Vectors', 'Patterns', 'Constraints', 'Images', 'Spatial', 'Models', 'Documents', 'Machine Data', ]; $GroupedCategories = $Categories; #$CategoryIcons = ['music.png', 'apps.png', 'ebook.png', 'audiobook.png', 'elearning.png', 'comedy.png', 'comics.png']; # Plain Formats $PlainFormats = [ 'CSV' => ['csv'], 'JSON' => ['json'], 'Text' => ['txt'], 'XML' => ['xml'], 'Other' => [''], ]; /** * Sequences */ # Platforms $SeqPlatforms = [ #$Media = [ # DNA 'Complete Genomics', 'cPAS-BGI/MGI', 'Helicos', 'Illumina HiSeq', 'Illumina MiSeq', 'Ion Torrent', 'Microfluidics', 'Nanopore', 'PacBio', 'Roche 454', 'Sanger', 'SOLiD', # RNA, Protein, etc. 'De Novo', 'HPLC', 'Mass Spec', 'RNA-Seq', 'Other', ]; # Sequence Formats # https://www.ncbi.nlm.nih.gov/sra/docs/submitformats/ $SeqFormats = [ #$Containers = [ 'BAM' => ['bam'], 'CRAM' => ['cram'], 'EMBL' => ['embl'], 'FASTA' => ['fa', 'fasta', 'fsa'], 'FASTA+QUAL' => ['qual'], 'CSFASTA' => ['csfa', 'csfasta', 'csfsa'], 'FASTQ' => ['fastq', 'fq', 'sanfastq'], 'GFF' => ['gff', 'gff2', 'gff3'], 'GTF' => ['gtf'], 'GenBank' => ['gb', 'gbk', 'genbank'], 'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'], 'PIR' => ['pir'], 'QSeq' => ['qseq'], 'SAM' => ['sam'], 'SFF' => ['sff'], 'SRF' => ['srf'], 'SnapGene' => ['dna', 'seq'], 'SwissProt' => ['dat'], 'VCF' => ['vcf'], ]; # Protein Formats # DON'T PARSE RAW FILES. TOO MANY COMPETING VENDORS # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3518119/ $ProtFormats = [ #$ContainersProt = [ 'ABI/Sciex' => ['t2d', 'wiff'], 'APML' => ['apml'], 'ASF' => ['asf'], 'Agilent/Bruker' => ['baf', 'd', 'fid', 'tdf', 'yep'], 'BlibBuild' => ['blib'], 'Bruker/Varian' => ['sms', 'xms'], 'Finnigan' => ['dat', 'ms'], 'ION-TOF' => ['ita', 'itm'], 'JCAMP-DX' => ['jdx'], 'MGF' => ['mgf'], 'MS2' => ['ms2'], 'MSF' => ['msf'], 'mzData' => ['mzdata'], 'mzML' => ['mzml'], 'mzXML' => ['mzxml'], 'OMSSA' => ['omssa', 'omx'], 'PEFF' => ['peff'], 'pepXML' => ['pepxml'], 'protXML' => ['protxml'], 'Shimadzu' => ['lcd', 'qgd', 'spc'], 'Skyline' => ['sky', 'skyd'], 'TPP/SPC' => ['dta'], 'Tandem' => ['tandem'], 'TraML' => ['traml'], 'ULVAC-PHI' => ['tdc'], ]; /** * Graphs */ # Graph Platforms # https://en.wikipedia.org/wiki/Graph_drawing#Software $GraphPlatforms = [ 'BioFabric', 'BioTapestry', 'Cytoscape', 'Edraw Max', 'GenMAPP', 'Gephi', 'graph-tool', 'Graphviz', 'InCroMAP', 'LaNet-vi', 'Linkurious', 'MATLAB', 'MEGA', 'Maple', 'Mathematica', #'Microsoft Automatic Graph Layout', 'NetworkX', 'Other', 'PGF/TikZ', 'PathVisio', 'Pathview', 'R', 'Systrip', 'Tom Sawyer Software', 'Tulip', 'yEd', ]; # XML Graph Formats $GraphXmlFormats = [ 'DGML' => ['dgml'], 'DotML' => ['dotml'], 'GEXF' => ['gexf'], 'GXL' => ['gxl'], 'GraphML' => ['graphml'], 'XGMML' => ['xgmml'], ]; # Text Graph Formats $GraphTxtFormats = [ 'DOT' => ['gv'], 'GML' => ['gml'], 'LCF' => ['lcf'], 'Newick' => ['xsd', 'sgf'], 'SIF' => ['sif'], 'TGF' => ['tgf'], ]; /** * Images */ # Image Platforms $ImgPlatforms = [ #$MediaManga = [ 'CT/CAT', 'ECG', 'Elastography', 'FNIR/NIRS', 'MPI', 'MRI/NMR', 'Microscopy', 'Photoacoustic', 'Photography', 'Scint/SPECT/PET', 'Ultrasound', 'X-Rays', 'Other', ]; # Image Formats # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3948928/ $ImgFormats = [ #$ContainersGames = [ 'Analyze' => ['hdr', 'img'], 'Interfile' => ['h33'], 'DICOM' => ['dcm', 'dicom'], 'HDF5' => ['bash5', 'baxh5', 'fast5', 'h5', 'hdf5'], 'NIfTI' => ['nii', 'nifti'], 'MINC' => ['minc', 'mnc'], 'JPEG' => ['jfif', 'jpeg', 'jpg'], 'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'], 'PNG' => ['png'], 'TIFF' => ['tif', 'tiff'], 'WebP' => ['webp'], ]; /** * Spatial */ # Vector Map Formats $MapVectorFormats = [ 'AutoCAD DXF' => ['dxf'], 'Cartesian (XYZ)' => ['xyz'], 'DLG' => ['dlg'], 'Esri TIN' => ['adf', 'dbf'], 'GML' => ['gml'], 'GeoJSON' => ['geojson'], 'ISFC' => ['isfc'], 'KML' => ['kml', 'kmzv'], # DAT omitted # https://en.wikipedia.org/wiki/MapInfo_TAB_format 'MapInfo TAB' => ['tab', 'ind', 'map', 'id'], 'Measure Map Pro' => ['mmp'], 'NTF' => ['ntf'], # DBF omitted # https://en.wikipedia.org/wiki/Shapefile 'Shapefile' => ['shp', 'shx'], 'Spatial Data File' => ['sdf', 'sdf3', 'sif', 'kif'], 'SOSI' => ['sosi'], 'SVG' => ['svg'], 'TIGER' => ['tiger'], 'VPF' => ['vpf'], ]; # Raster Map Formats $MapRasterFormats = [ 'ADRG' => ['adrg'], 'Binary' => ['bsq', 'bip', 'bil'], 'DRG' => ['drg'], 'ECRG' => ['ecrg'], 'ECW' => ['ecw'], # DAT and ASC omitted (common) # https://support.esri.com/en/technical-article/000008526 # https://web.archive.org/web/20150128024528/http://docs.codehaus.org/display/GEOTOOLS/ArcInfo+ASCII+Grid+format 'Esri Grid' => ['adf', 'nit', 'asc', 'grd'], 'GeoTIFF' => ['tfw'], #'IMG' => ['img'], #'JPEG 2000' => ['j2k', 'jp2', 'jpf', 'jpm', 'jpx', 'mj2'], 'MrSID' => ['sid'], 'netCDF' => ['nc'], 'RPF' => ['cadrg', 'cib'], ]; /** * Documents */ $DocPlatforms = [ # Composed 'Literature', 'Software', # Generated 'Kernel', 'Metadata', 'Notebook', 'Other', ]; # Binary Document Formats # https://en.wikipedia.org/wiki/OpenDocument # https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions $BinDocFormats = [ 'OpenDocument' => ['odt', 'fodt', 'ods', 'fods', 'odp', 'fodp', 'odg', 'fodg', 'odf'], 'Word' => ['doc', 'dot', 'wbk', 'docx', 'docm', 'dotx', 'dotm', 'docb'], 'PowerPoint' => ['ppt', 'pot', 'pps', 'pptx', 'pptm', 'potx', 'potm', 'ppam', 'ppsx', 'ppsm', 'sldx', 'sldm'], 'Excel' => ['xls', 'xlt', 'xlm', 'xlsx', 'xlsm', 'xltx', 'xltm', 'xlsb', 'xla', 'xlam', 'xll', 'xlw'], 'PDF' => ['pdf', 'fdf', 'xfdf'], ]; # Extra Formats # DON'T PARSE IMG OR ISO FILES # https://en.wikipedia.org/wiki/Disk_image#File_formats # http://dcjtech.info/topic/python-file-extensions/ $CpuGenFormats = [ 'Docker' => ['dockerfile'], 'Hard Disk' => ['fvd', 'dmg', 'esd', 'qcow', 'qcow2', 'qcow3', 'smi', 'swm', 'vdi', 'vhd', 'vhdx', 'vmdk', 'wim'], 'Optical Disc' => ['bin', 'ccd', 'cso', 'cue', 'daa', 'isz', 'mdf', 'mds', 'mdx', 'nrg', 'uif'], 'Python' => ['pxd', 'py', 'py3', 'pyc', 'pyd', 'pyde', 'pyi', 'pyo', 'pyp', 'pyt', 'pyw', 'pywz', 'pyx', 'pyz', 'rpy', 'xpy'], 'Jupyter' => ['ipynb'], 'Ontology' => ['cgif', 'cl', 'clif', 'csv', 'htm', 'html', 'kif', 'obo', 'owl', 'rdf', 'rdfa', 'rdfs', 'rif', 'tsv', 'xcl', 'xht', 'xhtml', 'xml'], ]; /** * Machine Data */ $RawPlatforms = [ 'Binary', 'Text', ]; # Archives $Archives = [ '7z' => ['7z'], 'bzip2' => ['bz2', 'bzip2'], 'gzip' => ['gz', 'gzip', 'tgz', 'tpz'], 'Pickle' => ['pickle', 'pkl'], 'RAR' => ['rar', 'rev'], 'ZIP' => ['zip', 'zipx'], 'None' => [''], ]; # Licenses $Codecs = [ 'BSD-2', 'BSD-3', 'CC BY', 'CC BY-SA', 'CC BY-ND', 'CC BY-NC', 'CC BY-NC-SA', 'CC BY-NC-ND', 'GNU GPL', 'GNU LGPL', 'GNU AGPL', 'GNU FDL', 'MIT', 'ODC-By', 'ODC-ODbL', 'OpenMTA', 'Public Domain', 'Unspecified', 'Other', ]; # Scopes $Resolutions = [ 'Contig', 'Scaffold', 'Chromosome', 'Whole Genome', 'Other', ]; # Collage categories $CollageCats = [ 0 => 'Personal', 1 => 'Theme', 2 => 'Staff Picks', 3 => 'Group Picks', ]; /** * Now for the good stuff. * The short names are for convenience. * It should be easy enough to find and replace, * e.g., if you want to use other names. */ $CatIcons = '/static/common/bioicons/'; $CATS = [ 'SEQ' => [ 'ID' => 1, 'Name' => 'Sequences', 'Icon' => $CatIcons.'sequences.png', 'Platforms' => $SeqPlatforms, 'Formats' => [ 'NucleoSeq' => $SeqFormats, 'ProtSeq' => $ProtFormats, 'Plain' => $PlainFormats ], 'Description' => "For data that's ACGT, ACGU, amino acid letters on disk.", ], 'GRF' => [ 'ID' => 2, 'Name' => 'Graphs', 'Icon' => $CatIcons.'graphs.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'GraphXml' => $GraphXmlFormats, 'GraphTxt' => $GraphTxtFormats, 'Plain' => $PlainFormats ], 'Description' => 'For pathway and regulatory network data, structured taxonomies, etc.', ], 'SYS' => [ 'ID' => 3, 'Name' => 'Systems', 'Icon' => $CatIcons.'systems.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'GraphXml' => $GraphXmlFormats, 'GraphTxt' => $GraphTxtFormats, 'Plain' => $PlainFormats ], 'Description' => 'For data that examines one facet broadly, not one subject deeply.', ], 'GEO' => [ 'ID' => 4, 'Name' => 'Geometric', 'Icon' => $CatIcons.'geometric.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'GraphXml' => $GraphXmlFormats, 'GraphTxt' => $GraphTxtFormats, 'Plain' => $PlainFormats ], 'Description' => "For structured data (XML, etc.) that describes the subject's orientation in space.", ], 'SCV' => [ 'ID' => 5, 'Name' => 'Scalars/Vectors', 'Icon' => $CatIcons.'scalars_vectors.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'GraphXml' => $GraphXmlFormats, 'GraphTxt' => $GraphTxtFormats, 'Plain' => $PlainFormats ], 'Description' => 'For data that describes observations over time and/or space.', ], 'PTRN' => [ 'ID' => 6, 'Name' => 'Patterns', 'Icon' => $CatIcons.'patterns.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'GraphXml' => $GraphXmlFormats, 'GraphTxt' => $GraphTxtFormats, 'Plain' => $PlainFormats ], 'Description' => 'For data that describes recurring structures in nature such as common pathways or motifs in the proteome or metabolome.', ], 'CNST' => [ 'ID' => 7, 'Name' => 'Constraints', 'Icon' => $CatIcons.'constraints.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'GraphXml' => $GraphXmlFormats, 'GraphTxt' => $GraphTxtFormats, 'Plain' => $PlainFormats ], 'Description' => 'For data that records experimental control behavior, checks readings against known physical constants, tracks the thermodynamic limits of reactions, etc.', ], 'IMG' => [ 'ID' => 8, 'Name' => 'Images', 'Icon' => $CatIcons.'images.png', 'Platforms' => $ImgPlatforms, 'Formats' => [ 'ImgRaster' => $ImgFormats, #'ImgVector' => $ImgFormats, 'Plain' => $PlainFormats ], 'Description' => 'For data you can look at!', ], 'SPAT' => [ 'ID' => 9, 'Name' => 'Spatial', 'Icon' => $CatIcons.'spatial.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'MapVector' => $MapVectorFormats, 'MapRaster' => $MapRasterFormats, 'ImgRaster' => $ImgFormats, 'Plain' => $PlainFormats ], 'Description' => "For data that's limited to specific locations or otherwise describes macroscopic space.", ], 'MOD' => [ 'ID' => 10, 'Name' => 'Models', 'Icon' => $CatIcons.'models.png', 'Platforms' => $GraphPlatforms, 'Formats' => [ 'MapVector' => $MapVectorFormats, 'MapRaster' => $MapRasterFormats, 'ImgRaster' => $ImgFormats, 'Plain' => $PlainFormats ], 'Description' => 'For projections, simulations, and other hypothetical or computer-generated data.', ], 'DOC' => [ 'ID' => 11, 'Name' => 'Documents', 'Icon' => $CatIcons.'documents.png', 'Platforms' => $DocPlatforms, 'Formats' => [ 'BinDoc' => $BinDocFormats, 'CpuGen' => $CpuGenFormats, 'Plain' => $PlainFormats ], 'Description' => 'For documentation, software, disk images, and literature datasets.', ], 'RAW' => [ 'ID' => 12, 'Name' => 'Machine Data', 'Icon' => $CatIcons.'machine_data.png', 'Platforms' => $RawPlatforms, 'Formats' => ['Plain' => $PlainFormats], 'Description' => 'For raw reads and machine data of any category.', ], ]; ENV::setPub( 'CATS', new RecursiveArrayObject($CATS) ); /** * Regular expressions * * The Gazelle regex collection. * Formerly in classes/regex.php. */ // resource_type://username:password@domain:port/path?query_string#anchor define('RESOURCE_REGEX', '(https?|ftps?):\/\/'); ENV::setPub( 'RESOURCE_REGEX', '(https?|ftps?):\/\/' ); define('IP_REGEX', '(\d{1,3}\.){3}\d{1,3}'); ENV::setPub( 'IP_REGEX', '(\d{1,3}\.){3}\d{1,3}' ); define('DOMAIN_REGEX', '([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+'); ENV::setPub( 'DOMAIN_REGEX', '([a-z0-9\-\_]+\.)*[a-z0-9\-\_]+' ); define('PORT_REGEX', ':\d{1,5}'); ENV::setPub( 'PORT_REGEX', ':\d{1,5}' ); define('URL_REGEX', '('.RESOURCE_REGEX.')('.IP_REGEX.'|'.DOMAIN_REGEX.')('.PORT_REGEX.')?(\/\S*)*'); ENV::setPub( 'URL_REGEX', "($ENV->RESOURCE_REGEX)($ENV->IP_REGEX|$ENV->DOMAIN_REGEX)($ENV->PORT_REGEX)?(\/\S*)*" ); define('USERNAME_REGEX', '/^[a-z0-9_]{2,20}$/iD'); ENV::setPub( 'USERNAME_REGEX', '/^[a-z0-9_]{2,20}$/iD' ); define('EMAIL_REGEX', '[_a-z0-9-]+([.+][_a-z0-9-]+)*@'.DOMAIN_REGEX); ENV::setPub( 'EMAIL_REGEX', "[_a-z0-9-]+([.+][_a-z0-9-]+)*@$ENV->DOMAIN_REGEX" ); define('IMAGE_REGEX', URL_REGEX.'\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?'); ENV::setPub( 'IMAGE_REGEX', "$ENV->URL_REGEX\/\S+\.(jpg|jpeg|tif|tiff|png|gif|bmp)(\?\S*)?" ); define('VIDEO_REGEX', URL_REGEX.'\/\S+\.(webm)(\?\S*)?'); ENV::setPub( 'VIDEO_REGEX', "$ENV->URL_REGEX\/\S+\.(webm)(\?\S*)?" ); define('CSS_REGEX', URL_REGEX.'\/\S+\.css(\?\S*)?'); ENV::setPub( 'CSS_REGEX', "$ENV->URL_REGEX\/\S+\.css(\?\S*)?" ); define('SITELINK_REGEX', RESOURCE_REGEX.'(www.)?'.preg_quote(SITE_DOMAIN, '/')); ENV::setPub( 'SITELINK_REGEX', "$ENV->RESOURCE_REGEX(www.)?".preg_quote(SITE_DOMAIN, '/') ); define('TORRENT_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?torrentid=(\d+)'); // torrentid = group 4 ENV::setPub( 'TORRENT_REGEX', "$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?torrentid=(\d+)" ); define('TORRENT_GROUP_REGEX', SITELINK_REGEX.'\/torrents\.php\?(.*&)?id=(\d+)'); // id = group 4 ENV::setPub( 'TORRENT_GROUP_REGEX', "$ENV->SITELINK_REGEX\/torrents\.php\?(.*&)?id=(\d+)" ); define('ARTIST_REGEX', SITELINK_REGEX.'\/artist\.php\?(.*&)?id=(\d+)'); // id = group 4 ENV::setPub( 'ARTIST_REGEX', "$ENV->SITELINK_REGEX\/artist\.php\?(.*&)?id=(\d+)" ); # https://stackoverflow.com/a/3180176 ENV::setPub( 'HTML_REGEX', '<([\w]+)([^>]*?)(([\s]*\/>)|(>((([^<]*?|<\!\-\-.*?\-\->)|(?R))*)<\/\\1[\s]*>))' ); ENV::setPub( 'BBCODE_REGEX', '\[([\w]+)([^\]]*?)(([\s]*\/\])|(\]((([^\[]*?|\[\!\-\-.*?\-\-\])|(?R))*)\[\/\\1[\s]*\]))' ); # https://www.crossref.org/blog/dois-and-matching-regular-expressions/ ENV::setPub( 'DOI_REGEX', '10.\d{4,9}\/[-._;()\/:A-Z0-9]+' ); # https://www.biostars.org/p/13753/ ENV::setPub( 'ENTREZ_REGEX', '\d*' ); # https://www.wikidata.org/wiki/Property:P496 ENV::setPub( 'ORCID_REGEX', '0000-000(1-[5-9]|2-[0-9]|3-[0-4])\d{3}-\d{3}[\dX]' ); # https://www.biostars.org/p/13753/ ENV::setPub( 'REFSEQ_REGEX', '\w{2}_\d{1,}\.\d{1,}' ); # https://www.uniprot.org/help/accession_numbers ENV::setPub( 'UNIPROT_REGEX', '[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}' );