mirror of
https://github.com/icecoder/ICEcoder.git
synced 2026-03-03 07:13:59 +01:00
Don't index *.min.* files, detect declaration lines with more intelligence by considering the format of the language and varying syntax, pick out the function name and args much better, don't store blank names, take only first word and so exclude things like classes extending/implementing format and no args on classes of course, plus consider if prev data before deciding upon whether to index
213 lines
11 KiB
PHP
213 lines
11 KiB
PHP
<?php
|
|
include("headers.php");
|
|
include("settings.php");
|
|
|
|
// File extensions to look for functions & classes in
|
|
$indexableFileExts = ["php", "js", "coffee", "ts", "rb", "py", "sql", "erl", "java", "jl", "c", "cpp", "ino", "cs", "go", "lua", "pl"];
|
|
|
|
// Fallback for prevIndexData to start off initially
|
|
$prevIndexData = [];
|
|
|
|
// If we have a data/index.php file
|
|
if (file_exists($docRoot.$ICEcoderDir."/data/index.php")) {
|
|
// Get serialized array back out of PHP file inside a comment block as prevIndexData
|
|
$prevIndexData = file_get_contents($docRoot.$ICEcoderDir."/data/index.php");
|
|
if (strpos($prevIndexData, "<?php") !== false) {
|
|
$prevIndexData = str_replace("<?php\n/*\n\n", "", $prevIndexData);
|
|
$prevIndexData = str_replace("\n\n*/\n?>", "", $prevIndexData);
|
|
$prevIndexData = unserialize($prevIndexData);
|
|
}
|
|
}
|
|
|
|
// Roughly 1 in 100 index runs, we'll do a full index
|
|
if (mt_rand(1,100) === 50) {
|
|
$prevIndexData = [];
|
|
}
|
|
|
|
// Start a new indexData for this run
|
|
$indexData = [];
|
|
|
|
function phpGrep($path, $base) {
|
|
global $indexableFileExts, $prevIndexData, $indexData;
|
|
|
|
$fp = opendir($path);
|
|
global $ICEcoder, $serverType, $docRoot, $ICEcoderDir;
|
|
if (!isset($ret)) {$ret="";};
|
|
$slash = $serverType == strpos($path,"\\")>-1 ? "\\" : "/";
|
|
while($f = readdir($fp)) {
|
|
// Ignore . and .. paths
|
|
if ($f == "." || $f == "..") continue;
|
|
$filePath = $path.$slash.$f;
|
|
$filePathExt = pathinfo($filePath, PATHINFO_EXTENSION);
|
|
// Exclude the folder ICEcoder is running from
|
|
$rootPrefix = '/'.str_replace("/","\/",preg_quote(str_replace("\\","/",$docRoot))).'/';
|
|
$localPath = preg_replace($rootPrefix, '', $filePath, 1);
|
|
if (strpos($localPath, $ICEcoderDir)===0) {
|
|
continue;
|
|
}
|
|
if(is_dir($filePath)) {
|
|
$ret .= phpGrep($filePath, $base);
|
|
} else {
|
|
// Check if we should scan within this file, by only considering files that may contain functions & classes
|
|
if (in_array($filePathExt, $indexableFileExts) === false) {
|
|
continue;
|
|
}
|
|
// Check if file appears to be the same (same size and mtime), if so, continue as we'll assume it's not changed
|
|
if (isset($prevIndexData['files'][$filePath]) &&
|
|
$prevIndexData['files'][$filePath]['size'] === stat($filePath)['size'] &&
|
|
$prevIndexData['files'][$filePath]['mtime'] === stat($filePath)['mtime']
|
|
) {
|
|
// Continue, as data will be the same and we'll use data from prevIndexData
|
|
continue;
|
|
}
|
|
// Start file data block if we don't have one yet
|
|
if (!isset($indexData['files'][$filePath])) {
|
|
$indexData['files'][$filePath] = [
|
|
"size" => stat($filePath)['size'],
|
|
"mtime" => stat($filePath)['mtime']
|
|
];
|
|
}
|
|
$bFile = false;
|
|
// Exclude banned files
|
|
for ($i=0;$i<count($ICEcoder['bannedFiles']);$i++) {
|
|
if ($ICEcoder['bannedFiles'][$i] !== "") {
|
|
if (strpos($f,str_replace("*","",$ICEcoder['bannedFiles'][$i]))!==false) {$bFile = true;};
|
|
}
|
|
}
|
|
// Exclude *.min.* minified files
|
|
$minFileText = pathinfo(pathinfo($f)['filename']);
|
|
if (isset($minFileText['extension']) && $minFileText['extension'] === "min") {
|
|
continue;
|
|
}
|
|
if (!$bFile) {
|
|
$lines = file($filePath);
|
|
foreach ($lines as $lineNum => $line) {
|
|
$functionText = "";
|
|
$classText = "";
|
|
// Get function declaration lines, covering most language formats
|
|
if (
|
|
// If we have both parens in ( then ) order on the line and...
|
|
(strpos($line, "(") !== false && strpos($line, "(") < strpos($line, ")")) &&
|
|
// ...if a particular language and we have a valid format on the same line for it
|
|
(($filePathExt === "py" || $filePathExt === "rb") && strpos($line, "def") !== false && strpos($line, "def") < strpos($line, "(")) ||
|
|
(($filePathExt === "js" || $filePathExt === "ts") && strpos($line, "=>") !== false) ||
|
|
(($filePathExt === "erl" || $filePathExt === "coffee") && strpos($line, "->") !== false) ||
|
|
(($filePathExt === "c" || $filePathExt === "cpp") && strpos($line, "{") !== false && strpos($line, "{") > strpos($line, "(")) ||
|
|
($filePathExt === "go" && strpos($line, "func") !== false && strpos($line, "func") < strpos($line, "(")) ||
|
|
// ...or if the line contains "function" before opening parens...
|
|
(strpos($line, "function") !== false && strpos($line, "function") < strpos($line, "("))
|
|
) {
|
|
// ...it's enough of an indication this is a function declaration line, so grab name and args from the line
|
|
// First, strip away all non alphanum, underscore and parens chars, plus the word "function"
|
|
// (No need to remove "def" or "func" as we're only concerned by the string between function name and parens and both "def" and "func"
|
|
// appear before function name in Python, Ruby and Go languages, it's only "function" that's between name and args in some languages
|
|
$functionLine = preg_replace('/[^\da-z\s_\(\)]|\bfunction\b/i', '', $line)."\n";
|
|
// Then replace one or more spaces that are followed by an open parens with a single space and open parens
|
|
// then explode on the open parens to get the split between name and start of args
|
|
$functionLine = preg_replace('/\s+\(/', '(', $functionLine)."\n";
|
|
$functionLine = explode("(", $functionLine);
|
|
// Finally, we have our function name and args we can put into an array after some string manipulation
|
|
$functionText = [
|
|
0 => ltrim(substr($functionLine[0], strrpos($functionLine[0], " "))),
|
|
1 => "(".explode(")",$functionLine[1])[0].")"
|
|
];
|
|
}
|
|
// Get class declaration lines (far simpler than functions, as all languages have a very similar format
|
|
if (strpos($line, "class ") !== false) {
|
|
$classText = substr($line, strpos($line, "class ") + 6);
|
|
// Get just the name of the class
|
|
$classText = explode(" ", $classText);
|
|
}
|
|
|
|
// Function data
|
|
if (!empty($functionText) && $functionText[0] !== "") {
|
|
// Start language block if we don't have one yet
|
|
if (!isset($indexData['functions'][$filePathExt])) {
|
|
$indexData['functions'][$filePathExt] = [];
|
|
}
|
|
// Set all the data for this function
|
|
$indexData['functions'][$filePathExt][$functionText[0]] = [
|
|
"name" => $functionText[0],
|
|
"range" => [
|
|
"from" => [
|
|
"line" => $lineNum,
|
|
"ch" => strpos($line, $functionText[0])
|
|
],
|
|
"to" => [
|
|
"line" => $lineNum,
|
|
"ch" => (strpos($line, $functionText[0]) + strlen($functionText[0]))
|
|
]
|
|
],
|
|
"filePath" => $filePath,
|
|
"filePathExt" => $filePathExt,
|
|
"params" => str_replace(" ", ", ", $functionText[1])
|
|
];
|
|
}
|
|
|
|
// Class data
|
|
if (!empty($classText) && $classText[0] !== "") {
|
|
// Start language block if we don't have one yet
|
|
if (!isset($indexData['classes'][$filePathExt])) {
|
|
$indexData['classes'][$filePathExt] = [];
|
|
}
|
|
// Set all the data for this class
|
|
$indexData['classes'][$filePathExt][$classText[0]] = [
|
|
"name" => $classText[0],
|
|
"range" => [
|
|
"from" => [
|
|
"line" => $lineNum,
|
|
"ch" => strpos($line, $classText[0])
|
|
],
|
|
"to" => [
|
|
"line" => $lineNum,
|
|
"ch" => (strpos($line, $classText[0]) + strlen($classText[0]))
|
|
]
|
|
],
|
|
"filePath" => $filePath,
|
|
"filePathExt" => $filePathExt
|
|
];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
// If we don't have a timestamp passed in, in prev data, or it's not the same as what's in the index...
|
|
if (!isset($_GET['timestamp']) || !isset($prevIndexData["timestamps"]) || $_GET['timestamp'] != $prevIndexData["timestamps"]["indexed"]) {
|
|
// If we don't have any prev data or something in the doc root changed, we can do an index...
|
|
if (!isset($prevIndexData["timestamps"]) || $prevIndexData["timestamps"]["indexed"] !== stat($docRoot)['mtime']) {
|
|
// Start a new indexData for this run
|
|
$indexData["timestamps"] = [
|
|
"indexed" => stat($docRoot)['mtime'],
|
|
"browser" => $_GET['timestamp'] ?? 0,
|
|
"changed" => true
|
|
];
|
|
|
|
// Start running function to index data
|
|
$results = phpGrep($docRoot.$iceRoot, $docRoot.$iceRoot);
|
|
|
|
// Overlay indexData ontop of prevIndexData
|
|
$output = array_replace_recursive($prevIndexData, $indexData);
|
|
|
|
// Store the serialized array in PHP comment block for next time
|
|
file_put_contents($docRoot.$ICEcoderDir."/data/index.php", "<?php\n/*\n\n".serialize($output)."\n\n*/\n?".">");
|
|
// Output what we have in our index...
|
|
} else {
|
|
$output = $prevIndexData;
|
|
}
|
|
// Else it's the same as last time so do nothing...
|
|
} else {
|
|
$output = [
|
|
"timestamps" => [
|
|
"indexed" => stat($docRoot)['mtime'],
|
|
"browser" => (int) $_GET['timestamp'],
|
|
"changed" => false
|
|
]
|
|
];
|
|
}
|
|
|
|
// Output the JSON
|
|
echo json_encode($output, JSON_PRETTY_PRINT);
|