From 5a7af34ca4b7624acf709a03ff16da27921c205e Mon Sep 17 00:00:00 2001 From: Matt Pass Date: Sun, 18 Aug 2019 15:58:19 +0100 Subject: [PATCH] Indexer working more intelligently --- lib/indexer.php | 99 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 16 deletions(-) diff --git a/lib/indexer.php b/lib/indexer.php index 2bd15f5..2e3707c 100644 --- a/lib/indexer.php +++ b/lib/indexer.php @@ -2,10 +2,33 @@ include("headers.php"); include("settings.php"); -$resultsArray = []; +// File extensions to look for functions & classes in +$indexableFileExts = ["php", "js", "coffee", "ts", "rb", "py", "sql", "erl", "java", "jl", "c", "cpp", "ino", "cs", "go", "lua", "pl"]; -function phpGrep($q, $path, $base) { - global $resultsArray; +// Fallback for prevIndexData to start off initially +$prevIndexData = []; + +// If we have a data/index.php file +if (file_exists("../data/index.php")) { + // Get serialized array back out of PHP file inside a comment block as prevIndexData + $prevIndexData = file_get_contents("../data/index.php"); + if (strpos($prevIndexData, "", "", $prevIndexData); + $prevIndexData = unserialize($prevIndexData); + } +} + +// Roughly 1 in 100 index runs, we'll do a full index +if (mt_rand(1,100) === 50) { + $prevIndexData = []; +} + +// Start a new indexData for this run +$indexData = []; + +function phpGrep($path, $base) { + global $indexableFileExts, $prevIndexData, $indexData; $fp = opendir($path); global $ICEcoder, $serverType, $docRoot, $ICEcoderDir; @@ -13,25 +36,44 @@ function phpGrep($q, $path, $base) { $slash = $serverType == strpos($path,"\\")>-1 ? "\\" : "/"; while($f = readdir($fp)) { // Ignore . and .. paths - if(preg_match("#^\.+$#", $f)) continue; - $fullPath = $path.$slash.$f; - + if ($f == "." || $f == "..") continue; + $filePath = $path.$slash.$f; + $filePathExt = pathinfo($filePath, PATHINFO_EXTENSION); // Exclude the folder ICEcoder is running from $rootPrefix = '/'.str_replace("/","\/",preg_quote(str_replace("\\","/",$docRoot))).'/'; - $localPath = preg_replace($rootPrefix, '', $fullPath, 1); + $localPath = preg_replace($rootPrefix, '', $filePath, 1); if (strpos($localPath, $ICEcoderDir)===0) { continue; } - if(is_dir($fullPath)) { - $ret .= phpGrep($q, $fullPath, $base); + if(is_dir($filePath)) { + $ret .= phpGrep($filePath, $base); } else { + // Check if we should scan within this file, by only considering files that may contain functions & classes + if (in_array($filePathExt, $indexableFileExts) === false) { + continue; + } + // Check if file appears to be the same (same size and mtime), if so, continue as we'll assume it's not changed + if (isset($prevIndexData['files'][$filePath]) && + $prevIndexData['files'][$filePath]['size'] === stat($filePath)['size'] && + $prevIndexData['files'][$filePath]['mtime'] === stat($filePath)['mtime'] + ) { + // Continue, as data will be the same and we'll use data from prevIndexData + continue; + } + // Start file data block if we don't have one yet + if (!isset($indexData['files'][$filePath])) { + $indexData['files'][$filePath] = [ + "size" => stat($filePath)['size'], + "mtime" => stat($filePath)['mtime'] + ]; + } $bFile = false; // Exclude banned files for ($i=0;$i $line) { $functionText = ""; $classText = ""; @@ -47,8 +89,15 @@ function phpGrep($q, $path, $base) { // Get just the name of the function/class $classText = explode("(", explode("{", trim($classText))[0]); } + + // Function data if (!empty($functionText)) { - $resultsArray['functions'][$functionText[0]] = [ + // Start language block if we don't have one yet + if (!isset($indexData['functions'][$filePathExt])) { + $indexData['functions'][$filePathExt] = []; + } + // Set all the data for this function + $indexData['functions'][$filePathExt][$functionText[0]] = [ "name" => $functionText[0], "range" => [ "from" => [ @@ -60,12 +109,20 @@ function phpGrep($q, $path, $base) { "ch" => (strpos($line, $functionText[0]) + strlen($functionText[0])) ] ], - "fullPath" => $fullPath, + "filePath" => $filePath, + "filePathExt" => $filePathExt, "params" => trim("(".$functionText[1]) ]; } + + // Class data if (!empty($classText)) { - $resultsArray['classes'][$classText[0]] = [ + // Start language block if we don't have one yet + if (!isset($indexData['classes'][$filePathExt])) { + $indexData['classes'][$filePathExt] = []; + } + // Set all the data for this class + $indexData['classes'][$filePathExt][$classText[0]] = [ "name" => $classText[0], "range" => [ "from" => [ @@ -77,7 +134,8 @@ function phpGrep($q, $path, $base) { "ch" => (strpos($line, $classText[0]) + strlen($classText[0])) ] ], - "fullPath" => $fullPath, + "filePath" => $filePath, + "filePathExt" => $filePathExt, "params" => trim("(".$classText[1]) ]; } @@ -88,6 +146,15 @@ function phpGrep($q, $path, $base) { return $ret; } -$results = phpGrep("function", $docRoot.$iceRoot, $docRoot.$iceRoot); -echo json_encode($resultsArray, JSON_PRETTY_PRINT); +// Start running function to index data +$results = phpGrep($docRoot.$iceRoot, $docRoot.$iceRoot); + +// Overlay indexData ontop of prevIndexData +$output = array_replace_recursive($prevIndexData, $indexData); + +// Store the serialized array in PHP comment block for next time +file_put_contents($docRoot.$ICEcoderDir."/data/index.php", ""); + +// Output the JSON +echo json_encode($output, JSON_PRETTY_PRINT); ?>