Refactor(server): Centralize GEMINI.md discovery logic in server (#498)

This commit is contained in:
Allen Hutchison
2025-05-23 08:53:22 -07:00
committed by GitHub
parent f8c4276e69
commit a008d81780
5 changed files with 746 additions and 328 deletions

View File

@@ -4,10 +4,6 @@
* SPDX-License-Identifier: Apache-2.0
*/
import * as fs from 'fs/promises';
import * as fsSync from 'fs'; // For synchronous checks like existsSync
import * as path from 'path';
import { homedir } from 'os';
import yargs from 'yargs/yargs';
import { hideBin } from 'yargs/helpers';
import process from 'node:process';
@@ -15,8 +11,7 @@ import {
Config,
loadEnvironment,
createServerConfig,
GEMINI_CONFIG_DIR,
GEMINI_MD_FILENAME,
loadServerHierarchicalMemory,
} from '@gemini-code/server';
import { Settings } from './settings.js';
import { readPackageUp } from 'read-package-up';
@@ -32,18 +27,6 @@ const logger = {
};
const DEFAULT_GEMINI_MODEL = 'gemini-2.5-pro-preview-05-06';
// TODO(adh): Refactor to use a shared ignore list with other tools like glob and read-many-files.
const DEFAULT_IGNORE_DIRECTORIES = [
'node_modules',
'.git',
'dist',
'build',
'out',
'coverage',
'.vscode',
'.idea',
'.DS_Store',
];
interface CliArgs {
model: string | undefined;
@@ -95,293 +78,24 @@ async function parseArguments(): Promise<CliArgs> {
return finalArgv;
}
async function findProjectRoot(startDir: string): Promise<string | null> {
let currentDir = path.resolve(startDir);
while (true) {
const gitPath = path.join(currentDir, '.git');
try {
const stats = await fs.stat(gitPath);
if (stats.isDirectory()) {
return currentDir;
}
} catch (error: unknown) {
if (typeof error === 'object' && error !== null && 'code' in error) {
const fsError = error as { code: string; message: string };
if (fsError.code !== 'ENOENT') {
logger.warn(
`Error checking for .git directory at ${gitPath}: ${fsError.message}`,
);
}
} else {
logger.warn(
`Non-standard error checking for .git directory at ${gitPath}: ${String(error)}`,
);
}
}
const parentDir = path.dirname(currentDir);
if (parentDir === currentDir) {
return null;
}
currentDir = parentDir;
}
}
async function collectDownwardGeminiFiles(
directory: string,
debugMode: boolean,
ignoreDirs: string[],
scannedDirCount: { count: number },
maxScanDirs: number,
): Promise<string[]> {
if (scannedDirCount.count >= maxScanDirs) {
if (debugMode)
logger.debug(
`Max directory scan limit (${maxScanDirs}) reached. Stopping downward scan at: ${directory}`,
);
return [];
}
scannedDirCount.count++;
if (debugMode)
logger.debug(
`Scanning downward for ${GEMINI_MD_FILENAME} files in: ${directory} (scanned: ${scannedDirCount.count}/${maxScanDirs})`,
);
const collectedPaths: string[] = [];
try {
const entries = await fs.readdir(directory, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(directory, entry.name);
if (entry.isDirectory()) {
if (ignoreDirs.includes(entry.name)) {
if (debugMode)
logger.debug(`Skipping ignored directory: ${fullPath}`);
continue;
}
const subDirPaths = await collectDownwardGeminiFiles(
fullPath,
debugMode,
ignoreDirs,
scannedDirCount,
maxScanDirs,
);
collectedPaths.push(...subDirPaths);
} else if (entry.isFile() && entry.name === GEMINI_MD_FILENAME) {
try {
await fs.access(fullPath, fsSync.constants.R_OK);
collectedPaths.push(fullPath);
if (debugMode)
logger.debug(
`Found readable downward ${GEMINI_MD_FILENAME}: ${fullPath}`,
);
} catch {
if (debugMode)
logger.debug(
`Downward ${GEMINI_MD_FILENAME} not readable, skipping: ${fullPath}`,
);
}
}
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
logger.warn(`Error scanning directory ${directory}: ${message}`);
if (debugMode) logger.debug(`Failed to scan directory: ${directory}`);
}
return collectedPaths;
}
export async function getGeminiMdFilePaths(
currentWorkingDirectory: string,
userHomePath: string,
debugMode: boolean,
): Promise<string[]> {
const resolvedCwd = path.resolve(currentWorkingDirectory);
const resolvedHome = path.resolve(userHomePath);
const globalMemoryPath = path.join(
resolvedHome,
GEMINI_CONFIG_DIR,
GEMINI_MD_FILENAME,
);
const paths: string[] = [];
if (debugMode)
logger.debug(
`Searching for ${GEMINI_MD_FILENAME} starting from CWD: ${resolvedCwd}`,
);
if (debugMode) logger.debug(`User home directory: ${resolvedHome}`);
try {
await fs.access(globalMemoryPath, fsSync.constants.R_OK);
paths.push(globalMemoryPath);
if (debugMode)
logger.debug(
`Found readable global ${GEMINI_MD_FILENAME}: ${globalMemoryPath}`,
);
} catch {
if (debugMode)
logger.debug(
`Global ${GEMINI_MD_FILENAME} not found or not readable: ${globalMemoryPath}`,
);
}
const projectRoot = await findProjectRoot(resolvedCwd);
if (debugMode)
logger.debug(`Determined project root: ${projectRoot ?? 'None'}`);
const upwardPaths: string[] = [];
let currentDir = resolvedCwd;
const stopDir = projectRoot ? path.dirname(projectRoot) : resolvedHome;
while (
currentDir &&
currentDir !== stopDir &&
currentDir !== path.dirname(currentDir)
) {
if (debugMode)
logger.debug(
`Checking for ${GEMINI_MD_FILENAME} in (upward scan): ${currentDir}`,
);
if (currentDir === path.join(resolvedHome, GEMINI_CONFIG_DIR)) {
if (debugMode)
logger.debug(`Skipping check inside global config dir: ${currentDir}`);
break;
}
const potentialPath = path.join(currentDir, GEMINI_MD_FILENAME);
try {
await fs.access(potentialPath, fsSync.constants.R_OK);
upwardPaths.unshift(potentialPath);
if (debugMode)
logger.debug(
`Found readable upward ${GEMINI_MD_FILENAME}: ${potentialPath}`,
);
} catch {
if (debugMode)
logger.debug(
`Upward ${GEMINI_MD_FILENAME} not found or not readable in: ${currentDir}`,
);
}
const parentDir = path.dirname(currentDir);
if (parentDir === currentDir) {
if (debugMode)
logger.debug(`Reached filesystem root, stopping upward search.`);
break;
}
currentDir = parentDir;
}
paths.push(...upwardPaths);
if (debugMode)
logger.debug(`Starting downward scan from CWD: ${resolvedCwd}`);
const MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY = 200; // Define the cap
const scannedDirCount = { count: 0 };
const downwardPaths = await collectDownwardGeminiFiles(
resolvedCwd,
debugMode,
DEFAULT_IGNORE_DIRECTORIES,
scannedDirCount,
MAX_DIRECTORIES_TO_SCAN_FOR_MEMORY,
);
downwardPaths.sort();
if (debugMode && downwardPaths.length > 0)
logger.debug(
`Found downward ${GEMINI_MD_FILENAME} files (sorted): ${JSON.stringify(downwardPaths)}`,
);
for (const dPath of downwardPaths) {
if (!paths.includes(dPath)) {
paths.push(dPath);
}
}
if (debugMode)
logger.debug(
`Final ordered ${GEMINI_MD_FILENAME} paths to read: ${JSON.stringify(paths)}`,
);
return paths;
}
interface GeminiFileContent {
filePath: string;
content: string | null;
}
async function readGeminiMdFiles(
filePaths: string[],
debugMode: boolean,
): Promise<GeminiFileContent[]> {
const results: GeminiFileContent[] = [];
for (const filePath of filePaths) {
try {
const content = await fs.readFile(filePath, 'utf-8');
results.push({ filePath, content });
if (debugMode)
logger.debug(
`Successfully read: ${filePath} (Length: ${content.length})`,
);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
logger.warn(
`Warning: Could not read ${GEMINI_MD_FILENAME} file at ${filePath}. Error: ${message}`,
);
results.push({ filePath, content: null });
if (debugMode) logger.debug(`Failed to read: ${filePath}`);
}
}
return results;
}
function concatenateInstructions(
instructionContents: GeminiFileContent[],
): string {
return instructionContents
.filter((item) => typeof item.content === 'string')
.map((item) => {
const trimmedContent = (item.content as string).trim();
if (trimmedContent.length === 0) {
return null; // Filter out empty content after trimming
}
// Use a relative path for the marker if possible, otherwise full path.
// This assumes process.cwd() is the project root or a relevant base.
const displayPath = path.isAbsolute(item.filePath)
? path.relative(process.cwd(), item.filePath)
: item.filePath;
return `--- Context from: ${displayPath} ---\n${trimmedContent}\n--- End of Context from: ${displayPath} ---`;
})
.filter((block): block is string => block !== null)
.join('\n\n');
}
// This function is now a thin wrapper around the server's implementation.
// It's kept in the CLI for now as App.tsx directly calls it for memory refresh.
// TODO: Consider if App.tsx should get memory via a server call or if Config should refresh itself.
export async function loadHierarchicalGeminiMemory(
currentWorkingDirectory: string,
debugMode: boolean,
): Promise<{ memoryContent: string; fileCount: number }> {
if (debugMode)
if (debugMode) {
logger.debug(
`Loading hierarchical memory for CWD: ${currentWorkingDirectory}`,
`CLI: Delegating hierarchical memory load to server for CWD: ${currentWorkingDirectory}`,
);
const userHomePath = homedir();
const filePaths = await getGeminiMdFilePaths(
currentWorkingDirectory,
userHomePath,
debugMode,
);
if (filePaths.length === 0) {
if (debugMode) logger.debug('No GEMINI.md files found in hierarchy.');
return { memoryContent: '', fileCount: 0 };
}
const contentsWithPaths = await readGeminiMdFiles(filePaths, debugMode);
const combinedInstructions = concatenateInstructions(contentsWithPaths);
if (debugMode)
logger.debug(
`Combined instructions length: ${combinedInstructions.length}`,
);
if (debugMode && combinedInstructions.length > 0)
logger.debug(
`Combined instructions (snippet): ${combinedInstructions.substring(0, 500)}...`,
);
return { memoryContent: combinedInstructions, fileCount: filePaths.length };
// Directly call the server function.
// The server function will use its own homedir() for the global path.
return loadServerHierarchicalMemory(currentWorkingDirectory, debugMode);
}
export async function loadCliConfig(settings: Settings): Promise<Config> {
// Load .env file using logic from server package
loadEnvironment();
const geminiApiKey = process.env.GEMINI_API_KEY;
@@ -410,17 +124,15 @@ export async function loadCliConfig(settings: Settings): Promise<Config> {
const argv = await parseArguments();
const debugMode = argv.debug || false;
// Call the (now wrapper) loadHierarchicalGeminiMemory which calls the server's version
const { memoryContent, fileCount } = await loadHierarchicalGeminiMemory(
process.cwd(),
debugMode,
);
const userAgent = await createUserAgent();
// Gemini Developer API or GCP Express or Vertex AI
const apiKeyForServer = geminiApiKey || googleApiKey || '';
// Create config using factory from server package
return createServerConfig(
apiKeyForServer,
argv.model || DEFAULT_GEMINI_MODEL,

View File

@@ -6,11 +6,6 @@
import { Message, MessageType } from '../types.js';
import { Config } from '@gemini-code/server';
import { getGeminiMdFilePaths } from '../../config/config.js';
import { homedir } from 'os';
import process from 'node:process';
export const SHOW_MEMORY_COMMAND_NAME = '/showmemory';
export function createShowMemoryAction(
config: Config | null,
@@ -27,52 +22,42 @@ export function createShowMemoryAction(
}
const debugMode = config.getDebugMode();
const cwd = process.cwd();
const homeDir = homedir();
if (debugMode) {
console.log(`[DEBUG] Show Memory: CWD=${cwd}, Home=${homeDir}`);
}
const filePaths = await getGeminiMdFilePaths(cwd, homeDir, debugMode);
if (filePaths.length > 0) {
addMessage({
type: MessageType.INFO,
content: `The following GEMINI.md files are being used (in order of precedence):\n- ${filePaths.join('\n- ')}`,
timestamp: new Date(),
});
} else {
addMessage({
type: MessageType.INFO,
content: 'No GEMINI.md files found in the hierarchy.',
timestamp: new Date(),
});
console.log('[DEBUG] Show Memory command invoked.');
}
const currentMemory = config.getUserMemory();
const fileCount = config.getGeminiMdFileCount();
if (config.getDebugMode()) {
if (debugMode) {
console.log(
`[DEBUG] Showing memory. Content from config.getUserMemory() (first 200 chars): ${currentMemory.substring(0, 200)}...`,
);
console.log(`[DEBUG] Number of GEMINI.md files loaded: ${fileCount}`);
}
if (fileCount > 0) {
addMessage({
type: MessageType.INFO,
content: `Loaded memory from ${fileCount} GEMINI.md file(s).`,
timestamp: new Date(),
});
}
if (currentMemory && currentMemory.trim().length > 0) {
addMessage({
type: MessageType.INFO,
// Display with a clear heading, and potentially format for readability if very long.
// For now, direct display. Consider using Markdown formatting for code blocks if memory contains them.
content: `Current combined GEMINI.md memory content:\n\`\`\`markdown\n${currentMemory}\n\`\`\``,
timestamp: new Date(),
});
} else {
// This message might be redundant if filePaths.length === 0, but kept for explicitness
// if somehow memory is empty even if files were found (e.g., all files are empty).
addMessage({
type: MessageType.INFO,
content:
'No hierarchical memory (GEMINI.md) is currently loaded or memory is empty.',
fileCount > 0
? 'Hierarchical memory (GEMINI.md) is loaded but content is empty.'
: 'No hierarchical memory (GEMINI.md) is currently loaded.',
timestamp: new Date(),
});
}