const fs = require('fs').promises;
const path = require('path');
// File size limit for initial display (1MB)
const FILE_SIZE_LIMIT = 1024 * 1024;
/**
* Check if file is UTF-8 encoded
* @param {Buffer} buffer - File buffer to check
* @returns {boolean} - True if file is UTF-8
*/
function isValidUTF8(buffer) {
// If file is empty, consider it UTF-8
if (buffer.length === 0) {
return true;
}
// Check for excessive null bytes (binary indicator)
let nullCount = 0;
const sampleSize = Math.min(buffer.length, 1024); // Check first 1KB
for (let i = 0; i < sampleSize; i++) {
if (buffer[i] === 0) {
nullCount++;
}
}
// If more than 1% null bytes, likely binary
if (nullCount / sampleSize > 0.01) {
console.log(`Detected binary file: ${nullCount}/${sampleSize} null bytes`);
return false;
}
// Try to convert to UTF-8 and check for replacement characters
const text = buffer.toString('utf8');
const replacementCount = (text.match(/\uFFFD/g) || []).length;
// If more than 1% replacement characters, likely binary/non-UTF8
if (replacementCount / text.length > 0.01) {
console.log(`Detected non-UTF8: ${replacementCount}/${text.length} replacement chars`);
return false;
}
// Check for font file signatures
if (text.includes('FFTM') || text.includes('GDEF') || text.includes('glyf') ||
text.includes('cmap') || text.includes('fpgm') || text.includes('gasp') ||
text.includes('DSIG') || text.includes('GSUB') || text.includes('GPOS')) {
console.log('Detected font file by signature');
return false;
}
// Check for high percentage of non-printable characters
let nonPrintableCount = 0;
const checkLength = Math.min(text.length, 1000);
for (let i = 0; i < checkLength; i++) {
const code = text.charCodeAt(i);
// Count chars that are not printable ASCII, common whitespace, or extended ASCII
if (code < 32 && code !== 9 && code !== 10 && code !== 13) {
nonPrintableCount++;
}
}
// If more than 10% non-printable characters, likely binary
if (nonPrintableCount / checkLength > 0.1) {
console.log(`Detected binary: ${nonPrintableCount}/${checkLength} non-printable chars`);
return false;
}
return true;
}
/**
* Detect file encoding
* @param {string} filePath - Path to the file
* @returns {Object} - File info with encoding and size data
*/
async function detectFileEncoding(filePath) {
try {
const buffer = await fs.readFile(filePath);
const isUTF8 = isValidUTF8(buffer);
console.log(`File: ${filePath}, Size: ${buffer.length}, IsUTF8: ${isUTF8}`);
console.log(`First 100 bytes:`, buffer.slice(0, 100).toString('hex'));
return {
isUTF8,
size: buffer.length,
buffer
};
} catch (error) {
throw error;
}
}
/**
* Build file tree for directory
* @param {string} dirPath - Directory path
* @param {number} maxDepth - Maximum depth to traverse
* @param {number} currentDepth - Current depth (internal use)
* @returns {Array} - File tree structure
*/
async function buildFileTree(dirPath, maxDepth = 3, currentDepth = 0) {
if (currentDepth > maxDepth) return null;
try {
const items = await fs.readdir(dirPath);
const tree = [];
for (const item of items) {
// Skip hidden files, common build directories, and Windows system folders
if (item.startsWith('.') ||
['node_modules', 'dist', 'build', '.git', 'My Music', 'My Pictures', 'My Videos', '$RECYCLE.BIN', 'System Volume Information'].includes(item)) {
continue;
}
const itemPath = path.join(dirPath, item);
try {
const stats = await fs.stat(itemPath);
if (stats.isDirectory()) {
const children = await buildFileTree(itemPath, maxDepth, currentDepth + 1);
tree.push({
name: item,
path: itemPath,
type: 'directory',
children: children || []
});
} else {
tree.push({
name: item,
path: itemPath,
type: 'file'
});
}
} catch (itemError) {
// Skip items that cause permission errors or other access issues
if (itemError.code === 'EPERM' || itemError.code === 'EACCES' || itemError.code === 'ENOENT') {
console.warn(`Skipping inaccessible item: ${itemPath} (${itemError.code})`);
continue;
}
// Re-throw unexpected errors
throw itemError;
}
}
return tree.sort((a, b) => {
// Directories first, then files
if (a.type === 'directory' && b.type === 'file') return -1;
if (a.type === 'file' && b.type === 'directory') return 1;
return a.name.localeCompare(b.name);
});
} catch (error) {
// Handle directory-level permission errors
if (error.code === 'EPERM' || error.code === 'EACCES') {
console.warn(`Permission denied accessing directory: ${dirPath}`);
return [];
}
console.error('Error building file tree:', error);
return [];
}
}
/**
* Search in files within directory
* @param {string} dirPath - Directory path to search
* @param {string} searchTerm - Search term
* @param {number} maxResults - Maximum results to return
* @returns {Array} - Search results
*/
async function searchInDirectory(dirPath, searchTerm, maxResults = 100) {
const results = [];
const searchRegex = new RegExp(searchTerm, 'gi');
async function searchRecursively(currentPath, depth = 0) {
if (depth > 5 || results.length >= maxResults) return;
try {
const items = await fs.readdir(currentPath);
for (const item of items) {
if (results.length >= maxResults) break;
// Skip hidden files and common build directories
if (item.startsWith('.') || ['node_modules', 'dist', 'build', '.git'].includes(item)) {
continue;
}
const itemPath = path.join(currentPath, item);
const stats = await fs.stat(itemPath);
if (stats.isDirectory()) {
await searchRecursively(itemPath, depth + 1);
} else if (stats.isFile()) {
// Only search in text files
const ext = path.extname(item).toLowerCase();
const textExtensions = ['.txt', '.js', '.ts', '.html', '.css', '.json', '.md', '.py', '.cpp', '.c', '.h', '.java', '.php', '.rb', '.go', '.rs'];
if (textExtensions.includes(ext) || !ext) {
try {
const content = await fs.readFile(itemPath, 'utf8');
const lines = content.split('\n');
lines.forEach((line, lineNumber) => {
const matches = line.match(searchRegex);
if (matches) {
results.push({
file: itemPath,
fileName: item,
line: lineNumber + 1,
content: line.trim(),
matches: matches.length
});
}
});
} catch (error) {
// Skip files that can't be read as text
}
}
}
}
} catch (error) {
console.error('Error searching directory:', error);
}
}
await searchRecursively(dirPath);
return results;
}
module.exports = {
detectFileEncoding,
buildFileTree,
searchInDirectory,
isValidUTF8,
FILE_SIZE_LIMIT
};