From 95bc7c7e59999d3dbd18b7782e551d4fc9b16cf7 Mon Sep 17 00:00:00 2001 From: grothedev Date: Wed, 23 Oct 2024 12:03:02 -0500 Subject: update process monitor script. kill processes that are using a ton of resources and just output processes that are using 1/4th of the threshold --- filetreetool.py | 332 -------------------------------------------------------- procwatch.sh | 36 ++++-- 2 files changed, 29 insertions(+), 339 deletions(-) delete mode 100755 filetreetool.py diff --git a/filetreetool.py b/filetreetool.py deleted file mode 100755 index 54d571a..0000000 --- a/filetreetool.py +++ /dev/null @@ -1,332 +0,0 @@ -#!/usr/bin/python -#a tool for traversing a directory structure and operating on its files -# the directory structure is a tree, where each leaf is a file (or symlink pointing to the memory address of another directory node). -# so it can be stored in memory as a tree datastructure. - - -import os -import sys -import time -import filetype #for filetype (extension and mime type) -import chardet #for getting encoding -import subprocess -import random -import hashlib -import argparse -from datetime import datetime - -#can we parse text out of this file or is it a libreoffice file? if so, return the contents -def attemptReadSampleFile(filepath): - if verbose: print('checking {}'.format(filepath)) - if os.path.isfile(filepath): - f = open(filepath, 'rb') - ftype = filetype.guess(filepath) - if verbose: print('filetype: {}'.format(str(ftype))) - if ftype != None: - if ftype.extension in ['py', 'c', 'cc', 'h', 'hh', 'java', 'rst', 'css', 'html', 'htm', 'js', 'php', 'sh']: #don't want code in the sample data - if verbose: print('this file is code') - return None - if ftype.extension == 'odt' and filepath[len(filepath)-1] != '#': #openoffice doc and not a lock file - return cmd('odt2txt ' + filepath) - else: - fb = f.read() #file data (bytes) to detect encoding - enc = str(chardet.detect(fb)['encoding']) - if verbose: print('encoding: {}'.format(enc)) - if enc in ['ascii','utf-8']: - return str(fb, encoding=enc) - else: - return None - else: - if verbose: print('not file') - return None - -def getFilesRecursive(paths): - samplefiles=[] #the paths of the individual files from which we want to grab text - for p in paths: - if verbose: print('path {}'.format(p)) - if os.path.isdir(p): - for root,dirs,files in os.walk(p): - if verbose: print('walk {}: {} files, {} dirs'.format(root, len(files), len(dirs))) - for f in files: - samplefiles.append(root + '/' + f) - else: - samplefiles.append(p) - return samplefiles - -#returns some # of the most recently modified files in a directory, scanning recursively -def getRecentlyModifiedFiles(dir, n=10): - fileswmodtimes = [] - for root,dirs,files in os.walk(dir): - for f in files: - fileswmodtimes.append( (f'{root}/{f}', os.path.getmtime(f'{root}/{f}')) ) - return sorted(fileswmodtimes, key=lambda x: x[1], reverse=True)[:n] - #if include_time: - # return sorted(fileswmodtimes, key=lambda x: x[1], reverse=True)[:n] - #else: - # return [f[0] for f in sorted(fileswmodtimes, key=lambda x: os.path.getmtime(x[0]), reverse=True)[:n]] - -#returns duplicate files from some given lists of files -def findDuplicates(*args): - for arg in args: - if not isinstance(arg, str): - raise TypeError('all arguments must be strings of file paths') - filelists = [] - dups = [] #duplicate files found, as a list> grouping together each file as mappings of filelist index to file index within that list. - for arg in args: - if os.path.isdir(arg): - fl = [] - for root,dirs,files in os.walk(arg): - for f in files: - fl.append((f'{root}/{f}',f,os.path.getsize(f'{root}/{f}'),'')) #tuple - filelists.append(fl) - elif ':' in arg: #remote file - tmp = arg.split(':') - host = tmp[0] - user = None - if '@' in host: - userhost = host.split('@') - user = userhost[0] - host = userhost[1] - path = tmp[1] - #TODO add remote file support - else: - raise ValueError('argument must be a directory or remote file path') - - #now we have a list of lists of files. look for duplicates via a few different approaches - - #union = {} - #intersection = set(filelists[0]) - #for fl in filelists: - # union = union | set(fl) - # intersection = intersection & set(fl) - - - #first, check for identical paths - for i in range(len(filelists)): - flist1 = filelists[i] - for ii in range(len(flist1)): - f1 = flist1[ii] - for j in range(i+1, len(filelists)): - flist2 = filelists[j] - if flist2 != flist1: - for jj in range(len(flist2)): - f2 = flist2[jj] - #recall that each element of the filelist is a tuple - if f1[0] == f2[0]: #exact same filepath, probably same file, check md5 - if f1[2] == f2[2]: #first of all check size - dups.append([(i,ii),(j,jj)]) - fb = open(f1[0],'r') - hash1 = hashlib.md5(fb).hexdigest() - fb.close() - fb = open(f2[0],'r') - hash2 = hashlib.md5(fb).hexdigest() - fb.close() - return None #todo - -def checkSimilarityOfDuplicates(dups, dir1, dir2): - for d in dups: - f1 = open(dir1 + '/' + d, 'rb') - f2 = open(dir2 + '/' + d, 'rb') - -#returns a list of files that are in both lists of files from each given directory -def findDuplicateFiles(dir1, dir2): - if not (os.path.isdir(dir1) and os.path.isdir(dir2)): - return -1 - fl1 = [] #list of file paths - fl2 = [] - for root,dirs,files in os.walk(dir1): - for f in files: - fl1.append((f'{root}/{f}',f,os.path.getsize(f'{root}/{f}'),'')) #tuple - for root,dirs,files in os.walk(dir2): - for f in files: - fl2.append((f'{root}/{f}',f,os.path.getsize(f'{root}/{f}'),'')) #tuple - -# for i in range(len(fl1)): - # for j in range(i+1, len(fl2)): - # #TODO - - # return list(set(l1) & set(l2)) - -#looks at all the files of the 2 given dirs, returns the following sets: -# unique to 1: files that only exist in dir1 -# unique to 2: files that only exist in dir2 -# duplicates: files that exist in both dirs -#this is done by filename only, and does not take into account the full path of the files, unless the arg fullpath = True. -#so 2 files could have the same name yet contain different data. use the function compareFileHashes() to figure out if files with same name have different contents. -#keyword args: -# fullpath: return the full path as opposed to just the filename -# pretty: return formatted text. default true because this tool is primarily used in cmd line -def getDupFiles(dir1, dir2, fullpath=False, pretty=True): - #files1 = str(cmd(f'find {dir1} -type f'), encoding='utf-8').split('\n') - #files2 = str(cmd(f'find {dir2} -type f'), encoding='utf-8').split('\n') - files1 = cmd(f'find {dir1} -type f').split('\n') - files2 = cmd(f'find {dir2} -type f').split('\n') - - #make lists of filenames, without paths - filenames1 = [] - filenames2 = [] - for fpath in files1: - tmp=fpath.split('/') - filenames1.append(tmp[len(tmp)-1]) - for fpath in files2: - tmp=fpath.split('/') - filenames2.append(tmp[len(tmp)-1]) - dupFiles = (set(filenames1) & set(filenames2)) - {' ','','.','..'} - - #TODO return fullpaths option - if pretty: - res = '' - for fn in dupFiles: - res += fn+'\n' - print(res) - else: - print(dupFiles) - return dupFiles - -def getDuplicateFiles(dir1, dir2, fullpath=False, pretty=True): - return getDupFiles(dir1, dir2, fullpath, pretty) -def getDupeFiles(dir1, dir2, fullpath=False, pretty=True): - return getDupFiles(dir1, dir2, fullpath, pretty) - - -def getUniqueFiles(dir1, dir2, fullpath=False, pretty=True): - files1 = str(cmd(f'find {dir1} -type f'), encoding='utf-8').split('\n') - files2 = str(cmd(f'find {dir2} -type f'), encoding='utf-8').split('\n') - - #make lists of filenames, without paths, if applicable - if not fullpath: - filenames1 = [] - filenames2 = [] - for fpath in files1: - tmp=fpath.split('/') - filenames1.append(tmp[len(tmp)-1]) - for fpath in files2: - tmp=fpath.split('/') - filenames2.append(tmp[len(tmp)-1]) - union = (set(filenames1) | set(filenames2)) - {' ','','.','..'} - uniq1 = union - set(filenames2) - uniq2 = union - set(filenames1) - - if pretty: - res = 'unique to 1:\n' - for fn in uniq1: - res += fn+'\n' - res += 'unique to 2:\n' - for fn in uniq2: - res += fn+'\n' - print(res) - else: - print(uniq1,uniq2) - return (uniq1,uniq2) -def getUniqFiles(dir1, dir2, fullpath=False, pretty=True): - return getUniqueFiles(dir1, dir2, fullpath, pretty) - -def getDupeFilesHash(dir1, dir2, fullpath=False, pretty=True): - filesWithHash = [] - for f in getDupFiles(dir1, dir2, fullpath=False, pretty=False): - f1 = os.system(f'find {dir1} | grep {f}') #TODO continue work here - f2 = os.system(f'find {dir2} | grep {f}') - print(f1) - #hash1 = cmd(f'md5sum {f1}') - #hash2 = cmd(f'md5sum {f2}') - #print(hash1+' '+hash2) - filesWithHash.append((f,hash)) - #print(filesWithHash) - return filesWithHash - - - - -#execute a command -'''def cmd(cmdStr): - return subprocess.run(cmdStr.join(' '), encoding='utf-8', stdout=subprocess.PIPE).stdout -''' -''' -execute a command using the python subprocess module -params: - cmdstr (str) : the command to run -return: stdout of command -''' -def cmd(cmdstr,v=False): - cmdarray = cmdstr.strip().split(' ') - log(f'runcmd: {cmdstr}') - '''if '|' in cmdarray: #TODO handling pipe not yet working - i = cmdarray.index('|') - proc0 = subprocess.check_output(cmdstr, shell=True)''' - proc = subprocess.run(cmdarray, stdout=subprocess.PIPE) - if proc.returncode == 0: - res = proc.stdout - if verbose: - log(f'result: {res}') - else: - log(f'returncode = {proc.returncode}') - res = proc.stderr - return str(res, encoding='utf-8') - -def log(msg): - t = tnow() - with open('filetreetool.log', 'a') as lf: - lf.write(f'{t}: {msg}\n') - lf.close() - if verbose: print(f'LOG: {msg}') - -def tnow(): - ''' - return: current time, formatted as %Y%m%d-%H%M%S - ''' - return datetime.now().strftime('%Y%m%d-%H%M%S') - -verbose=False #verbose - -def callFunc(func_name, *args, **kwargs): - """ - Calls a function by its name if it exists in the global scope. - - :param func_name: (string) name of the function to call - :param args: (List) Positional arguments to pass to the function - :param kwargs: (List>) Keyword arguments to pass to the function - :return: The result of the function call if successful, None otherwise - """ - #TODO keep a log of called functions and cached result, to use for optimization - if func_name in globals() and callable(globals()[func_name]): - func = globals()[func_name] - return func(*args, **kwargs) - else: - print(f'function "{func_name}" not callable') - return None - - -if __name__ == '__main__': - ''' - older less-dynamic implementation - parser = argparse.ArgumentParser(description='a tool to analyze and manage files in a directory structure') - parser.add_argument('func', help='module function to call') - parser.add_argument('args', nargs='*', help='function args') - args = parser.parse_args() - - if args.func == 'compareDupesOfFiles': - if len(args.args < 2): - print('need (dir1, dir2)') - sys.exit(1) - result = compareDupesOfFiles(args.args[0], args.args[1])''' - if len(sys.argv) == 1: - print('a tool for traversing a directory structure and operating on its files. \ - the directory structure is a tree, where each leaf is a file (or symlink pointing to the memory address of another directory node). \ - so it can be stored in memory as a tree datastructure. ') - sys.exit(0) - if len(sys.argv) == 2: - callFunc(sys.argv[1]) - elif len(sys.argv) > 2: - args = [] #separate out normal args and keyword args - kwargs = [] - for a in sys.argv[2:]: - if '=' in a: - tmp = a.split('=') - if len(tmp) != 2: - print(f'invalid: {a}') - sys.exit(1) - kwargs.append([tmp[0],tmp[1]]) - else: - args.append(a) - #print(f'calling {sys.argv[1]}({",".join(args)} {kwargs})') - callFunc(sys.argv[1], *args, *kwargs) diff --git a/procwatch.sh b/procwatch.sh index 0088b3f..2f5717d 100755 --- a/procwatch.sh +++ b/procwatch.sh @@ -4,28 +4,50 @@ log_file="process_monitor.log" # Set the CPU and memory usage thresholds -cpu_threshold=50 # Percentage -mem_threshold=500 # Megabytes +cpu_threshold=396 # Percentage +mem_threshold=15360 # Megabytes +offender=-1 +kill_attempts=0 while true; do # Get the list of running processes - processes=$(ps -eo pid,pcpu,rss,comm --sort=-pcpu | grep -v "^ PID") + processes=$(ps -eo pid,pcpu,rss,comm --sort=-pcpu | grep -v "PID") # echo $processes # Loop through the processes while read -r process_info; do - echo "PROCESS! "${process_info} - echo "" # Extract the process information pid=$(echo "$process_info" | awk '{print $1}') name=$(echo "$process_info" | awk '{print $4}') cpu_percent=$(echo "$process_info" | awk '{print $2}') mem_usage=$(echo "$process_info" | awk '{print $3}') mem_usage=$((mem_usage / 1024)) # Convert to Megabytes - + if (( $(echo "$cpu_percent > $((cpu_threshold/4))" | bc -l) )) || (( mem_usage > $((mem_threshold/4)) )); then + echo "PROCESS! ${pid}:${name} -- ${cpu_percent}% ${mem_usage}MB" + fi # Check if the process exceeds the CPU or memory usage threshold if (( $(echo "$cpu_percent > $cpu_threshold" | bc -l) )) || (( mem_usage > mem_threshold )); then - echo "$(date) - Process '$name' (PID: $pid) has exceeded the threshold: CPU usage: ${cpu_percent}%, Memory usage: ${mem_usage} MB" # >> "$log_file" + msg_warn="$(date) - Process $name (PID: $pid) has exceeded the threshold: CPU usage: ${cpu_percent}%, Memory usage: ${mem_usage} MB. Killing in 10 seconds" # >> "$log_file" + echo ${msg_warn} + notify-send -t 8000 "$msg_warn" + sleep 10 + if [[ $pid == $offender ]]; then + kill_attempts=$((kill_attempts+1)) + else + offender=${pid} + #TODO deal with multiple offending processes later + fi + if [[ ${kill_attempts} -gt 3 ]]; then + echo "force kill ${pid}" + #kill -9 ${pid} + else + echo "kill ${pid}" + #kill ${pid} + fi + if [[ -z $(pgrep -f ${pid}) ]]; then + offender=-1 + kill_attempts=0 + fi fi done <<< "$processes" -- cgit v1.2.3