Perl Script to Delete files older than x Days

c:
cd C:\Users\~1\AppData\Local\Temp\
echo start
perl E:\W\Scripts\p4proxy-purger-editable.pl C:\Users\~1\AppData\Local\Temp\
perl E:\W\Scripts\p4proxy-purger-editable.pl C:\temp\
perl E:\W\Scripts\p4proxy-purger-editable.pl C:\WBackups
echo done

====================================
p4proxy-purger-editable.pl
====================================
# Recursively crawl below P4Proxy root dir
# Written to perform well and not crash the machine even if there are millions of files/sub-dirs
# - remove empty dirs
# - delete files with mod time > MAXAGE constant
# - keep a limited number of  multiple older versions of a file
# Constants:
my $MAXAGE = 7; # delete anything > this number of days old
my $KEEPVERSIONS = 1; # keep most recent N versions of same file IFF also younger than MAXAGE
my $MINFREE = 150*1024*1024*1024; # only start deleting when free space drops below this level
# NOTE: $MINFREE should not be less than 10-15GB since this much can be used in a busy day
my $MAXFREE = 252*1024*1024*1024; # stop deleting when this much free space, 836GB drive capacity
use IO::Handle;
autoflush STDOUT 1;
# some reporting metrics variables
$ndirs = 0;
$nfiles = 0;
$ndeletedolder = 0;
$ndeletedversions = 0;
$nempty = 0;
$maxdepth = 0;
$completed = 1;

sub friendlybytes($)
{
    my $bytes = shift;
    if ($bytes > 1024*1024*1024)
    {
        return sprintf("%.2f GB", $bytes/1024/1024/1024);
    }
    elsif ($bytes > 1024*1024)
    {
        return sprintf("%.2f MB", $bytes/1024/1024);
    }
    elsif ($bytes > 1024)
    {
        return sprintf("%.2f KB", $bytes/1024);
    }
    else
    {
        return sprintf("%d Bytes", $bytes);
    }
}

sub freespace($)
{
    # get free space in bytes without relying on installing extra perl modules/packages
    # want this script to run on base Perl install, fewer dependencies
    my $path = shift;
    my $drive = $path;
    $drive =~ s/^([a-zA-Z]\:).+/$1/;
    my @dir = `dir $drive`;
    foreach $line (@dir)
    {
        if ($line =~ m/Dir\(s\)\s+(.*) bytes free/) {
            $bytes = $1;
            $bytes =~ s/,//g;
            return $bytes;
        }
    }
    die "FAILED to find free space on $path\n";
}

sub do_it($$); # declare prototype for recursion
sub do_it($$)
{
    my $root = shift;
    my $depth = shift;
    if (freespace($root) > $MAXFREE)
    {
        $completed = 0;
        return;
    }
    if ($depth > $maxdepth)
    {
        $maxdepth = $depth;
    }
    my @dirs = (); # on the stack
    my @fileversions = ();
    opendir (DIR, $root);
    my $thisempty = 1;
    my $nnn = 0;
    while (my $entry = readdir(DIR))
    {
        if (($entry eq '.') || ($entry eq '..'))
        {
            next;
        }
        elsif (-f "$root/$entry") # breadth first search is more performant
        {
        $nnn++;
        print scalar(localtime)." [$depth]$nnn scanned $nfiles files, $ndirs dirs   \r";
            $nfiles++;
            if ($root =~ m/.+,d$/)
            {
                # its a dir with several versions of same file
                push @fileversions, "$root/$entry";
            }
            else
            {
                my $modtime = (stat "$root/$entry")[9]; # mod time is when it was last synced (=accessed time generally)
                my $days_old = (time() - $modtime)/(24*60*60);
                if ($days_old > $MAXAGE)
                {
                    if (!(unlink "$root/$entry"))
            {
            print "\r".scalar(localtime)." ERROR: Failed to delete file $root/$entry: $! $@ $^E\n";
            }
            else
            {
            $ndeletedolder++;
            my $rounded = sprintf("%.1f", $days_old);
            print "\r".scalar(localtime)." INFO: deleted last mod $rounded days ago (".(scalar localtime($modtime)).") [$ndeletedolder] $root/$entry\n";
            }
                }
        else
        {
            $thisempty = 0;
        }
            }
        }
        elsif (-d "$root/$entry")
        {
            # NOTE Can't just recurse because opendir is not recursive
        $nnn=0;
        print scalar(localtime)." [$depth]$nnn scanned $nfiles files, $ndirs dirs   \r";
            $ndirs++;
            $thisempty = 0;
            #print scalar(localtime).(sprintf(" Checking dir %04d $root/$entry, scanned %d files\n", $ndirs, $nfiles));
            push(@dirs, "$root/$entry");
        }
        else
        {
            print "\r".scalar(localtime)." ASSERTION FAILURE: Not sure what this is (not dir, not file) : $root/$entry\n";
        }
    }
    closedir(DIR);
    if ((scalar @fileversions) > $KEEPVERSIONS)
    {
        # just keep the $KEEPVERSIONS most recent
        # TODO: Determine how/which to delete so get rid of oldest first keeping required number of most recent...
    my @modtimes = ();
    foreach my $vsn (@fileversions) {
        my $mod = (stat($vsn))[9];
        push @modtimes, "$mod,$vsn";
    }
    my $count = 0;
    my @sorted = sort {$a <=> $b} @modtimes;
    foreach my $vsn (@sorted) {
        $count ++;
        $vsn =~ s/([^,]+),(.+)/$2/; # extract just filename part
        my $modt = $1;
        my $daysold = (time()-$modt)/(24*60*60);
        # ascending order of modtime in epoch time i.e. most recent file is last
        if (($count <= ((scalar @modtimes)-$KEEPVERSIONS)) &&
        ($daysold > $MAXAGE) &&
        (freespace($root) < $MAXFREE)){
        if (!(unlink $vsn))
        {
            print "\r".scalar(localtime)." ERROR: Failed to delete older version $vsn: $! $@ $^E\n";
        }
        else
        {
            $ndeletedversions++;
            print "\r".scalar(localtime)." INFO: Deleted older version (oldest $count of ".(scalar(@modtimes))." last changed ".scalar localtime($modt).") [".$ndeletedversions."] $vsn\n";
        }
        }
    }
    $thisempty = 0;
    }
    while (my $d = pop(@dirs))
    {
        do_it($d, $depth+1);
    }
    if ($thisempty)
    {
        if (!rmdir "$root") # clean up empty directories
    {
        #print scalar(localtime)." ERROR: Failed to rmdir empty dir $root: $! $@ $^E\n";
    }
    else
    {
        $nempty++;
        print "\r".scalar(localtime)." INFO: Deleted empty dir #".$nempty.": $root \n";
    }
    }
} # do_it

# MAIN
if (scalar @ARGV != 1)
{
    print "USAGE: $0 <P4 proxy root dir>\n";
    print "  - cleans up storage in/below root dir\n";
}
else
{
    my $freebefore = freespace($ARGV[0]);
    my $friendlyfree = friendlybytes($freebefore);
    my $start = time();
    if ($freebefore < $MINFREE)
    {
        print scalar(localtime)." INFO: Free space $friendlyfree is less than trigger threshold ".friendlybytes($MINFREE).", starting purging ...\n";
        do_it($ARGV[0], 1);
    }
    else
    {
        print scalar(localtime)." INFO: Exiting immediately, $friendlyfree bytes free space already is more than threshold ".friendlybytes($MINFREE).", no action needed\n";
        exit 0;
    }
    print "\n\nChecked a total of $ndirs dirs, $nfiles files, max sub-folders depth was $maxdepth\n";
    print "Deleted: $nempty empty dirs, $ndeletedversions older versions, $ndeletedolder files older than $MAXAGE days\n";
    print "Free space before $friendlyfree, now ".friendlybytes(freespace($ARGV[0]))."\n";
    my $dur = time() - $start;
    print sprintf("Purging took %02d:%02d:%02d\n", $dur/60/60, ($dur/60) % 60, $dur % 60);
    if ($completed)
    {
        print "Ran to completion, searched entire file system.\n";
    }
    else
    {
        print "Exited when free space above Max threshold again, did NOT search entire file system.\n";
    }
}

Post a Comment

Previous Post Next Post