DokuWiki

It's better when it's simple

User Tools

Site Tools


tips:clean_media_directory

Clean up Media Directory

This is a little perl script for your shell to find Files in the Media-Directory which have no link to it. You can only print out a little bit statistic about these orphaned files, move them to a “thrash”-folder or delete them instantly.

Hint: Its always better to have a Backup because there are no guarantee ;) And maybe its better for the first run to use the “-s”-Flag (Print only statistics an exit).

Feel free to send me your opinion, bug report or feature request. Thanks :) pascal [dot] bendeich [at] gmx [dot] de

Source:


#!/usr/bin/perl
# Abstract
# ~~~~~~~~
# Find all files in the Media Directory from dokuwiki which have no Link to itself and move or delete them.
# Also can print out a little bit statistics about this orphaned files.
#
#
# Author
# ~~~~~~~
# Pascal Bendeich <pascal[dot]bendeich[at]gmx[dot]de>
#
# Changes
# ~~~~~~~
#
# 2008-11-10, bendeich: use of Getopt and added counter for statistics
# 2008-10-31, bendeich: initial release
#
# Todo
# ~~~~
# - wating for bug reports ;)
 
 
 
##############
# Modules
 
use strict;
use File::Basename;
use File::Copy;
use Getopt::Long;
 
 
 
##############
# Debug
 
#use Data::Dumper;
#use warnings;
 
 
 
##############
# Variables
 
my $dataDir = undef;
my $moveDir = "/tmp/dokuwiki";		# maybe u want to change this (but remind that u need write permissions ;)
my $mediaFiles = [];
my $pageFiles = [];
my $orphaned =[] ;
my $help = 0;
my $unlink = 0;
my $statistics = 0;
my $verbose = 0;
my $counter = 0;
 
GetOptions (
	'datadir=s' => \$dataDir,
	'movedir=s' => \$moveDir,
	'unlink' => \$unlink,
	'statistics' => \$statistics,
	'verbose' => \$verbose,
	'help|?' =>	\$help
);
 
 
 
##############
# Functions
 
# Search recurse for existing files under a Directory
sub searchFilesRecurse {
 
	my $dir = shift;
	my $data = shift;
	my $files = undef;
 
	opendir(DIR, $dir) || die("Cannot open directory: $!");
	foreach my $file (sort readdir DIR) {
 
		# ignore . and ..
		next if $file =~ /^\./;
 
		# recurse
		if (-d "$dir/$file") {
			searchFilesRecurse("$dir/$file",$data);
		} else {
			push @$data, "$dir/$file";
		}
	}
	# be nice
	closedir(DIR);
 
	return 0;
}
 
# Print Usage text
sub usage {
	my $usage = <<USAGE
Usage: cleanup_dokuwiki_mediadir.pl -datadir .../data [OPTIONS...]
 
Clean up orphaned files in the Media Directory of DokuWiki
 
  Options:
   -d(atadir)		Set path to your dokuwiki data directory (i.e. /srv/www/wiki/data)
   -m(ovedir)		Set path where orphaned files should be moved to. Make sure this Directory exists (Default = /tmp/dokuwiki)
   -u(nlink)		Unlink (delete) or move orphaned files (Default = 0 (move))
   -s(tatistics)	Print only Statistics for Media Files and exit (Default = 0)
   -v(erbose)		Be verbose. Only interesting without -s (Default = 0)
   -?, -h, --help	Print this help
 
USAGE
 
}
 
 
 
##############
# Main
 
# Test for needed parameter or help-Flag and exit
if($help or !$dataDir) {
	print usage;
	exit 0;
}
# remove optional last slash at data-dir
chop $dataDir if $dataDir =~ m/^(.*)\/$/;
 
# build Hashes with media-/page-files
searchFilesRecurse("$dataDir/media", $mediaFiles);
searchFilesRecurse("$dataDir/pages", $pageFiles);
 
# DEBUG
#print Dumper($mediaFiles);
#print scalar @$mediaFiles ."\n";
#print Dumper($pageFiles);
#print scalar @$pageFiles ."\n";
 
# search in all pages for a link to the mediafile
foreach my $file (@$mediaFiles) {
	my $i = 0;
	my $length = length "$dataDir/media";
	my $paddern = substr($file,($length+1));
	$paddern =~ s/\//:/g;
	$paddern = ":?" . lc($paddern);
	foreach my $page (@$pageFiles) {
		open FILE, "<$page";
		foreach (<FILE>) {
			$i++ if (lc($_) =~ /$paddern/);
		}
		# be nice
		close(FILE);
	}
	# print only statistics and ignore unlink-Flag
	if($statistics == 1) {
		print "Links to $file:  $i\n";
		next;
	} 
	# delete or move file if no link is found
	if($i == 0 ) {
		$counter++;		# Statsitic
		if ($unlink == 0) {		# move
			my $result = move("$file","$moveDir/" . basename($file));
			if ($result) {
				print "Moved $file to $moveDir\n" if $verbose;
			} else {
				print "Move of file $file to $moveDir failed: $!\n";
			}
		} elsif ($unlink == 1)  {		# delete
			my $result = unlink("$file");
			if ($result) {
				print "Unlinked $file successfuly\n" if $verbose;
			} else {
				print "Unlink of file failed: $!\n";
			}
		}
	}
}
print "\nSum of existing mediafiles: " . scalar @$mediaFiles if $statistics == 1;
print "\nDeleted/moved $counter files.\n";
 
# thats it
exit 0;
 
 
 
##############
# eof
tips/clean_media_directory.txt · Last modified: 2009-06-09 15:18 by 212.204.78.42

Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Share Alike 4.0 International
CC Attribution-Share Alike 4.0 International Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki