DokuWiki

It's better when it's simple

User Tools

Site Tools


tips:docpdf2dokuwiki

This is an old revision of the document!


Import your doc and pdf files

You need the perl wiki converter installed, pdftohtml and jodconverter.

Very crude yet, enhancements welcome!

docpdf2DokuWiki.pl
#!/usr/bin/perl
 
use HTML::WikiConverter;
use File::Basename;
use File::Copy;
 
my $wc = new HTML::WikiConverter( dialect => 'DokuWiki' );
 
$path = "/home/cplus/Anleitungen";
$txtpath = "/var/www/dokuwiki/data/pages";
$imgpath = "/var/www/dokuwiki/data/media";
@dirs = <$path/*>;
 
foreach $dir (@dirs) {
   @files = <$dir/*>;
   foreach $file (@files) {
      $oldfile = $file;
      $file =~ s/\s+/_/g;
      $file =~ s/\-/_/g;
      $file =~ s/\__/_/g;
      $file =~ s/\__/_/g;
      $dirname = dirname($file);
      $basename = basename($file);
      $basename = lc $basename;
      $file = "$dirname/$basename";
      rename($oldfile,$file);
      if ( $file=~ /docx*$/ ) {
         system("jodconverter", "-f", "html", "$file");
      }
      if ( $file=~ /pdf$/ ) {
         system("pdftohtml", "$file");
      }
      $htmfile = $file;
      $htmfile =~ s/\..*//;
      $htmfile = "$htmfile.html";
      $newfile = $file;
      $newfile =~ s/\..*//;
      $newfile = "$newfile.txt";
      if ( -e $htmfile ) {
         open (MYFILE, ">>$newfile");
         $input = $wc->html2wiki( file => $htmfile );
         print MYFILE "$input";
         close (MYFILE);
      }
   }
   $txtdirpath=$txtpath.'/'.basename($dir);
   if ( ! -e $txtdirpath) {
      system("mkdir", "$txtdirpath");
   }
   @txt = <$dirname/*.txt>;
   foreach $txt (@txt) {
      copy($txt, $txtdirpath);
   }
   $imgdirpath=$imgpath.'/'.basename($dir);
   if ( ! -e $imgdirpath) {
      system("mkdir", "$imgdirpath");
   }
   @img = <$dirname/*.png>;
   foreach $img (@img) {
      copy($img, $imgdirpath);
   }
   @img = <$dirname/*.jpg>;
   foreach $img (@img) {
      copy($img, $imgdirpath);
   }
}
system ("chown", "-R", "www-data:www-data", "/var/www/dokuwiki");
tips/docpdf2dokuwiki.1408515863.txt.gz · Last modified: 2014-08-20 08:24 by 82.100.248.202

Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Share Alike 4.0 International
CC Attribution-Share Alike 4.0 International Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki