DokuWiki

It's better when it's simple

User Tools

Site Tools


tips:docpdf2dokuwiki

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
tips:docpdf2dokuwiki [2012-08-03 15:00] audiomobstertips:docpdf2dokuwiki [2016-06-29 14:38] (current) – old revision restored (2014-08-20 08:24) 141.65.129.207
Line 1: Line 1:
 ====== Import your doc and pdf files ====== ====== Import your doc and pdf files ======
 +
 +You need the perl wiki converter installed, pdftohtml and jodconverter.
  
 Very crude yet, enhancements welcome! Very crude yet, enhancements welcome!
  
-<code perl>+<code perl docpdf2DokuWiki.pl> 
 #!/usr/bin/perl #!/usr/bin/perl
- 
-# Copyright (C) 2012 audiomobster (audiomobster at gmail dot com) 
-# 
-#  docpdf2Dokuwiki is free software; you can redistribute it and/or modify it 
-#  under the terms of the GNU General Public License as published 
-#  by the Free Software Foundation; either version 2 of the License, 
-#  or (at your option) any later version. 
-  
-#  docpdf2DokuWiki is distributed in the hope that it will be useful, but 
-#  WITHOUT ANY WARRANTY; without even the implied warranty of 
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
-#  GNU General Public License for more details. 
-# 
-#  You should have received a copy of the GNU General Public License 
-#  along with this program; if not, write to the Free Software 
-#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
-#  MA  02111-1307  USA 
  
 use HTML::WikiConverter; use HTML::WikiConverter;
 +use File::Basename;
 +use File::Copy;
 +
 my $wc = new HTML::WikiConverter( dialect => 'DokuWiki' ); my $wc = new HTML::WikiConverter( dialect => 'DokuWiki' );
  
-$txtpath = "/var/www/dokuwiki/data/pages/anleitungen"; +$path = "/home/cplus/Anleitungen"; 
-$imgpath = "/var/www/dokuwiki/data/media/anleitungen"; +$txtpath = "/var/www/dokuwiki/data/pages"; 
-@docfiles </var/www/dokuwiki/data/pages/anleitungen/*.doc*>+$imgpath "/var/www/dokuwiki/data/media"
-@pdffiles = </var/www/dokuwiki/data/pages/anleitungen/*.pdf>;+@dirs = <$path/*>;
  
-# Remove spaces and uppercase +foreach $dir (@dirs) { 
-foreach $file (@docfiles) { +   @files = <$dir/*>; 
-   $oldfile = $file; +   foreach $file (@files) { 
-   $file =~ s/\s+/_/g; +      $oldfile = $file; 
-   $file = lc $file; +      $file =~ s/\s+/_/g; 
-   rename($oldfile,$file); +      $file =~ s/\-/_/g; 
-   system("unoconv", "--format=html", "$file");+      $file =~ s/\__/_/g; 
 +      $file =~ s/\__/_/g; 
 +      $dirname = dirname($file); 
 +      $basename = basename($file); 
 +      $basename = lc $basename; 
 +      $file = "$dirname/$basename"
 +      rename($oldfile,$file); 
 +      if ( $file=~ /docx*$/ ) { 
 +         system("jodconverter", "-f", "html", "$file"); 
 +      } 
 +      if ( $file=~ /pdf$/ ) { 
 +         system("pdftohtml", "$file"); 
 +      } 
 +      $htmfile = $file; 
 +      $htmfile =~ s/\..*//; 
 +      $htmfile = "$htmfile.html"
 +      $newfile = $file; 
 +      $newfile =~ s/\..*//; 
 +      $newfile = "$newfile.txt"; 
 +      if ( -e $htmfile ) { 
 +         open (MYFILE, ">>$newfile"); 
 +         $input = $wc->html2wiki( file => $htmfile ); 
 +         print MYFILE "$input"; 
 +         close (MYFILE); 
 +      }
    }    }
- +   $txtdirpath=$txtpath.'/'.basename($dir)
-foreach $file (@pdffiles) { +   if ! -e $txtdirpath{ 
-   $oldfile = $file; +      system("mkdir", "$txtdirpath");
-   $file =~ s/\s+/_/g; +
-   $file = lc $file+
-   rename($oldfile,$file); +
-   system("pdftohtml", "$file");+
    }    }
- +   @txt = <$dirname/*.txt>; 
-@htmfiles = </var/www/dokuwiki/data/pages/anleitungen/*.html>; +   foreach $txt (@txt) { 
- +      copy($txt, $txtdirpath);
-foreach $file (@htmfiles) { +
-   $newfile = $file; +
-   $newfile =~ s/\..*//; +
-   $newfile = "$newfile.txt"; +
-   open (MYFILE">>$newfile"); +
-   $input = $wc->html2wiki( file => $file ); +
-   print MYFILE "$input"; +
-   close (MYFILE);+
    }    }
 +   $imgdirpath=$imgpath.'/'.basename($dir);
 +   if ( ! -e $imgdirpath) {
 +      system("mkdir", "$imgdirpath");
 +   }
 +   @img = <$dirname/*.png>;
 +   foreach $img (@img) {
 +      copy($img, $imgdirpath);
 +   }
 +   @img = <$dirname/*.jpg>;
 +   foreach $img (@img) {
 +      copy($img, $imgdirpath);
 +   }
 +}
 +system ("chown", "-R", "www-data:www-data", "/var/www/dokuwiki");
  
-system("chown", "-R", "www-data:www-data", "$path"); 
- 
-system("cp", "$txtpath/*.png", "$imgpath"); 
-system("cp", "$txtpath/*.jpg", "$imgpath"); 
 </code> </code>
tips/docpdf2dokuwiki.1343998859.txt.gz · Last modified: 2012-08-03 15:00 by audiomobster

Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Share Alike 4.0 International
CC Attribution-Share Alike 4.0 International Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki