tips:pdfexport:htmldoc
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
tips:pdfexport:htmldoc [2008-10-20 15:38] – corrected layout problem wernerflamme | tips:pdfexport:htmldoc [2023-03-08 08:21] (current) – 2409:4070:4387:9f60:5d3e:e9b4:db59:c826 | ||
---|---|---|---|
Line 2: | Line 2: | ||
[[http:// | [[http:// | ||
* Install htmldoc (pretty easy) | * Install htmldoc (pretty easy) | ||
- | * Add the //Export to PDF// button as described | + | * Add the //Export to PDF// button as described |
* Create a temporary directory that the webserver can write to for the intermediate step. | * Create a temporary directory that the webserver can write to for the intermediate step. | ||
- | * In the function '' | + | * In the function '' |
if($act == ' | if($act == ' | ||
pdfmake( | pdfmake( | ||
Line 150: | Line 150: | ||
header(" | header(" | ||
</ | </ | ||
- | To retrieve images from the wiki server (relative links, hope that it won't cause security issues) (I had problems with PNG files, so I converted them into JPEG format). | + | To retrieve images from the wiki server (relative links, hope that it won't cause security issues) (I had problems with PNG files, so I converted them into JPEG format |
<code xml> | <code xml> | ||
$text = preg_replace("'< | $text = preg_replace("'< | ||
Line 520: | Line 520: | ||
header(" | header(" | ||
</ | </ | ||
+ | ====== HTMLDOC recursive variant ====== | ||
+ | My problem was that i needed support for child page export. It therefore choose to modify / hack [[# | ||
+ | |||
+ | It will thus perform a recursive export of your current page. This means that any internal links will be followed and converted to PDF too. The internal links should copied to the PDF - meaning that they are click-able like they are in dokuwiki. | ||
+ | |||
+ | * Follow the first steps of [[# | ||
+ | * Then insert this into " | ||
+ | function pdfmake($text) | ||
+ | { | ||
+ | //Variables used to stop the search for child pages | ||
+ | global $pdfmake_recursion_level; | ||
+ | global $pdfmake_recursion_current; | ||
+ | global $pdfmake_links; | ||
+ | $pdfmake_links = array(); | ||
+ | | ||
+ | $pdfmake_recursion_level = 30; | ||
+ | $pdfmake_recursion_current = 0; | ||
+ | |||
+ | // Now search for children. | ||
+ | $text = pdfmake_children($text); | ||
+ | |||
+ | // And create the pdf | ||
+ | pdfmake_inner($text); | ||
+ | } | ||
+ | function pdfmake_inner($text){ | ||
+ | global $lang; | ||
+ | global $conf; | ||
+ | |||
+ | $dir=DOKU_INC." | ||
+ | $filenameInput=$dir." | ||
+ | $filenameOutput=$dir." | ||
+ | |||
+ | # Convert text and toctitle to destination code-page | ||
+ | $text=iconv(" | ||
+ | # Change toctitle if needed | ||
+ | if ($conf[' | ||
+ | $toctitle=$conf[' | ||
+ | } | ||
+ | elseif ($conf[' | ||
+ | $toctitle=$lang[' | ||
+ | } | ||
+ | else { | ||
+ | $toctitle=" | ||
+ | } | ||
+ | $toctitle=iconv(" | ||
+ | |||
+ | # htmldoc compatible name-conversion | ||
+ | $pdfcp=preg_replace("/ | ||
+ | $text = preg_replace("'< | ||
+ | $text = preg_replace("'< | ||
+ | |||
+ | # Execute changes based on replaces.conf | ||
+ | $replacesf=DOKU_INC . " | ||
+ | if ($conf[' | ||
+ | $allreplaces=file_get_contents($replacesf); | ||
+ | |||
+ | # Delete comments from file | ||
+ | $allreplaces=preg_replace("' | ||
+ | |||
+ | # Legalize multiple white-spaces | ||
+ | $allreplaces=preg_replace("' | ||
+ | |||
+ | # Delete unwanted spaces | ||
+ | $allreplaces=preg_replace("' | ||
+ | |||
+ | # Delete multiple empty lines | ||
+ | $allreplaces=preg_replace("' | ||
+ | |||
+ | # Split codepage sections | ||
+ | $codepages=preg_split("' | ||
+ | $cpreg=preg_quote($conf[' | ||
+ | |||
+ | # Find the used codepage | ||
+ | foreach ($codepages as $codepage) { | ||
+ | if (preg_match("'" | ||
+ | $replaces=preg_replace("'" | ||
+ | break; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # Split patterns | ||
+ | $patterns=preg_split("' | ||
+ | foreach ($patterns as $onepair) { | ||
+ | # Split pairs | ||
+ | $pairarray=preg_split("' | ||
+ | # Make changes | ||
+ | $text=str_replace($pairarray[0], | ||
+ | } | ||
+ | } | ||
+ | |||
+ | $text = preg_replace("'< | ||
+ | $text = preg_replace("'< | ||
+ | $text = str_replace(" | ||
+ | | ||
+ | $textarr = preg_split("/ | ||
+ | |||
+ | # Find and change linked images | ||
+ | $linkeds = preg_grep("'< | ||
+ | foreach ( $linkeds as $linked ) { | ||
+ | $picture = preg_replace("/< | ||
+ | $picture = preg_replace("'</ | ||
+ | $found = "'" | ||
+ | $text = preg_replace($found, | ||
+ | } | ||
+ | # HTML compatibility -> htmldoc can use <br> instead of <br/> | ||
+ | $text = str_replace('/>','>', | ||
+ | $text = str_replace('< | ||
+ | |||
+ | #write the string to temporary html-file | ||
+ | $fp = fopen ($filenameInput, | ||
+ | fwrite($fp, | ||
+ | fclose($fp); | ||
+ | |||
+ | #Use embedded fonts if needed | ||
+ | if ($conf[' | ||
+ | $fontparam=' | ||
+ | } else { | ||
+ | $fontparam=''; | ||
+ | } | ||
+ | |||
+ | #JPEG compression rate settings | ||
+ | $jpeg=" | ||
+ | |||
+ | #PDF compatibility | ||
+ | $pdf=" | ||
+ | |||
+ | # | ||
+ | $width=" | ||
+ | |||
+ | #convert using htmldoc | ||
+ | $command = $conf[' | ||
+ | |||
+ | system($command); | ||
+ | system(" | ||
+ | |||
+ | #send to browser | ||
+ | $filenameOutput=trim($filenameOutput); | ||
+ | header(" | ||
+ | header(" | ||
+ | $fd = @fopen($filenameOutput," | ||
+ | //Puke on error | ||
+ | if($fd == false) | ||
+ | { | ||
+ | print ' | ||
+ | exit; | ||
+ | } | ||
+ | |||
+ | while(!feof($fd)){ | ||
+ | echo fread($fd, | ||
+ | } | ||
+ | fclose($fd); | ||
+ | |||
+ | #clean up temporary files | ||
+ | system(" | ||
+ | system(" | ||
+ | } | ||
+ | |||
+ | //search for child pages and render their html | ||
+ | function pdfmake_children($text) | ||
+ | { | ||
+ | //Extract recusion levels | ||
+ | global $pdfmake_recursion_level; | ||
+ | global $pdfmake_recursion_current; | ||
+ | global $pdfmake_links; | ||
+ | | ||
+ | $links = array(); | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | |||
+ | //find all links on page | ||
+ | $regex_pattern = "/< | ||
+ | preg_match_all($regex_pattern, | ||
+ | |||
+ | //The matching pairs will be listed in matches[1]. Sort these matches, so that subnamspaces comes before their parent namespaces. | ||
+ | // | ||
+ | for($i=0; $i< count($matches[1]); | ||
+ | //extract the internal dokuwiki id of the subpage. This is needed to perform the rendering | ||
+ | $link = substr($matches[1][$i], | ||
+ | // echo $link, '< | ||
+ | //Dont add a page which has already been included | ||
+ | if(!in_array($link, | ||
+ | | ||
+ | | ||
+ | $innerText .= p_wiki_xhtml($link,'', | ||
+ | | ||
+ | //Add the link to the collection so it can be sanitized later. | ||
+ | $pdfmake_links[] = $link; | ||
+ | $links[] = $link; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | //Recurse into the next level of internal links | ||
+ | if($pdfmake_recursion_current < $pdfmake_recursion_level) { | ||
+ | //echo " | ||
+ | $innerText = pdfmake_children($innerText); | ||
+ | } | ||
+ | $text = pdfmake_correctlinks($text, | ||
+ | $innerText = pdfmake_correctlinks($innerText, | ||
+ | | ||
+ | // return all the text to caller. | ||
+ | return $text.$innerText; | ||
+ | } | ||
+ | function pdfmake_correctlinks($text, | ||
+ | { | ||
+ | for($i = 0; $i < count($links); | ||
+ | $link = $links[$i]; | ||
+ | // this $link is the full path to the dokuwiki page. However, in the HTML output, it is only the name after the last ":" | ||
+ | $text = str_replace($link, | ||
+ | } | ||
+ | |||
+ | return $text; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Remember that I only tested this on my own sever (on which it works). So expect bugs and / or strange behavior. | ||
+ | |||
+ | ===== Bug fixes ===== | ||
+ | Here follows a list of fixed bugs | ||
+ | * 2009-10-31: | ||
+ | * Fixed a bug in the command line which, on some pages, caused the PDF generation to fail. | ||
+ | * Fixed a bug with unconvertable UTF8 chars breaking pdf generation (chars like -> and <-) | ||
+ | |||
+ | --- // | ||
====== HTMLDOC and OS X ====== | ====== HTMLDOC and OS X ====== | ||
Line 564: | Line 790: | ||
$filenameOutput=tempnam('',' | $filenameOutput=tempnam('',' | ||
</ | </ | ||
- | |||
====== HTMLDOC request ====== | ====== HTMLDOC request ====== | ||
I think that will be very useful if you can create a page with the list of wiki page to export and HTMLDOC export all these pages into a PDF file.\\ | I think that will be very useful if you can create a page with the list of wiki page to export and HTMLDOC export all these pages into a PDF file.\\ | ||
Line 578: | Line 803: | ||
So you can create pages from which you can extract a PDF file based on more wiki pages | So you can create pages from which you can extract a PDF file based on more wiki pages | ||
+ | |||
+ | **Check the** [[# | ||
===== Config problem with HTMLDOC variant ===== | ===== Config problem with HTMLDOC variant ===== | ||
Line 587: | Line 814: | ||
you have to declare all value in your '' | you have to declare all value in your '' | ||
+ | |||
+ | ===== Changes to the TOC ===== | ||
+ | |||
+ | Some recent changes in the core will break all the TOC-related code above, because [[https:// | ||
+ |
tips/pdfexport/htmldoc.1224509882.txt.gz · Last modified: 2009-10-31 11:31 (external edit)