The Law of Unintended Consequences

Robert explains how he broke XML rendering in commandbox-ssg commandbox cfml

May 3, 2024 / Robert Zehnder

I was feeling pretty good about myself when I managed to post-process commandbox-ssg HTML output using jSoup. The down side, as I learned after the fact, non-HTML files were still getting the same treatment. By default jSoup uses an HTML parser and will wrap the output in html and body tags if they do not exist in the input html. Needless to say, that does not bode well when your sitemap.xml or an RSS feed is wrapped in HTML tags.

My first inclination was to set a new property skip_beautify in the front matter that would skip the post-process step if that flag was true. I did get that working to get my RSS feed and sitemap back up and running, but it felt like a hack. After a little bit of reflection I came up with a better way that appears to work well.

Every page generated by commandbox-ssg has its own prc scope that contains all the data required to generate the page. Here is what the default prc scope looks like:

  var prc = {
   "build_start"            : buildTime,
   "fileSlug"               : getFileFromPath( template ).listFirst( "." ),
   "inFile"                 : fileSystemUtil.normalizeSlashes( template ),
   "outFile"                : "",
   "title"                  : "",
   "description"            : "",
   "published"              : true,
   "date"                   : dateTimeFormat( getFileInfo( template ).lastModified, "yyyy-mm-dd HH:nn" ),
   "content"                : "",
   "type"                   : "page",
   "layout"                 : "main",
   "view"                   : "",
   "permalink"              : "",
   "fileExt"                : "html",
   "excludeFromCollections" : false
  };

The default file extension for a page generated is html which is an available prc attribute, but not used. The method getOutfile() is what handles calculating the path and filename of every generated template, however, if a permalink was passed in with a file extension set it would not update prc.fileExt. Now, if the permalink is set and it does include a file extension it will now set prc.fileExt properly. There is still a potential issue with a permalink without a file extension, but that is addressed elsewhere in the code.

Here is the getOutfile() method modified to handle file extensions:

 function getOutfile( required struct prc ){
  var outDir  = "";
  var outFile = "";
  var temp    = "";

  if ( prc.type == "post" ) {
   outFile   = prc.inFile.replace( cwd, "" );
   temp      = outFile.listToArray( "/" ).reverse();
   temp[ 1 ] = ( prc.keyExists( "slug" ) ? prc.slug : prc.fileSlug ) & "." & prc.fileExt;
   outFile   = cwd & "_site/" & temp.reverse().toList( "/" );
  } else {
   if ( len( prc.permalink ) ) {
    outFile  = cwd & "_site" & prc.permalink;
    // calculate the file extension
    var stem = listLast( prc.permalink, "/" );
    if ( stem.find( "." ) ) {
     prc.fileExt = listLast( "." );
    } else {
     // a file exension wasn't specified
     prc.fileExt = "";
    }
   } else {
    outFile = getFileFromPath( prc.inFile ).listFirst( "." );
    outDir  = getDirectoryFromPath( prc.inFile ).replace( cwd, "/" );
    outFile = cwd & "_site" & outDir & outFile & "." & prc.fileExt;
   }
  }
  prc.outfile = fileSystemUtil.normalizeSlashes( outfile );
 }

Next the renderTemplate() needs to be modified so files with html extension will be post-processed by jSoup and just render other template types.

Also, right above the return you will notice I have added a check to see if renderedContent is XML. This will catch instances where file extension was not specified in the permalink, but it is returning XML content. If so, it will update prc.fileExt to xml.

 /**
  * returns rendered html for a template and view
  *
  * @prc request context for the current page
  */
 function renderTemplate( prc ){
  var renderedHtml = "";
  var template     = "";

  try {
   // template is CF markup
   if ( prc.inFile.findNoCase( ".cfm" ) ) {
    if ( process.hasIncludes && process.views.find( prc.view ) && prc.layout != "none" ) {
     // render the cfml in the template first
     template = fileSystemUtil.makePathRelative( prc.inFile );

     savecontent variable="prc.content" {
      include template;
     }

     // overlay the view
     template = fileSystemUtil.makePathRelative( cwd & "_includes/" & prc.view & ".cfm" );

     savecontent variable="renderedHtml" {
      include template;
     }
    } else {
     // view was not found, just render the template
     template = fileSystemUtil.makePathRelative( prc.inFile );

     savecontent variable="renderedHtml" {
      include template;
     }
    }
   }
   // template is markdown
   if ( prc.inFile.findNoCase( ".md" ) ) {
    if ( process.hasIncludes && process.views.find( prc.view ) ) {
     template = fileSystemUtil.makePathRelative( cwd & "_includes/" & prc.view & ".cfm" );

     savecontent variable="renderedHtml" {
      include template;
     }
    } else {
     renderedHtml = prc.content;
    }
   }
   // skip layout if "none" is specified
   if (
    prc.layout != "none" &&
    process.hasIncludes &&
    process.layouts.find( prc.layout )
   ) {
    template = fileSystemUtil.makePathRelative( cwd & "_includes/layouts/" & prc.layout & ".cfm" );

    savecontent variable="renderedHtml" {
     include template;
    }
   }
  } catch ( any e ) {
   error( prc.inFile & " :: " & e.message );
  }

  if ( isXML( trim( renderedHtml ) ) ) prc.fileExt = "xml";

  return trim( prc.fileExt == "html" ? JSoup.parse( renderedHtml ) : renderedHtml );
 }

The final step, as a bit of a sanity check, was modifying the --verbose output of build to include the calculated file extension in the output.

 /**
  * Write generated content to files
  */
 function generateStatic(){
  templates.each( ( prc ) => {
   if ( prc.published ) {
    var contents = renderTemplate( prc );
    directoryCreate(
     getDirectoryFromPath( prc.outFile ),
     true,
     true
    );

    if ( process.verbose ) {
     print.greyline(
      " [ " & prc.inFile & " ] -> " &
      "/" & replace( prc.outFile, cwd, "", "all" ) &
      "  [" & prc.fileExt & "]"
     );
    } else {
     print.greyline( "Writing file: /" & replace( prc.outFile, cwd, "", "all" ) );
    }
    fileWrite( prc.outFile, contents );
   }
  } );
 }

The pages are now rendering properly with the correct file extensions.

❯ ssg build --verbose | grep xml
 [ /Users/rob/Development/kdfe-ssg/feed.cfm ] -> /_site/rss  [xml]
 [ /Users/rob/Development/kdfe-ssg/posts/2020/12/quickly-transform-xml-to-json-with-org-json.md ] -> /_site/posts/2020/12/quickly-transform-xml-to-json-with-org-json.html  [html]
 [ /Users/rob/Development/kdfe-ssg/sitemap.cfm ] -> /_site/sitemap.xml  [xml]

I guess I got a bit ahead of myself and pushed the last release too early. Once I hammer things a bit I will push commandbox 0.2.2 to forgebox with working content type detection.