JavaScript - Tabular Text to CSV Converter

From NoskeWiki
Revision as of 23:03, 18 December 2019 by NoskeWiki (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

About

NOTE: This page is a daughter page of: JavaScript


A pretty basic JavaScript/HTML example to help you take a series of entries in itemized format (eg: "name: john, age: 21") and turns it into a CSV or TSV (table) style string.


Basic interface of online conversion tool into tabular table like form


Itemized Text to CSV Converter

convert_html_table_to_csv.html:

<!doctype html>
 
<html lang="en">
<head>
  <meta charset="utf-8">
 
  <title>Itemized Text to CSV Converter</title>
  <meta name="description" content="Simple page for extracting tabular rows from itemized format text">
  <meta name="author" content="Andrew Noske">

  <style>
    body {
      background-color: linen;
      font-size: 14px;
    }
    #txtarea-in {
      width: 400px;
      height: 300px;
    }
    #txtarea-out {
      width: 600px;
      height: 300px;
    }
    td {
      vertical-align: top;
    }
    #btn-convert {
      height: 300px;
    }
    #txtinput-headings {
      width: 240px;
    }
  </style>
  
  <script>
  /**
   * Reads all values in the UI and turns the input string in
   * `txtarea-in` into a CSV/TSV style string as output
   * into `txtarea-out`, plus a few other text areas it reads
   * and changes.
   */
  function convertToCsv() {
    var txtAreaIn = document.getElementById("txtarea-in");
    var txtHeadings = document.getElementById("txtinput-headings");
    var txtRowSeparator = document.getElementById("txtinput-row-separator");
    var txtColSeparator = document.getElementById("txtinput-col-separator");
    var cmbOutputFormat = document.getElementById("cmb-output-format");
    var txtAreaOut = document.getElementById("txtarea-out");

    var txtIn = txtAreaIn.value.trim().replace('\r', '');
    var headings = txtHeadings.value.split(',');
    headings = trimItemsInArray(headings);
    var rowSep = getSeparatorValue(txtRowSeparator.value);
    var colSep = getSeparatorValue(txtColSeparator.value);
    var csvSepChar = ',';
    if (cmbOutputFormat.value == 'tab') {
      csvSepChar = '\t';
    }
    
    var txtOut = convertTextToCsv(txtIn, headings, rowSep, colSep, csvSepChar);
    txtAreaOut.value = txtOut;
  }

  /**
   * Make sure the text representation of tabs and newlines are
   * converted to actual string tokens.
   * @param {string} origTextValue Original text value. Example: "\\n".
   * @return {string} String after modification. Example: "\n".
   */ 
  function getSeparatorValue(origTextValue) {
    var txtValue = origTextValue.replace('\\n', '\n');
    txtValue = txtValue.replace('\\n', '\n');
    txtValue = txtValue.replace('\\t', '\t');
    return txtValue;
  }
    
  /**
   * Takes in plaintext and a set of headings you want out, then then 
   * processes the text into a CSV/TSV style string as output.
   * @param {string} txtIn Raw string to process as input.
   * @param {!Array<string>} headings Ordered array of headings.
   *     Example: ["name","age"].
   * @param {string} rowSep Char/string token that seperates rows of data.
   *     Example: "\n\n".
   * @param {string} colSep Char/string token that seperates pieces of 
   *     data inside a row. Example: "\n".
   * @param {string} csvSepChar Char/string to seperate values in output.
   *     Example: "\t".   
   * @return {string} CSV/TSV style string.
   *     Example "name\tage\nFred\t24\nGeorge\t31" }.
   */
  function convertTextToCsv(txtIn, headings, rowSep, colSep, csvSepChar) {
    var OUT_LINE_SEP_CHAR = '\n';
    var txtOut = headingsToRow(headings, csvSepChar) + OUT_LINE_SEP_CHAR;
    var totalItems = 0;

    if (headings.length == 0) {
      return 'ERROR: You must enter headings';
    }
    var txtRowBlocks = txtIn.split(rowSep);
    txtRowBlocks = txtRowBlocks.filter(function (el) {
      return el != '';
    });    
    console.log('Number potential rows: ' + txtRowBlocks.length.toString());
    
    txtNumLines = document.getElementById("txtinput-num-lines");
    txtNumLines.value = txtRowBlocks.length;

    // For each row block:
    for (var r=0; r<txtRowBlocks.length; r++) {
      var txtBlock = txtRowBlocks[r].trim();
      // console.log(txtBlock);  // For debugging.
      
      var lines = txtBlock.split(colSep);
      var rowDict = makeRowDict(headings);
      var itemsFound = 0;

      // For each line:
      for (var i=0; i<lines.length - 1; i++) {
        var line = lines[i].trim();
        if (headings.includes(line)) {
          rowDict[line] = lines[i+1];
          itemsFound++;
          totalItems++;
        }
      }
      if (itemsFound > 0) {
        txtOut += rowDictToString(rowDict, headings, csvSepChar) + OUT_LINE_SEP_CHAR;
      }
    }
    txtNumItems = document.getElementById("txtinput-num-items");
    txtNumItems.value = totalItems.toString();
    
    return txtOut;
  }

  /**
   * Initiates a new dictionary with all the given heading values as keys and '-'
   * as their starting value.
   * @param {!Array<string>} headings Ordered array of headings. Example: ["name","age"].
   * @return {object} Dictionary in the form: { "name": "-", "age": "-" }.
   */ 
  function makeRowDict(headings) {
    var rowDict = {};
    for (var i=0; i<headings.length; i++) {
      var heading = headings[i];
      rowDict[heading] = '-';
    }
    return rowDict;
  }

  /**
   * Writes out a single line with your headings in CSV form.
   * @param {!Array<string>} headings Ordered array of headings. Example: ["name","age"].
   * @param {string} csvSepChar The string to separate column headings. Example: ",".
   * @return {string} Single line with columns. Example: "name,age".
   */
  function rowDictToString(rowDict, headings, csvSepChar) {
    var txtOut = '';
    for (var i=0; i<headings.length; i++) {
      var heading = headings[i];
      txtOut += (i == 0) ? rowDict[heading] : csvSepChar + rowDict[heading];
    }
    return txtOut;
  }
    
  /**
   * Writes out a single line with your headings in CSV form.
   * @param {!Array<string>} headings Ordered array of headings. Example: ["name","age"].
   * @param {string} csvSepChar The string to separate column headings. Example: ",".
   * @return {string} Single line with columns. Example: "name,age".
   */
  function headingsToRow(headings, csvSepChar) {
    return headings.join(csvSepChar);
  }

  /**
   * Removes an empty strings from an array of strings.
   * @param {!Array<string>} array Array of string.
   * @return {!Array<string>} Same array, but with empty strings removed.
   */
  function trimItemsInArray(array) {
    for (var i = 0; i < array.length; i++) {
      array[i] = array[i].trim();
    }
    return array;
  }

  </script>
</head>


<body>

<table border="0" cellpadding="5" cellspacing="0" style="width: 500px;">
<tbody>
  <tr>
    <th>Input (itemized text)</th>
    <th></th>
    <th>Output (table format)</th>
  </tr>
  <tr>
    <td>
      <textarea id="txtarea-in">
NAME
Jenny Xxxx
AGENCY
The Xxxx Agency
AAR MEMBER
Yes
EMAIL
queries@xxxxxxxx.com
AGENCY WEB SITE
http://www.xxxxxxxxx.com/


NAME
Nancy Yyyyy
AGENCY
Nancy Yyyyy Literary Agency
AAR MEMBER
Yes
EMAIL
QueryNancy@yyyyyyyy.com
AGENCY WEB SITE
http://www.yyyyyyyy.com/

      </textarea>
      <br><br>
      
      <table border="0" cellpadding="5" cellspacing="0">
        <tbody>
          <tr>
            <td>Headings:</td>
            <td><input id="txtinput-headings" size="30" type="text" value="NAME,EMAIL, AGENCY WEB SITE" placeholder="Comma seperated"/></td>
          </tr>
          <tr>
            <td>Row separator:</td>
            <td><input id="txtinput-row-separator" size="10" type="text" value="\n\n" placeholder=""/></td>
          </tr>
          <tr>
            <td>Col separator:</td>
            <td><input id="txtinput-col-separator" size="10" type="text" value="\n" placeholder=""/></td>
          </tr>
          <tr>
            <td>Output format:</td>
            <td>
              <select id="cmb-output-format">
                <option value="comma">comma seperated</option>
                <option value="tab" selected>tab seperated</option>
              </select>
            </td>
          </tr>
        </tbody>
    </table>

    </td>
    <td>
      <b><button id="btn-convert" onclick="convertToCsv();">Convert<br>></button></b>
    </td>
    <td>
      <textarea id="txtarea-out"></textarea>
      <br><br>

      <table border="0" cellpadding="5" cellspacing="0">
        <tbody>
          <tr>
            <td>Number entries:</td>
            <td><input id="txtinput-num-lines" size="5" type="text" value="-" disabled/></td>
          </tr>
        </tbody>
        <tbody>
          <tr>
            <td>Items found:</td>
            <td><input id="txtinput-num-items" size="5" type="text" value="-" disabled/></td>
          </tr>
        </tbody>
      </table>
    </td>
  </tr>
</tbody>
</table>
  
</body>
</html>


Code license
For all of the code on my site... if there are specific instruction or licence comments please leave them in. If you copy my code with minimum modifications to another webpage, or into any code other people will see I would love an acknowledgment to my site.... otherwise, the license for this code is more-or-less WTFPL (do what you want)! If only copying <20 lines, then don't bother. That said - if you'd like to add a web-link to my site www.andrewnoske.com or (better yet) the specific page with code, that's a really sweet gestures! Links to the page may be useful to yourself or your users and helps increase traffic to my site. Hope my code is useful! :)



See Also