Handling PDF Files with pdf.js
Handling DOCX Files with mammoth.js
JavaScript for Parsing FDX and Counting Words
document.getElementById('uploadFdx').addEventListener('change', function(event) {
var file = event.target.files[0];
var reader = new FileReader();
reader.onload = function(e) {
var parser = new DOMParser();
var xmlDoc = parser.parseFromString(e.target.result, "application/xml");
// Assuming all text content is relevant, we concatenate text from all nodes
var allText = extractText(xmlDoc.documentElement);
// Calculate the word count from concatenated text
var wordCount = allText.match(/\S+/g) ? allText.match(/\S+/g).length : 0;
console.log("Word Count: ", wordCount);
};
reader.readAsText(file);
});
// Recursive function to extract text from all nodes
function extractText(node) {
var text = "";
if (node.nodeType === 3) { // Node.TEXT_NODE
text += node.nodeValue;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
for (var i = 0; i < node.childNodes.length; i++) {
text += extractText(node.childNodes[i]);
}
}
return text;
}