Search This Blog

How To Extract Text From PDF Using JavaScript | PDF To Text Extractor

Source Code to extract text from pdf documents.


<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>PDF to Text Extractor</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.min.js" integrity="sha512-ml/QKfG3+Yes6TwOzQb7aCNtJF4PUyha6R3w8pSTo/VJSywl7ZreYvvtUso7fKevpsI+pYVVwnu82YO0q3V6eg==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
    <style>
        h1{
            width: 100%;
            text-align: center;
        }
        .pdfwork,.afterupload{
            display: flex;
            align-items: center;
            justify-content: center;
            flex-direction:column;
            width: 100%;
        }
        .pdfwork *{
            margin-top: 5px;
        }
        .afterupload{
            display: none;
        }
        .another{
            display: none;
        }
    </style>
</head>
<body>
    <h1>PDF To Text Extractor</h1>
    <div class="pdfwork">
        <button class="another" onclick="location.reload()">Extract Another PDF</button>
        <span>Select PDF</span>
        <input type="file" class="selectpdf">
        <span>Password :</span>
        <input type="password" class="pwd" placeholder='optional'>
        <button class="upload">Upload</button>
        <div class="afterupload">
            <span>Select Page</span>
            <select class="selectpage" onchange="afterProcess()"></select>
            <a href="" class="download" download>Download Pdf Text</a>
            <textarea class="pdftext"></textarea>
        </div>
    </div>
    <script>
        pdfjsLib.GlobalWorkerOptions.workerSrc="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.worker.min.js";
        let pdfinput = document.querySelector(".selectpdf");
        let pwd = document.querySelector(".pwd");
        let upload = document.querySelector(".upload");
        let afterupload = document.querySelector(".afterupload");
        let select = document.querySelector("select");
        let download = document.querySelector(".download");
        let pdftext = document.querySelector(".pdftext");
        upload.addEventListener('click',()=>{
            let file = pdfinput.files[0];
            if(file!=undefined&&file.type=="application/pdf"){
                let fr = new FileReader();
                fr.readAsDataURL(file)
                fr.onload=()=>{
                    let res = fr.result;
                    if(pwd.value==""){
                        extractText(res,false)
                    }else{
                        extractText(res,true)
                    }
                }
            }else{
                alert("select a valid pdf file")
            }
        })
        let alltext = [];
        async function extractText(url,pass) {
          try{
            let pdf;
            if(pass){
                pdf = await pdfjsLib.getDocument({url:url,password:pwd.value}).promise;
            }else{
                pdf = await pdfjsLib.getDocument(url).promise;
            }
            let pages = pdf.numPages;
            for(let i=1;i<=pages;i++){
                let page = await pdf.getPage(i)
                let txt = await page.getTextContent();
                let text = txt.items.map((s)=>s.str).join("");
                alltext.push(text)
            }
            alltext.map((e,i)=>{
                select.innerHTML+=`
                <option value="${i+1}">${i+1}</option>
                `;
            })
            afterProcess()
          }catch(err){
            alert(err.message)
          }
        }

        function afterProcess(){
            pdftext.value=alltext[select.value-1];
            download.href="data:text/plain;charset=utf-8,"+encodeURIComponent(alltext[select.value-1])
            afterupload.style.display="flex";
            document.querySelector(".another").style.display="unset";
        }
    </script>
</body>
</html>

No comments:

Post a Comment