python pdf to txt
import pdfplumber,os
def main():
path = 'cooper.pdf'
txtfile ='cooper.txt'
print('努力转换之中,请稍待片刻~~~~~~~~')
pdf = pdfplumber.open(path)
mytxtfile= open(txtfile,'w')
myrow = ""
for page in pdf.pages:
# 获取当前页面的全部文本信息,包括表格中的文字
# print(page.extract_text())
for table in page.extract_tables():
# print(table)
for row in table:
if row[0]!="":
#myrow.append(row[0])
myrow=row[0].replace(" "," ")
myrow =myrow.replace(" "," ")
myrow =myrow.replace(" "," ")
myrow =myrow.replace(" "," ")
myrow =myrow.replace(" "," ")
myrow =myrow.replace(" "," ")
myrow =myrow.replace(" "," ")
myrow =myrow.replace(" "," ")
myrow =myrow.replace(" "," ")
mytxtfile.write(myrow+'\n')
print(row)
#print(myrow)
#print('---------- 分割线 ----------')
pdf.close()
mytxtfile.close()
print('转换完成!')
if __name__=='__main__':
main()