Pretvorba: HTML tablice u text (html2txt;table2txt)

from bs4 import BeautifulSoup
with open("datoteka.html", "r", encoding="utf-8") as html_file:
html_content = html_file.read()
soup = BeautifulSoup(html_content, 'html.parser')
tables = soup.find_all('table')
table_texts = []

for table in tables:
  rows = table.find_all('tr')
  table_text = ''
  for row in rows:
    cells = row.find_all(['th', 'td'])
    row_text = '\t'.join(cell.get_text(strip=True) for cell in cells)
    table_text += row_text + '\n'
  table_texts.append(table_text)

with open("datoteka.txt", "w", encoding="utf-8") as txt_file:
  for table_text in table_texts:
    txt_file.write(table_text + '\n\n')

 

image_pdfimage_print