Bora vê
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

beuty.py 2.0KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. from bs4 import BeautifulSoup
  2. import os
  3. import pickle
  4. def beut(tabela1, tabela2):
  5. with open('tabela.html', 'r') as file:
  6. soup = BeautifulSoup(file, "html5lib")
  7. soup.prettify()#reorganiza as tags
  8. tabela_all = soup.find_all('tbody')
  9. coluna1 = tabela_all[0].find_all('tr')
  10. coluna2 = tabela_all[1].find_all('tr')
  11. valores_return(coluna1, tabela1)
  12. valores_return(coluna2, tabela2)
  13. def valores_return(colunas, tabela):
  14. for coluna in colunas:
  15. celula = coluna.find_all('td')
  16. celula = [ele.text.strip() for ele in celula]
  17. tabela.append([ele for ele in celula if ele])
  18. def separar(palavra):
  19. numero = []
  20. for word in palavra:
  21. if word.isdigit():
  22. numero += word
  23. return (''.join(numero))
  24. def initsintatico(token, args,erro):
  25. if not(os.path.isfile('dicionario1.dtc')and os.path.isfile('dicionario2.dtc')):
  26. save()
  27. from sintatico import analisadorsintatico
  28. dict_tabela1 = load("dicionario1.dtc")
  29. dict_tabela2 = load("dicionario2.dtc")
  30. analisadorsintatico(dict_tabela1, dict_tabela2, token, args,erro)
  31. def load(filename):
  32. file = open(filename, "rb")
  33. return pickle.load(file)
  34. def save():
  35. tabela1 = []
  36. tabela2 = []
  37. beut(tabela1, tabela2)
  38. topo = tabela1[0]
  39. del tabela1[0]
  40. dict_tabela1 = {}
  41. dict_tabela2 = {}
  42. for linhas in tabela1:
  43. i = 0;
  44. for linha in linhas:
  45. if linha.isdigit() and i <= len(topo):
  46. dict_tabela1[(linhas[0], topo[i - 1])] = [int(linha)]
  47. i += 1
  48. for i in range(len(tabela2)):
  49. if (((tabela2[i][1]).split('::= '))[1]).split(" ")[0] != 'î':
  50. dict_tabela2[int(separar(tabela2[i][0]))] = ((((tabela2[i][1]).split('::= '))[1]).split())
  51. else:
  52. dict_tabela2[int(separar(tabela2[i][0]))] = []
  53. saveFile(dict_tabela1, "dicionario1.dtc")
  54. saveFile(dict_tabela2, "dicionario2.dtc")
  55. def saveFile(dict, filename):
  56. dicionario = open(filename, "wb")
  57. pickle.dump(dict, dicionario)
  58. dicionario.close()