functions.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # -*- coding: utf-8 -*-
  2. import gzip
  3. import json
  4. import re
  5. from os import walk, path
  6. import time
  7. from multiprocessing import Process
  8. class pathDB:
  9. def __init__(self, path):
  10. self.path = path
  11. def getFileInDirectory(self):
  12. f = []
  13. for (dirpath, dirname, filename) in walk(self.path):
  14. f.append({"dirpath": dirpath, "dirname": dirname, "filename": filename})
  15. return f
  16. def checkFileExists(self, file):
  17. if path.isfile(file):
  18. return 1
  19. else:
  20. return 0
  21. def get_json_by_date(self):
  22. f = {}
  23. date = []
  24. for (dirpath, dirname, filename) in walk(self.path):
  25. for file in filename:
  26. if str(file[0:8]) not in date:
  27. date.append(file[0:8])
  28. f[file[0:8]] = {
  29. 'year': int(file[0:4]),
  30. 'month': int(file[4:6]),
  31. 'day': int(file[6:8]),
  32. 'files': list(filter(lambda x: file[0:8] in x, filename))
  33. }
  34. return f
  35. class Settings:
  36. def __init__(self, path):
  37. self.path = path
  38. def getFullSetting(self):
  39. with open(self.path, 'r') as setting:
  40. sett = json.load(setting)
  41. return sett
  42. def getPathBackup(self):
  43. return self.getFullSetting()['pathDir']
  44. def get_fun(data, pattern):
  45. p = re.compile(pattern)
  46. result = re.findall(p, data)
  47. return result
  48. def readInChunk(fileObj, chunkSize=2018):
  49. while True:
  50. data = fileObj.read(chunkSize)
  51. if not data:
  52. break
  53. yield data
  54. class work_with_backup:
  55. def __init__(self, data, pattern, path_file):
  56. self.data = data
  57. self.pattern = pattern
  58. self.path_file = path_file
  59. def open_file(path_file):
  60. pattern_extensions = ".*\.gz"
  61. name_file = re.split('/', path_file)[len(re.split('/', path_file)) - 1]
  62. data = ""
  63. if re.match(pattern_extensions, name_file):
  64. ff = gzip.open(path_file, 'rt', encoding='utf-8', errors='ignore')
  65. start_time = time.time()
  66. for x in readInChunk(ff):
  67. data = data + Process(x)
  68. print("--- %s seconds ---" % (time.time() - start_time))
  69. else:
  70. ff = open(path_file, 'rb')
  71. data = readInChunk(ff)
  72. ff.close()
  73. return data
  74. def get_name_database(self, data):
  75. # print(json.dump(self.get_fun(data, '-- MySQL dump')))
  76. p = "hdfhdfghdfghdfghdfgh"
  77. print(len(json.dumps(get_fun(data, p))))
  78. return 0
  79. # if int(len(json.dump(self.get_fun(data, '-- MySQL dump')))) < 0:
  80. # return 'mysql'
  81. # elif int(len(json.dump(self.get_fun(data, '-- PostgreSQL database dump')))) < 0:
  82. # return 'pg'