Python Backup Script

Part one, take the first full backup. At the present time, this code is still under development and should not be used on a production machine. However, I am posting it here for reference.

Eventually, this code is going to be included in a backup client I am developing that will interface with glusterfs and Amazon S3 storage.

Currently, this code is tested to run on Python v2.7.4 on a Fedora 18 machine. With all three python files, and any number of properly defined job xml files in the jobs.d/ directory, these scripts are currently functional.

seed_files.py

This is the controller file for taking the first backup.

#!/usr/bin/python

# Create first full backup

import os, stat, time, seed_functions

def printHello():
  print "hello";

#--- This part of the script does the heavy lifting.
def seedMain(myFindPath,myJobId,myFullTempPath,myTargetMetaPath,myTargetTarFilePath,myExcludeFiles):
  #---execute the first backup for the job.
  seed_functions.findFiles(myFullTempPath,myFindPath,myExcludeFiles);
  seed_functions.storeMetaData(myFullTempPath,myTargetMetaPath);
  seed_functions.mkTarFile(myFullTempPath,myTargetTarFilePath);
  print "Job completed successfully";

#--- If this is being run as a script, set temporary variables
if __name__ == '__main__':
  myFindPath = '/home/myuser/findfiles';
  myJobId = str('106');
  myTempPath = '/tmp/';
  myTempFileList = 'files.tmp';
  myFullTempPath = os.path.join(myTempPath,myTempFileList); 
  myTargetPath = '/home/target1/';
  myTargetMeta = 'job'+myJobId+'.meta';
  myTargetTarFile = 'job'+myJobId+'.tar';
  myTargetMetaPath = myTargetPath+myTargetMeta;
  myTargetTarFilePath = myTargetPath+myTargetTarFile;

  #---Path names below should be absolute path names (start with / ) and should not end with '/'
  myExlcudeFiles = list();
  myExlcudeFiles.append('/home/myuser/findfiles/dontbackup');
  myExlcudeFiles.append('/home/myuser/findfiles/somebigfiles');
  myExlcudeFiles.append('*.adf');
  myExlcudeFiles.append('badfilez*');
  
  seedMain(myFindPath,myJobId,myFullTempPath,myTargetMetaPath,myTargetTarFilePath,myExcludeFiles);

#myJdate = seed_functions.getJulianDate();

seed_functions.py
This is the file that does the actual real work

#!/usr/bin/python

#Shared Functions and Classes
import os, sys, stat, time, glob
from datetime import datetime

def getJulianDate():
#--- This function returns an interger value of today's Julian Date 
#--- preceded by two digit year.  IE: 01JAN2013 -> 13001
  nowtime =  str(datetime.now());
  (year, month, day) = nowtime.split('-');
  day = int(day[:2]);
  month = int(month);
  year = int(year);
  t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0));
  jdate = (year % 2000) * 1000;
  jdate = jdate + time.gmtime(t)[7];
  return jdate;


import re
def findFiles(filepath,myFindPath,excludeFiles):
#--- Variables: fileListPath = string, myFindPath = string, excludeFiles = list() of strings)
#--- This function writes/overwrites the file @ 'fileListPath' which should be absolute path.
#--- The file @ fileListPath is a list of files found within myFindPath.
#--- myFindPath is desgined to be an absolute directory path.
#--- excludeFiles is a list() of absolute paths which should be excluded during the finding operation.
  with open(filepath, 'w') as ftemp:
    #--- Generate list of files that shouldn't be added
    #removeExcludedFiles();
    for dirname, dirnames, filenames in os.walk(myFindPath):
      #--- Remove exlcuded directories
      for badfile in dirnames:
        if os.path.join(myFindPath,dirname,badfile) in excludeFiles:
          dirnames.remove(badfile);

      #--- Gather the other directories
      for subdirname in dirnames:
	  #--- we just want to add empty directories
	  if os.path.islink(os.path.join(dirname, subdirname)) or not os.listdir(os.path.join(dirname, subdirname)):
	    ftemp.write(os.path.join(dirname, subdirname)+'\n');

      #--- Add files to the list.
      for filename in filenames:
	  #--- check to see if the file is a regular file or a link:
	  if os.path.islink(os.path.join(dirname, filename)) or os.path.isfile(os.path.join(dirname, filename)):
            #if not filename in myWildList:
	      ftemp.write(os.path.join(dirname, filename)+'\n');

  ftemp.closed;	

def storeMetaData(fileListPath, fileMetaPath):
#---This function creates an output file with
#---Filename and absolute path of fileMetaPath
#---File format is:
#---  /path/to/file ::: modified datetime ::: seconds since 1970 ::: md5 hash
  with open(fileMetaPath, 'w') as fmeta:
    myFileList = open(fileListPath, 'r');
    for filez in myFileList:
      #--- create an array to append information.
      myFileMeta = list();
      #--- Append absolute path and file name as first item
      myFileMeta.append(os.path.abspath(filez.strip()));
      #--- Get file meta information from os.stat()
      myStat = os.stat(filez.strip());
      #---Append human readable date/time stamp.
      myFileMeta.append(time.ctime(myStat.st_mtime));
      #---Append unix timestamp for easy comparison in the future.
      myFileMeta.append(myStat.st_mtime);
      if not os.path.isdir(filez.strip()):
        myHash = md5(filez.strip());
      else:
        myHash = "---None: directory";
      myFileMeta.append(myHash);
      metaDataString = str(myFileMeta[0]+":::"+myFileMeta[1]+":::"+str(myFileMeta[2])+":::"+str(myFileMeta[3]));
      fmeta.write(metaDataString+'\n');
    myFileList.closed;
  fmeta.closed;

import hashlib,os
def md5(filename):
    ''' function to get md5 of file '''
    d = hashlib.md5();
    try:
        d.update(open(filename).read());
    except Exception,e:
        print e;
    else:
        return d.hexdigest();

import tarfile
def mkTarFile(fileList, tarOutPath):
    thisTarOut = tarOutPath+".lzo"
    thisFileList = "-T "+fileList
    os.system("tar {options} {tarfile} {filex} &> /dev/null".format(options="cpvfa", tarfile=thisTarOut, filex=thisFileList));

#--------
#--- Not yet impemented functions below:
#--------

def findFiles2(fileListPath,myFindPath,excludeFiles):
#--- This function is for testing purposes only
  with open(fileListPath, 'w') as ftemp:
   for dirname, dirnames, filenames in os.walk(myFindPath):
    for subdirname in dirnames:
	#--- we just want to add empty directories
	if not os.listdir(os.path.join(dirname, subdirname)):
		ftemp.write(os.path.join(dirname, subdirname)+'\n');
    for filename in filenames:
	#--- check to see if the file is a regular file or a link:
	if os.path.islink(os.path.join(dirname, filename)) or os.path.isfile(os.path.join(dirname, filename)):
		ftemp.write(os.path.join(dirname, filename)+'\n');
  ftemp.closed;

def removeExcludedFiles():
    #--- Not implemented yet.
    myWildList = list();
    wildMatch = re.compile("^\*");
    wildMatch2 = re.compile(".*\*");
    print "Excluding the following"
    for badfile in excludeFiles:
      print badfile;
      result = wildMatch.match(badfile);
      if not result:
        result2 = wildMatch2.match(badfile);
        print result2;
      if result or result2:
        myWildList.append(badfile);
    print myWildList;

start_seeds.py
This is near completion; it parses the jobs in jobs.d/, verifies them, and runs them.

#!/usr/bin/python
import os, re
import xml.etree.ElementTree as ET
import seed_files



def readConfFile():
  #--- Future feature to read a specified jobs.d from config file.
  jobdir = 'jobs.d';
  return jobdir;

def findJobs(jobdir):
  myJobList = list();
  confMatch = re.compile(".*\.xml$");
  for dirname, dirnames, filenames in os.walk(jobdir):
    for jobid in filenames:
      result = confMatch.match(jobid);
      if result:
        myJobList.append(os.path.join(jobdir,jobid));
  return myJobList;

def checkPath(pathText):
  confMatch = re.compile("^\/");
  result = confMatch.match(pathText);
  confMatch2 = re.compile("^\/$");
  result2 = confMatch2.match(pathText);
  if result2:
    exit('Path cannot be / ');
  if not result:
    exit('Directory path must be absolute path: '+pathText);
  if os.path.isdir(pathText) or os.path.ismount(pathText):
    print 'Path seems valid: ',pathText;
  else:
    exit('Invalid path: '+pathText);

def parseJobs(myFoundJobs):
  print "Found the following config files: ",myFoundJobs;
  print "------------------------------------------------";
  myJobList = list();
  for myJob in myFoundJobs:
    mySubList = list();
    print "Parsing and testing: ",myJob;
    tree = ET.parse(myJob);
    root = tree.getroot();

    for child in root:
      #--- Validate backup path is valide.
      #----future feature: master excludes in config file
      if child.tag == 'backupdir':
        print "Checking Backup Directory";
        checkPath(child.text);
      if child.tag == 'backuptarget':
        print "Checking Backup Target"
        checkPath(child.text);

      #--- Create another sublist for excluded directories.
      if child.tag == 'exclude':
        myExcludeList = list();
        for subchild in child:
	  myExcludeList.append(subchild.text);
        mySubList.append(myExcludeList);

      #--- Since it's not a sublist, we append directly
      elif not child.tag == 'exclude':
        mySubList.append(child.text);
    print "------------------------------------------------";
    myJobList.append(mySubList);

  #--- Ensure some jobs were actually found.
  if myJobList.__len__() == 0:
    exit('Exit on Error: No Jobs Found!');
  #--- If we didn't exit above, we returned the parsed job list
  return myJobList;

def performBackup(myJobList):
  for job in myJobList:
    #--- job[0]:  JobID
    #--- job[1];  Backup Path
    #--- job[2]:  Backup Target
    #--- job[3]:  temp directory
    #--- job[4]:  Excluded directories
    myJobId = job[0];
    myFindPath = job[1];
    myTarget = job[2];
    myTempPath = job[3];
    myExcludes = job[4];
    myTargetPath = os.path.join(myTarget,str('job'+myJobId),'master');
    myTempFileList = 'backup_job'+myJobId+'.tmp';
    myFullTempPath = os.path.join(myTargetPath,myTempFileList); 
    myTargetMeta = 'job'+myJobId+'_master.meta';
    myTargetTarFile = 'job'+myJobId+'_master_seed.tar';
    myTargetMetaPath = os.path.join(myTargetPath,myTargetMeta);
    myTargetTarFilePath = os.path.join(myTargetPath,myTargetTarFile);

    if os.path.exists(os.path.join(myTarget,str('job'+myJobId))):
      print os.path.join(myFindPath,str('job'+myJobId));
      exit('Critical Error on JobID: '+myJobId+'\n This job directory already exists!  Exiting to preserve data!');
    else:
      os.makedirs(os.path.join(myTarget,str('job'+myJobId)));
    if os.path.exists(myTargetPath):
      exit('Critical Error on JobID: '+myJobId+'\n This job directory already exists!  Exiting to preserve data!');
    else:
      os.makedirs(myTargetPath);

    myExcludeFiles = list();
    for excludes in myExcludes:
      myExcludeFiles.append(excludes);
    myExcludeFiles.append(myTarget);
    print "------------------------------------------------";
    print "Starting Job: ",myJobId;
    seed_files.seedMain(myFindPath,myJobId,myFullTempPath,myTargetMetaPath,myTargetTarFilePath,myExcludeFiles);



#--- Execute the script.
myJobList = list();
jobdir = readConfFile();
myFoundJobs = findJobs(jobdir);
myJobList = parseJobs(myFoundJobs);
print "Number of jobs: ",myJobList.__len__();
#promptContinue() #--- Let user review backup jobs, prompt for continue.
performBackup(myJobList);

job103.xml
Job file in jobs.d/ directory

<?xml version="1.0"?>
<data>
  <jobid>103</jobid>
  <backupdir>/home/myuser/files</backupdir>
  <backuptarget>/offsitenfs/client1/target2</backuptarget>
  <temppath>/tmp</temppath>
  <exclude>
    <directory>/home/myuser/files/badfolder1</directory>
    <directory>/home/myuser/files/music/badfolder2</directory>
  </exclude>
</data>

Python Backup Script

Trending Articles

Practice Sheet of Right form of verbs for HSC Students

Download: FK ft Shenky – Nakuyewa ”Prod by: Shenky”

How to win at Markstrat (Markstrat Tips and Tricks) – Vodites

Ominde Commission Report and Recommendations – Ominde Report of 1964

Bureau of Internal Revenue: Regional Offices (Directory)

GO 53 on Enhancement of Ex-gratia upto 5 Lakhs Toddy Tappers in Telangana

Cakewalk CA-2A Leveling Amplifier v2.0.1.97 WiN, v2.0.1.96 OSX Incl Keygen

Mp3 Download: Mdu - Kunjenjenjena

How the kill the job , when DTP request running for long hours.

Microsoft Intune から展開しているアプリのアップデートについて

18-year-old girl was beaten for half an hour by two Northampton men in 'an...

Car crash in Dunton Bassett leaves driver in critical condition

Macky 2, Two Others In Road Accident

Application log 00000000000000089514: Could not convert queue DLVST90CLNT

Detroit mafia: D’Anna Brothers agree to plea deal

Delivery block field greyed out using VA02

Muloraki Au

【個人撮影】スマホのプライベート映像♪「中に出さないで///」カラオケ屋での生ハメ撮りが流出ｗ【リベンジポルノ】＠PornHub

BREAKING NEWS: Diamond Platnumz Is Reported Dead After Ghastly Car Accident

FIAT 500 B0111 B0112