1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23   
 24   
 25   
 26   
 27   
 28   
 29   
 30   
 31   
 32   
 33   
 34   
 35   
 36   
 37   
 38  """ 
 39  Implements the standard 'collect' action. 
 40  @sort: executeCollect 
 41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 42  """ 
 43   
 44   
 45   
 46   
 47   
 48   
 49   
 50  import os 
 51  import logging 
 52  import pickle 
 53   
 54   
 55  from CedarBackup2.filesystem import BackupFileList, FilesystemList 
 56  from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 57  from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 58  from CedarBackup2.actions.util import writeIndicatorFile 
 59   
 60   
 61   
 62   
 63   
 64   
 65  logger = logging.getLogger("CedarBackup2.log.actions.collect") 
 66   
 67   
 68   
 69   
 70   
 71   
 72   
 73   
 74   
 75   
 77     """ 
 78     Executes the collect backup action. 
 79   
 80     @note: When the collect action is complete, we will write a collect 
 81     indicator to the collect directory, so it's obvious that the collect action 
 82     has completed.  The stage process uses this indicator to decide whether a 
 83     peer is ready to be staged. 
 84   
 85     @param configPath: Path to configuration file on disk. 
 86     @type configPath: String representing a path on disk. 
 87   
 88     @param options: Program command-line options. 
 89     @type options: Options object. 
 90   
 91     @param config: Program configuration. 
 92     @type config: Config object. 
 93   
 94     @raise ValueError: Under many generic error conditions 
 95     @raise TarError: If there is a problem creating a tar file 
 96     """ 
 97     logger.debug("Executing the 'collect' action.") 
 98     if config.options is None or config.collect is None: 
 99        raise ValueError("Collect configuration is not properly filled in.") 
100     if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 
101         (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 
102        raise ValueError("There must be at least one collect file or collect directory.") 
103     fullBackup = options.full 
104     logger.debug("Full backup flag is [%s]", fullBackup) 
105     todayIsStart = isStartOfWeek(config.options.startingDay) 
106     resetDigest = fullBackup or todayIsStart 
107     logger.debug("Reset digest flag is [%s]", resetDigest) 
108     if config.collect.collectFiles is not None: 
109        for collectFile in config.collect.collectFiles: 
110           logger.debug("Working with collect file [%s]", collectFile.absolutePath) 
111           collectMode = _getCollectMode(config, collectFile) 
112           archiveMode = _getArchiveMode(config, collectFile) 
113           digestPath = _getDigestPath(config, collectFile.absolutePath) 
114           tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode) 
115           if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 
116              logger.debug("File meets criteria to be backed up today.") 
117              _collectFile(config, collectFile.absolutePath, tarfilePath, 
118                           collectMode, archiveMode, resetDigest, digestPath) 
119           else: 
120              logger.debug("File will not be backed up, per collect mode.") 
121           logger.info("Completed collecting file [%s]", collectFile.absolutePath) 
122     if config.collect.collectDirs is not None: 
123        for collectDir in config.collect.collectDirs: 
124           logger.debug("Working with collect directory [%s]", collectDir.absolutePath) 
125           collectMode = _getCollectMode(config, collectDir) 
126           archiveMode = _getArchiveMode(config, collectDir) 
127           ignoreFile = _getIgnoreFile(config, collectDir) 
128           linkDepth = _getLinkDepth(collectDir) 
129           dereference = _getDereference(collectDir) 
130           recursionLevel = _getRecursionLevel(collectDir) 
131           (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 
132           if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 
133              logger.debug("Directory meets criteria to be backed up today.") 
134              _collectDirectory(config, collectDir.absolutePath, 
135                                collectMode, archiveMode, ignoreFile, linkDepth, dereference, 
136                                resetDigest, excludePaths, excludePatterns, recursionLevel) 
137           else: 
138              logger.debug("Directory will not be backed up, per collect mode.") 
139           logger.info("Completed collecting directory [%s]", collectDir.absolutePath) 
140     writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 
141                        config.options.backupUser, config.options.backupGroup) 
142     logger.info("Executed the 'collect' action successfully.") 
 143   
144   
145   
146   
147   
148   
149   
150   
151   
152   
153 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath): 
 154     """ 
155     Collects a configured collect file. 
156   
157     The indicated collect file is collected into the indicated tarfile. 
158     For files that are collected incrementally, we'll use the indicated 
159     digest path and pay attention to the reset digest flag (basically, the reset 
160     digest flag ignores any existing digest, but a new digest is always 
161     rewritten). 
162   
163     The caller must decide what the collect and archive modes are, since they 
164     can be on both the collect configuration and the collect file itself. 
165   
166     @param config: Config object. 
167     @param absolutePath: Absolute path of file to collect. 
168     @param tarfilePath: Path to tarfile that should be created. 
169     @param collectMode: Collect mode to use. 
170     @param archiveMode: Archive mode to use. 
171     @param resetDigest: Reset digest flag. 
172     @param digestPath: Path to digest file on disk, if needed. 
173     """ 
174     backupList = BackupFileList() 
175     backupList.addFile(absolutePath) 
176     _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath) 
 177   
178   
179   
180   
181   
182   
183 -def _collectDirectory(config, absolutePath, collectMode, archiveMode, 
184                        ignoreFile, linkDepth, dereference, resetDigest, 
185                        excludePaths, excludePatterns, recursionLevel): 
 186     """ 
187     Collects a configured collect directory. 
188   
189     The indicated collect directory is collected into the indicated tarfile. 
190     For directories that are collected incrementally, we'll use the indicated 
191     digest path and pay attention to the reset digest flag (basically, the reset 
192     digest flag ignores any existing digest, but a new digest is always 
193     rewritten). 
194   
195     The caller must decide what the collect and archive modes are, since they 
196     can be on both the collect configuration and the collect directory itself. 
197   
198     @param config: Config object. 
199     @param absolutePath: Absolute path of directory to collect. 
200     @param collectMode: Collect mode to use. 
201     @param archiveMode: Archive mode to use. 
202     @param ignoreFile: Ignore file to use. 
203     @param linkDepth: Link depth value to use. 
204     @param dereference: Dereference flag to use. 
205     @param resetDigest: Reset digest flag. 
206     @param excludePaths: List of absolute paths to exclude. 
207     @param excludePatterns: List of patterns to exclude. 
208     @param recursionLevel: Recursion level (zero for no recursion) 
209     """ 
210     if recursionLevel == 0: 
211         
212        logger.info("Collecting directory [%s]", absolutePath) 
213        tarfilePath = _getTarfilePath(config, absolutePath, archiveMode) 
214        digestPath = _getDigestPath(config, absolutePath) 
215   
216        backupList = BackupFileList() 
217        backupList.ignoreFile = ignoreFile 
218        backupList.excludePaths = excludePaths 
219        backupList.excludePatterns = excludePatterns 
220        backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference) 
221   
222        _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath) 
223     else: 
224         
225        subdirs = FilesystemList() 
226        subdirs.excludeFiles = True 
227        subdirs.excludeLinks = True 
228        subdirs.excludePaths = excludePaths 
229        subdirs.excludePatterns = excludePatterns 
230        subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False) 
231   
232         
233        for subdir in subdirs: 
234           _collectDirectory(config, subdir, collectMode, archiveMode, 
235                             ignoreFile, linkDepth, dereference, resetDigest, 
236                             excludePaths, excludePatterns, recursionLevel-1) 
237           excludePaths.append(subdir)  
238   
239         
240        _collectDirectory(config, absolutePath, collectMode, archiveMode, 
241                          ignoreFile, linkDepth, dereference, resetDigest, 
242                          excludePaths, excludePatterns, 0) 
 243   
244   
245   
246   
247   
248   
249 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath): 
 250     """ 
251     Execute the backup process for the indicated backup list. 
252   
253     This function exists mainly to consolidate functionality between the 
254     L{_collectFile} and L{_collectDirectory} functions.  Those functions build 
255     the backup list; this function causes the backup to execute properly and 
256     also manages usage of the digest file on disk as explained in their 
257     comments. 
258   
259     For collect files, the digest file will always just contain the single file 
260     that is being backed up.  This might little wasteful in terms of the number 
261     of files that we keep around, but it's consistent and easy to understand. 
262   
263     @param config: Config object. 
264     @param backupList: List to execute backup for 
265     @param absolutePath: Absolute path of directory or file to collect. 
266     @param tarfilePath: Path to tarfile that should be created. 
267     @param collectMode: Collect mode to use. 
268     @param archiveMode: Archive mode to use. 
269     @param resetDigest: Reset digest flag. 
270     @param digestPath: Path to digest file on disk, if needed. 
271     """ 
272     if collectMode != 'incr': 
273        logger.debug("Collect mode is [%s]; no digest will be used.", collectMode) 
274        if len(backupList) == 1 and backupList[0] == absolutePath:   
275           logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 
276        else: 
277           logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 
278        if len(backupList) > 0: 
279           backupList.generateTarfile(tarfilePath, archiveMode, True) 
280           changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 
281     else: 
282        if resetDigest: 
283           logger.debug("Based on resetDigest flag, digest will be cleared.") 
284           oldDigest = {} 
285        else: 
286           logger.debug("Based on resetDigest flag, digest will loaded from disk.") 
287           oldDigest = _loadDigest(digestPath) 
288        (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 
289        logger.debug("Removed %d unchanged files based on digest values.", removed) 
290        if len(backupList) == 1 and backupList[0] == absolutePath:   
291           logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 
292        else: 
293           logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 
294        if len(backupList) > 0: 
295           backupList.generateTarfile(tarfilePath, archiveMode, True) 
296           changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 
297        _writeDigest(config, newDigest, digestPath) 
 298   
299   
300   
301   
302   
303   
305     """ 
306     Loads the indicated digest path from disk into a dictionary. 
307   
308     If we can't load the digest successfully (either because it doesn't exist or 
309     for some other reason), then an empty dictionary will be returned - but the 
310     condition will be logged. 
311   
312     @param digestPath: Path to the digest file on disk. 
313   
314     @return: Dictionary representing contents of digest path. 
315     """ 
316     if not os.path.isfile(digestPath): 
317        digest = {} 
318        logger.debug("Digest [%s] does not exist on disk.", digestPath) 
319     else: 
320        try: 
321           digest = pickle.load(open(digestPath, "r")) 
322           logger.debug("Loaded digest [%s] from disk: %d entries.", digestPath, len(digest)) 
323        except: 
324           digest = {} 
325           logger.error("Failed loading digest [%s] from disk.", digestPath) 
326     return digest 
 327   
328   
329   
330   
331   
332   
334     """ 
335     Writes the digest dictionary to the indicated digest path on disk. 
336   
337     If we can't write the digest successfully for any reason, we'll log the 
338     condition but won't throw an exception. 
339   
340     @param config: Config object. 
341     @param digest: Digest dictionary to write to disk. 
342     @param digestPath: Path to the digest file on disk. 
343     """ 
344     try: 
345        pickle.dump(digest, open(digestPath, "w")) 
346        changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 
347        logger.debug("Wrote new digest [%s] to disk: %d entries.", digestPath, len(digest)) 
348     except: 
349        logger.error("Failed to write digest [%s] to disk.", digestPath) 
 350   
351   
352   
353   
354   
355   
356   
357   
358   
359   
361     """ 
362     Gets the collect mode that should be used for a collect directory or file. 
363     If possible, use the one on the file or directory, otherwise take from collect section. 
364     @param config: Config object. 
365     @param item: C{CollectFile} or C{CollectDir} object 
366     @return: Collect mode to use. 
367     """ 
368     if item.collectMode is None: 
369        collectMode = config.collect.collectMode 
370     else: 
371        collectMode = item.collectMode 
372     logger.debug("Collect mode is [%s]", collectMode) 
373     return collectMode 
 374   
375   
376   
377   
378   
379   
381     """ 
382     Gets the archive mode that should be used for a collect directory or file. 
383     If possible, use the one on the file or directory, otherwise take from collect section. 
384     @param config: Config object. 
385     @param item: C{CollectFile} or C{CollectDir} object 
386     @return: Archive mode to use. 
387     """ 
388     if item.archiveMode is None: 
389        archiveMode = config.collect.archiveMode 
390     else: 
391        archiveMode = item.archiveMode 
392     logger.debug("Archive mode is [%s]", archiveMode) 
393     return archiveMode 
 394   
395   
396   
397   
398   
399   
401     """ 
402     Gets the ignore file that should be used for a collect directory or file. 
403     If possible, use the one on the file or directory, otherwise take from collect section. 
404     @param config: Config object. 
405     @param item: C{CollectFile} or C{CollectDir} object 
406     @return: Ignore file to use. 
407     """ 
408     if item.ignoreFile is None: 
409        ignoreFile = config.collect.ignoreFile 
410     else: 
411        ignoreFile = item.ignoreFile 
412     logger.debug("Ignore file is [%s]", ignoreFile) 
413     return ignoreFile 
 414   
415   
416   
417   
418   
419   
421     """ 
422     Gets the link depth that should be used for a collect directory. 
423     If possible, use the one on the directory, otherwise set a value of 0 (zero). 
424     @param item: C{CollectDir} object 
425     @return: Link depth to use. 
426     """ 
427     if item.linkDepth is None: 
428        linkDepth = 0 
429     else: 
430        linkDepth = item.linkDepth 
431     logger.debug("Link depth is [%d]", linkDepth) 
432     return linkDepth 
 433   
434   
435   
436   
437   
438   
440     """ 
441     Gets the dereference flag that should be used for a collect directory. 
442     If possible, use the one on the directory, otherwise set a value of False. 
443     @param item: C{CollectDir} object 
444     @return: Dereference flag to use. 
445     """ 
446     if item.dereference is None: 
447        dereference = False 
448     else: 
449        dereference = item.dereference 
450     logger.debug("Dereference flag is [%s]", dereference) 
451     return dereference 
 452   
453   
454   
455   
456   
457   
471   
472   
473   
474   
475   
476   
478     """ 
479     Gets the digest path associated with a collect directory or file. 
480     @param config: Config object. 
481     @param absolutePath: Absolute path to generate digest for 
482     @return: Absolute path to the digest associated with the collect directory or file. 
483     """ 
484     normalized = buildNormalizedPath(absolutePath) 
485     filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 
486     digestPath = os.path.join(config.options.workingDir, filename) 
487     logger.debug("Digest path is [%s]", digestPath) 
488     return digestPath 
 489   
490   
491   
492   
493   
494   
496     """ 
497     Gets the tarfile path (including correct extension) associated with a collect directory. 
498     @param config: Config object. 
499     @param absolutePath: Absolute path to generate tarfile for 
500     @param archiveMode: Archive mode to use for this tarfile. 
501     @return: Absolute path to the tarfile associated with the collect directory. 
502     """ 
503     if archiveMode == 'tar': 
504        extension = "tar" 
505     elif archiveMode == 'targz': 
506        extension = "tar.gz" 
507     elif archiveMode == 'tarbz2': 
508        extension = "tar.bz2" 
509     normalized = buildNormalizedPath(absolutePath) 
510     filename = "%s.%s" % (normalized, extension) 
511     tarfilePath = os.path.join(config.collect.targetDir, filename) 
512     logger.debug("Tarfile path is [%s]", tarfilePath) 
513     return tarfilePath 
 514   
515   
516   
517   
518   
519   
554