@@ -31,12 +31,99 @@ import (
31
31
"io"
32
32
"os"
33
33
"path/filepath"
34
+
"regexp"
34
35
"sort"
35
36
"strings"
36
37
"time"
37
38
)
38
39
40
+
type purgeJob struct {
41
+
datetime time.Time
42
+
dirs []string
43
+
files []string
44
+
}
45
+
46
+
func genPurgeJobs(items []Item, dbname string) []purgeJob {
47
+
jobs := make(map[string]purgeJob)
48
+
49
+
// The files to purge must be grouped by date. depending on the options
50
+
// there can be up to 6 files for a database or output
51
+
reExt := regexp.MustCompile(`^(sql|d|dump|tar|out|createdb\.sql)(?:\.(sha\d{1,3}|age))?(?:\.(sha\d{1,3}|age))?(?:\.(sha\d{1,3}))?`)
52
+
53
+
for _, item := range items {
54
+
if strings.HasPrefix(item.key, cleanDBName(dbname)+"_") {
55
+
dateNExt := strings.TrimPrefix(item.key, cleanDBName(dbname)+"_")
56
+
parts := strings.SplitN(dateNExt, ".", 2)
57
+
58
+
var (
59
+
date time.Time
60
+
parsed bool
61
+
)
62
+
63
+
// We match the file using every timestamp format
64
+
// possible so that the format can be changed without
65
+
// breaking the purge
66
+
for _, layout := range []string{"2006-01-02_15-04-05", time.RFC3339} {
67
+
68
+
// Parse the format to a time in the local
69
+
// timezone when the timezone is not part of
70
+
// the string, otherwise it uses to timezone
71
+
// written in the string. We do this because
72
+
// the limit is in the local timezone.
73
+
date, _ = time.ParseInLocation(layout, parts[0], time.Local)
74
+
if !date.IsZero() {
75
+
parsed = true
76
+
break
77
+
}
78
+
}
79
+
80
+
if !parsed {
81
+
// the file does not match the time format, skip it
82
+
continue
83
+
}
84
+
85
+
// Identify the kind of file based on the dot separated
86
+
// strings at the end of its name
87
+
matches := reExt.FindStringSubmatch(parts[1])
88
+
if len(matches) == 5 {
89
+
job := jobs[parts[0]]
90
+
91
+
if job.datetime.IsZero() {
92
+
job.datetime = date
93
+
}
94
+
95
+
if date.Before(job.datetime) {
96
+
job.datetime = date
97
+
}
98
+
99
+
if item.isDir {
100
+
job.dirs = append(job.dirs, item.key)
101
+
} else {
102
+
job.files = append(job.files, item.key)
103
+
}
104
+
105
+
jobs[parts[0]] = job
106
+
continue
107
+
}
108
+
}
109
+
}
110
+
111
+
// The output is a list of jobs, sorted by date, youngest first
112
+
jobList := make([]purgeJob, 0)
113
+
for _, j := range jobs {
114
+
jobList = append(jobList, j)
115
+
}
116
+
117
+
sort.Slice(jobList, func(i, j int) bool {
118
+
return jobList[i].datetime.After(jobList[j].datetime)
119
+
})
120
+
121
+
return jobList
122
+
}
123
+
39
124
func purgeDumps(directory string, dbname string, keep int, limit time.Time) error {
125
+
l.Verboseln("purge:", dbname, "limit:", limit, "keep:", keep)
126
+
40
127
// The dbname can be put in the path of the backup directory, so we
41
128
// have to compute it first. This is why a dbname is required to purge
42
129
// old dumps
@@ -46,106 +133,163 @@ func purgeDumps(directory string, dbname string, keep int, limit time.Time) erro
46
133
return fmt.Errorf("could not purge %s: %s", dirpath, err)
47
134
}
48
135
defer dir.Close()
49
-
dirContents := make([]os.FileInfo, 0)
136
+
137
+
files := make([]Item, 0)
50
138
for {
51
139
var f []os.FileInfo
52
140
f, err = dir.Readdir(1)
53
-
if errors.Is(err, io.EOF) {
54
-
// reset to avoid returning is.EOF at the end
55
-
err = nil
56
-
break
57
-
} else if err != nil {
141
+
if err != nil {
142
+
if errors.Is(err, io.EOF) {
143
+
// reset to avoid returning is.EOF at the end
144
+
err = nil
145
+
break
146
+
}
58
147
return fmt.Errorf("could not purge %s: %s", dirpath, err)
59
148
}
60
149
61
-
if strings.HasPrefix(f[0].Name(), cleanDBName(dbname)+"_") &&
62
-
(!f[0].IsDir() || strings.HasSuffix(f[0].Name(), ".d")) {
63
-
dirContents = append(dirContents, f[0])
64
-
}
150
+
files = append(files, Item{key: f[0].Name(), modtime: f[0].ModTime(), isDir: f[0].IsDir()})
65
151
}
66
152
67
-
// Sort the list of filenames by date, youngest first,
68
-
// so that we can slice it easily to keep backups
69
-
sort.Slice(dirContents, func(i, j int) bool {
70
-
return dirContents[i].ModTime().After(dirContents[j].ModTime())
71
-
})
153
+
// Parse and group by date. We remove groups of files produced by
154
+
// the same run (including checksums, encrypted files, etc)
155
+
jobs := genPurgeJobs(files, dbname)
156
+
157
+
if keep < len(jobs) && keep >= 0 {
158
+
// Show the files kept in verbose mode
159
+
for _, j := range jobs[:keep] {
160
+
for _, f := range j.files {
161
+
l.Verboseln("keeping (count)", filepath.Join(dirpath, f))
162
+
}
72
163
73
-
if keep < len(dirContents) && keep >= 0 {
74
-
for _, f := range dirContents[keep:] {
75
-
file := filepath.Join(dirpath, f.Name())
76
-
if f.ModTime().Before(limit) {
77
-
l.Infoln("removing", file)
78
-
if f.IsDir() {
79
-
if err = os.RemoveAll(file); err != nil {
164
+
for _, d := range j.dirs {
165
+
l.Verboseln("keeping (count)", filepath.Join(dirpath, d))
166
+
}
167
+
}
168
+
169
+
// Purge the older files that after excluding the one we need
170
+
// to keep
171
+
for _, j := range jobs[keep:] {
172
+
if j.datetime.Before(limit) {
173
+
for _, f := range j.files {
174
+
path := filepath.Join(dirpath, f)
175
+
l.Infoln("removing", path)
176
+
if err = os.Remove(path); err != nil {
80
177
l.Errorln(err)
81
178
}
82
-
} else {
83
-
if err = os.Remove(file); err != nil {
179
+
}
180
+
181
+
for _, d := range j.dirs {
182
+
path := filepath.Join(dirpath, d)
183
+
l.Infoln("removing", path)
184
+
if err = os.RemoveAll(path); err != nil {
84
185
l.Errorln(err)
85
186
}
86
187
}
87
188
} else {
88
-
l.Verboseln("keeping", file)
189
+
for _, f := range j.files {
190
+
l.Verboseln("keeping (age)", filepath.Join(dirpath, f))
191
+
}
192
+
193
+
for _, d := range j.dirs {
194
+
l.Verboseln("keeping (age)", filepath.Join(dirpath, d))
195
+
}
89
196
}
90
197
}
91
198
}
199
+
92
200
if err != nil {
93
201
return fmt.Errorf("could not purge %s: %s", dirpath, err)
94
202
}
203
+
95
204
return nil
96
205
}
97
206
98
-
func purgeRemoteDumps(repo Repo, directory string, dbname string, keep int, limit time.Time) (rv error) {
207
+
func purgeRemoteDumps(repo Repo, directory string, dbname string, keep int, limit time.Time) error {
208
+
l.Verboseln("remote purge:", dbname, "limit:", limit, "keep:", keep)
209
+
99
210
// The dbname can be put in the directory tree of the dump, in this
100
-
// case the directory containing dbname in its name is kept on the
211
+
// case the directory containing {dbname} in its name is kept on the
101
212
// remote path along with any subdirectory. So we have to include it in
102
213
// the filter when listing remote files
103
214
dirpath := filepath.Dir(formatDumpPath(directory, "", "", dbname, time.Time{}))
104
215
prefix := relPath(directory, filepath.Join(dirpath, cleanDBName(dbname)))
105
216
106
-
files, err := repo.List(prefix)
217
+
// Get the list of files from the repository, this includes the
218
+
// contents of dumps in the directory format.
219
+
remoteFiles, err := repo.List(prefix)
107
220
if err != nil {
108
221
return fmt.Errorf("could not purge: %w", err)
109
222
}
110
223
111
-
// Sort the list of filenames by date, youngest first,
112
-
// so that we can slice it easily to keep backups
113
-
sort.Slice(files, func(i, j int) bool {
114
-
return files[i].modtime.After(files[j].modtime)
115
-
})
224
+
// We are going to parse the filename, we need to remove any posible
225
+
// parent dir before the name of the dump
226
+
parentDir := filepath.Dir(prefix)
227
+
if parentDir == "." || parentDir == "/" {
228
+
parentDir = ""
229
+
}
230
+
231
+
files := make([]Item, 0)
232
+
for _, i := range remoteFiles {
233
+
f, err := filepath.Rel(parentDir, i.key)
234
+
if err != nil {
235
+
l.Warnf("could not process remote file %s: %s", i.key, err)
236
+
continue
237
+
}
238
+
239
+
files = append(files, Item{key: f, modtime: i.modtime, isDir: i.isDir})
240
+
}
116
241
117
-
dirs := make([]string, 0)
118
-
119
-
if keep < len(files) && keep >= 0 {
120
-
for _, f := range files[keep:] {
121
-
if f.modtime.Before(limit) {
122
-
if f.isDir {
123
-
// remove directory after so that we
124
-
// have better chances that they are
125
-
// empty
126
-
dirs = append(dirs, f.key)
127
-
continue
242
+
// Parse and group by date. We remove groups of files produced by
243
+
// the same run (including checksums, encrypted files, etc)
244
+
jobs := genPurgeJobs(files, dbname)
245
+
246
+
if keep < len(jobs) && keep >= 0 {
247
+
// Show the files kept in verbose mode
248
+
for _, j := range jobs[:keep] {
249
+
for _, f := range j.files {
250
+
l.Verboseln("keeping remote (count)", filepath.Join(parentDir, f))
251
+
}
252
+
253
+
for _, d := range j.dirs {
254
+
l.Verboseln("keeping remote (count)", filepath.Join(parentDir, d))
255
+
}
256
+
}
257
+
258
+
// Purge the older files that after excluding the one we need
259
+
// to keep
260
+
for _, j := range jobs[keep:] {
261
+
if j.datetime.Before(limit) {
262
+
for _, f := range j.files {
263
+
path := filepath.Join(parentDir, f)
264
+
l.Infoln("removing remote", path)
265
+
if err = repo.Remove(path); err != nil {
266
+
l.Errorln(err)
267
+
}
128
268
}
129
269
130
-
l.Infoln("removing remote file", f.key)
131
-
if err := repo.Remove(f.key); err != nil {
132
-
l.Errorf("could not purge %s: %s", f.key, err)
133
-
rv = err
270
+
for _, d := range j.dirs {
271
+
path := filepath.Join(parentDir, d)
272
+
l.Infoln("removing remote", path)
273
+
if err = repo.Remove(path); err != nil {
274
+
l.Errorln(err)
275
+
}
134
276
}
135
-
continue
136
-
}
137
277
138
-
l.Verboseln("keeping remote file", f.key)
278
+
} else {
279
+
for _, f := range j.files {
280
+
l.Verboseln("keeping remote (age)", filepath.Join(parentDir, f))
281
+
}
282
+
283
+
for _, d := range j.dirs {
284
+
l.Verboseln("keeping remote (age)", filepath.Join(parentDir, d))
285
+
}
286
+
}
139
287
}
140
288
}
141
289
142
-
for _, d := range dirs {
143
-
l.Infoln("removing remote directory", d)
144
-
if err := repo.Remove(d); err != nil {
145
-
l.Errorf("could not purge %s: %s", d, err)
146
-
rv = err
147
-
}
290
+
if err != nil {
291
+
return fmt.Errorf("could not purge: %w", err)
148
292
}
149
293
150
-
return
294
+
return nil
151
295
}
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4