@@ -102,6 +102,8 @@ const typeValueToTypeName = {
102
102
103
103
const BlockImportant = BlockAction | Important;
104
104
105
+
const reIsWildcarded = /[\^\*]/;
106
+
105
107
// ABP filters: https://adblockplus.org/en/filters
106
108
// regex tester: http://regex101.com/
107
109
@@ -110,10 +112,39 @@ const BlockImportant = BlockAction | Important;
110
112
// See the following as short-lived registers, used during evaluation. They are
111
113
// valid until the next evaluation.
112
114
113
-
let pageHostnameRegister = '',
114
-
requestHostnameRegister = '';
115
-
//var filterRegister = null;
116
-
//var categoryRegister = '';
115
+
let pageHostnameRegister = '';
116
+
let requestHostnameRegister = '';
117
+
118
+
/******************************************************************************/
119
+
120
+
// First character of match must be within the hostname part of the url.
121
+
//
122
+
// https://github.com/gorhill/uBlock/issues/1929
123
+
// Match only hostname label boundaries.
124
+
125
+
const isHnAnchored = (( ) => {
126
+
let lastLen = 0, lastBeg = -1, lastEnd = -1;
127
+
128
+
return (url, matchStart) => {
129
+
const len = requestHostnameRegister.length;
130
+
if ( len !== lastLen || url.endsWith('://', lastBeg) === false ) {
131
+
lastBeg = len !== 0 ? url.indexOf('://') : -1;
132
+
if ( lastBeg !== -1 ) {
133
+
lastBeg += 3;
134
+
lastEnd = lastBeg + len;
135
+
} else {
136
+
lastEnd = -1;
137
+
}
138
+
}
139
+
return matchStart < lastEnd && (
140
+
matchStart === lastBeg ||
141
+
matchStart > lastBeg &&
142
+
url.charCodeAt(matchStart - 1) === 0x2E /* '.' */
143
+
);
144
+
};
145
+
})();
146
+
147
+
/******************************************************************************/
117
148
118
149
// Local helpers
119
150
@@ -204,27 +235,6 @@ const toLogDataInternal = function(categoryBits, tokenHash, filter) {
204
235
return logData;
205
236
};
206
237
207
-
// First character of match must be within the hostname part of the url.
208
-
//
209
-
// https://github.com/gorhill/uBlock/issues/1929
210
-
// Match only hostname label boundaries.
211
-
const isHnAnchored = (function() {
212
-
let hostname = '';
213
-
let beg = -1, end = -1;
214
-
215
-
return function(url, matchStart) {
216
-
if ( requestHostnameRegister !== hostname ) {
217
-
const hn = requestHostnameRegister;
218
-
beg = hn !== '' ? url.indexOf(hn) : -1;
219
-
end = beg !== -1 ? beg + hn.length : -1;
220
-
hostname = hn;
221
-
}
222
-
if ( matchStart < beg || matchStart >= end ) { return false; }
223
-
return matchStart === beg ||
224
-
url.charCodeAt(matchStart - 1) === 0x2E /* '.' */;
225
-
};
226
-
})();
227
-
228
238
/*******************************************************************************
229
239
230
240
Each filter class will register itself in the map. A filter class
@@ -536,6 +546,52 @@ FilterPlainHnAnchored.prototype.trieableId = 1;
536
546
537
547
registerFilterClass(FilterPlainHnAnchored);
538
548
549
+
/*******************************************************************************
550
+
551
+
Filters with only one single occurrence of wildcard `*`
552
+
553
+
*/
554
+
555
+
const FilterWildcard1 = class {
556
+
constructor(s0, s1) {
557
+
this.s0 = s0;
558
+
this.s1 = s1;
559
+
}
560
+
561
+
match(url) {
562
+
const pos = url.indexOf(this.s0);
563
+
return pos !== -1 && url.indexOf(this.s1, pos + this.s0.length) !== -1;
564
+
}
565
+
566
+
logData() {
567
+
return {
568
+
raw: `${this.s0}*${this.s1}`,
569
+
regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0),
570
+
compiled: this.compile()
571
+
};
572
+
}
573
+
574
+
compile() {
575
+
return [ this.fid, this.s0, this.s1 ];
576
+
}
577
+
578
+
static compile(details) {
579
+
if ( details.anchor !== 0 ) { return; }
580
+
const s = details.f;
581
+
let pos = s.indexOf('*');
582
+
if ( pos === -1 ) { return; }
583
+
if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; }
584
+
if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; }
585
+
return [ FilterWildcard1.fid, s.slice(0, pos), s.slice(pos + 1) ];
586
+
}
587
+
588
+
static load(args) {
589
+
return new FilterWildcard1(args[1], args[2]);
590
+
}
591
+
};
592
+
593
+
registerFilterClass(FilterWildcard1);
594
+
539
595
/******************************************************************************/
540
596
541
597
const FilterGeneric = class {
@@ -571,6 +627,8 @@ const FilterGeneric = class {
571
627
}
572
628
573
629
static compile(details) {
630
+
const compiled = FilterWildcard1.compile(details);
631
+
if ( compiled !== undefined ) { return compiled; }
574
632
return [ FilterGeneric.fid, details.f, details.anchor ];
575
633
}
576
634
@@ -583,6 +641,117 @@ FilterGeneric.prototype.re = null;
583
641
584
642
registerFilterClass(FilterGeneric);
585
643
644
+
/*******************************************************************************
645
+
646
+
Hostname-anchored filters with only one occurrence of wildcard `*`
647
+
648
+
*/
649
+
650
+
const FilterWildcard1HnAnchored = class {
651
+
constructor(s0, s1) {
652
+
this.s0 = s0;
653
+
this.s1 = s1;
654
+
}
655
+
656
+
match(url) {
657
+
const pos = url.indexOf(this.s0);
658
+
return pos !== -1 &&
659
+
isHnAnchored(url, pos) &&
660
+
url.indexOf(this.s1, pos + this.s0.length) !== -1;
661
+
}
662
+
663
+
logData() {
664
+
return {
665
+
raw: `||${this.s0}*${this.s1}`,
666
+
regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0),
667
+
compiled: this.compile()
668
+
};
669
+
}
670
+
671
+
compile() {
672
+
return [ this.fid, this.s0, this.s1 ];
673
+
}
674
+
675
+
static compile(details) {
676
+
if ( (details.anchor & 0x0b001) !== 0 ) { return; }
677
+
const s = details.f;
678
+
let pos = s.indexOf('*');
679
+
if ( pos === -1 ) { return; }
680
+
if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; }
681
+
const needSeparator =
682
+
pos !== 0 && s.charCodeAt(pos - 1) === 0x5E /* '^' */;
683
+
if ( needSeparator ) { pos -= 1; }
684
+
if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; }
685
+
if ( needSeparator ) {
686
+
return FilterWildcard2HnAnchored.compile(details, pos);
687
+
}
688
+
return [
689
+
FilterWildcard1HnAnchored.fid,
690
+
s.slice(0, pos),
691
+
s.slice(pos + 1),
692
+
];
693
+
}
694
+
695
+
static load(args) {
696
+
return new FilterWildcard1HnAnchored(args[1], args[2]);
697
+
}
698
+
};
699
+
700
+
registerFilterClass(FilterWildcard1HnAnchored);
701
+
702
+
/*******************************************************************************
703
+
704
+
Hostname-anchored filters with one occurrence of the wildcard
705
+
sequence `^*` and no other wildcard-equivalent character
706
+
707
+
*/
708
+
709
+
const FilterWildcard2HnAnchored = class {
710
+
constructor(s0, s1) {
711
+
this.s0 = s0;
712
+
this.s1 = s1;
713
+
}
714
+
715
+
match(url) {
716
+
const pos0 = url.indexOf(this.s0);
717
+
if ( pos0 === -1 || isHnAnchored(url, pos0) === false ) {
718
+
return false;
719
+
}
720
+
const pos1 = pos0 + this.s0.length;
721
+
const pos2 = url.indexOf(this.s1, pos1);
722
+
return pos2 !== -1 &&
723
+
this.reSeparators.test(url.slice(pos1, pos2));
724
+
}
725
+
726
+
logData() {
727
+
return {
728
+
raw: `||${this.s0}^*${this.s1}`,
729
+
regex: rawToRegexStr(`${this.s0}^*${this.s1}`, 0),
730
+
compiled: this.compile()
731
+
};
732
+
}
733
+
734
+
compile() {
735
+
return [ this.fid, this.s0, this.s1 ];
736
+
}
737
+
738
+
static compile(details, pos) {
739
+
return [
740
+
FilterWildcard2HnAnchored.fid,
741
+
details.f.slice(0, pos),
742
+
details.f.slice(pos + 2),
743
+
];
744
+
}
745
+
746
+
static load(args) {
747
+
return new FilterWildcard2HnAnchored(args[1], args[2]);
748
+
}
749
+
};
750
+
751
+
FilterWildcard2HnAnchored.prototype.reSeparators = /[^0-9a-z.%_-]/;
752
+
753
+
registerFilterClass(FilterWildcard2HnAnchored);
754
+
586
755
/******************************************************************************/
587
756
588
757
const FilterGenericHnAnchored = class {
@@ -610,6 +779,8 @@ const FilterGenericHnAnchored = class {
610
779
}
611
780
612
781
static compile(details) {
782
+
const compiled = FilterWildcard1HnAnchored.compile(details);
783
+
if ( compiled !== undefined ) { return compiled; }
613
784
return [ FilterGenericHnAnchored.fid, details.f ];
614
785
}
615
786
@@ -1377,7 +1548,10 @@ const FilterBucket = class {
1377
1548
return true;
1378
1549
}
1379
1550
}
1380
-
if ( this.plainHnAnchoredTrie !== null && isHnAnchored(url, tokenBeg) ) {
1551
+
if (
1552
+
this.plainHnAnchoredTrie !== null &&
1553
+
isHnAnchored(url, tokenBeg)
1554
+
) {
1381
1555
const pos = this.plainHnAnchoredTrie.matches(url, tokenBeg);
1382
1556
if ( pos !== -1 ) {
1383
1557
this.plainHnAnchoredFilter.s = url.slice(tokenBeg, pos);
@@ -1524,7 +1698,6 @@ const FilterParser = function() {
1524
1698
this.reHasUnicode = /[^\x00-\x7F]/;
1525
1699
this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;
1526
1700
this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;
1527
-
this.reIsWildcarded = /[\^\*]/;
1528
1701
this.domainOpt = '';
1529
1702
this.noTokenHash = µb.urlTokenizer.noTokenHash;
1530
1703
this.unsupportedTypeBit = this.bitFromType('unsupported');
@@ -1917,7 +2090,7 @@ FilterParser.prototype.parse = function(raw) {
1917
2090
this.anchor = 0;
1918
2091
}
1919
2092
1920
-
this.wildcarded = this.reIsWildcarded.test(s);
2093
+
this.wildcarded = reIsWildcarded.test(s);
1921
2094
1922
2095
// This might look weird but we gain memory footprint by not going through
1923
2096
// toLowerCase(), at least on Chromium. Because copy-on-write?
@@ -2985,6 +3158,36 @@ FilterContainer.prototype.bucketHistogram = function() {
2985
3158
- FilterPlainHnAnchored and FilterPlainPrefix1 are good candidates
2986
3159
for storing in a plain string trie.
2987
3160
3161
+
As of 2019-04-25:
3162
+
3163
+
{"FilterPlainHnAnchored" => 11078}
3164
+
{"FilterPlainPrefix1" => 7195}
3165
+
{"FilterPrefix1Trie" => 5720}
3166
+
{"FilterOriginHit" => 3561}
3167
+
{"FilterWildcard2HnAnchored" => 2943}
3168
+
{"FilterPair" => 2391}
3169
+
{"FilterBucket" => 1922}
3170
+
{"FilterWildcard1HnAnchored" => 1910}
3171
+
{"FilterHnAnchoredTrie" => 1586}
3172
+
{"FilterPlainHostname" => 1391}
3173
+
{"FilterOriginHitSet" => 1155}
3174
+
{"FilterPlain" => 634}
3175
+
{"FilterWildcard1" => 423}
3176
+
{"FilterGenericHnAnchored" => 389}
3177
+
{"FilterOriginMiss" => 302}
3178
+
{"FilterGeneric" => 163}
3179
+
{"FilterOriginMissSet" => 150}
3180
+
{"FilterRegex" => 124}
3181
+
{"FilterPlainRightAnchored" => 110}
3182
+
{"FilterGenericHnAndRightAnchored" => 95}
3183
+
{"FilterHostnameDict" => 59}
3184
+
{"FilterPlainLeftAnchored" => 30}
3185
+
{"FilterJustOrigin" => 22}
3186
+
{"FilterHTTPJustOrigin" => 19}
3187
+
{"FilterHTTPSJustOrigin" => 18}
3188
+
{"FilterExactMatch" => 5}
3189
+
{"FilterOriginMixedSet" => 3}
3190
+
2988
3191
*/
2989
3192
2990
3193
FilterContainer.prototype.filterClassHistogram = function() {
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4