A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/gorhill/uBlock/commit/99390390fc12c27c367b9beef85dcd90f187f950 below:

Introduce three more specialized filter classes to avoid regexes · gorhill/uBlock@9939039 · GitHub

@@ -102,6 +102,8 @@ const typeValueToTypeName = {

102 102 103 103

const BlockImportant = BlockAction | Important;

104 104 105 +

const reIsWildcarded = /[\^\*]/;

106 + 105 107

// ABP filters: https://adblockplus.org/en/filters

106 108

// regex tester: http://regex101.com/

107 109

@@ -110,10 +112,39 @@ const BlockImportant = BlockAction | Important;

110 112

// See the following as short-lived registers, used during evaluation. They are

111 113

// valid until the next evaluation.

112 114 113 -

let pageHostnameRegister = '',

114 -

requestHostnameRegister = '';

115 -

//var filterRegister = null;

116 -

//var categoryRegister = '';

115 +

let pageHostnameRegister = '';

116 +

let requestHostnameRegister = '';

117 + 118 +

/******************************************************************************/

119 + 120 +

// First character of match must be within the hostname part of the url.

121 +

//

122 +

// https://github.com/gorhill/uBlock/issues/1929

123 +

// Match only hostname label boundaries.

124 + 125 +

const isHnAnchored = (( ) => {

126 +

let lastLen = 0, lastBeg = -1, lastEnd = -1;

127 + 128 +

return (url, matchStart) => {

129 +

const len = requestHostnameRegister.length;

130 +

if ( len !== lastLen || url.endsWith('://', lastBeg) === false ) {

131 +

lastBeg = len !== 0 ? url.indexOf('://') : -1;

132 +

if ( lastBeg !== -1 ) {

133 +

lastBeg += 3;

134 +

lastEnd = lastBeg + len;

135 +

} else {

136 +

lastEnd = -1;

137 +

}

138 +

}

139 +

return matchStart < lastEnd && (

140 +

matchStart === lastBeg ||

141 +

matchStart > lastBeg &&

142 +

url.charCodeAt(matchStart - 1) === 0x2E /* '.' */

143 +

);

144 +

};

145 +

})();

146 + 147 +

/******************************************************************************/

117 148 118 149

// Local helpers

119 150

@@ -204,27 +235,6 @@ const toLogDataInternal = function(categoryBits, tokenHash, filter) {

204 235

return logData;

205 236

};

206 237 207 -

// First character of match must be within the hostname part of the url.

208 -

//

209 -

// https://github.com/gorhill/uBlock/issues/1929

210 -

// Match only hostname label boundaries.

211 -

const isHnAnchored = (function() {

212 -

let hostname = '';

213 -

let beg = -1, end = -1;

214 - 215 -

return function(url, matchStart) {

216 -

if ( requestHostnameRegister !== hostname ) {

217 -

const hn = requestHostnameRegister;

218 -

beg = hn !== '' ? url.indexOf(hn) : -1;

219 -

end = beg !== -1 ? beg + hn.length : -1;

220 -

hostname = hn;

221 -

}

222 -

if ( matchStart < beg || matchStart >= end ) { return false; }

223 -

return matchStart === beg ||

224 -

url.charCodeAt(matchStart - 1) === 0x2E /* '.' */;

225 -

};

226 -

})();

227 - 228 238

/*******************************************************************************

229 239 230 240

Each filter class will register itself in the map. A filter class

@@ -536,6 +546,52 @@ FilterPlainHnAnchored.prototype.trieableId = 1;

536 546 537 547

registerFilterClass(FilterPlainHnAnchored);

538 548 549 +

/*******************************************************************************

550 + 551 +

Filters with only one single occurrence of wildcard `*`

552 + 553 +

*/

554 + 555 +

const FilterWildcard1 = class {

556 +

constructor(s0, s1) {

557 +

this.s0 = s0;

558 +

this.s1 = s1;

559 +

}

560 + 561 +

match(url) {

562 +

const pos = url.indexOf(this.s0);

563 +

return pos !== -1 && url.indexOf(this.s1, pos + this.s0.length) !== -1;

564 +

}

565 + 566 +

logData() {

567 +

return {

568 +

raw: `${this.s0}*${this.s1}`,

569 +

regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0),

570 +

compiled: this.compile()

571 +

};

572 +

}

573 + 574 +

compile() {

575 +

return [ this.fid, this.s0, this.s1 ];

576 +

}

577 + 578 +

static compile(details) {

579 +

if ( details.anchor !== 0 ) { return; }

580 +

const s = details.f;

581 +

let pos = s.indexOf('*');

582 +

if ( pos === -1 ) { return; }

583 +

if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; }

584 +

if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; }

585 +

return [ FilterWildcard1.fid, s.slice(0, pos), s.slice(pos + 1) ];

586 +

}

587 + 588 +

static load(args) {

589 +

return new FilterWildcard1(args[1], args[2]);

590 +

}

591 +

};

592 + 593 +

registerFilterClass(FilterWildcard1);

594 + 539 595

/******************************************************************************/

540 596 541 597

const FilterGeneric = class {

@@ -571,6 +627,8 @@ const FilterGeneric = class {

571 627

}

572 628 573 629

static compile(details) {

630 +

const compiled = FilterWildcard1.compile(details);

631 +

if ( compiled !== undefined ) { return compiled; }

574 632

return [ FilterGeneric.fid, details.f, details.anchor ];

575 633

}

576 634

@@ -583,6 +641,117 @@ FilterGeneric.prototype.re = null;

583 641 584 642

registerFilterClass(FilterGeneric);

585 643 644 +

/*******************************************************************************

645 + 646 +

Hostname-anchored filters with only one occurrence of wildcard `*`

647 + 648 +

*/

649 + 650 +

const FilterWildcard1HnAnchored = class {

651 +

constructor(s0, s1) {

652 +

this.s0 = s0;

653 +

this.s1 = s1;

654 +

}

655 + 656 +

match(url) {

657 +

const pos = url.indexOf(this.s0);

658 +

return pos !== -1 &&

659 +

isHnAnchored(url, pos) &&

660 +

url.indexOf(this.s1, pos + this.s0.length) !== -1;

661 +

}

662 + 663 +

logData() {

664 +

return {

665 +

raw: `||${this.s0}*${this.s1}`,

666 +

regex: rawToRegexStr(`${this.s0}*${this.s1}`, 0),

667 +

compiled: this.compile()

668 +

};

669 +

}

670 + 671 +

compile() {

672 +

return [ this.fid, this.s0, this.s1 ];

673 +

}

674 + 675 +

static compile(details) {

676 +

if ( (details.anchor & 0x0b001) !== 0 ) { return; }

677 +

const s = details.f;

678 +

let pos = s.indexOf('*');

679 +

if ( pos === -1 ) { return; }

680 +

if ( reIsWildcarded.test(s.slice(pos + 1)) ) { return; }

681 +

const needSeparator =

682 +

pos !== 0 && s.charCodeAt(pos - 1) === 0x5E /* '^' */;

683 +

if ( needSeparator ) { pos -= 1; }

684 +

if ( reIsWildcarded.test(s.slice(0, pos)) ) { return; }

685 +

if ( needSeparator ) {

686 +

return FilterWildcard2HnAnchored.compile(details, pos);

687 +

}

688 +

return [

689 +

FilterWildcard1HnAnchored.fid,

690 +

s.slice(0, pos),

691 +

s.slice(pos + 1),

692 +

];

693 +

}

694 + 695 +

static load(args) {

696 +

return new FilterWildcard1HnAnchored(args[1], args[2]);

697 +

}

698 +

};

699 + 700 +

registerFilterClass(FilterWildcard1HnAnchored);

701 + 702 +

/*******************************************************************************

703 + 704 +

Hostname-anchored filters with one occurrence of the wildcard

705 +

sequence `^*` and no other wildcard-equivalent character

706 + 707 +

*/

708 + 709 +

const FilterWildcard2HnAnchored = class {

710 +

constructor(s0, s1) {

711 +

this.s0 = s0;

712 +

this.s1 = s1;

713 +

}

714 + 715 +

match(url) {

716 +

const pos0 = url.indexOf(this.s0);

717 +

if ( pos0 === -1 || isHnAnchored(url, pos0) === false ) {

718 +

return false;

719 +

}

720 +

const pos1 = pos0 + this.s0.length;

721 +

const pos2 = url.indexOf(this.s1, pos1);

722 +

return pos2 !== -1 &&

723 +

this.reSeparators.test(url.slice(pos1, pos2));

724 +

}

725 + 726 +

logData() {

727 +

return {

728 +

raw: `||${this.s0}^*${this.s1}`,

729 +

regex: rawToRegexStr(`${this.s0}^*${this.s1}`, 0),

730 +

compiled: this.compile()

731 +

};

732 +

}

733 + 734 +

compile() {

735 +

return [ this.fid, this.s0, this.s1 ];

736 +

}

737 + 738 +

static compile(details, pos) {

739 +

return [

740 +

FilterWildcard2HnAnchored.fid,

741 +

details.f.slice(0, pos),

742 +

details.f.slice(pos + 2),

743 +

];

744 +

}

745 + 746 +

static load(args) {

747 +

return new FilterWildcard2HnAnchored(args[1], args[2]);

748 +

}

749 +

};

750 + 751 +

FilterWildcard2HnAnchored.prototype.reSeparators = /[^0-9a-z.%_-]/;

752 + 753 +

registerFilterClass(FilterWildcard2HnAnchored);

754 + 586 755

/******************************************************************************/

587 756 588 757

const FilterGenericHnAnchored = class {

@@ -610,6 +779,8 @@ const FilterGenericHnAnchored = class {

610 779

}

611 780 612 781

static compile(details) {

782 +

const compiled = FilterWildcard1HnAnchored.compile(details);

783 +

if ( compiled !== undefined ) { return compiled; }

613 784

return [ FilterGenericHnAnchored.fid, details.f ];

614 785

}

615 786

@@ -1377,7 +1548,10 @@ const FilterBucket = class {

1377 1548

return true;

1378 1549

}

1379 1550

}

1380 -

if ( this.plainHnAnchoredTrie !== null && isHnAnchored(url, tokenBeg) ) {

1551 +

if (

1552 +

this.plainHnAnchoredTrie !== null &&

1553 +

isHnAnchored(url, tokenBeg)

1554 +

) {

1381 1555

const pos = this.plainHnAnchoredTrie.matches(url, tokenBeg);

1382 1556

if ( pos !== -1 ) {

1383 1557

this.plainHnAnchoredFilter.s = url.slice(tokenBeg, pos);

@@ -1524,7 +1698,6 @@ const FilterParser = function() {

1524 1698

this.reHasUnicode = /[^\x00-\x7F]/;

1525 1699

this.reWebsocketAny = /^ws[s*]?(?::\/?\/?)?\*?$/;

1526 1700

this.reBadCSP = /(?:^|;)\s*report-(?:to|uri)\b/;

1527 -

this.reIsWildcarded = /[\^\*]/;

1528 1701

this.domainOpt = '';

1529 1702

this.noTokenHash = µb.urlTokenizer.noTokenHash;

1530 1703

this.unsupportedTypeBit = this.bitFromType('unsupported');

@@ -1917,7 +2090,7 @@ FilterParser.prototype.parse = function(raw) {

1917 2090

this.anchor = 0;

1918 2091

}

1919 2092 1920 -

this.wildcarded = this.reIsWildcarded.test(s);

2093 +

this.wildcarded = reIsWildcarded.test(s);

1921 2094 1922 2095

// This might look weird but we gain memory footprint by not going through

1923 2096

// toLowerCase(), at least on Chromium. Because copy-on-write?

@@ -2985,6 +3158,36 @@ FilterContainer.prototype.bucketHistogram = function() {

2985 3158

- FilterPlainHnAnchored and FilterPlainPrefix1 are good candidates

2986 3159

for storing in a plain string trie.

2987 3160 3161 +

As of 2019-04-25:

3162 + 3163 +

{"FilterPlainHnAnchored" => 11078}

3164 +

{"FilterPlainPrefix1" => 7195}

3165 +

{"FilterPrefix1Trie" => 5720}

3166 +

{"FilterOriginHit" => 3561}

3167 +

{"FilterWildcard2HnAnchored" => 2943}

3168 +

{"FilterPair" => 2391}

3169 +

{"FilterBucket" => 1922}

3170 +

{"FilterWildcard1HnAnchored" => 1910}

3171 +

{"FilterHnAnchoredTrie" => 1586}

3172 +

{"FilterPlainHostname" => 1391}

3173 +

{"FilterOriginHitSet" => 1155}

3174 +

{"FilterPlain" => 634}

3175 +

{"FilterWildcard1" => 423}

3176 +

{"FilterGenericHnAnchored" => 389}

3177 +

{"FilterOriginMiss" => 302}

3178 +

{"FilterGeneric" => 163}

3179 +

{"FilterOriginMissSet" => 150}

3180 +

{"FilterRegex" => 124}

3181 +

{"FilterPlainRightAnchored" => 110}

3182 +

{"FilterGenericHnAndRightAnchored" => 95}

3183 +

{"FilterHostnameDict" => 59}

3184 +

{"FilterPlainLeftAnchored" => 30}

3185 +

{"FilterJustOrigin" => 22}

3186 +

{"FilterHTTPJustOrigin" => 19}

3187 +

{"FilterHTTPSJustOrigin" => 18}

3188 +

{"FilterExactMatch" => 5}

3189 +

{"FilterOriginMixedSet" => 3}

3190 + 2988 3191

*/

2989 3192 2990 3193

FilterContainer.prototype.filterClassHistogram = function() {


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4