]> spindle.queued.net Git - midori/commitdiff
Always check rules options, not only in pattern matching
authorAlexander Butenko <a.butenka@gmail.com>
Sat, 2 Jul 2011 18:11:24 +0000 (14:11 -0400)
committerChristian Dywan <christian@twotoasts.de>
Tue, 5 Jul 2011 01:54:31 +0000 (03:54 +0200)
extensions/adblock.c

index 395b8e9964d8e2405ae66063a301e86944ee500c..7eb1704e55fc11d89de20e243499170c53216937 100644 (file)
@@ -39,6 +39,7 @@
 
 static GHashTable* pattern;
 static GHashTable* keys;
+static GHashTable* optslist;
 static GHashTable* urlcache;
 static GString* blockcss;
 static GString* blockcssprivate;
@@ -92,6 +93,9 @@ adblock_init_db ()
     keys = g_hash_table_new_full (g_str_hash, g_str_equal,
                    (GDestroyNotify)g_free,
                    (GDestroyNotify)g_regex_unref);
+    optslist = g_hash_table_new_full (g_str_hash, g_str_equal,
+                   NULL,
+                   (GDestroyNotify)g_free);
     urlcache = g_hash_table_new_full (g_str_hash, g_str_equal,
                    (GDestroyNotify)g_free,
                    (GDestroyNotify)g_free);
@@ -600,16 +604,18 @@ adblock_is_matched_by_pattern (const gchar*  req_uri,
                                const gchar*  page_uri)
 {
     GHashTableIter iter;
-    gpointer opts, regex;
+    gpointer patt, regex;
+    gchar* opts;
 
     if (USE_PATTERN_MATCHING == 0)
         return FALSE;
 
     g_hash_table_iter_init (&iter, pattern);
-    while (g_hash_table_iter_next (&iter, &opts, &regex))
+    while (g_hash_table_iter_next (&iter, &patt, &regex))
     {
         if (g_regex_match_full (regex, req_uri, -1, 0, 0, NULL, NULL))
         {
+            opts = g_hash_table_lookup (optslist, patt);
             if (opts && adblock_check_filter_options (regex, opts, req_uri, page_uri) == TRUE)
                 return FALSE;
             else
@@ -623,8 +629,7 @@ adblock_is_matched_by_pattern (const gchar*  req_uri,
 }
 
 static inline gboolean
-adblock_is_matched_by_key (const gchar*  opts,
-                           const gchar*  req_uri,
+adblock_is_matched_by_key (const gchar*  req_uri,
                            const gchar*  page_uri)
 {
     gchar* uri;
@@ -638,10 +643,13 @@ adblock_is_matched_by_key (const gchar*  opts,
     {
         gchar* sig = g_strndup (uri + pos, SIGNATURE_SIZE);
         GRegex* regex = g_hash_table_lookup (keys, sig);
+        gchar* opts;
+
         if (regex && !g_list_find (regex_bl, regex))
         {
             if (g_regex_match_full (regex, req_uri, -1, 0, 0, NULL, NULL))
             {
+                opts = g_hash_table_lookup (optslist, sig);
                 g_free (sig);
                 if (opts && adblock_check_filter_options (regex, opts, req_uri, page_uri))
                 {
@@ -667,8 +675,7 @@ adblock_is_matched_by_key (const gchar*  opts,
 }
 
 static gboolean
-adblock_is_matched (const gchar*  opts,
-                    const gchar*  req_uri,
+adblock_is_matched (const gchar*  req_uri,
                     const gchar*  page_uri)
 {
     gboolean foundbykey;
@@ -683,7 +690,7 @@ adblock_is_matched (const gchar*  opts,
             return TRUE;
     }
 
-    foundbykey = adblock_is_matched_by_key (opts, req_uri, page_uri);
+    foundbykey = adblock_is_matched_by_key (req_uri, page_uri);
     foundbypattern = adblock_is_matched_by_pattern (req_uri, page_uri);
     if (foundbykey == TRUE || foundbypattern == TRUE)
     {
@@ -773,8 +780,7 @@ adblock_resource_request_starting_cb (WebKitWebView*         web_view,
     if (debug == 2)
         g_test_timer_start ();
     #endif
-    /* TODO: opts should be defined */
-    if (adblock_is_matched (NULL, req_uri, page_uri))
+    if (adblock_is_matched (req_uri, page_uri))
     {
         blocked_uris = g_object_get_data (G_OBJECT (web_view), "blocked-uris");
         blocked_uris = g_list_prepend (blocked_uris, g_strdup (req_uri));
@@ -1035,10 +1041,8 @@ adblock_fixup_regexp (const gchar* prefix,
 }
 
 static void
-adblock_compile_regexp (GHashTable* tbl,
-                        GHashTable* keystbl,
-                        gchar*      patt,
-                        gchar*      opts)
+adblock_compile_regexp (gchar* patt,
+                        gchar* opts)
 {
     GRegex* regex;
     GError* error = NULL;
@@ -1062,34 +1066,38 @@ adblock_compile_regexp (GHashTable* tbl,
     {
         int len = strlen (patt);
         int signature_count = 0;
+
         for (pos = len - SIGNATURE_SIZE; pos >= 0; pos--) {
             sig = g_strndup (patt + pos, SIGNATURE_SIZE);
             if (!g_regex_match_simple ("[\\*]", sig, G_REGEX_UNGREEDY, G_REGEX_MATCH_NOTEMPTY) &&
-                !g_hash_table_lookup (keystbl, sig))
+                !g_hash_table_lookup (keys, sig))
             {
                 adblock_debug ("sig: %s %s", sig, patt);
-                g_hash_table_insert (keystbl, sig, regex);
+                g_hash_table_insert (keys, sig, regex);
+                g_hash_table_insert (optslist, sig, g_strdup (opts));
                 signature_count++;
             }
             else
             {
                 if (g_regex_match_simple ("^\\*", sig, G_REGEX_UNGREEDY, G_REGEX_MATCH_NOTEMPTY) &&
-                    !g_hash_table_lookup (tbl, opts))
+                    !g_hash_table_lookup (pattern, patt))
                 {
                     adblock_debug ("patt2: %s %s", sig, patt);
-                    g_hash_table_insert (tbl, opts, regex);
+                    g_hash_table_insert (pattern, patt, regex);
+                    g_hash_table_insert (optslist, patt, g_strdup (opts));
                 }
                 g_free (sig);
             }
         }
-        if (signature_count > 1 && g_hash_table_lookup (tbl, opts))
-            g_hash_table_steal (tbl, opts);
+        if (signature_count > 1 && g_hash_table_lookup (pattern, patt))
+            g_hash_table_steal (pattern, patt);
     }
     else
     {
         adblock_debug ("patt: %s%s", patt, "");
         /* Pattern is a regexp chars */
-        g_hash_table_insert (tbl, opts, regex);
+        g_hash_table_insert (pattern, patt, regex);
+        g_hash_table_insert (optslist, patt, g_strdup (opts));
     }
 }
 
@@ -1110,19 +1118,19 @@ adblock_add_url_pattern (gchar* prefix,
     if (data[1] && data[2])
     {
         patt = g_strconcat (data[0], data[1], NULL);
-        opts = g_strdup_printf ("t=%s,r=%s,%s", type, patt, data[2]);
+        opts = g_strconcat (type, ",", data[2], NULL);
         g_strfreev (data);
     }
     else if (data[1])
     {
         patt = data[0];
-        opts = g_strdup_printf ("t=%s,r=%s,%s", type, patt, data[1]);
+        opts = g_strconcat (type, ",", data[1], NULL);
         g_free (data[1]);
     }
     else
     {
         patt = data[0];
-        opts = g_strdup_printf ("t=%s,r=%s", type, patt);
+        opts = g_strdup (type);
     }
 
     if (g_regex_match_simple ("subdocument", opts,
@@ -1136,8 +1144,9 @@ adblock_add_url_pattern (gchar* prefix,
     format_patt = adblock_fixup_regexp (prefix, patt);
 
     adblock_debug ("got: %s opts %s", format_patt, opts);
-    adblock_compile_regexp (pattern, keys, format_patt, opts);
+    adblock_compile_regexp (format_patt, opts);
 
+    g_free (opts);
     g_free (patt);
     return format_patt;
 }
@@ -1265,7 +1274,7 @@ adblock_parse_file (gchar* path)
     if ((file = g_fopen (path, "r")))
     {
         while (fgets (line, 2000, file))
-            g_free (adblock_parse_line (line));
+            adblock_parse_line (line);
         fclose (file);
         return TRUE;
     }
@@ -1314,6 +1323,7 @@ adblock_deactivate_cb (MidoriExtension* extension,
         g_string_free (blockcssprivate, TRUE);
     blockcssprivate = blockcss = NULL;
     g_hash_table_destroy (pattern);
+    g_hash_table_destroy (optslist);
     g_hash_table_destroy (urlcache);
 }
 
@@ -1399,28 +1409,28 @@ test_adblock_pattern (void)
     adblock_parse_file (filename);
 
     g_test_timer_start ();
-    g_assert (adblock_is_matched (NULL, "http://www.engadget.com/_uac/adpage.html", ""));
-    g_assert (adblock_is_matched (NULL, "http://test.dom/test?var=1", ""));
-    g_assert (adblock_is_matched (NULL, "http://ads.foo.bar/teddy", ""));
-    g_assert (!adblock_is_matched (NULL, "http://ads.fuu.bar/teddy", ""));
-    g_assert (adblock_is_matched (NULL, "https://ads.bogus.name/blub", ""));
-    g_assert (adblock_is_matched (NULL, "http://ads.bla.blub/kitty", ""));
-    g_assert (adblock_is_matched (NULL, "http://ads.blub.boing/soda", ""));
-    g_assert (!adblock_is_matched (NULL, "http://ads.foo.boing/beer", ""));
-    g_assert (adblock_is_matched (NULL, "https://testsub.engine.adct.ru/test?id=1", ""));
+    g_assert (adblock_is_matched ("http://www.engadget.com/_uac/adpage.html", ""));
+    g_assert (adblock_is_matched ("http://test.dom/test?var=1", ""));
+    g_assert (adblock_is_matched ("http://ads.foo.bar/teddy", ""));
+    g_assert (!adblock_is_matched ("http://ads.fuu.bar/teddy", ""));
+    g_assert (adblock_is_matched ("https://ads.bogus.name/blub", ""));
+    g_assert (adblock_is_matched ("http://ads.bla.blub/kitty", ""));
+    g_assert (adblock_is_matched ("http://ads.blub.boing/soda", ""));
+    g_assert (!adblock_is_matched ("http://ads.foo.boing/beer", ""));
+    g_assert (adblock_is_matched ("https://testsub.engine.adct.ru/test?id=1", ""));
     if (USE_PATTERN_MATCHING)
-        g_assert (adblock_is_matched (NULL, "http://test.ltd/addyn/test/test?var=adtech;&var2=1", ""));
-    g_assert (adblock_is_matched (NULL, "http://add.doubleclick.net/pfadx/aaaa.mtvi", ""));
-    g_assert (!adblock_is_matched (NULL, "http://add.doubleclick.net/pfadx/aaaa.mtv", ""));
-    g_assert (adblock_is_matched (NULL, "http://objects.tremormedia.com/embed/xml/list.xml?r=", ""));
-    g_assert (!adblock_is_matched (NULL, "http://qq.videostrip.c/sub/admatcherclient.php", ""));
-    g_assert (adblock_is_matched (NULL, "http://qq.videostrip.com/sub/admatcherclient.php", ""));
-    g_assert (adblock_is_matched (NULL, "http://qq.videostrip.com/sub/admatcherclient.php", ""));
-    g_assert (adblock_is_matched (NULL, "http://br.gcl.ru/cgi-bin/br/test", ""));
-    g_assert (!adblock_is_matched (NULL, "https://bugs.webkit.org/buglist.cgi?query_format=advanced&short_desc_type=allwordssubstr&short_desc=&long_desc_type=substring&long_desc=&bug_file_loc_type=allwordssubstr&bug_file_loc=&keywords_type=allwords&keywords=&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&emailassigned_to1=1&emailtype1=substring&email1=&emailassigned_to2=1&emailreporter2=1&emailcc2=1&emailtype2=substring&email2=&bugidtype=include&bug_id=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&query_based_on=gtkport&field0-0-0=keywords&type0-0-0=anywordssubstr&value0-0-0=Gtk%20Cairo%20soup&field0-0-1=short_desc&type0-0-1=anywordssubstr&value0-0-1=Gtk%20Cairo%20soup%20autoconf%20automake%20autotool&field0-0-2=component&type0-0-2=equals&value0-0-2=WebKit%20Gtk", ""));
-    g_assert (!adblock_is_matched (NULL, "http://www.engadget.com/2009/09/24/google-hits-android-rom-modder-with-a-cease-and-desist-letter/", ""));
-    g_assert (!adblock_is_matched (NULL, "http://karibik-invest.com/es/bienes_raices/search.php?sqT=19&sqN=&sqMp=&sqL=0&qR=1&sqMb=&searchMode=1&action=B%FAsqueda", ""));
-    g_assert (!adblock_is_matched (NULL, "http://google.com", ""));
+        g_assert (adblock_is_matched ("http://test.ltd/addyn/test/test?var=adtech;&var2=1", ""));
+    g_assert (adblock_is_matched ("http://add.doubleclick.net/pfadx/aaaa.mtvi", ""));
+    g_assert (!adblock_is_matched ("http://add.doubleclick.net/pfadx/aaaa.mtv", ""));
+    g_assert (adblock_is_matched ("http://objects.tremormedia.com/embed/xml/list.xml?r=", ""));
+    g_assert (!adblock_is_matched ("http://qq.videostrip.c/sub/admatcherclient.php", ""));
+    g_assert (adblock_is_matched ("http://qq.videostrip.com/sub/admatcherclient.php", ""));
+    g_assert (adblock_is_matched ("http://qq.videostrip.com/sub/admatcherclient.php", ""));
+    g_assert (adblock_is_matched ("http://br.gcl.ru/cgi-bin/br/test", ""));
+    g_assert (!adblock_is_matched ("https://bugs.webkit.org/buglist.cgi?query_format=advanced&short_desc_type=allwordssubstr&short_desc=&long_desc_type=substring&long_desc=&bug_file_loc_type=allwordssubstr&bug_file_loc=&keywords_type=allwords&keywords=&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&emailassigned_to1=1&emailtype1=substring&email1=&emailassigned_to2=1&emailreporter2=1&emailcc2=1&emailtype2=substring&email2=&bugidtype=include&bug_id=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&query_based_on=gtkport&field0-0-0=keywords&type0-0-0=anywordssubstr&value0-0-0=Gtk%20Cairo%20soup&field0-0-1=short_desc&type0-0-1=anywordssubstr&value0-0-1=Gtk%20Cairo%20soup%20autoconf%20automake%20autotool&field0-0-2=component&type0-0-2=equals&value0-0-2=WebKit%20Gtk", ""));
+    g_assert (!adblock_is_matched ("http://www.engadget.com/2009/09/24/google-hits-android-rom-modder-with-a-cease-and-desist-letter/", ""));
+    g_assert (!adblock_is_matched ("http://karibik-invest.com/es/bienes_raices/search.php?sqT=19&sqN=&sqMp=&sqL=0&qR=1&sqMb=&searchMode=1&action=B%FAsqueda", ""));
+    g_assert (!adblock_is_matched ("http://google.com", ""));
 
     g_print ("Search took %f seconds\n", g_test_timer_elapsed ());