diff --git a/README.md b/README.md index bc80102..ddf51cc 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,25 @@ BSD 2-Clause License. --- +## ngx_headers_more + +ngx_headers_more - Set and clear input and output headers...more than "add"! + +This module is not distributed with the Nginx source. + +### Build + +```bash +./configure \ + --add-dynamic-module=/path/to/ngx_headers_more +``` + +### License + +BSD 2-Clause License. + +--- + ## ngx_http_flv_module HTTP-FLV / RTMP / HLS streaming server module for Nginx. @@ -177,4 +196,25 @@ when compiled with --with-debug, this module will produce high number of log mes ### License +BSD 2-Clause License. + +--- + +## ngx_zstd + +zstd-nginx-module - Nginx module for the [Zstandard compression](https://facebook.github.io/zstd/). + +### Build + +```bash +./configure \ + --add-module=/path/to/ngx_zstd +``` + +### Notes + +u should add as static module + +### License + BSD 2-Clause License. \ No newline at end of file diff --git a/ngx_headers_more/.gitattributes b/ngx_headers_more/.gitattributes new file mode 100644 index 0000000..6fe6f35 --- /dev/null +++ b/ngx_headers_more/.gitattributes @@ -0,0 +1 @@ +*.t linguist-language=Text diff --git a/ngx_headers_more/.gitignore b/ngx_headers_more/.gitignore new file mode 100644 index 0000000..39c82fa --- /dev/null +++ b/ngx_headers_more/.gitignore @@ -0,0 +1,54 @@ +reindex +.libs +*.swp +*.slo +*.la +*.swo +*.lo +*~ +*.o +print.txt +.rsync +*.tar.gz +dist +build[78] +build +tags +update-readme +*.tmp +test/Makefile +test/blib +test.sh +t.sh +t/t.sh +test/t/servroot/ +releng +reset +*.t_ +genmobi.sh +*.mobi +misc/chunked +src/headers.c +src/headers.h +src/module.c +src/module.h +src/util.c +src/util.h +go +ctags +src/in.c +src/in.h +src/out.c +src/out.h +build[89] +build1[0-9] +buildroot/ +work/ +all +t/servroot +analyze +cov +nginx +*.plist +a.patch +Makefile diff --git a/ngx_headers_more/.travis.yml b/ngx_headers_more/.travis.yml new file mode 100644 index 0000000..1735686 --- /dev/null +++ b/ngx_headers_more/.travis.yml @@ -0,0 +1,65 @@ +sudo: required +dist: focal + +branches: + only: + - "master" + +os: linux + +language: c + +compiler: + - gcc + +addons: + apt: + packages: + - axel + - cpanminus + +env: + global: + - LUAJIT_PREFIX=/opt/luajit21 + - LUAJIT_LIB=$LUAJIT_PREFIX/lib + - LUAJIT_INC=$LUAJIT_PREFIX/include/luajit-2.1 + - LD_LIBRARY_PATH=$LUAJIT_LIB:$LD_LIBRARY_PATH + - PCRE_PREFIX=/usr/local/openresty/pcre2 + - PCRE_LIB=$PCRE_PREFIX/lib + - PCRE_INC=$PCRE_PREFIX/include + matrix: + - NGINX_VERSION=1.29.8 + +before_install: + - sudo apt-get update -y + - sudo apt-get install -y ca-certificates + - sudo cpanm --notest Test::Nginx IPC::Run Test2::Util > build.log 2>&1 || (cat build.log && exit 1) + - sudo cpanm -v --notest Test::Nginx > build.log 2>&1 || (cat build.log && exit 1) + - wget -O - https://openresty.org/package/pubkey.gpg | sudo apt-key add - + - echo "deb http://openresty.org/package/ubuntu $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/openresty.list + - sudo apt-get update + - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends openresty-pcre2 openresty-openssl3 openresty-pcre2-dev openresty-openssl3-dev + +install: + - git clone https://github.com/openresty/echo-nginx-module.git ../echo-nginx-module + - git clone https://github.com/openresty/lua-nginx-module.git ../lua-nginx-module + - git clone https://github.com/openresty/lua-resty-core.git ../lua-resty-core + - git clone https://github.com/openresty/lua-resty-lrucache.git ../lua-resty-lrucache + - git clone https://github.com/openresty/nginx-eval-module.git ../eval-nginx-module + - git clone https://github.com/openresty/openresty.git ../openresty + - git clone https://github.com/openresty/no-pool-nginx.git ../no-pool-nginx + - git clone https://github.com/openresty/nginx-devel-utils.git + - git clone -b v2.1-agentzh https://github.com/openresty/luajit2.git luajit2 + +before_script: + - cd luajit2/ + - make -j$JOBS CCDEBUG=-g Q= PREFIX=$LUAJIT_PREFIX CC=$CC XCFLAGS='-DLUA_USE_APICHECK -DLUA_USE_ASSERT -msse4.2' > build.log 2>&1 || (cat build.log && exit 1) + - sudo make install PREFIX=$LUAJIT_PREFIX > build.log 2>&1 || (cat build.log && exit 1) + - cd .. + +script: + - export PATH=$PWD/work/nginx/sbin:$PWD/nginx-devel-utils:$PATH + - export NGX_BUILD_CC=$CC + - sh util/build.sh $NGINX_VERSION > build.log 2>&1 || (cat build.log && exit 1) + - prove -I. -r t + diff --git a/ngx_headers_more/LICENSE b/ngx_headers_more/LICENSE new file mode 100644 index 0000000..c14f60a --- /dev/null +++ b/ngx_headers_more/LICENSE @@ -0,0 +1,20 @@ +This module is licensed under the terms of the BSD license. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ngx_headers_more/README.markdown b/ngx_headers_more/README.markdown new file mode 100644 index 0000000..5ca1b15 --- /dev/null +++ b/ngx_headers_more/README.markdown @@ -0,0 +1,541 @@ +Name +==== + +**ngx_headers_more** - Set and clear input and output headers...more than "add"! + +*This module is not distributed with the Nginx source.* See [the installation instructions](#installation). + +Table of Contents +================= + +* [Name](#name) +* [Version](#version) +* [Synopsis](#synopsis) +* [Description](#description) +* [Directives](#directives) + * [more_set_headers](#more_set_headers) + * [more_clear_headers](#more_clear_headers) + * [more_set_input_headers](#more_set_input_headers) + * [more_clear_input_headers](#more_clear_input_headers) +* [Limitations](#limitations) +* [Installation](#installation) +* [Compatibility](#compatibility) +* [Community](#community) + * [English Mailing List](#english-mailing-list) + * [Chinese Mailing List](#chinese-mailing-list) +* [Bugs and Patches](#bugs-and-patches) +* [Source Repository](#source-repository) +* [Changes](#changes) +* [Test Suite](#test-suite) +* [TODO](#todo) +* [Getting involved](#getting-involved) +* [Authors](#authors) +* [Copyright & License](#copyright--license) +* [See Also](#see-also) + +Version +======= + +This document describes headers-more-nginx-module [v0.34](https://github.com/openresty/headers-more-nginx-module/tags) released on 17 July 2022. + +Synopsis +======== + +```nginx + + # set the Server output header + more_set_headers 'Server: my-server'; + + # set and clear output headers + location /bar { + more_set_headers 'X-MyHeader: blah' 'X-MyHeader2: foo'; + more_set_headers -t 'text/plain text/css' 'Content-Type: text/foo'; + more_set_headers -s '400 404 500 503' -s 413 'Foo: Bar'; + more_clear_headers 'Content-Type'; + + # your proxy_pass/memcached_pass/or any other config goes here... + } + + # set output headers + location /type { + more_set_headers 'Content-Type: text/plain'; + # ... + } + + # set input headers + location /foo { + set $my_host 'my dog'; + more_set_input_headers 'Host: $my_host'; + more_set_input_headers -t 'text/plain' 'X-Foo: bah'; + + # now $host and $http_host have their new values... + # ... + } + + # replace input header X-Foo *only* if it already exists + more_set_input_headers -r 'X-Foo: howdy'; +``` + +Description +=========== + +This module allows you to add, set, or clear any output +or input header that you specify. + +This is an enhanced version of the standard +[headers](http://nginx.org/en/docs/http/ngx_http_headers_module.html) module because it provides more utilities like +resetting or clearing "builtin headers" like `Content-Type`, +`Content-Length`, and `Server`. + +It also allows you to specify an optional HTTP status code +criteria using the `-s` option and an optional content +type criteria using the `-t` option while modifying the +output headers with the [more_set_headers](#more_set_headers) and +[more_clear_headers](#more_clear_headers) directives. For example, + +```nginx + more_set_headers -s 404 -t 'text/html' 'X-Foo: Bar'; +``` + +You can also specify multiple MIME types to filter out in a single `-t` option. +For example, + +```nginx +more_set_headers -t 'text/html text/plain' 'X-Foo: Bar'; +``` + +Never use other parameters like `charset=utf-8` in the `-t` option values; they will not +work as you would expect. + +Input headers can be modified as well. For example + +```nginx + location /foo { + more_set_input_headers 'Host: foo' 'User-Agent: faked'; + # now $host, $http_host, $user_agent, and + # $http_user_agent all have their new values. + } +``` + +The option `-t` is also available in the +[more_set_input_headers](#more_set_input_headers) and +[more_clear_input_headers](#more_clear_input_headers) directives (for request header filtering) while the `-s` option +is not allowed. + +Unlike the standard [headers](http://nginx.org/en/docs/http/ngx_http_headers_module.html) module, this module's directives will by +default apply to all the status codes, including `4xx` and `5xx`. + +[Back to TOC](#table-of-contents) + +Directives +========== + +[Back to TOC](#table-of-contents) + +more_set_headers +---------------- +**syntax:** *more_set_headers [-t <content-type list>]... [-s <status-code list>]... [-a] <new-header>...* + +**default:** *no* + +**context:** *http, server, location, location if* + +**phase:** *output-header-filter* + +Replaces (if any) or adds (if not any) the specified output headers when the response status code matches the codes specified by the `-s` option *AND* the response content type matches the types specified by the `-t` option. + +If the "-a" option is specified, the specified output headers can be appended directly without clearing the old fields. The behavior of builtin headers such as "Content-Type", "Content-Length", "Server", etc. cannot be changed. + +If either `-s` or `-t` is not specified or has an empty list value, then no match is required. Therefore, the following directive set the `Server` output header to the custom value for *any* status code and *any* content type: + +```nginx + + more_set_headers "Server: my_server"; +``` + +Existing response headers with the same name are always overridden. If you want to add headers incrementally, use the standard [add_header](http://nginx.org/en/docs/http/ngx_http_headers_module.html#add_header) directive instead. + +A single directive can set/add multiple output headers. For example + +```nginx + + more_set_headers 'Foo: bar' 'Baz: bah'; +``` + +Multiple occurrences of the options are allowed in a single directive. Their values will be merged together. For instance + +```nginx + + more_set_headers -s 404 -s '500 503' 'Foo: bar'; +``` + +is equivalent to + +```nginx + + more_set_headers -s '404 500 503' 'Foo: bar'; +``` + +The new header should be the one of the forms: + +1. `Name: Value` +1. `Name: ` +1. `Name` + +The last two effectively clear the value of the header `Name`. + +Nginx variables are allowed in header values. For example: + +```nginx + + set $my_var "dog"; + more_set_headers "Server: $my_var"; +``` + +But variables won't work in header keys due to performance considerations. + +Multiple set/clear header directives are allowed in a single location, and they're executed sequentially. + +Directives inherited from an upper level scope (say, http block or server blocks) are executed before the directives in the location block. + +Note that although `more_set_headers` is allowed in *location* if blocks, it is *not* allowed in the *server* if blocks, as in + +```nginx + + ? # This is NOT allowed! + ? server { + ? if ($args ~ 'download') { + ? more_set_headers 'Foo: Bar'; + ? } + ? ... + ? } +``` + +Behind the scene, use of this directive and its friend [more_clear_headers](#more_clear_headers) will (lazily) register an ouput header filter that modifies `r->headers_out` the way you specify. + +[Back to TOC](#table-of-contents) + +more_clear_headers +------------------ +**syntax:** *more_clear_headers [-t <content-type list>]... [-s <status-code list>]... <new-header>...* + +**default:** *no* + +**context:** *http, server, location, location if* + +**phase:** *output-header-filter* + +Clears the specified output headers. + +In fact, + +```nginx + + more_clear_headers -s 404 -t 'text/plain' Foo Baz; +``` + +is exactly equivalent to + +```nginx + + more_set_headers -s 404 -t 'text/plain' "Foo: " "Baz: "; +``` + +or + +```nginx + + more_set_headers -s 404 -t 'text/plain' Foo Baz; +``` + +See [more_set_headers](#more_set_headers) for more details. + +The wildcard character, `*`, can also be used at the end of the header name to specify a pattern. For example, the following directive +effectively clears *any* output headers starting by "`X-Hidden-`": + +```nginx + + more_clear_headers 'X-Hidden-*'; +``` + +The `*` wildcard support was first introduced in [v0.09](#v009). + +[Back to TOC](#table-of-contents) + +more_set_input_headers +---------------------- +**syntax:** *more_set_input_headers [-r] [-t <content-type list>]... <new-header>...* + +**default:** *no* + +**context:** *http, server, location, location if* + +**phase:** *rewrite tail* + +Very much like [more_set_headers](#more_set_headers) except that it operates on input headers (or request headers) and it only supports the `-t` option. + +Note that using the `-t` option in this directive means filtering by the `Content-Type` *request* header, rather than the response header. + +Behind the scene, use of this directive and its friend [more_clear_input_headers](#more_clear_input_headers) will (lazily) +register a `rewrite phase` handler that modifies `r->headers_in` the way you specify. Note that it always run at the *end* of +the `rewrite` phase so that it runs *after* the standard [rewrite module](http://nginx.org/en/docs/http/ngx_http_rewrite_module.html) +and works in subrequests as well. + +If the `-r` option is specified, then the headers will be replaced to the new values *only if* they already exist. + +[Back to TOC](#table-of-contents) + +more_clear_input_headers +------------------------ +**syntax:** *more_clear_input_headers [-t <content-type list>]... <new-header>...* + +**default:** *no* + +**context:** *http, server, location, location if* + +**phase:** *rewrite tail* + +Clears the specified input headers. + +In fact, + +```nginx + + more_clear_input_headers -t 'text/plain' Foo Baz; +``` + +is exactly equivalent to + +```nginx + + more_set_input_headers -t 'text/plain' "Foo: " "Baz: "; +``` + +or + +```nginx + + more_set_input_headers -t 'text/plain' Foo Baz +``` + +To remove request headers "Foo" and "Baz" for all incoming requests regardless of the content type, we can write + +```nginx + + more_clear_input_headers "Foo" "Baz"; +``` + +See [more_set_input_headers](#more_set_input_headers) for more details. + +The wildcard character, `*`, can also be used at the end of the header name to specify a pattern. For example, the following directive +effectively clears *any* input headers starting by "`X-Hidden-`": + +```nginx + + more_clear_input_headers 'X-Hidden-*'; +``` + +[Back to TOC](#table-of-contents) + +Limitations +=========== + +* Unlike the standard [headers](http://nginx.org/en/docs/http/ngx_http_headers_module.html) module, this module does not automatically take care of the constraint among the `Expires`, `Cache-Control`, and `Last-Modified` headers. You have to get them right yourself or use the [headers](http://nginx.org/en/docs/http/ngx_http_headers_module.html) module together with this module. +* You cannot remove the `Connection` response header using this module because the `Connection` response header is generated by the standard `ngx_http_header_filter_module` in the Nginx core, whose output header filter runs always *after* the filter of this module. The only way to actually remove the `Connection` header is to patch the Nginx core, that is, editing the C function `ngx_http_header_filter` in the `src/http/ngx_http_header_filter_module.c` file. + +[Back to TOC](#table-of-contents) + +Installation +============ + +Grab the nginx source code from [nginx.org](http://nginx.org/), for example, +the version 1.17.8 (see [nginx compatibility](#compatibility)), and then build the source with this module: + +```bash + + wget 'http://nginx.org/download/nginx-1.17.8.tar.gz' + tar -xzvf nginx-1.17.8.tar.gz + cd nginx-1.17.8/ + + # Here we assume you would install you nginx under /opt/nginx/. + ./configure --prefix=/opt/nginx \ + --add-module=/path/to/headers-more-nginx-module + + make + make install +``` + +Download the latest version of the release tarball of this module from [headers-more-nginx-module file list](https://github.com/openresty/headers-more-nginx-module/tags). + +Starting from NGINX 1.9.11, you can also compile this module as a dynamic module, by using the `--add-dynamic-module=PATH` option instead of `--add-module=PATH` on the +`./configure` command line above. And then you can explicitly load the module in your `nginx.conf` via the [load_module](http://nginx.org/en/docs/ngx_core_module.html#load_module) +directive, for example, + +```nginx +load_module /path/to/modules/ngx_http_headers_more_filter_module.so; +``` + +Also, this module is included and enabled by default in the [OpenResty bundle](http://openresty.org). + +[Back to TOC](#table-of-contents) + +Compatibility +============= + +The following versions of Nginx should work with this module: + +* **1.29.x** (last tested: 1.29.2) +* **1.27.x** (last tested: 1.27.1) +* **1.25.x** (last tested: 1.25.3) +* **1.21.x** (last tested: 1.21.4) +* **1.19.x** (last tested: 1.19.9) +* **1.17.x** (last tested: 1.17.8) +* **1.16.x** +* **1.15.x** (last tested: 1.15.8) +* **1.14.x** +* **1.13.x** (last tested: 1.13.6) +* **1.12.x** +* **1.11.x** (last tested: 1.11.2) +* **1.10.x** +* **1.9.x** (last tested: 1.9.15) +* **1.8.x** +* **1.7.x** (last tested: 1.7.10) +* **1.6.x** (last tested: 1.6.2) +* **1.5.x** (last tested: 1.5.8) +* **1.4.x** (last tested: 1.4.4) +* **1.3.x** (last tested: 1.3.7) +* **1.2.x** (last tested: 1.2.9) +* **1.1.x** (last tested: 1.1.5) +* **1.0.x** (last tested: 1.0.11) +* **0.9.x** (last tested: 0.9.4) +* **0.8.x** (last tested: 0.8.54) +* **0.7.x >= 0.7.44** (last tested: 0.7.68) + +Earlier versions of Nginx like 0.6.x and 0.5.x will *not* work. + +If you find that any particular version of Nginx above 0.7.44 does not work with this module, please consider [reporting a bug](#report-bugs). + +[Back to TOC](#table-of-contents) + +Community +========= + +[Back to TOC](#table-of-contents) + +English Mailing List +-------------------- + +The [openresty-en](https://groups.google.com/group/openresty-en) mailing list is for English speakers. + +[Back to TOC](#table-of-contents) + +Chinese Mailing List +-------------------- + +The [openresty](https://groups.google.com/group/openresty) mailing list is for Chinese speakers. + +[Back to TOC](#table-of-contents) + +Bugs and Patches +================ + +Please submit bug reports, wishlists, or patches by + +1. creating a ticket on the [GitHub Issue Tracker](https://github.com/chaoslawful/lua-nginx-module/issues), +1. or posting to the [OpenResty community](#community). + +[Back to TOC](#table-of-contents) + +Source Repository +================= + +Available on github at [openresty/headers-more-nginx-module](https://github.com/openresty/headers-more-nginx-module). + +[Back to TOC](#table-of-contents) + +Changes +======= + +The changes of every release of this module can be obtained from the OpenResty bundle's change logs: + + + +[Back to TOC](#table-of-contents) + +Test Suite +========== + +This module comes with a Perl-driven test suite. The [test cases](https://github.com/openresty/headers-more-nginx-module/tree/master/t/) are +[declarative](https://github.com/openresty/headers-more-nginx-module/blob/master/t/sanity.t) too. Thanks to the [Test::Nginx](http://search.cpan.org/perldoc?Test::Nginx) module in the Perl world. + +To run it on your side: + +```bash + + $ PATH=/path/to/your/nginx-with-headers-more-module:$PATH prove -r t +``` + +To run the test suite with valgrind's memcheck, use the following commands: + +```bash + + $ export PATH=/path/to/your/nginx-with-headers-more-module:$PATH + $ TEST_NGINX_USE_VALGRIND=1 prove -r t +``` + +You need to terminate any Nginx processes before running the test suite if you have changed the Nginx server binary. + +Because a single nginx server (by default, `localhost:1984`) is used across all the test scripts (`.t` files), it's meaningless to run the test suite in parallel by specifying `-jN` when invoking the `prove` utility. + +Some parts of the test suite requires modules [proxy](http://nginx.org/en/docs/http/ngx_http_proxy_module.html), [rewrite](http://nginx.org/en/docs/http/ngx_http_rewrite_module.html), and [echo](https://github.com/openresty/echo-nginx-module) to be enabled as well when building Nginx. + +[Back to TOC](#table-of-contents) + +TODO +==== + +* Support variables in new headers' keys. + +[Back to TOC](#table-of-contents) + +Getting involved +================ + +You'll be very welcomed to submit patches to the [author](#author) or just ask for a commit bit to the [source repository](#source-repository) on GitHub. + +[Back to TOC](#table-of-contents) + +Authors +======= + +* Yichun "agentzh" Zhang (章亦春) *<agentzh@gmail.com>*, OpenResty Inc. +* Bernd Dorn ( ) + +This wiki page is also maintained by the author himself, and everybody is encouraged to improve this page as well. + +[Back to TOC](#table-of-contents) + +Copyright & License +=================== + +The code base is borrowed directly from the standard [headers](http://nginx.org/en/docs/http/ngx_http_headers_module.html) module in Nginx 0.8.24. This part of code is copyrighted by Igor Sysoev. + +Copyright (c) 2009-2025, Yichun "agentzh" Zhang (章亦春) , OpenResty Inc. + +Copyright (c) 2010-2013, Bernd Dorn. + +The license text is available in the [LICENSE](LICENSE) file located in the root directory of the project. + +[Back to TOC](#table-of-contents) + +See Also +======== + +* The original thread on the Nginx mailing list that inspires this module's development: ["A question about add_header replication"](http://forum.nginx.org/read.php?2,11206,11738). +* The orginal announcement thread on the Nginx mailing list: ["The "headers_more" module: Set and clear output headers...more than 'add'!"](http://forum.nginx.org/read.php?2,23460). +* The original [blog post](http://agentzh.blogspot.com/2009/11/headers-more-module-scripting-input-and.html) about this module's initial development. +* The [echo module](https://github.com/openresty/echo-nginx-module) for Nginx module's automated testing. +* The standard [headers](http://nginx.org/en/docs/http/ngx_http_headers_module.html) module. + +[Back to TOC](#table-of-contents) + diff --git a/ngx_headers_more/config b/ngx_headers_more/config new file mode 100644 index 0000000..5707cc4 --- /dev/null +++ b/ngx_headers_more/config @@ -0,0 +1,32 @@ +ngx_addon_name=ngx_http_headers_more_filter_module + +HEADERS_MORE_SRCS=" \ + $ngx_addon_dir/src/ngx_http_headers_more_filter_module.c \ + $ngx_addon_dir/src/ngx_http_headers_more_headers_out.c \ + $ngx_addon_dir/src/ngx_http_headers_more_headers_in.c \ + $ngx_addon_dir/src/ngx_http_headers_more_util.c \ + " + +HEADERS_MORE_DEPS=" \ + $ngx_addon_dir/src/ddebug.h \ + $ngx_addon_dir/src/ngx_http_headers_more_filter_module.h \ + $ngx_addon_dir/src/ngx_http_headers_more_headers_in.h \ + $ngx_addon_dir/src/ngx_http_headers_more_headers_out.h \ + $ngx_addon_dir/src/ngx_http_headers_more_headers_in.h \ + $ngx_addon_dir/src/ngx_http_headers_more_util.h \ + " + +if test -n "$ngx_module_link"; then + ngx_module_type=HTTP_AUX_FILTER + ngx_module_name=$ngx_addon_name + ngx_module_incs= + ngx_module_deps="$HEADERS_MORE_DEPS" + ngx_module_srcs="$HEADERS_MORE_SRCS" + ngx_module_libs= + + . auto/module +else + HTTP_AUX_FILTER_MODULES="$HTTP_AUX_FILTER_MODULES $ngx_addon_name" + NGX_ADDON_SRCS="$NGX_ADDON_SRCS $HEADERS_MORE_SRCS" + NGX_ADDON_DEPS="$NGX_ADDON_DEPS $HEADERS_MORE_DEPS" +fi diff --git a/ngx_headers_more/src/ddebug.h b/ngx_headers_more/src/ddebug.h new file mode 100644 index 0000000..13879af --- /dev/null +++ b/ngx_headers_more/src/ddebug.h @@ -0,0 +1,124 @@ +#ifndef DDEBUG_H +#define DDEBUG_H + + +#include +#include +#include +#include + + +#if defined(DDEBUG) && (DDEBUG) + +# if (NGX_HAVE_VARIADIC_MACROS) + +# define dd(...) fprintf(stderr, "headers-more *** %s: ", __func__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " at %s line %d.\n", __FILE__, __LINE__) + +# else + +#include +#include + +#include + +static ngx_inline void +dd(const char * fmt, ...) { +} + +# endif + +# if DDEBUG > 1 + +# define dd_enter() dd_enter_helper(r, __func__) + +# if defined(nginx_version) && nginx_version >= 8011 +# define dd_main_req_count r->main->count +# else +# define dd_main_req_count 0 +# endif + +static ngx_inline void +dd_enter_helper(ngx_http_request_t *r, const char *func) +{ + ngx_http_posted_request_t *pr; + + fprintf(stderr, "headers-more *** enter %s %.*s %.*s?%.*s c:%d m:%p r:%p ar:%p pr:%p", + func, + (int) r->method_name.len, r->method_name.data, + (int) r->uri.len, r->uri.data, + (int) r->args.len, r->args.data, + (int) dd_main_req_count, r->main, + r, r->connection->data, r->parent); + + if (r->posted_requests) { + fprintf(stderr, " posted:"); + + for (pr = r->posted_requests; pr; pr = pr->next) { + fprintf(stderr, "%p,", pr); + } + } + + fprintf(stderr, "\n"); +} + +# else + +# define dd_enter() + +# endif + +#else + +# if (NGX_HAVE_VARIADIC_MACROS) + +# define dd(...) + +# define dd_enter() + +# else + +#include + +static ngx_inline void +dd(const char * fmt, ...) { +} + +static ngx_inline void +dd_enter() { +} + +# endif + +#endif + +#if defined(DDEBUG) && (DDEBUG) + +#define dd_check_read_event_handler(r) \ + dd("r->read_event_handler = %s", \ + r->read_event_handler == ngx_http_block_reading ? \ + "ngx_http_block_reading" : \ + r->read_event_handler == ngx_http_test_reading ? \ + "ngx_http_test_reading" : \ + r->read_event_handler == ngx_http_request_empty_handler ? \ + "ngx_http_request_empty_handler" : "UNKNOWN") + +#define dd_check_write_event_handler(r) \ + dd("r->write_event_handler = %s", \ + r->write_event_handler == ngx_http_handler ? \ + "ngx_http_handler" : \ + r->write_event_handler == ngx_http_core_run_phases ? \ + "ngx_http_core_run_phases" : \ + r->write_event_handler == ngx_http_request_empty_handler ? \ + "ngx_http_request_empty_handler" : "UNKNOWN") + +#else + +#define dd_check_read_event_handler(r) +#define dd_check_write_event_handler(r) + +#endif + +#endif /* DDEBUG_H */ + diff --git a/ngx_headers_more/src/ngx_http_headers_more_filter_module.c b/ngx_headers_more/src/ngx_http_headers_more_filter_module.c new file mode 100644 index 0000000..0bb6fec --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_filter_module.c @@ -0,0 +1,348 @@ + +/* + * Copyright (C) Yichun Zhang (agentzh) + */ + + +#ifndef DDEBUG +#define DDEBUG 0 +#endif +#include "ddebug.h" + + +#include "ngx_http_headers_more_filter_module.h" +#include "ngx_http_headers_more_headers_out.h" +#include "ngx_http_headers_more_headers_in.h" +#include "ngx_http_headers_more_util.h" +#include + + +/* config handlers */ + +static void *ngx_http_headers_more_create_loc_conf(ngx_conf_t *cf); +static char *ngx_http_headers_more_merge_loc_conf(ngx_conf_t *cf, + void *parent, void *child); +static void *ngx_http_headers_more_create_main_conf(ngx_conf_t *cf); +static ngx_int_t ngx_http_headers_more_post_config(ngx_conf_t *cf); + +/* post-read-phase handler */ + +static ngx_int_t ngx_http_headers_more_handler(ngx_http_request_t *r); + +/* filter handlers */ + +static ngx_int_t ngx_http_headers_more_filter_init(ngx_conf_t *cf); + +ngx_uint_t ngx_http_headers_more_location_hash = 0; + + +static ngx_command_t ngx_http_headers_more_filter_commands[] = { + + { ngx_string("more_set_headers"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_HTTP_LIF_CONF + |NGX_CONF_1MORE, + ngx_http_headers_more_set_headers, + NGX_HTTP_LOC_CONF_OFFSET, + 0, + NULL}, + + { ngx_string("more_clear_headers"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_HTTP_LIF_CONF + |NGX_CONF_1MORE, + ngx_http_headers_more_clear_headers, + NGX_HTTP_LOC_CONF_OFFSET, + 0, + NULL}, + + { ngx_string("more_set_input_headers"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_HTTP_LIF_CONF + |NGX_CONF_1MORE, + ngx_http_headers_more_set_input_headers, + NGX_HTTP_LOC_CONF_OFFSET, + 0, + NULL}, + + { ngx_string("more_clear_input_headers"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_HTTP_LIF_CONF + |NGX_CONF_1MORE, + ngx_http_headers_more_clear_input_headers, + NGX_HTTP_LOC_CONF_OFFSET, + 0, + NULL}, + + ngx_null_command +}; + + +static ngx_http_module_t ngx_http_headers_more_filter_module_ctx = { + NULL, /* preconfiguration */ + ngx_http_headers_more_post_config, /* postconfiguration */ + + ngx_http_headers_more_create_main_conf, /* create main configuration */ + NULL, /* init main configuration */ + + NULL, /* create server configuration */ + NULL, /* merge server configuration */ + + ngx_http_headers_more_create_loc_conf, /* create location configuration */ + ngx_http_headers_more_merge_loc_conf /* merge location configuration */ +}; + + +ngx_module_t ngx_http_headers_more_filter_module = { + NGX_MODULE_V1, + &ngx_http_headers_more_filter_module_ctx, /* module context */ + ngx_http_headers_more_filter_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + NULL, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static ngx_http_output_header_filter_pt ngx_http_next_header_filter; + + +static volatile ngx_cycle_t *ngx_http_headers_more_prev_cycle = NULL; + + +static ngx_int_t +ngx_http_headers_more_filter(ngx_http_request_t *r) +{ + ngx_int_t rc; + ngx_uint_t i; + ngx_http_headers_more_loc_conf_t *conf; + ngx_http_headers_more_cmd_t *cmd; + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "headers more header filter, uri \"%V\"", &r->uri); + + conf = ngx_http_get_module_loc_conf(r, ngx_http_headers_more_filter_module); + + if (conf->cmds) { + cmd = conf->cmds->elts; + for (i = 0; i < conf->cmds->nelts; i++) { + if (cmd[i].is_input) { + continue; + } + + rc = ngx_http_headers_more_exec_cmd(r, &cmd[i]); + + if (rc != NGX_OK) { + return rc; + } + } + } + + return ngx_http_next_header_filter(r); +} + + +static ngx_int_t +ngx_http_headers_more_filter_init(ngx_conf_t *cf) +{ + ngx_http_next_header_filter = ngx_http_top_header_filter; + ngx_http_top_header_filter = ngx_http_headers_more_filter; + + return NGX_OK; +} + + +static void * +ngx_http_headers_more_create_loc_conf(ngx_conf_t *cf) +{ + ngx_http_headers_more_loc_conf_t *conf; + + conf = ngx_pcalloc(cf->pool, sizeof(ngx_http_headers_more_loc_conf_t)); + if (conf == NULL) { + return NULL; + } + + /* + * set by ngx_pcalloc(): + * + * conf->cmds = NULL; + */ + + return conf; +} + + +static char * +ngx_http_headers_more_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) +{ + ngx_uint_t i; + ngx_uint_t orig_len; + ngx_http_headers_more_cmd_t *prev_cmd, *cmd; + ngx_http_headers_more_loc_conf_t *prev = parent; + ngx_http_headers_more_loc_conf_t *conf = child; + + if (conf->cmds == NULL || conf->cmds->nelts == 0) { + conf->cmds = prev->cmds; + + } else if (prev->cmds && prev->cmds->nelts) { + orig_len = conf->cmds->nelts; + + (void) ngx_array_push_n(conf->cmds, prev->cmds->nelts); + + cmd = conf->cmds->elts; + + for (i = 0; i < orig_len; i++) { + cmd[conf->cmds->nelts - 1 - i] = cmd[orig_len - 1 - i]; + } + + prev_cmd = prev->cmds->elts; + + for (i = 0; i < prev->cmds->nelts; i++) { + cmd[i] = prev_cmd[i]; + } + } + + return NGX_CONF_OK; +} + + +static ngx_int_t +ngx_http_headers_more_post_config(ngx_conf_t *cf) +{ + int multi_http_blocks; + ngx_int_t rc; + ngx_http_handler_pt *h; + ngx_http_core_main_conf_t *cmcf; + + ngx_http_headers_more_main_conf_t *hmcf; + + ngx_http_headers_more_location_hash = + ngx_http_headers_more_hash_literal("location"); + + hmcf = ngx_http_conf_get_module_main_conf(cf, + ngx_http_headers_more_filter_module); + + if (ngx_http_headers_more_prev_cycle != ngx_cycle) { + ngx_http_headers_more_prev_cycle = ngx_cycle; + multi_http_blocks = 0; + + } else { + multi_http_blocks = 1; + } + + if (multi_http_blocks || hmcf->requires_filter) { + rc = ngx_http_headers_more_filter_init(cf); + if (rc != NGX_OK) { + return rc; + } + } + + if (!hmcf->requires_handler) { + return NGX_OK; + } + + cmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_core_module); + + h = ngx_array_push(&cmcf->phases[NGX_HTTP_REWRITE_PHASE].handlers); + if (h == NULL) { + return NGX_ERROR; + } + + *h = ngx_http_headers_more_handler; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_headers_more_handler(ngx_http_request_t *r) +{ + ngx_int_t rc; + ngx_uint_t i; + ngx_http_headers_more_loc_conf_t *conf; + ngx_http_headers_more_main_conf_t *hmcf; + ngx_http_headers_more_cmd_t *cmd; + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "headers more rewrite handler, uri \"%V\"", &r->uri); + + hmcf = ngx_http_get_module_main_conf(r, + ngx_http_headers_more_filter_module); + + if (!hmcf->postponed_to_phase_end) { + ngx_http_core_main_conf_t *cmcf; + ngx_http_phase_handler_t tmp; + ngx_http_phase_handler_t *ph; + ngx_http_phase_handler_t *cur_ph; + ngx_http_phase_handler_t *last_ph; + + hmcf->postponed_to_phase_end = 1; + + cmcf = ngx_http_get_module_main_conf(r, ngx_http_core_module); + + ph = cmcf->phase_engine.handlers; + cur_ph = &ph[r->phase_handler]; + last_ph = &ph[cur_ph->next - 1]; + + if (cur_ph < last_ph) { + dd("swaping the contents of cur_ph and last_ph..."); + + tmp = *cur_ph; + + memmove(cur_ph, cur_ph + 1, + (last_ph - cur_ph) * sizeof (ngx_http_phase_handler_t)); + + *last_ph = tmp; + + r->phase_handler--; /* redo the current ph */ + + return NGX_DECLINED; + } + } + + dd("running phase handler..."); + + conf = ngx_http_get_module_loc_conf(r, ngx_http_headers_more_filter_module); + + if (conf->cmds) { + if (r->http_version < NGX_HTTP_VERSION_10) { + return NGX_DECLINED; + } + + cmd = conf->cmds->elts; + for (i = 0; i < conf->cmds->nelts; i++) { + if (!cmd[i].is_input) { + continue; + } + + rc = ngx_http_headers_more_exec_input_cmd(r, &cmd[i]); + + if (rc != NGX_OK) { + return rc; + } + } + } + + return NGX_DECLINED; +} + + +static void * +ngx_http_headers_more_create_main_conf(ngx_conf_t *cf) +{ + ngx_http_headers_more_main_conf_t *hmcf; + + hmcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_headers_more_main_conf_t)); + if (hmcf == NULL) { + return NULL; + } + + /* set by ngx_pcalloc: + * hmcf->postponed_to_phase_end = 0; + * hmcf->requires_filter = 0; + * hmcf->requires_handler = 0; + */ + + return hmcf; +} diff --git a/ngx_headers_more/src/ngx_http_headers_more_filter_module.h b/ngx_headers_more/src/ngx_http_headers_more_filter_module.h new file mode 100644 index 0000000..5f31ab4 --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_filter_module.h @@ -0,0 +1,81 @@ + +/* + * Copyright (c) Yichun Zhang (agentzh) + */ + + +#ifndef NGX_HTTP_HEADERS_MORE_FILTER_MODULE_H +#define NGX_HTTP_HEADERS_MORE_FILTER_MODULE_H + + +#include +#include +#include + + +typedef enum { + ngx_http_headers_more_opcode_set, + ngx_http_headers_more_opcode_clear +} ngx_http_headers_more_opcode_t; + + +typedef struct { + ngx_array_t *types; /* of ngx_str_t */ + ngx_array_t *statuses; /* of ngx_uint_t */ + ngx_array_t *headers; /* of ngx_http_header_val_t */ + ngx_flag_t is_input; +} ngx_http_headers_more_cmd_t; + + +typedef struct { + ngx_array_t *cmds; /* of ngx_http_headers_more_cmd_t */ +} ngx_http_headers_more_loc_conf_t; + + +typedef struct { + ngx_int_t postponed_to_phase_end; + ngx_int_t requires_filter; + ngx_int_t requires_handler; +} ngx_http_headers_more_main_conf_t; + + +typedef struct ngx_http_headers_more_header_val_s + ngx_http_headers_more_header_val_t; + + +typedef ngx_int_t (*ngx_http_headers_more_set_header_pt)(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); + + +typedef struct { + ngx_str_t name; + ngx_uint_t offset; + ngx_http_headers_more_set_header_pt handler; +} ngx_http_headers_more_set_header_t; + + +struct ngx_http_headers_more_header_val_s { + ngx_http_complex_value_t value; + ngx_uint_t hash; + ngx_str_t key; + ngx_http_headers_more_set_header_pt handler; + ngx_uint_t offset; + unsigned replace:1; + unsigned wildcard:1; + unsigned append:1; +}; + + +extern ngx_module_t ngx_http_headers_more_filter_module; + + +#ifndef ngx_str_set +#define ngx_str_set(str, text) \ + (str)->len = sizeof(text) - 1; (str)->data = (u_char *) text +#endif + + +#define ngx_http_headers_more_assert(a) assert(a) + + +#endif /* NGX_HTTP_HEADERS_MORE_FILTER_MODULE_H */ diff --git a/ngx_headers_more/src/ngx_http_headers_more_headers_in.c b/ngx_headers_more/src/ngx_http_headers_more_headers_in.c new file mode 100644 index 0000000..983be5b --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_headers_in.c @@ -0,0 +1,959 @@ + +/* + * Copyright (C) Yichun Zhang (agentzh) + */ + + +#ifndef DDEBUG +#define DDEBUG 0 +#endif +#include "ddebug.h" + + +#include "ngx_http_headers_more_headers_in.h" +#include "ngx_http_headers_more_util.h" +#include + + +static char *ngx_http_headers_more_parse_directive(ngx_conf_t *cf, + ngx_command_t *ngx_cmd, void *conf, + ngx_http_headers_more_opcode_t opcode); +static int ngx_http_headers_more_check_type(ngx_http_request_t *r, + ngx_array_t *types); +static ngx_int_t ngx_http_set_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_header_helper(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value, + ngx_table_elt_t **output_header); +static ngx_int_t ngx_http_set_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_user_agent_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_clear_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_clear_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_host_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_connection_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_builtin_multi_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_headers_more_validate_host(ngx_str_t *host, + ngx_pool_t *pool, ngx_uint_t alloc); + + +static ngx_http_headers_more_set_header_t ngx_http_headers_more_set_handlers[] + = { + + { ngx_string("Host"), + offsetof(ngx_http_headers_in_t, host), + ngx_http_set_host_header }, + + { ngx_string("Connection"), + offsetof(ngx_http_headers_in_t, connection), + ngx_http_set_connection_header }, + + { ngx_string("If-Modified-Since"), + offsetof(ngx_http_headers_in_t, if_modified_since), + ngx_http_set_builtin_header }, + +#if defined(nginx_version) && nginx_version >= 9002 + { ngx_string("If-Unmodified-Since"), + offsetof(ngx_http_headers_in_t, if_unmodified_since), + ngx_http_set_builtin_header }, +#endif + +#if defined(nginx_version) && nginx_version >= 1003003 + { ngx_string("If-Match"), + offsetof(ngx_http_headers_in_t, if_match), + ngx_http_set_builtin_header }, + + { ngx_string("If-None-Match"), + offsetof(ngx_http_headers_in_t, if_none_match), + ngx_http_set_builtin_header }, +#endif + + { ngx_string("User-Agent"), + offsetof(ngx_http_headers_in_t, user_agent), + ngx_http_set_user_agent_header }, + + { ngx_string("Referer"), + offsetof(ngx_http_headers_in_t, referer), + ngx_http_set_builtin_header }, + + { ngx_string("Content-Length"), + offsetof(ngx_http_headers_in_t, content_length), + ngx_http_set_content_length_header }, + + { ngx_string("Content-Type"), + offsetof(ngx_http_headers_in_t, content_type), + ngx_http_set_builtin_header }, + + { ngx_string("Range"), + offsetof(ngx_http_headers_in_t, range), + ngx_http_set_builtin_header }, + + { ngx_string("If-Range"), + offsetof(ngx_http_headers_in_t, if_range), + ngx_http_set_builtin_header }, + + { ngx_string("Transfer-Encoding"), + offsetof(ngx_http_headers_in_t, transfer_encoding), + ngx_http_set_builtin_header }, + + { ngx_string("Expect"), + offsetof(ngx_http_headers_in_t, expect), + ngx_http_set_builtin_header }, + +#if defined(nginx_version) && nginx_version >= 1003013 + { ngx_string("Upgrade"), + offsetof(ngx_http_headers_in_t, upgrade), + ngx_http_set_builtin_header }, +#endif + +#if (NGX_HTTP_GZIP) + { ngx_string("Accept-Encoding"), + offsetof(ngx_http_headers_in_t, accept_encoding), + ngx_http_set_builtin_header }, + + { ngx_string("Via"), offsetof(ngx_http_headers_in_t, via), + ngx_http_set_builtin_header }, +#endif + + { ngx_string("Authorization"), + offsetof(ngx_http_headers_in_t, authorization), + ngx_http_set_builtin_header }, + + { ngx_string("Keep-Alive"), + offsetof(ngx_http_headers_in_t, keep_alive), + ngx_http_set_builtin_header }, + +#if (NGX_HTTP_X_FORWARDED_FOR) + { ngx_string("X-Forwarded-For"), + offsetof(ngx_http_headers_in_t, x_forwarded_for), + ngx_http_set_builtin_multi_header }, + +#endif + +#if (NGX_HTTP_REALIP) + { ngx_string("X-Real-IP"), + offsetof(ngx_http_headers_in_t, x_real_ip), + ngx_http_set_builtin_header }, +#endif + +#if (NGX_HTTP_DAV) + { ngx_string("Depth"), offsetof(ngx_http_headers_in_t, depth), + ngx_http_set_builtin_header }, + + { ngx_string("Destination"), offsetof(ngx_http_headers_in_t, destination), + ngx_http_set_builtin_header }, + + { ngx_string("Overwrite"), offsetof(ngx_http_headers_in_t, overwrite), + ngx_http_set_builtin_header }, + + { ngx_string("Date"), offsetof(ngx_http_headers_in_t, date), + ngx_http_set_builtin_header }, +#endif + +#if defined(nginx_version) && nginx_version >= 1023000 + { ngx_string("Cookie"), + offsetof(ngx_http_headers_in_t, cookie), + ngx_http_set_builtin_multi_header }, +#else + { ngx_string("Cookie"), + offsetof(ngx_http_headers_in_t, cookies), + ngx_http_set_builtin_multi_header }, +#endif + + { ngx_null_string, 0, ngx_http_set_header } +}; + + +ngx_int_t +ngx_http_headers_more_exec_input_cmd(ngx_http_request_t *r, + ngx_http_headers_more_cmd_t *cmd) +{ + ngx_str_t value; + ngx_http_headers_more_header_val_t *h; + ngx_uint_t i; + + if (!cmd->headers) { + return NGX_OK; + } + + if (cmd->types && !ngx_http_headers_more_check_type(r, cmd->types)) { + return NGX_OK; + } + + h = cmd->headers->elts; + for (i = 0; i < cmd->headers->nelts; i++) { + + if (ngx_http_complex_value(r, &h[i].value, &value) != NGX_OK) { + return NGX_ERROR; + } + + if (value.len) { + value.len--; /* remove the trailing '\0' added by + ngx_http_headers_more_parse_header */ + } + + if (h[i].handler(r, &h[i], &value) != NGX_OK) { + return NGX_ERROR; + } + } + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_set_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + return ngx_http_set_header_helper(r, hv, value, NULL); +} + + +static ngx_int_t +ngx_http_set_header_helper(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value, + ngx_table_elt_t **output_header) +{ + ngx_table_elt_t *h, *matched; + ngx_list_part_t *part; + ngx_uint_t i; + ngx_uint_t rc; + + dd_enter(); + + matched = NULL; + +retry: + + part = &r->headers_in.headers.part; + h = part->elts; + + for (i = 0; /* void */; i++) { + dd("i: %d, part: %p", (int) i, part); + + if (i >= part->nelts) { + if (part->next == NULL) { + break; + } + + part = part->next; + h = part->elts; + i = 0; + } + + if (!hv->wildcard + && h[i].key.len == hv->key.len + && ngx_strncasecmp(h[i].key.data, hv->key.data, + h[i].key.len) == 0) + { + goto matched; + } + + if (hv->wildcard + && value->len == 0 + && h[i].key.len >= hv->key.len - 1 + && ngx_strncasecmp(h[i].key.data, hv->key.data, + hv->key.len - 1) == 0) + { + goto matched; + } + + /* not matched */ + continue; + +matched: + + if (value->len == 0 || (matched && matched != &h[i])) { + h[i].hash = 0; + + rc = ngx_http_headers_more_rm_header_helper( + &r->headers_in.headers, part, i); + + ngx_http_headers_more_assert( + !(r->headers_in.headers.part.next == NULL + && r->headers_in.headers.last + != &r->headers_in.headers.part)); + + if (rc == NGX_OK) { + if (output_header) { + *output_header = NULL; + } + + goto retry; + } + + return NGX_ERROR; + } + + h[i].value = *value; + + if (output_header) { + *output_header = &h[i]; + dd("setting existing builtin input header"); + } + + if (matched == NULL) { + matched = &h[i]; + } + } + + if (matched) { + return NGX_OK; + } + + if (value->len == 0 || hv->replace) { + return NGX_OK; + } + + if (r->headers_in.headers.last == NULL) { + /* must be 400 bad request */ + return NGX_OK; + } + + h = ngx_list_push(&r->headers_in.headers); + + if (h == NULL) { + return NGX_ERROR; + } + + dd("created new header for %.*s", (int) hv->key.len, hv->key.data); + + if (value->len == 0) { + h->hash = 0; + + } else { + h->hash = hv->hash; + } + + h->key = hv->key; + h->value = *value; +#if defined(nginx_version) && nginx_version >= 1023000 + h->next = NULL; +#endif + + h->lowcase_key = ngx_pnalloc(r->pool, h->key.len); + if (h->lowcase_key == NULL) { + return NGX_ERROR; + } + + ngx_strlow(h->lowcase_key, h->key.data, h->key.len); + + if (output_header) { + *output_header = h; + + while (r != r->main) { + r->parent->headers_in = r->headers_in; + r = r->parent; + } + } + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_set_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + ngx_table_elt_t *h, **old; + + dd("entered set_builtin_header (input)"); + + if (hv->offset) { + old = (ngx_table_elt_t **) ((char *) &r->headers_in + hv->offset); + + } else { + old = NULL; + } + + dd("old builtin ptr ptr: %p", old); + if (old) { + dd("old builtin ptr: %p", *old); + } + + if (old == NULL || *old == NULL) { + dd("set normal header"); + return ngx_http_set_header_helper(r, hv, value, old); + } + + h = *old; + + if (value->len == 0) { + h->hash = 0; + h->value = *value; + + return ngx_http_set_header_helper(r, hv, value, old); + } + + h->hash = hv->hash; + h->value = *value; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_set_host_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + ngx_str_t host; + + if (value->len) { + host= *value; + + if (ngx_http_headers_more_validate_host(&host, r->pool, 0) != NGX_OK) { + return NGX_ERROR; + } + + r->headers_in.server = host; + + } else { + r->headers_in.server = *value; + } + + return ngx_http_set_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_set_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + off_t len; + + if (value->len == 0) { + return ngx_http_clear_content_length_header(r, hv, value); + } + + len = ngx_atosz(value->data, value->len); + if (len == NGX_ERROR) { + return NGX_ERROR; + } + + dd("reset headers_in.content_length_n to %d", (int) len); + + r->headers_in.content_length_n = len; + + return ngx_http_set_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_clear_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + r->headers_in.content_length_n = -1; + + return ngx_http_clear_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_clear_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + value->len = 0; + return ngx_http_set_builtin_header(r, hv, value); +} + + +char * +ngx_http_headers_more_set_input_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf) +{ + return ngx_http_headers_more_parse_directive(cf, cmd, conf, + ngx_http_headers_more_opcode_set); +} + + +char * +ngx_http_headers_more_clear_input_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf) +{ + return ngx_http_headers_more_parse_directive(cf, cmd, conf, + ngx_http_headers_more_opcode_clear); +} + + +static int +ngx_http_headers_more_check_type(ngx_http_request_t *r, ngx_array_t *types) +{ + ngx_uint_t i; + ngx_str_t *t; + ngx_str_t actual_type; + + if (r->headers_in.content_type == NULL) { + return 0; + } + + actual_type = r->headers_in.content_type->value; + if (actual_type.len == 0) { + return 0; + } + + dd("headers_in->content_type: %.*s", + (int) actual_type.len, + actual_type.data); + + t = types->elts; + for (i = 0; i < types->nelts; i++) { + dd("...comparing with type [%.*s]", (int) t[i].len, t[i].data); + + if (actual_type.len == t[i].len + && ngx_strncmp(actual_type.data, t[i].data, t[i].len) == 0) + { + return 1; + } + } + + return 0; +} + + +static char * +ngx_http_headers_more_parse_directive(ngx_conf_t *cf, ngx_command_t *ngx_cmd, + void *conf, ngx_http_headers_more_opcode_t opcode) +{ + ngx_http_headers_more_loc_conf_t *hlcf = conf; + + ngx_uint_t i; + ngx_http_headers_more_cmd_t *cmd; + ngx_str_t *arg; + ngx_flag_t ignore_next_arg; + ngx_str_t *cmd_name; + ngx_int_t rc; + ngx_flag_t replace = 0; + ngx_http_headers_more_header_val_t *h; + + ngx_http_headers_more_main_conf_t *hmcf; + + if (hlcf->cmds == NULL) { + hlcf->cmds = ngx_array_create(cf->pool, 1, + sizeof(ngx_http_headers_more_cmd_t)); + + if (hlcf->cmds == NULL) { + return NGX_CONF_ERROR; + } + } + + cmd = ngx_array_push(hlcf->cmds); + + if (cmd == NULL) { + return NGX_CONF_ERROR; + } + + cmd->headers = ngx_array_create(cf->pool, 1, + sizeof(ngx_http_headers_more_header_val_t)); + + if (cmd->headers == NULL) { + return NGX_CONF_ERROR; + } + + cmd->types = ngx_array_create(cf->pool, 1, sizeof(ngx_str_t)); + if (cmd->types == NULL) { + return NGX_CONF_ERROR; + } + + cmd->statuses = NULL; + + arg = cf->args->elts; + + cmd_name = &arg[0]; + + ignore_next_arg = 0; + + for (i = 1; i < cf->args->nelts; i++) { + if (ignore_next_arg) { + ignore_next_arg = 0; + continue; + } + + if (arg[i].len == 0) { + continue; + } + + if (arg[i].data[0] != '-') { + rc = ngx_http_headers_more_parse_header(cf, cmd_name, + &arg[i], cmd->headers, + opcode, + ngx_http_headers_more_set_handlers); + + if (rc != NGX_OK) { + return NGX_CONF_ERROR; + } + + continue; + } + + if (arg[i].len == 2) { + if (arg[i].data[1] == 't') { + if (i == cf->args->nelts - 1) { + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: option -t takes an argument.", + cmd_name); + + return NGX_CONF_ERROR; + } + + rc = ngx_http_headers_more_parse_types(cf->log, cmd_name, + &arg[i + 1], + cmd->types); + + if (rc != NGX_OK) { + return NGX_CONF_ERROR; + } + + ignore_next_arg = 1; + + continue; + } + + if (arg[i].data[1] == 'r') { + dd("Found replace flag"); + replace = 1; + continue; + } + } + + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: invalid option name: \"%V\"", cmd_name, &arg[i]); + + return NGX_CONF_ERROR; + } + + dd("Found %d types, and %d headers", + (int) cmd->types->nelts, + (int) cmd->headers->nelts); + + if (cmd->headers->nelts == 0) { + ngx_pfree(cf->pool, cmd->headers); + cmd->headers = NULL; + + } else { + h = cmd->headers->elts; + for (i = 0; i < cmd->headers->nelts; i++) { + h[i].replace = replace; + } + } + + if (cmd->types->nelts == 0) { + ngx_pfree(cf->pool, cmd->types); + cmd->types = NULL; + } + + cmd->is_input = 1; + + hmcf = ngx_http_conf_get_module_main_conf(cf, + ngx_http_headers_more_filter_module); + + hmcf->requires_handler = 1; + + return NGX_CONF_OK; +} + + +/* borrowed the code from ngx_http_request.c:ngx_http_process_user_agent */ +static ngx_int_t +ngx_http_set_user_agent_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + u_char *user_agent, *msie; + + /* clear existing settings */ + + r->headers_in.msie = 0; + r->headers_in.msie6 = 0; + r->headers_in.opera = 0; + r->headers_in.gecko = 0; + r->headers_in.chrome = 0; + r->headers_in.safari = 0; + r->headers_in.konqueror = 0; + + if (value->len == 0) { + return ngx_http_set_builtin_header(r, hv, value); + } + + /* check some widespread browsers */ + + user_agent = value->data; + + msie = ngx_strstrn(user_agent, "MSIE ", 5 - 1); + + if (msie && msie + 7 < user_agent + value->len) { + + r->headers_in.msie = 1; + + if (msie[6] == '.') { + + switch (msie[5]) { + case '4': + case '5': + r->headers_in.msie6 = 1; + break; + case '6': + if (ngx_strstrn(msie + 8, "SV1", 3 - 1) == NULL) { + r->headers_in.msie6 = 1; + } + break; + } + } + } + + if (ngx_strstrn(user_agent, "Opera", 5 - 1)) { + r->headers_in.opera = 1; + r->headers_in.msie = 0; + r->headers_in.msie6 = 0; + } + + if (!r->headers_in.msie && !r->headers_in.opera) { + + if (ngx_strstrn(user_agent, "Gecko/", 6 - 1)) { + r->headers_in.gecko = 1; + + } else if (ngx_strstrn(user_agent, "Chrome/", 7 - 1)) { + r->headers_in.chrome = 1; + + } else if (ngx_strstrn(user_agent, "Safari/", 7 - 1) + && ngx_strstrn(user_agent, "Mac OS X", 8 - 1)) + { + r->headers_in.safari = 1; + + } else if (ngx_strstrn(user_agent, "Konqueror", 9 - 1)) { + r->headers_in.konqueror = 1; + } + } + + return ngx_http_set_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_set_connection_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + r->headers_in.connection_type = 0; + + if (value->len == 0) { + return ngx_http_set_builtin_header(r, hv, value); + } + + if (ngx_strcasestrn(value->data, "close", 5 - 1)) { + r->headers_in.connection_type = NGX_HTTP_CONNECTION_CLOSE; + r->headers_in.keep_alive_n = -1; + r->keepalive = 0; + + } else if (ngx_strcasestrn(value->data, "keep-alive", 10 - 1)) { + r->headers_in.connection_type = NGX_HTTP_CONNECTION_KEEP_ALIVE; + } + + return ngx_http_set_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_set_builtin_multi_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ +#if defined(nginx_version) && nginx_version >= 1023000 + ngx_table_elt_t **headers, **ph, *h; +#if (DDEBUG) + int nelts; +#endif + + if (r->headers_out.status == 400 || r->headers_in.headers.last == NULL) { + /* must be a 400 Bad Request */ + return NGX_OK; + } + + headers = (ngx_table_elt_t **) ((char *) &r->headers_in + hv->offset); + + if (*headers) { +#if (DDEBUG) + nelts = 0; + for (h = *headers; h; h = h->next) { + nelts++; + } + + dd("clear multi-value headers: %d", nelts); +#endif + + *headers = NULL; + } + + if (ngx_http_set_header_helper(r, hv, value, &h) == NGX_ERROR) { + return NGX_ERROR; + } + + if (value->len == 0) { + return NGX_OK; + } + + dd("new multi-value header: %p", h); + + if (*headers) { + for (ph = headers; *ph; ph = &(*ph)->next) { /* void */ } + *ph = h; + + } else { + *headers = h; + } + + h->next = NULL; + + return NGX_OK; +#else + ngx_array_t *headers; + ngx_table_elt_t **v, *h; + + if (r->headers_out.status == 400 || r->headers_in.headers.last == NULL) { + /* must be a 400 Bad Request */ + return NGX_OK; + } + + headers = (ngx_array_t *) ((char *) &r->headers_in + hv->offset); + + if (headers->nelts > 0) { + ngx_array_destroy(headers); + + if (ngx_array_init(headers, r->pool, 2, + sizeof(ngx_table_elt_t *)) + != NGX_OK) + { + return NGX_ERROR; + } + + dd("clear multi-value headers: %d", (int) headers->nelts); + } + +#if 1 + if (headers->nalloc == 0) { + if (ngx_array_init(headers, r->pool, 2, + sizeof(ngx_table_elt_t *)) + != NGX_OK) + { + return NGX_ERROR; + } + } +#endif + + h = NULL; + if (ngx_http_set_header_helper(r, hv, value, &h) == NGX_ERROR) { + return NGX_ERROR; + } + + if (value->len == 0) { + return NGX_OK; + } + + dd("new cookie header: %p", h); + + v = ngx_array_push(headers); + if (v == NULL) { + return NGX_ERROR; + } + + *v = h; + return NGX_OK; +#endif +} + + +static ngx_int_t +ngx_http_headers_more_validate_host(ngx_str_t *host, ngx_pool_t *pool, + ngx_uint_t alloc) +{ + u_char *h, ch; + size_t i, dot_pos, host_len; + + enum { + sw_usual = 0, + sw_literal, + sw_rest + } state; + + dot_pos = host->len; + host_len = host->len; + + h = host->data; + + state = sw_usual; + + for (i = 0; i < host->len; i++) { + ch = h[i]; + + switch (ch) { + + case '.': + if (dot_pos == i - 1) { + return NGX_DECLINED; + } + + dot_pos = i; + break; + + case ':': + if (state == sw_usual) { + host_len = i; + state = sw_rest; + } + break; + + case '[': + if (i == 0) { + state = sw_literal; + } + break; + + case ']': + if (state == sw_literal) { + host_len = i + 1; + state = sw_rest; + } + break; + + case '\0': + return NGX_DECLINED; + + default: + + if (ngx_path_separator(ch)) { + return NGX_DECLINED; + } + + if (ch >= 'A' && ch <= 'Z') { + alloc = 1; + } + + break; + } + } + + if (dot_pos == host_len - 1) { + host_len--; + } + + if (host_len == 0) { + return NGX_DECLINED; + } + + if (alloc) { + host->data = ngx_pnalloc(pool, host_len); + if (host->data == NULL) { + return NGX_ERROR; + } + + ngx_strlow(host->data, h, host_len); + } + + host->len = host_len; + + return NGX_OK; +} diff --git a/ngx_headers_more/src/ngx_http_headers_more_headers_in.h b/ngx_headers_more/src/ngx_http_headers_more_headers_in.h new file mode 100644 index 0000000..d2251da --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_headers_in.h @@ -0,0 +1,26 @@ + +/* + * Copyright (c) Yichun Zhang (agentzh) + */ + + +#ifndef NGX_HTTP_HEADERS_MORE_INPUT_HEADERS_H +#define NGX_HTTP_HEADERS_MORE_INPUT_HEADERS_H + + +#include "ngx_http_headers_more_filter_module.h" + + +/* output header setters and clearers */ + +ngx_int_t ngx_http_headers_more_exec_input_cmd(ngx_http_request_t *r, + ngx_http_headers_more_cmd_t *cmd); + +char *ngx_http_headers_more_set_input_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf); + +char *ngx_http_headers_more_clear_input_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf); + + +#endif /* NGX_HTTP_HEADERS_MORE_INPUT_HEADERS_H */ diff --git a/ngx_headers_more/src/ngx_http_headers_more_headers_out.c b/ngx_headers_more/src/ngx_http_headers_more_headers_out.c new file mode 100644 index 0000000..6bc49f5 --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_headers_out.c @@ -0,0 +1,817 @@ + +/* + * Copyright (C) Yichun Zhang (agentzh) + */ + + +#ifndef DDEBUG +#define DDEBUG 0 +#endif +#include "ddebug.h" + + +#include "ngx_http_headers_more_headers_out.h" +#include "ngx_http_headers_more_util.h" +#include + + +static char * +ngx_http_headers_more_parse_directive(ngx_conf_t *cf, ngx_command_t *ngx_cmd, + void *conf, ngx_http_headers_more_opcode_t opcode); +static ngx_flag_t ngx_http_headers_more_check_type(ngx_http_request_t *r, + ngx_array_t *types); +static ngx_flag_t ngx_http_headers_more_check_status(ngx_http_request_t *r, + ngx_array_t *statuses); +static ngx_int_t ngx_http_set_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_header_helper(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value, + ngx_table_elt_t **output_header, ngx_flag_t no_create); +static ngx_int_t ngx_http_set_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_accept_ranges_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_content_type_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_clear_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_clear_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); +static ngx_int_t ngx_http_set_builtin_multi_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value); + + +static ngx_http_headers_more_set_header_t ngx_http_headers_more_set_handlers[] + = { + + { ngx_string("Server"), + offsetof(ngx_http_headers_out_t, server), + ngx_http_set_builtin_header }, + + { ngx_string("Date"), + offsetof(ngx_http_headers_out_t, date), + ngx_http_set_builtin_header }, + + { ngx_string("Content-Encoding"), + offsetof(ngx_http_headers_out_t, content_encoding), + ngx_http_set_builtin_header }, + + { ngx_string("Location"), + offsetof(ngx_http_headers_out_t, location), + ngx_http_set_builtin_header }, + + { ngx_string("Refresh"), + offsetof(ngx_http_headers_out_t, refresh), + ngx_http_set_builtin_header }, + + { ngx_string("Last-Modified"), + offsetof(ngx_http_headers_out_t, last_modified), + ngx_http_set_builtin_header }, + + { ngx_string("Content-Range"), + offsetof(ngx_http_headers_out_t, content_range), + ngx_http_set_builtin_header }, + + { ngx_string("Accept-Ranges"), + offsetof(ngx_http_headers_out_t, accept_ranges), + ngx_http_set_accept_ranges_header }, + + { ngx_string("WWW-Authenticate"), + offsetof(ngx_http_headers_out_t, www_authenticate), + ngx_http_set_builtin_header }, + + { ngx_string("Expires"), + offsetof(ngx_http_headers_out_t, expires), + ngx_http_set_builtin_header }, + + { ngx_string("E-Tag"), + offsetof(ngx_http_headers_out_t, etag), + ngx_http_set_builtin_header }, + + { ngx_string("Content-Length"), + offsetof(ngx_http_headers_out_t, content_length), + ngx_http_set_content_length_header }, + + { ngx_string("Content-Type"), + 0, + ngx_http_set_content_type_header }, + + { ngx_string("Cache-Control"), + offsetof(ngx_http_headers_out_t, cache_control), + ngx_http_set_builtin_multi_header }, + + { ngx_null_string, 0, ngx_http_set_header } +}; + + +ngx_int_t +ngx_http_headers_more_exec_cmd(ngx_http_request_t *r, + ngx_http_headers_more_cmd_t *cmd) +{ + ngx_str_t value; + ngx_http_headers_more_header_val_t *h; + ngx_uint_t i; + + if (!cmd->headers) { + return NGX_OK; + } + + if (cmd->types && !ngx_http_headers_more_check_type(r, cmd->types)) { + return NGX_OK; + } + + if (cmd->statuses + && !ngx_http_headers_more_check_status(r, cmd->statuses)) + { + return NGX_OK; + } + + h = cmd->headers->elts; + for (i = 0; i < cmd->headers->nelts; i++) { + + if (ngx_http_complex_value(r, &h[i].value, &value) != NGX_OK) { + return NGX_ERROR; + } + + if (value.len) { + value.len--; /* remove the trailing '\0' added by + ngx_http_headers_more_parse_header */ + } + + if (h[i].handler(r, &h[i], &value) != NGX_OK) { + return NGX_ERROR; + } + } + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_set_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + return ngx_http_set_header_helper(r, hv, value, NULL, 0); +} + + +static ngx_int_t +ngx_http_set_header_helper(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value, + ngx_table_elt_t **output_header, ngx_flag_t no_create) +{ + ngx_table_elt_t *h; + ngx_list_part_t *part; + ngx_uint_t i; + ngx_flag_t matched = 0; + + dd_enter(); + +#if 1 + if (r->headers_out.location + && r->headers_out.location->value.len + && r->headers_out.location->value.data[0] == '/') + { + /* XXX ngx_http_core_find_config_phase, for example, + * may not initialize the "key" and "hash" fields + * for a nasty optimization purpose, and + * we have to work-around it here */ + + r->headers_out.location->hash = ngx_http_headers_more_location_hash; + ngx_str_set(&r->headers_out.location->key, "Location"); + } +#endif + + if (hv->append) { + goto append; + } + + part = &r->headers_out.headers.part; + h = part->elts; + + for (i = 0; /* void */; i++) { + + if (i >= part->nelts) { + if (part->next == NULL) { + break; + } + + part = part->next; + h = part->elts; + i = 0; + } + + if (h[i].hash == 0) { + continue; + } + + if (!hv->wildcard + && h[i].key.len == hv->key.len + && ngx_strncasecmp(h[i].key.data, hv->key.data, + h[i].key.len) == 0) + { + goto matched; + } + + if (hv->wildcard + && h[i].key.len >= hv->key.len - 1 + && ngx_strncasecmp(h[i].key.data, hv->key.data, + hv->key.len - 1) == 0) + { + goto matched; + } + + /* not matched */ + continue; + +matched: + + if (value->len == 0 || matched) { + dd("clearing normal header for %.*s", (int) hv->key.len, + hv->key.data); + + h[i].value.len = 0; + h[i].hash = 0; + + } else { + h[i].value = *value; + h[i].hash = hv->hash; + } + + if (output_header) { + *output_header = &h[i]; + } + + matched = 1; + } + + if (matched){ + return NGX_OK; + } + + if ((hv->wildcard || no_create) && value->len == 0) { + return NGX_OK; + } + + /* XXX we still need to create header slot even if the value + * is empty because some builtin headers like Last-Modified + * relies on this to get cleared */ + +append: + + h = ngx_list_push(&r->headers_out.headers); + if (h == NULL) { + return NGX_ERROR; + } + + if (value->len == 0) { + h->hash = 0; + + } else { + h->hash = hv->hash; + } + + h->key = hv->key; + h->value = *value; +#if defined(nginx_version) && nginx_version >= 1023000 + h->next = NULL; +#endif + + h->lowcase_key = ngx_pnalloc(r->pool, h->key.len); + if (h->lowcase_key == NULL) { + return NGX_ERROR; + } + + ngx_strlow(h->lowcase_key, h->key.data, h->key.len); + + if (output_header) { + *output_header = h; + } + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_set_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + ngx_table_elt_t *h, **old; + + dd_enter(); + + if (hv->offset) { + old = (ngx_table_elt_t **) ((char *) &r->headers_out + hv->offset); + + } else { + old = NULL; + } + + if (old == NULL || *old == NULL) { + return ngx_http_set_header_helper(r, hv, value, old, 0); + } + + h = *old; + + if (value->len == 0) { + dd("clearing the builtin header"); + + h->hash = 0; + h->value = *value; + + return NGX_OK; + } + + h->hash = hv->hash; + h->key = hv->key; + h->value = *value; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_set_builtin_multi_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ +#if defined(nginx_version) && nginx_version >= 1023000 + ngx_table_elt_t **headers, *h, *ho, **ph; + + headers = (ngx_table_elt_t **) ((char *) &r->headers_out + hv->offset); + + if (*headers) { + for (h = (*headers)->next; h; h = h->next) { + h->hash = 0; + h->value.len = 0; + } + + h = *headers; + + h->value = *value; + + if (value->len == 0) { + h->hash = 0; + + } else { + h->hash = hv->hash; + } + + return NGX_OK; + } + + for (ph = headers; *ph; ph = &(*ph)->next) { /* void */ } + + ho = ngx_list_push(&r->headers_out.headers); + if (ho == NULL) { + return NGX_ERROR; + } + + ho->value = *value; + ho->hash = hv->hash; + ngx_str_set(&ho->key, "Cache-Control"); + ho->next = NULL; + *ph = ho; + + return NGX_OK; +#else + ngx_array_t *pa; + ngx_table_elt_t *ho, **ph; + ngx_uint_t i; + + pa = (ngx_array_t *) ((char *) &r->headers_out + hv->offset); + + if (pa->elts == NULL) { + if (ngx_array_init(pa, r->pool, 2, sizeof(ngx_table_elt_t *)) + != NGX_OK) + { + return NGX_ERROR; + } + } + + /* override old values (if any) */ + + if (pa->nelts > 0) { + ph = pa->elts; + for (i = 1; i < pa->nelts; i++) { + ph[i]->hash = 0; + ph[i]->value.len = 0; + } + + ph[0]->value = *value; + + if (value->len == 0) { + ph[0]->hash = 0; + + } else { + ph[0]->hash = hv->hash; + } + + return NGX_OK; + } + + ph = ngx_array_push(pa); + if (ph == NULL) { + return NGX_ERROR; + } + + ho = ngx_list_push(&r->headers_out.headers); + if (ho == NULL) { + return NGX_ERROR; + } + + ho->value = *value; + ho->hash = hv->hash; + ngx_str_set(&ho->key, "Cache-Control"); + *ph = ho; + + return NGX_OK; +#endif +} + + +static ngx_int_t +ngx_http_set_content_type_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + u_char *p, *last, *end; + + r->headers_out.content_type_len = value->len; + r->headers_out.content_type = *value; + r->headers_out.content_type_hash = hv->hash; + r->headers_out.content_type_lowcase = NULL; + + p = value->data; + end = p + value->len; + + for (; p != end; p++) { + + if (*p != ';') { + continue; + } + + last = p; + + while (*++p == ' ') { /* void */ } + + if (p == end) { + break; + } + + if (ngx_strncasecmp(p, (u_char *) "charset=", 8) != 0) { + continue; + } + + p += 8; + + r->headers_out.content_type_len = last - value->data; + + if (*p == '"') { + p++; + } + + last = end; + + if (*(last - 1) == '"') { + last--; + } + + r->headers_out.charset.len = last - p; + r->headers_out.charset.data = p; + + break; + } + + value->len = 0; + + return ngx_http_set_header_helper(r, hv, value, NULL, 1); +} + + +static ngx_int_t +ngx_http_set_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + off_t len; + + if (value->len == 0) { + return ngx_http_clear_content_length_header(r, hv, value); + } + + len = ngx_atosz(value->data, value->len); + if (len == NGX_ERROR) { + return NGX_ERROR; + } + + r->headers_out.content_length_n = len; + + return ngx_http_set_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_set_accept_ranges_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + if (value->len == 0) { + r->allow_ranges = 0; + } + + return ngx_http_set_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_clear_content_length_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + r->headers_out.content_length_n = -1; + + return ngx_http_clear_builtin_header(r, hv, value); +} + + +static ngx_int_t +ngx_http_clear_builtin_header(ngx_http_request_t *r, + ngx_http_headers_more_header_val_t *hv, ngx_str_t *value) +{ + dd_enter(); + + value->len = 0; + + return ngx_http_set_builtin_header(r, hv, value); +} + + +char * +ngx_http_headers_more_set_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf) +{ + return ngx_http_headers_more_parse_directive(cf, cmd, conf, + ngx_http_headers_more_opcode_set); +} + + +char * +ngx_http_headers_more_clear_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf) +{ + return ngx_http_headers_more_parse_directive(cf, cmd, conf, + ngx_http_headers_more_opcode_clear); +} + + +static ngx_flag_t +ngx_http_headers_more_check_type(ngx_http_request_t *r, ngx_array_t *types) +{ + ngx_uint_t i; + ngx_str_t *t; + + dd("headers_out->content_type: %.*s (len %d)", + (int) r->headers_out.content_type.len, + r->headers_out.content_type.data, + (int) r->headers_out.content_type.len); + + t = types->elts; + + for (i = 0; i < types->nelts; i++) { + dd("...comparing with type [%.*s]", (int) t[i].len, t[i].data); + + if (r->headers_out.content_type_len == t[i].len + && ngx_strncmp(r->headers_out.content_type.data, + t[i].data, t[i].len) == 0) + { + return 1; + } + } + + return 0; +} + + +static ngx_flag_t +ngx_http_headers_more_check_status(ngx_http_request_t *r, ngx_array_t *statuses) +{ + ngx_uint_t i; + ngx_uint_t *status; + + dd("headers_out.status = %d", (int) r->headers_out.status); + + status = statuses->elts; + for (i = 0; i < statuses->nelts; i++) { + dd("...comparing with specified status %d", (int) status[i]); + + if (r->headers_out.status == status[i]) { + return 1; + } + } + + return 0; +} + + +static char * +ngx_http_headers_more_parse_directive(ngx_conf_t *cf, ngx_command_t *ngx_cmd, + void *conf, ngx_http_headers_more_opcode_t opcode) +{ + ngx_http_headers_more_loc_conf_t *hlcf = conf; + + ngx_uint_t i, j; + ngx_http_headers_more_cmd_t *cmd; + ngx_str_t *arg; + ngx_flag_t ignore_next_arg; + ngx_str_t *cmd_name; + ngx_int_t rc; + ngx_flag_t append = 0; + ngx_flag_t is_builtin_header = 0; + ngx_http_headers_more_header_val_t *h; + ngx_http_headers_more_set_header_t *handlers; + + ngx_http_headers_more_main_conf_t *hmcf; + + if (hlcf->cmds == NULL) { + hlcf->cmds = ngx_array_create(cf->pool, 1, + sizeof(ngx_http_headers_more_cmd_t)); + + if (hlcf->cmds == NULL) { + return NGX_CONF_ERROR; + } + } + + cmd = ngx_array_push(hlcf->cmds); + if (cmd == NULL) { + return NGX_CONF_ERROR; + } + + cmd->headers = + ngx_array_create(cf->pool, 1, + sizeof(ngx_http_headers_more_header_val_t)); + if (cmd->headers == NULL) { + return NGX_CONF_ERROR; + } + + cmd->types = ngx_array_create(cf->pool, 1, sizeof(ngx_str_t)); + if (cmd->types == NULL) { + return NGX_CONF_ERROR; + } + + cmd->statuses = ngx_array_create(cf->pool, 1, sizeof(ngx_uint_t)); + if (cmd->statuses == NULL) { + return NGX_CONF_ERROR; + } + + arg = cf->args->elts; + + cmd_name = &arg[0]; + + ignore_next_arg = 0; + + for (i = 1; i < cf->args->nelts; i++) { + + if (ignore_next_arg) { + ignore_next_arg = 0; + continue; + } + + if (arg[i].len == 0) { + continue; + } + + if (arg[i].data[0] != '-') { + rc = ngx_http_headers_more_parse_header(cf, cmd_name, + &arg[i], cmd->headers, + opcode, + ngx_http_headers_more_set_handlers); + + if (rc != NGX_OK) { + return NGX_CONF_ERROR; + } + + continue; + } + + if (arg[i].len == 2) { + if (arg[i].data[1] == 't') { + if (i == cf->args->nelts - 1) { + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: option -t takes an argument.", + cmd_name); + + return NGX_CONF_ERROR; + } + + rc = ngx_http_headers_more_parse_types(cf->log, cmd_name, + &arg[i + 1], + cmd->types); + + if (rc != NGX_OK) { + return NGX_CONF_ERROR; + } + + ignore_next_arg = 1; + + continue; + + } else if (arg[i].data[1] == 's') { + + if (i == cf->args->nelts - 1) { + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: option -s takes an argument.", + cmd_name); + + return NGX_CONF_ERROR; + } + + rc = ngx_http_headers_more_parse_statuses(cf->log, cmd_name, + &arg[i + 1], + cmd->statuses); + + if (rc != NGX_OK) { + return NGX_CONF_ERROR; + } + + ignore_next_arg = 1; + + continue; + + } else if (arg[i].data[1] == 'a') { + + if (ngx_strncasecmp((u_char *) "more_set_headers", + cmd_name->data, cmd_name->len) != 0) + { + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: invalid option name: \"%V\"", + cmd_name, &arg[i]); + + return NGX_CONF_ERROR; + } + + dd("Found append flag"); + append = 1; + continue; + } + } + + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: invalid option name: \"%V\"", cmd_name, &arg[i]); + + return NGX_CONF_ERROR; + } + + dd("Found %d statuses, %d types, and %d headers", + (int) cmd->statuses->nelts, (int) cmd->types->nelts, + (int) cmd->headers->nelts); + + if (cmd->headers->nelts == 0) { + cmd->headers = NULL; + + } else { + + h = cmd->headers->elts; + for (i = 0; i < cmd->headers->nelts; i++) { + h[i].append = 0; + + handlers = ngx_http_headers_more_set_handlers; + + for (j = 0; handlers[j].name.len; j++) { + if (h[i].key.len == handlers[j].name.len + && ngx_strncasecmp(h[i].key.data, handlers[j].name.data, + h[i].key.len) == 0) + { + is_builtin_header = 1; + break; + } + } + + if (is_builtin_header && append) { + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: can not append builtin headers \"%V\"", + cmd_name, &h[i].key); + + return NGX_CONF_ERROR; + } + + if (!is_builtin_header) { + h[i].append = append; + } + } + } + + if (cmd->types->nelts == 0) { + cmd->types = NULL; + } + + if (cmd->statuses->nelts == 0) { + cmd->statuses = NULL; + } + + cmd->is_input = 0; + + hmcf = ngx_http_conf_get_module_main_conf(cf, + ngx_http_headers_more_filter_module); + + hmcf->requires_filter = 1; + + return NGX_CONF_OK; +} diff --git a/ngx_headers_more/src/ngx_http_headers_more_headers_out.h b/ngx_headers_more/src/ngx_http_headers_more_headers_out.h new file mode 100644 index 0000000..c939507 --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_headers_out.h @@ -0,0 +1,26 @@ + +/* + * Copyright (c) Yichun Zhang (agentzh) + */ + + +#ifndef NGX_HTTP_HEADERS_MORE_OUTPUT_HEADERS_H +#define NGX_HTTP_HEADERS_MORE_OUTPUT_HEADERS_H + + +#include "ngx_http_headers_more_filter_module.h" + + +/* output header setters and clearers */ + +ngx_int_t ngx_http_headers_more_exec_cmd(ngx_http_request_t *r, + ngx_http_headers_more_cmd_t *cmd); + +char *ngx_http_headers_more_set_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf); + +char *ngx_http_headers_more_clear_headers(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf); + + +#endif /* NGX_HTTP_HEADERS_MORE_OUTPUT_HEADERS_H */ diff --git a/ngx_headers_more/src/ngx_http_headers_more_util.c b/ngx_headers_more/src/ngx_http_headers_more_util.c new file mode 100644 index 0000000..e1f3636 --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_util.c @@ -0,0 +1,382 @@ + +/* + * Copyright (C) Yichun Zhang (agentzh) + */ + + +#ifndef DDEBUG +#define DDEBUG 0 +#endif +#include "ddebug.h" + + +#include "ngx_http_headers_more_util.h" +#include + + +ngx_int_t +ngx_http_headers_more_parse_header(ngx_conf_t *cf, ngx_str_t *cmd_name, + ngx_str_t *raw_header, ngx_array_t *headers, + ngx_http_headers_more_opcode_t opcode, + ngx_http_headers_more_set_header_t *handlers) +{ + ngx_http_headers_more_header_val_t *hv; + + ngx_uint_t i; + ngx_str_t key = ngx_null_string; + ngx_str_t value = ngx_null_string; + ngx_flag_t seen_end_of_key; + ngx_http_compile_complex_value_t ccv; + u_char *p; + + hv = ngx_array_push(headers); + if (hv == NULL) { + return NGX_ERROR; + } + + seen_end_of_key = 0; + for (i = 0; i < raw_header->len; i++) { + if (key.len == 0) { + if (isspace(raw_header->data[i])) { + continue; + } + + key.data = raw_header->data; + key.len = 1; + + continue; + } + + if (!seen_end_of_key) { + if (raw_header->data[i] == ':' + || isspace(raw_header->data[i])) + { + seen_end_of_key = 1; + continue; + } + + key.len++; + + continue; + } + + if (value.len == 0) { + if (raw_header->data[i] == ':' + || isspace(raw_header->data[i])) + { + continue; + } + + value.data = &raw_header->data[i]; + value.len = 1; + + continue; + } + + value.len++; + } + + if (key.len == 0) { + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: no key found in the header argument: %V", + cmd_name, raw_header); + + return NGX_ERROR; + } + + hv->wildcard = (key.data[key.len - 1] == '*'); + if (hv->wildcard && key.len<2){ + ngx_log_error(NGX_LOG_ERR, cf->log, 0, + "%V: wildcard key too short: %V", + cmd_name, raw_header); + return NGX_ERROR; + } + + hv->hash = ngx_hash_key_lc(key.data, key.len); + hv->key = key; + + hv->offset = 0; + + for (i = 0; handlers[i].name.len; i++) { + if (hv->key.len != handlers[i].name.len + || ngx_strncasecmp(hv->key.data, handlers[i].name.data, + handlers[i].name.len) != 0) + { + dd("hv key comparison: %s <> %s", handlers[i].name.data, + hv->key.data); + + continue; + } + + hv->offset = handlers[i].offset; + hv->handler = handlers[i].handler; + + break; + } + + if (handlers[i].name.len == 0 && handlers[i].handler) { + hv->offset = handlers[i].offset; + hv->handler = handlers[i].handler; + } + + if (opcode == ngx_http_headers_more_opcode_clear) { + value.len = 0; + } + + if (value.len == 0) { + ngx_memzero(&hv->value, sizeof(ngx_http_complex_value_t)); + return NGX_OK; + + } + + /* Nginx request header value requires to be a null-terminated + * C string */ + + p = ngx_palloc(cf->pool, value.len + 1); + if (p == NULL) { + return NGX_ERROR; + } + + ngx_memcpy(p, value.data, value.len); + p[value.len] = '\0'; + value.data = p; + value.len++; /* we should also compile the trailing '\0' */ + + /* compile the header value as a complex value */ + + ngx_memzero(&ccv, sizeof(ngx_http_compile_complex_value_t)); + + ccv.cf = cf; + ccv.value = &value; + ccv.complex_value = &hv->value; + + if (ngx_http_compile_complex_value(&ccv) != NGX_OK) { + return NGX_ERROR; + } + + return NGX_OK; +} + + +ngx_int_t +ngx_http_headers_more_parse_statuses(ngx_log_t *log, ngx_str_t *cmd_name, + ngx_str_t *value, ngx_array_t *statuses) +{ + u_char *p, *last; + ngx_uint_t *s = NULL; + + p = value->data; + last = p + value->len; + + for (; p != last; p++) { + if (s == NULL) { + if (isspace(*p)) { + continue; + } + + s = ngx_array_push(statuses); + if (s == NULL) { + return NGX_ERROR; + } + + if (*p >= '0' && *p <= '9') { + *s = *p - '0'; + + } else { + ngx_log_error(NGX_LOG_ERR, log, 0, + "%V: invalid digit \"%c\" found in " + "the status code list \"%V\"", + cmd_name, *p, value); + + return NGX_ERROR; + } + + continue; + } + + if (isspace(*p)) { + dd("Parsed status %d", (int) *s); + + s = NULL; + continue; + } + + if (*p >= '0' && *p <= '9') { + *s *= 10; + *s += *p - '0'; + + } else { + ngx_log_error(NGX_LOG_ERR, log, 0, + "%V: invalid digit \"%c\" found in " + "the status code list \"%V\"", + cmd_name, *p, value); + + return NGX_ERROR; + } + } + + if (s) { + dd("Parsed status %d", (int) *s); + } + + return NGX_OK; +} + + +ngx_int_t +ngx_http_headers_more_parse_types(ngx_log_t *log, ngx_str_t *cmd_name, + ngx_str_t *value, ngx_array_t *types) +{ + u_char *p, *last; + ngx_str_t *t = NULL; + + p = value->data; + last = p + value->len; + + for (; p != last; p++) { + if (t == NULL) { + if (isspace(*p) || *p == ';') { + continue; + } + + t = ngx_array_push(types); + if (t == NULL) { + return NGX_ERROR; + } + + t->len = 1; + t->data = p; + + continue; + } + + if (isspace(*p) || *p == ';') { + t = NULL; + continue; + } + + t->len++; + } + + return NGX_OK; +} + + +ngx_int_t +ngx_http_headers_more_rm_header_helper(ngx_list_t *l, ngx_list_part_t *cur, + ngx_uint_t i) +{ + ngx_table_elt_t *data; + ngx_list_part_t *new, *part; + + dd("list rm item: part %p, i %d, nalloc %d", cur, (int) i, + (int) l->nalloc); + + data = cur->elts; + + dd("cur: nelts %d, nalloc %d", (int) cur->nelts, + (int) l->nalloc); + + if (i == 0) { + cur->elts = (char *) cur->elts + l->size; + cur->nelts--; + + if (cur == l->last) { + if (cur->nelts == 0) { +#if 1 + part = &l->part; + + if (part == cur) { + cur->elts = (char *) cur->elts - l->size; + /* do nothing */ + + } else { + while (part->next != cur) { + if (part->next == NULL) { + return NGX_ERROR; + } + + part = part->next; + } + + l->last = part; + part->next = NULL; + dd("part nelts: %d", (int) part->nelts); + l->nalloc = part->nelts; + } +#endif + + } else { + l->nalloc--; + } + + return NGX_OK; + } + + if (cur->nelts == 0) { + part = &l->part; + + if (part == cur) { + ngx_http_headers_more_assert(cur->next != NULL); + + dd("remove 'cur' from the list by rewriting 'cur': " + "l->last: %p, cur: %p, cur->next: %p, part: %p", + l->last, cur, cur->next, part); + + if (l->last == cur->next) { + dd("last is cur->next"); + l->part = *(cur->next); + l->last = part; + l->nalloc = part->nelts; + + } else { + l->part = *(cur->next); + } + + } else { + dd("remove 'cur' from the list"); + while (part->next != cur) { + if (part->next == NULL) { + return NGX_ERROR; + } + + part = part->next; + } + + part->next = cur->next; + } + + return NGX_OK; + } + + return NGX_OK; + } + + if (i == cur->nelts - 1) { + cur->nelts--; + + if (cur == l->last) { + l->nalloc = cur->nelts; + } + + return NGX_OK; + } + + new = ngx_palloc(l->pool, sizeof(ngx_list_part_t)); + if (new == NULL) { + return NGX_ERROR; + } + + new->elts = &data[i + 1]; + new->nelts = cur->nelts - i - 1; + new->next = cur->next; + + cur->nelts = i; + cur->next = new; + if (cur == l->last) { + l->last = new; + l->nalloc = new->nelts; + } + + return NGX_OK; +} diff --git a/ngx_headers_more/src/ngx_http_headers_more_util.h b/ngx_headers_more/src/ngx_http_headers_more_util.h new file mode 100644 index 0000000..6c4614b --- /dev/null +++ b/ngx_headers_more/src/ngx_http_headers_more_util.h @@ -0,0 +1,52 @@ + +/* + * Copyright (c) Yichun Zhang (agentzh) + */ + + +#ifndef NGX_HTTP_HEADERS_MORE_UTIL_H +#define NGX_HTTP_HEADERS_MORE_UTIL_H + + +#include "ngx_http_headers_more_filter_module.h" + + +#define ngx_http_headers_more_hash_literal(s) \ + ngx_http_headers_more_hash_str((u_char *) s, sizeof(s) - 1) + + +static ngx_inline ngx_uint_t +ngx_http_headers_more_hash_str(u_char *src, size_t n) +{ + ngx_uint_t key; + + key = 0; + + while (n--) { + key = ngx_hash(key, *src); + src++; + } + + return key; +} + + +extern ngx_uint_t ngx_http_headers_more_location_hash; + + +ngx_int_t ngx_http_headers_more_parse_header(ngx_conf_t *cf, + ngx_str_t *cmd_name, ngx_str_t *raw_header, ngx_array_t *headers, + ngx_http_headers_more_opcode_t opcode, + ngx_http_headers_more_set_header_t *handlers); + +ngx_int_t ngx_http_headers_more_parse_statuses(ngx_log_t *log, + ngx_str_t *cmd_name, ngx_str_t *value, ngx_array_t *statuses); + +ngx_int_t ngx_http_headers_more_parse_types(ngx_log_t *log, + ngx_str_t *cmd_name, ngx_str_t *value, ngx_array_t *types); + +ngx_int_t ngx_http_headers_more_rm_header_helper(ngx_list_t *l, + ngx_list_part_t *cur, ngx_uint_t i); + + +#endif /* NGX_HTTP_HEADERS_MORE_UTIL_H */ diff --git a/ngx_headers_more/t/bug.t b/ngx_headers_more/t/bug.t new file mode 100644 index 0000000..731954a --- /dev/null +++ b/ngx_headers_more/t/bug.t @@ -0,0 +1,461 @@ +# vi:filetype= + +use Test::Nginx::Socket; # 'no_plan'; + +repeat_each(2); + +plan tests => 62 * repeat_each(); + +no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: set Server +--- config + #more_set_headers 'Last-Modified: x'; + more_clear_headers 'Last-Modified'; +--- request + GET /index.html +--- response_headers +! Last-Modified +--- response_body_like: It works! + + + +=== TEST 2: variables in the Ranges header +--- config + location /index.html { + set $rfrom 1; + set $rto 3; + more_set_input_headers 'Range: bytes=$rfrom - $rto'; + #more_set_input_headers 'Range: bytes=1 - 3'; + #echo $http_range; + } +--- request +GET /index.html +--- error_code: 206 +--- response_body chomp +htm + + + +=== TEST 3: mime type overriding (inlined types) +--- config + more_clear_headers 'X-Powered-By' 'X-Runtime' 'ETag'; + + types { + text/html html htm shtml; + text/css css; + } +--- user_files +>>> a.css +hello +--- request +GET /a.css +--- error_code: 200 +--- response_headers +Content-Type: text/css +--- response_body +hello + + + +=== TEST 4: mime type overriding (included types file) +--- config + more_clear_headers 'X-Powered-By' 'X-Runtime' 'ETag'; + include mime.types; +--- user_files +>>> a.css +hello +>>> ../conf/mime.types +types { + text/html html htm shtml; + text/css css; +} +--- request +GET /a.css +--- error_code: 200 +--- response_headers +Content-Type: text/css +--- response_body +hello + + + +=== TEST 5: empty variable as the header value +--- config + location /foo { + more_set_headers 'X-Foo: $arg_foo'; + echo hi; + } +--- request + GET /foo +--- response_headers +! X-Foo +--- response_body +hi + + + +=== TEST 6: range bug +--- config + location /index.html { + more_clear_input_headers "Range*" ; + more_clear_input_headers "Content-Range*" ; + + more_set_input_headers 'Range: bytes=1-5'; + more_set_headers 'Content-Range: bytes 1-5/1000'; + } +--- request + GET /index.html +--- more_headers +Range: bytes=1-3 +--- raw_response_headers_like: Content-Range: bytes 1-5/1000$ +--- response_body chop +html> +--- error_code: 206 +--- SKIP + + + +=== TEST 7: Allow-Ranges +--- config + location /index.html { + more_clear_headers 'Accept-Ranges'; + } +--- request + GET /index.html +--- response_headers +! Accept-Ranges +--- response_body_like: It works + + + +=== TEST 8: clear hand-written Allow-Ranges headers +--- config + location /index.html { + more_set_headers 'Accept-Ranges: bytes'; + more_clear_headers 'Accept-Ranges'; + } +--- request + GET /index.html +--- response_headers +! Accept-Ranges +--- response_body_like: It works + + + +=== TEST 9: clear first, then add +--- config + location /bug { + more_clear_headers 'Foo'; + more_set_headers 'Foo: a'; + echo hello; + } +--- request + GET /bug +--- raw_response_headers_like eval +".*Foo: a.*" +--- response_body +hello + + + +=== TEST 10: first add, then clear, then add again +--- config + location /bug { + more_set_headers 'Foo: a'; + more_clear_headers 'Foo'; + more_set_headers 'Foo: b'; + echo hello; + } +--- request + GET /bug +--- raw_response_headers_like eval +".*Foo: b.*" +--- response_body +hello + + + +=== TEST 11: override charset +--- config + location /foo { + charset iso-8859-1; + default_type "text/html"; + echo hiya; + } + + location /bug { + more_set_headers "Content-Type: text/html; charset=UTF-8"; + proxy_pass http://127.0.0.1:$server_port/foo; + } +--- request + GET /bug +--- response_body +hiya +--- response_headers +Content-Type: text/html; charset=UTF-8 + + + +=== TEST 12: set multi-value header to a single value +--- config + location /main { + set $footer ''; + proxy_pass http://127.0.0.1:$server_port/foo; + more_set_headers 'Foo: b'; + header_filter_by_lua ' + ngx.var.footer = ngx.header.Foo + '; + echo_after_body $footer; + } + location /foo { + echo foo; + add_header Foo a; + add_header Foo c; + } +--- request + GET /main +--- response_headers +Foo: b +--- response_body +foo +b + + + +=== TEST 13: set multi values to cache-control and override it with multiple values (to reproduce a bug) +--- config + location /lua { + content_by_lua ' + ngx.header.cache_control = { "private", "no-store", "foo", "bar", "baz" } + ngx.send_headers() + ngx.say("Cache-Control: ", ngx.var.sent_http_cache_control) + '; + more_clear_headers Cache-Control; + add_header Cache-Control "blah"; + } +--- request + GET /lua +--- response_headers +Cache-Control: blah +--- response_body +Cache-Control: blah + + + +=== TEST 14: set 20+ headers +--- config + location /test { + more_clear_input_headers "Authorization"; + echo $http_a1; + echo $http_authorization; + echo $http_a2; + echo $http_a3; + echo $http_a23; + echo $http_a24; + echo $http_a25; + } +--- request + GET /test +--- more_headers eval +my $i = 1; +my $s; +while ($i <= 25) { + $s .= "A$i: $i\n"; + if ($i == 22) { + $s .= "Authorization: blah\n"; + } + $i++; +} +#warn $s; +$s +--- response_body +1 + +2 +3 +23 +24 +25 + + + +=== TEST 15: github #20: segfault caused by the nasty optimization in the nginx core (set) +--- config + location = /t/ { + more_set_headers "Foo: 1"; + proxy_pass http://127.0.0.1:$server_port; + } +--- request +GET /t +--- more_headers +Foo: bar +Bah: baz +--- response_body_like: 301 Moved Permanently +--- error_code: 301 +--- no_error_log +[error] + + + +=== TEST 16: github #20: segfault caused by the nasty optimization in the nginx core (clear) +--- config + location = /t/ { + more_clear_headers Foo; + proxy_pass http://127.0.0.1:$server_port; + } +--- request +GET /t +--- more_headers +Foo: bar +Bah: baz +--- response_body_like: 301 Moved Permanently +--- error_code: 301 +--- no_error_log +[error] + + + +=== TEST 17: Content-Type response headers with a charset param (correct -t values) +--- config + location = /t { + more_set_headers -t 'text/html' 'X-Foo: Bar'; + proxy_pass http://127.0.0.1:$server_port/fake; + } + + location = /fake { + default_type text/html; + charset utf-8; + echo ok; + } +--- request +GET /t +--- response_headers +X-Foo: Bar +--- response_body +ok + + + +=== TEST 18: Content-Type response headers with a charset param (WRONG -t values) +--- config + location = /t { + more_set_headers -t 'text/html; charset=utf-8' 'X-Foo: Bar'; + proxy_pass http://127.0.0.1:$server_port/fake; + } + + location = /fake { + default_type text/html; + charset utf-8; + echo ok; + } +--- request +GET /t +--- response_headers +X-Foo: Bar +--- response_body +ok + + + +=== TEST 19: for bad requests (bad request method letter case) +--- config + error_page 400 = /err; + + location = /err { + more_set_input_headers "Foo: bar"; + echo ok; + } +--- raw_request +GeT / HTTP/1.1 +--- response_body +ok +--- no_check_leak + + + +=== TEST 20: for bad requests (bad request method names) +--- config + error_page 400 = /err; + + location = /err { + more_set_input_headers "Foo: bar"; + echo ok; + } +--- raw_request +GET x HTTP/1.1 +--- response_body +ok +--- no_check_leak + + + +=== TEST 21: override Cache-Control header sent by proxy module +--- config + location = /back { + content_by_lua_block { + ngx.header['Cache-Control'] = 'max-age=0, no-cache' + ngx.send_headers() + ngx.say("Cache-Control: ", ngx.var.sent_http_cache_control) + } + } + + location = /t { + more_set_headers "Cache-Control: max-age=1800"; + proxy_pass http://127.0.0.1:$server_port/back; + } +--- request + GET /t +--- response_headers +Cache-Control: max-age=1800 +--- response_body +Cache-Control: max-age=0, no-cache + + + +=== TEST 22: 401 from upstream without WWW-Authenticate header +--- config + location = /back { + content_by_lua_block { + ngx.exit(401) + } + } + + location = /t { + more_set_headers -s "401" 'WWW-Authenticate: Bearer realm="https://my.org/auth"'; + proxy_pass http://127.0.0.1:$server_port/back; + } +--- request + GET /t +--- error_code: 401 +--- response_headers +WWW-Authenticate: Bearer realm="https://my.org/auth" +--- response_body eval +qr/401 Authorization Required/ + + + +=== TEST 23: 401 from upstream with WWW-Authenticate header +--- config + location = /back { + more_set_headers -s "401" 'WWW-Authenticate: Bearer realm="https://my.org/auth1"'; + content_by_lua_block { + ngx.exit(401) + } + } + + location = /t { + more_set_headers -s "401" 'WWW-Authenticate: Bearer realm="https://my.org/auth"'; + proxy_pass http://127.0.0.1:$server_port/back; + } +--- request + GET /t +--- error_code: 401 +--- response_headers +WWW-Authenticate: Bearer realm="https://my.org/auth" +--- response_body eval +qr/401 Authorization Required/ diff --git a/ngx_headers_more/t/builtin.t b/ngx_headers_more/t/builtin.t new file mode 100644 index 0000000..27b20af --- /dev/null +++ b/ngx_headers_more/t/builtin.t @@ -0,0 +1,338 @@ +# vi:filetype= + +use lib 'lib'; +use Test::Nginx::Socket; # 'no_plan'; + +plan tests => 60; + +no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: set Server +--- config + location /foo { + echo hi; + more_set_headers 'Server: Foo'; + } +--- request + GET /foo +--- response_headers +Server: Foo +--- response_body +hi + + + +=== TEST 2: clear Server +--- config + location /foo { + echo hi; + more_clear_headers 'Server: '; + } +--- request + GET /foo +--- response_headers +! Server +--- response_body +hi + + + +=== TEST 3: set Content-Type +--- config + location /foo { + default_type 'text/plan'; + more_set_headers 'Content-Type: text/css'; + echo hi; + } +--- request + GET /foo +--- response_headers +Content-Type: text/css +--- response_body +hi + + + +=== TEST 4: set Content-Type +--- config + location /foo { + default_type 'text/plan'; + more_set_headers 'Content-Type: text/css'; + return 404; + } +--- request + GET /foo +--- response_headers +Content-Type: text/css +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 5: clear Content-Type +--- config + location /foo { + default_type 'text/plain'; + more_clear_headers 'Content-Type: '; + return 404; + } +--- request + GET /foo +--- response_headers +! Content-Type +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 6: clear Content-Type (colon not required) +--- config + location /foo { + default_type 'text/plain'; + more_set_headers 'Content-Type: Hello'; + more_clear_headers 'Content-Type'; + return 404; + } +--- request + GET /foo +--- response_headers +! Content-Type +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 7: clear Content-Type (value ignored) +--- config + location /foo { + default_type 'text/plain'; + more_set_headers 'Content-Type: Hello'; + more_clear_headers 'Content-Type: blah'; + return 404; + } +--- request + GET /foo +--- response_headers +! Content-Type +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 8: clear Content-Type (case insensitive) +--- config + location /foo { + default_type 'text/plain'; + more_set_headers 'Content-Type: Hello'; + more_clear_headers 'content-type: blah'; + return 404; + } +--- request + GET /foo +--- response_headers +! Content-Type +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 9: clear Content-Type using set empty +--- config + location /foo { + default_type 'text/plain'; + more_set_headers 'Content-Type: Hello'; + more_set_headers 'content-type:'; + return 404; + } +--- request + GET /foo +--- response_headers +! Content-Type +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 10: clear Content-Type using setting key only +--- config + location /foo { + default_type 'text/plain'; + more_set_headers 'Content-Type: Hello'; + more_set_headers 'content-type'; + return 404; + } +--- request + GET /foo +--- response_headers +! Content-Type +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 11: set content-length +--- config + location /len { + more_set_headers 'Content-Length: 2'; + echo hello; + } +--- request + GET /len +--- response_headers +Content-Length: 2 +--- response_body chop +he + + + +=== TEST 12: set content-length multiple times +--- config + location /len { + more_set_headers 'Content-Length: 2'; + more_set_headers 'Content-Length: 4'; + echo hello; + } +--- request + GET /len +--- response_headers +Content-Length: 4 +--- response_body chop +hell + + + +=== TEST 13: clear content-length +--- config + location /len { + more_set_headers 'Content-Length: 4'; + more_set_headers 'Content-Length:'; + echo hello; + } +--- request + GET /len +--- response_headers +! Content-Length +--- response_body +hello + + + +=== TEST 14: clear content-length (another way) +--- config + location /len { + more_set_headers 'Content-Length: 4'; + more_clear_headers 'Content-Length'; + echo hello; + } +--- request + GET /len +--- response_headers +! Content-Length +--- response_body +hello + + + +=== TEST 15: clear content-type +--- config + location /len { + default_type 'text/plain'; + more_set_headers 'Content-Type:'; + echo hello; + } +--- request + GET /len +--- response_headers +! Content-Type +--- response_body +hello + + + +=== TEST 16: clear content-type (the other way) +--- config + location /len { + default_type 'text/plain'; + more_clear_headers 'Content-Type:'; + echo hello; + } +--- request + GET /len +--- response_headers +! Content-Type +--- response_body +hello + + + +=== TEST 17: set Charset +--- config + location /len { + default_type 'text/plain'; + more_set_headers 'Charset: gbk'; + echo hello; + } +--- request + GET /len +--- response_headers +Charset: gbk +--- response_body +hello + + + +=== TEST 18: clear Charset +--- config + location /len { + default_type 'text/plain'; + more_set_headers 'Charset: gbk'; + more_clear_headers 'Charset'; + echo hello; + } +--- request + GET /len +--- response_headers +! Charset +--- response_body +hello + + + +=== TEST 19: clear Charset (the other way: using set) +--- config + location /len { + default_type 'text/plain'; + more_set_headers 'Charset: gbk'; + more_set_headers 'Charset: '; + echo hello; + } +--- request + GET /len +--- response_headers +! Charset +--- response_body +hello + + + +=== TEST 20: set Vary +--- config + location /foo { + more_set_headers 'Vary: gbk'; + echo hello; + } + location /len { + default_type 'text/plain'; + more_set_headers 'Vary: hello'; + proxy_pass http://127.0.0.1:$server_port/foo; + } +--- request + GET /len +--- response_headers +Vary: hello +--- response_body +hello diff --git a/ngx_headers_more/t/eval.t b/ngx_headers_more/t/eval.t new file mode 100644 index 0000000..febd306 --- /dev/null +++ b/ngx_headers_more/t/eval.t @@ -0,0 +1,36 @@ +# vi:filetype= + +use lib 'lib'; +use Test::Nginx::Socket; # 'no_plan'; + +repeat_each(3); + +plan tests => repeat_each() * 2 * blocks(); + +#no_long_string(); +#no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: set request header at client side +--- config + location /foo { + eval_subrequest_in_memory off; + eval_override_content_type text/plain; + eval $res { + echo -n 1; + } + #echo "[$res]"; + if ($res = '1') { + more_set_input_headers 'Foo: Bar'; + echo "OK"; + break; + } + echo "NOT OK"; + } +--- request + GET /foo +--- response_body +OK diff --git a/ngx_headers_more/t/input-conn.t b/ngx_headers_more/t/input-conn.t new file mode 100644 index 0000000..f53e80f --- /dev/null +++ b/ngx_headers_more/t/input-conn.t @@ -0,0 +1,137 @@ +# vim:set ft= ts=4 sw=4 et fdm=marker: + +use lib 'lib'; +use Test::Nginx::Socket; + +#worker_connections(1014); +#master_process_enabled(1); +#log_level('warn'); + +repeat_each(2); + +plan tests => repeat_each() * (4 * blocks()); + +#no_diff(); +no_long_string(); + +run_tests(); + +__DATA__ + +=== TEST 1: clear the Connection req header +--- config + location /req-header { + more_clear_input_headers Connection; + echo "connection: $http_connection"; + } +--- request +GET /req-header + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: conn type: %d\n", $r->headers_in->connection_type) +} + + +F(ngx_http_core_content_phase) { + printf("content: conn type: %d\n", $r->headers_in->connection_type) +} + +--- stap_out +rewrite: conn type: 1 +content: conn type: 0 + +--- response_body +connection: +--- no_error_log +[error] + + + +=== TEST 2: set custom Connection req header (close) +--- config + location /req-header { + more_set_input_headers "Connection: CLOSE"; + echo "connection: $http_connection"; + } +--- request +GET /req-header + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: conn type: %d\n", $r->headers_in->connection_type) +} + + +F(ngx_http_core_content_phase) { + printf("content: conn type: %d\n", $r->headers_in->connection_type) +} + +--- stap_out +rewrite: conn type: 1 +content: conn type: 1 + +--- response_body +connection: CLOSE +--- no_error_log +[error] + + + +=== TEST 3: set custom Connection req header (keep-alive) +--- config + location /req-header { + more_set_input_headers "Connection: keep-alive"; + echo "connection: $http_connection"; + } +--- request +GET /req-header + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: conn type: %d\n", $r->headers_in->connection_type) +} + + +F(ngx_http_core_content_phase) { + printf("content: conn type: %d\n", $r->headers_in->connection_type) +} + +--- stap_out +rewrite: conn type: 1 +content: conn type: 2 + +--- response_body +connection: keep-alive +--- no_error_log +[error] + + + +=== TEST 4: set custom Connection req header (bad) +--- config + location /req-header { + more_set_input_headers "Connection: bad"; + echo "connection: $http_connection"; + } +--- request +GET /req-header + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: conn type: %d\n", $r->headers_in->connection_type) +} + + +F(ngx_http_core_content_phase) { + printf("content: conn type: %d\n", $r->headers_in->connection_type) +} + +--- stap_out +rewrite: conn type: 1 +content: conn type: 0 + +--- response_body +connection: bad +--- no_error_log +[error] diff --git a/ngx_headers_more/t/input-cookie.t b/ngx_headers_more/t/input-cookie.t new file mode 100644 index 0000000..3e5257b --- /dev/null +++ b/ngx_headers_more/t/input-cookie.t @@ -0,0 +1,183 @@ +# vim:set ft= ts=4 sw=4 et fdm=marker: + +use lib 'lib'; +use Test::Nginx::Socket; + +#worker_connections(1014); +#master_process_enabled(1); +#log_level('warn'); + +repeat_each(2); + +plan tests => repeat_each() * (4 * blocks()); + +#no_diff(); +no_long_string(); + +run_tests(); + +__DATA__ + +=== TEST 1: clear cookie (with existing cookies) +--- config + location /t { + more_clear_input_headers Cookie; + echo "Cookie foo: $cookie_foo"; + echo "Cookie baz: $cookie_baz"; + echo "Cookie: $http_cookie"; + } +--- request +GET /t +--- more_headers +Cookie: foo=bar +Cookie: baz=blah + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +F(ngx_http_core_content_phase) { + printf("content: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +--- stap_out +rewrite: cookies: 2 +content: cookies: 0 + +--- response_body +Cookie foo: +Cookie baz: +Cookie: + +--- no_error_log +[error] + + + +=== TEST 2: clear cookie (without existing cookies) +--- config + location /t { + more_clear_input_headers Cookie; + echo "Cookie foo: $cookie_foo"; + echo "Cookie baz: $cookie_baz"; + echo "Cookie: $http_cookie"; + } +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +F(ngx_http_core_content_phase) { + printf("content: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +--- stap_out +rewrite: cookies: 0 +content: cookies: 0 + +--- response_body +Cookie foo: +Cookie baz: +Cookie: + +--- no_error_log +[error] + + + +=== TEST 3: set one custom cookie (with existing cookies) +--- config + location /t { + more_set_input_headers "Cookie: boo=123"; + echo "Cookie foo: $cookie_foo"; + echo "Cookie baz: $cookie_baz"; + echo "Cookie boo: $cookie_boo"; + echo "Cookie: $http_cookie"; + } +--- request +GET /t +--- more_headers +Cookie: foo=bar +Cookie: baz=blah + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +F(ngx_http_core_content_phase) { + printf("content: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +--- stap_out +rewrite: cookies: 2 +content: cookies: 1 + +--- response_body +Cookie foo: +Cookie baz: +Cookie boo: 123 +Cookie: boo=123 + +--- no_error_log +[error] + + + +=== TEST 4: set one custom cookie (without existing cookies) +--- config + location /t { + more_set_input_headers "Cookie: boo=123"; + echo "Cookie foo: $cookie_foo"; + echo "Cookie baz: $cookie_baz"; + echo "Cookie boo: $cookie_boo"; + echo "Cookie: $http_cookie"; + } +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +F(ngx_http_core_content_phase) { + printf("content: cookies: %d\n", $r->headers_in->cookies->nelts) +} + +--- stap_out +rewrite: cookies: 0 +content: cookies: 1 + +--- response_body +Cookie foo: +Cookie baz: +Cookie boo: 123 +Cookie: boo=123 + +--- no_error_log +[error] + + + +=== TEST 5: for bad requests causing segfaults when setting & getting multi-value headers +--- config + error_page 400 = /err; + + location = /err { + more_set_input_headers "Cookie: foo=bar"; + echo -n $cookie_foo; + echo ok; + } +--- raw_request +GeT / HTTP/1.1 +--- response_body +ok +--- no_error_log +[warn] +[error] +--- no_check_leak diff --git a/ngx_headers_more/t/input-ua.t b/ngx_headers_more/t/input-ua.t new file mode 100644 index 0000000..da9a60d --- /dev/null +++ b/ngx_headers_more/t/input-ua.t @@ -0,0 +1,628 @@ +# vim:set ft= ts=4 sw=4 et fdm=marker: + +use lib 'lib'; +use Test::Nginx::Socket; + +#worker_connections(1014); +#master_process_enabled(1); +#log_level('warn'); + +repeat_each(2); + +plan tests => repeat_each() * (4 * blocks()); + +#no_diff(); +no_long_string(); + +run_tests(); + +__DATA__ + +=== TEST 1: clear Opera user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.229 Version/11.62 + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: opera: %d\n", $r->headers_in->opera) +} + +F(ngx_http_core_content_phase) { + printf("content: opera: %d\n", $r->headers_in->opera) +} + +--- stap_out +rewrite: opera: 1 +content: opera: 0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 2: clear MSIE 4 user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows NT 5.0) + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=1 msie6=1 +content: msie=0 msie6=0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 3: set custom MSIE 4 user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows NT 5.0)"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=0 msie6=0 +content: msie=1 msie6=1 + +--- response_body +User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows NT 5.0) +--- no_error_log +[error] + + + +=== TEST 4: clear MSIE 5 user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/4.0 (compatible; MSIE 5.01; Windows 95; MSIECrawler) + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=1 msie6=1 +content: msie=0 msie6=0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 5: set custom MSIE 5 user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/4.0 (compatible; MSIE 5.01; Windows 95; MSIECrawler)"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=0 msie6=0 +content: msie=1 msie6=1 + +--- response_body +User-Agent: Mozilla/4.0 (compatible; MSIE 5.01; Windows 95; MSIECrawler) +--- no_error_log +[error] + + + +=== TEST 6: clear MSIE 6 (without SV1) user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;) + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=1 msie6=1 +content: msie=0 msie6=0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 7: set custom MSIE 6 (without SV1) user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;)"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=0 msie6=0 +content: msie=1 msie6=1 + +--- response_body +User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;) +--- no_error_log +[error] + + + +=== TEST 8: clear MSIE 6 (with SV1) user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.1) + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=1 msie6=0 +content: msie=0 msie6=0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 9: set custom MSIE 6 (with SV1) user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.1)"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=0 msie6=0 +content: msie=1 msie6=0 + +--- response_body +User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.1) +--- no_error_log +[error] + + + +=== TEST 10: set custom MSIE 7 user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; winfx; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Zune 2.0)"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +F(ngx_http_core_content_phase) { + printf("content: msie=%d msie6=%d\n", + $r->headers_in->msie, + $r->headers_in->msie6) +} + +--- stap_out +rewrite: msie=0 msie6=0 +content: msie=1 msie6=0 + +--- response_body +User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; winfx; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Zune 2.0) +--- no_error_log +[error] + + + +=== TEST 11: clear Gecko user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/5.0 (Android; Mobile; rv:13.0) Gecko/13.0 Firefox/13.0 + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: gecko: %d\n", $r->headers_in->gecko) +} + + +F(ngx_http_core_content_phase) { + printf("content: gecko: %d\n", $r->headers_in->gecko) +} + +--- stap_out +rewrite: gecko: 1 +content: gecko: 0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 12: set custom Gecko user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/5.0 (Android; Mobile; rv:13.0) Gecko/13.0 Firefox/13.0"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: gecko: %d\n", $r->headers_in->gecko) +} + + +F(ngx_http_core_content_phase) { + printf("content: gecko: %d\n", $r->headers_in->gecko) +} + +--- stap_out +rewrite: gecko: 0 +content: gecko: 1 + +--- response_body +User-Agent: Mozilla/5.0 (Android; Mobile; rv:13.0) Gecko/13.0 Firefox/13.0 +--- no_error_log +[error] + + + +=== TEST 13: clear Chrome user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19 + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: chrome: %d\n", $r->headers_in->chrome) +} + + +F(ngx_http_core_content_phase) { + printf("content: chrome: %d\n", $r->headers_in->chrome) +} + +--- stap_out +rewrite: chrome: 1 +content: chrome: 0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 14: set custom Chrome user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: chrome: %d\n", $r->headers_in->chrome) +} + + +F(ngx_http_core_content_phase) { + printf("content: chrome: %d\n", $r->headers_in->chrome) +} + +--- stap_out +rewrite: chrome: 0 +content: chrome: 1 + +--- response_body +User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19 +--- no_error_log +[error] + + + +=== TEST 15: clear Safari (Mac OS X) user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.8 + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: safari: %d\n", $r->headers_in->safari) +} + + +F(ngx_http_core_content_phase) { + printf("content: safari: %d\n", $r->headers_in->safari) +} + +--- stap_out +rewrite: safari: 1 +content: safari: 0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 16: set custom Safari user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.8"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: safari: %d\n", $r->headers_in->safari) +} + + +F(ngx_http_core_content_phase) { + printf("content: safari: %d\n", $r->headers_in->safari) +} + +--- stap_out +rewrite: safari: 0 +content: safari: 1 + +--- response_body +User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.8 +--- no_error_log +[error] + + + +=== TEST 17: clear Konqueror user-agent +--- config + location /t { + more_clear_input_headers User-Agent; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- more_headers +User-Agent: Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.10 (like Gecko) (Kubuntu) + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: konqueror: %d\n", $r->headers_in->konqueror) +} + + +F(ngx_http_core_content_phase) { + printf("content: konqueror: %d\n", $r->headers_in->konqueror) +} + +--- stap_out +rewrite: konqueror: 1 +content: konqueror: 0 + +--- response_body +User-Agent: +--- no_error_log +[error] + + + +=== TEST 18: set custom Konqueror user-agent +--- config + location /t { + more_set_input_headers "User-Agent: Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.10 (like Gecko) (Kubuntu)"; + echo "User-Agent: $http_user_agent"; + } + +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + printf("rewrite: konqueror: %d\n", $r->headers_in->konqueror) +} + + +F(ngx_http_core_content_phase) { + printf("content: konqueror: %d\n", $r->headers_in->konqueror) +} + +--- stap_out +rewrite: konqueror: 0 +content: konqueror: 1 + +--- response_body +User-Agent: Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.10 (like Gecko) (Kubuntu) +--- no_error_log +[error] diff --git a/ngx_headers_more/t/input.t b/ngx_headers_more/t/input.t new file mode 100644 index 0000000..f1afc90 --- /dev/null +++ b/ngx_headers_more/t/input.t @@ -0,0 +1,1384 @@ +# vi:filetype= + +use lib 'lib'; +use Test::Nginx::Socket; # 'no_plan'; + +repeat_each(2); + +plan tests => repeat_each() * 128; + +no_long_string(); +#no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: set request header at client side +--- config + location /foo { + #more_set_input_headers 'X-Foo: howdy'; + echo $http_x_foo; + } +--- request + GET /foo +--- more_headers +X-Foo: blah +--- response_headers +! X-Foo +--- response_body +blah + + + +=== TEST 2: set request header at client side and rewrite it +--- config + location /foo { + more_set_input_headers 'X-Foo: howdy'; + echo $http_x_foo; + } +--- request + GET /foo +--- more_headers +X-Foo: blah +--- response_headers +! X-Foo +--- response_body +howdy + + + +=== TEST 3: rewrite content length +--- config + location /bar { + more_set_input_headers 'Content-Length: 2048'; + echo_read_request_body; + echo_request_body; + } +--- request eval +"POST /bar\n" . +"a" x 4096 +--- response_body eval +"a" x 2048 +--- timeout: 15 + + + +=== TEST 4: try to rewrite content length using the rewrite module +Thisshould not take effect ;) +--- config + location /bar { + set $http_content_length 2048; + echo_read_request_body; + echo_request_body; + } +--- request eval +"POST /bar\n" . +"a" x 4096 +--- response_body eval +"a" x 4096 + + + +=== TEST 5: rewrite host and user-agent +--- config + location /bar { + more_set_input_headers 'Host: foo' 'User-Agent: blah'; + echo "Host: $host"; + echo "User-Agent: $http_user_agent"; + } +--- request +GET /bar +--- response_body +Host: foo +User-Agent: blah + + + +=== TEST 6: clear host and user-agent +$host always has a default value and cannot be really cleared. +--- config + location /bar { + more_clear_input_headers 'Host: foo' 'User-Agent: blah'; + echo "Host: $host"; + echo "Host (2): $http_host"; + echo "User-Agent: $http_user_agent"; + } +--- request +GET /bar +--- response_body +Host: localhost +Host (2): +User-Agent: + + + +=== TEST 7: clear host and user-agent (the other way) +--- config + location /bar { + more_set_input_headers 'Host:' 'User-Agent:' 'X-Foo:'; + echo "Host: $host"; + echo "User-Agent: $http_user_agent"; + echo "X-Foo: $http_x_foo"; + } +--- request +GET /bar +--- more_headers +X-Foo: bar +--- response_body +Host: localhost +User-Agent: +X-Foo: + + + +=== TEST 8: clear content-length +--- config + location /bar { + more_set_input_headers 'Content-Length: '; + echo "Content-Length: $http_content_length"; + } +--- request +POST /bar +hello +--- more_headers +--- response_body +Content-Length: + + + +=== TEST 9: clear content-length (the other way) +--- config + location /bar { + more_clear_input_headers 'Content-Length: '; + echo "Content-Length: $http_content_length"; + } +--- request +POST /bar +hello +--- more_headers +--- response_body +Content-Length: + + + +=== TEST 10: rewrite type +--- config + location /bar { + more_set_input_headers 'Content-Type: text/css'; + echo "Content-Type: $content_type"; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/plain +--- response_body +Content-Type: text/css + + + +=== TEST 11: clear type +--- config + location /bar { + more_set_input_headers 'Content-Type:'; + echo "Content-Type: $content_type"; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/plain +--- response_body +Content-Type: + + + +=== TEST 12: clear type (the other way) +--- config + location /bar { + more_clear_input_headers 'Content-Type:foo'; + echo "Content-Type: $content_type"; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/plain +--- response_body +Content-Type: + + + +=== TEST 13: add type constraints +--- config + location /bar { + more_set_input_headers -t 'text/plain' 'X-Blah:yay'; + echo $http_x_blah; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/plain +--- response_body +yay + + + +=== TEST 14: add type constraints (not matched) +--- config + location /bar { + more_set_input_headers -t 'text/plain' 'X-Blah:yay'; + echo $http_x_blah; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/css +--- response_body eval: "\n" + + + +=== TEST 15: add type constraints (OR'd) +--- config + location /bar { + more_set_input_headers -t 'text/plain text/css' 'X-Blah:yay'; + echo $http_x_blah; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/css +--- response_body +yay + + + +=== TEST 16: add type constraints (OR'd) +--- config + location /bar { + more_set_input_headers -t 'text/plain text/css' 'X-Blah:yay'; + echo $http_x_blah; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/plain +--- response_body +yay + + + +=== TEST 17: add type constraints (OR'd) (not matched) +--- config + location /bar { + more_set_input_headers -t 'text/plain text/css' 'X-Blah:yay'; + echo $http_x_blah; + } +--- request +POST /bar +hello +--- more_headers +Content-Type: text/html +--- response_body eval: "\n" + + + +=== TEST 18: mix input and output cmds +--- config + location /bar { + more_set_input_headers 'X-Blah:yay'; + more_set_headers 'X-Blah:hiya'; + echo $http_x_blah; + } +--- request +GET /bar +--- response_headers +X-Blah: hiya +--- response +yay + + + +=== TEST 19: set request header at client side and replace +--- config + location /foo { + more_set_input_headers -r 'X-Foo: howdy'; + echo $http_x_foo; + } +--- request + GET /foo +--- more_headers +X-Foo: blah +--- response_headers +! X-Foo +--- response_body +howdy + + + +=== TEST 20: do no set request header at client, so no replace with -r option +--- config + location /foo { + more_set_input_headers -r 'X-Foo: howdy'; + echo "empty_header:" $http_x_foo; + } +--- request + GET /foo +--- response_headers +! X-Foo +--- response_body +empty_header: + + + +=== TEST 21: clear input headers +--- config + location /foo { + set $val 'dog'; + + more_clear_input_headers 'User-Agent'; + + proxy_pass http://127.0.0.1:$server_port/proxy; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + location /proxy { + echo -n $echo_client_request_headers; + } +--- request + GET /foo +--- more_headers +User-Agent: my-sock +--- response_body eval +$Test::Nginx::Util::NginxVersion < 1.029008 ? +"GET /proxy HTTP/1.0\r +Host: 127.0.0.1:\$ServerPort\r +Connection: close\r +\r +" +: +"GET /proxy HTTP/1.0\r +Connection: close\r +Host: 127.0.0.1:\$ServerPort\r +\r +"; +--- skip_nginx: 3: < 0.7.46 + + + +=== TEST 22: clear input headers +--- config + location /foo { + more_clear_input_headers 'User-Agent'; + + proxy_pass http://127.0.0.1:$server_port/proxy; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + location /proxy { + echo -n $echo_client_request_headers; + } +--- request + GET /foo +--- response_body eval +$Test::Nginx::Util::NginxVersion < 1.029008 ? +"GET /proxy HTTP/1.0\r +Host: 127.0.0.1:\$ServerPort\r +Connection: close\r +\r +" +: +"GET /proxy HTTP/1.0\r +Connection: close\r +Host: 127.0.0.1:\$ServerPort\r +\r +"; +--- skip_nginx: 3: < 0.7.46 + + + +=== TEST 23: clear input headers +--- config + location /foo { + more_clear_input_headers 'X-Foo19'; + more_clear_input_headers 'X-Foo20'; + more_clear_input_headers 'X-Foo21'; + + proxy_pass http://127.0.0.1:$server_port/proxy; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + location /proxy { + echo -n $echo_client_request_headers; + } +--- request + GET /foo +--- more_headers eval +my $s; +for my $i (3..21) { + $s .= "X-Foo$i: $i\n"; +} +$s; +--- response_body eval + +my $comm_header = $Test::Nginx::Util::NginxVersion < 1.029008 ? + "Host: 127.0.0.1:\$ServerPort\r\nConnection: close\r" : "Connection: close\r\nHost: 127.0.0.1:\$ServerPort\r"; + +"GET /proxy HTTP/1.0\r +$comm_header +X-Foo3: 3\r +X-Foo4: 4\r +X-Foo5: 5\r +X-Foo6: 6\r +X-Foo7: 7\r +X-Foo8: 8\r +X-Foo9: 9\r +X-Foo10: 10\r +X-Foo11: 11\r +X-Foo12: 12\r +X-Foo13: 13\r +X-Foo14: 14\r +X-Foo15: 15\r +X-Foo16: 16\r +X-Foo17: 17\r +X-Foo18: 18\r +\r +" +--- skip_nginx: 3: < 0.7.46 + + + +=== TEST 24: Accept-Encoding +--- config + location /bar { + default_type 'text/plain'; + more_set_input_headers 'Accept-Encoding: gzip'; + gzip on; + gzip_min_length 1; + gzip_buffers 4 8k; + gzip_types text/plain; + } +--- user_files +">>> bar +" . ("hello" x 512) +--- request +GET /bar +--- response_headers +Content-Encoding: gzip +--- response_body_like: . + + + +=== TEST 25: rewrite + set request header +--- config + location /t { + rewrite ^ /foo last; + } + + location /foo { + more_set_input_headers 'X-Foo: howdy'; + proxy_pass http://127.0.0.1:$server_port/echo; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + + location /echo { + echo "X-Foo: $http_x_foo"; + } +--- request + GET /foo +--- response_headers +! X-Foo +--- response_body +X-Foo: howdy + + + +=== TEST 26: clear_header should clear all the instances of the user custom header +--- config + location = /t { + more_clear_input_headers Foo; + + proxy_pass http://127.0.0.1:$server_port/echo; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + + location = /echo { + echo "Foo: [$http_foo]"; + echo "Test-Header: [$http_test_header]"; + } +--- request +GET /t +--- more_headers +Foo: foo +Foo: bah +Test-Header: 1 +--- response_body +Foo: [] +Test-Header: [1] + + + +=== TEST 27: clear_header should clear all the instances of the builtin header +--- config + location = /t { + more_clear_input_headers Content-Type; + + proxy_pass http://127.0.0.1:$server_port/echo; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + + location = /echo { + echo "Content-Type: [$http_content_type]"; + echo "Test-Header: [$http_test_header]"; + #echo $echo_client_request_headers; + } +--- request +GET /t +--- more_headers +Content-Type: foo +Content-Type: bah +Test-Header: 1 +--- response_body +Content-Type: [] +Test-Header: [1] + + + +=== TEST 28: Converting POST to GET - clearing headers (bug found by Matthieu Tourne, 411 error page) +--- config + location /t { + more_clear_input_headers Content-Type; + more_clear_input_headers Content-Length; + + #proxy_pass http://127.0.0.1:8888; + proxy_pass http://127.0.0.1:$server_port/back; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + + location /back { + echo $echo_client_request_headers; + } +--- request +POST /t +hello world +--- more_headers +Content-Type: application/ocsp-request +Test-Header: 1 +--- response_body_like eval +if ($Test::Nginx::Util::NginxVersion < 1.029008) { +qr/Connection: close\r +Test-Header: 1\r +\r +$/ +} else { +qr/POST \/back HTTP\/1.0\r +Connection: close\r +Host: 127.0.0.1:$ENV{TEST_NGINX_SERVER_PORT}\r +Test-Header: 1\r +\r +$/ +} +--- no_error_log +[error] + + + +=== TEST 29: clear_header() does not duplicate subsequent headers (old bug) +--- config + location = /t { + more_clear_input_headers Foo; + + proxy_pass http://127.0.0.1:$server_port/echo; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + + location = /echo { + echo $echo_client_request_headers; + } +--- request +GET /t +--- more_headers +Bah: bah +Foo: foo +Test-Header: 1 +Foo1: foo1 +Foo2: foo2 +Foo3: foo3 +Foo4: foo4 +Foo5: foo5 +Foo6: foo6 +Foo7: foo7 +Foo8: foo8 +Foo9: foo9 +Foo10: foo10 +Foo11: foo11 +Foo12: foo12 +Foo13: foo13 +Foo14: foo14 +Foo15: foo15 +Foo16: foo16 +Foo17: foo17 +Foo18: foo18 +Foo19: foo19 +Foo20: foo20 +Foo21: foo21 +Foo22: foo22 +--- response_body_like eval +qr/Bah: bah\r +Test-Header: 1\r +Foo1: foo1\r +Foo2: foo2\r +Foo3: foo3\r +Foo4: foo4\r +Foo5: foo5\r +Foo6: foo6\r +Foo7: foo7\r +Foo8: foo8\r +Foo9: foo9\r +Foo10: foo10\r +Foo11: foo11\r +Foo12: foo12\r +Foo13: foo13\r +Foo14: foo14\r +Foo15: foo15\r +Foo16: foo16\r +Foo17: foo17\r +Foo18: foo18\r +Foo19: foo19\r +Foo20: foo20\r +Foo21: foo21\r +Foo22: foo22\r +/ + + + +=== TEST 30: clear input header (just more than 20 headers) +--- config + location = /t { + more_clear_input_headers "R"; + proxy_pass http://127.0.0.1:$server_port/back; + proxy_http_version 1.0; + proxy_set_header Connection close; + proxy_set_header Host foo; + } + + location = /back { + echo -n $echo_client_request_headers; + } +--- request +GET /t +--- more_headers eval +my $s = "User-Agent: curl\n"; + +for my $i ('a' .. 'r') { + $s .= uc($i) . ": " . "$i\n" +} +$s +--- response_body eval +my $comm_header = $Test::Nginx::Util::NginxVersion < 1.029008 ? +"Host: foo\r\nConnection: close\r" : "Connection: close\r\nHost: foo\r"; +"GET /back HTTP/1.0\r +$comm_header +User-Agent: curl\r +A: a\r +B: b\r +C: c\r +D: d\r +E: e\r +F: f\r +G: g\r +H: h\r +I: i\r +J: j\r +K: k\r +L: l\r +M: m\r +N: n\r +O: o\r +P: p\r +Q: q\r +\r +" + + + +=== TEST 31: clear input header (just more than 20 headers, and add more) +--- config + location = /t { + more_clear_input_headers R; + more_set_input_headers "foo-1: 1" "foo-2: 2" "foo-3: 3" "foo-4: 4" + "foo-5: 5" "foo-6: 6" "foo-7: 7" "foo-8: 8" "foo-9: 9" + "foo-10: 10" "foo-11: 11" "foo-12: 12" "foo-13: 13" + "foo-14: 14" "foo-15: 15" "foo-16: 16" "foo-17: 17" "foo-18: 18" + "foo-19: 19" "foo-20: 20" "foo-21: 21"; + + proxy_pass http://127.0.0.1:$server_port/back; + proxy_http_version 1.0; + proxy_set_header Connection close; + proxy_set_header Host foo; + } + + location = /back { + echo -n $echo_client_request_headers; + } +--- request +GET /t +--- more_headers eval +my $s = "User-Agent: curl\n"; + +for my $i ('a' .. 'r') { + $s .= uc($i) . ": " . "$i\n" +} +$s +--- response_body eval +my $comm_header = $Test::Nginx::Util::NginxVersion < 1.029008 ? +"Host: foo\r\nConnection: close\r" : "Connection: close\r\nHost: foo\r"; +"GET /back HTTP/1.0\r +$comm_header +User-Agent: curl\r +A: a\r +B: b\r +C: c\r +D: d\r +E: e\r +F: f\r +G: g\r +H: h\r +I: i\r +J: j\r +K: k\r +L: l\r +M: m\r +N: n\r +O: o\r +P: p\r +Q: q\r +foo-1: 1\r +foo-2: 2\r +foo-3: 3\r +foo-4: 4\r +foo-5: 5\r +foo-6: 6\r +foo-7: 7\r +foo-8: 8\r +foo-9: 9\r +foo-10: 10\r +foo-11: 11\r +foo-12: 12\r +foo-13: 13\r +foo-14: 14\r +foo-15: 15\r +foo-16: 16\r +foo-17: 17\r +foo-18: 18\r +foo-19: 19\r +foo-20: 20\r +foo-21: 21\r +\r +" + + + +=== TEST 32: clear input header (just more than 21 headers) +--- config + location = /t { + more_clear_input_headers R Q; + proxy_pass http://127.0.0.1:$server_port/back; + proxy_http_version 1.0; + proxy_set_header Connection close; + proxy_set_header Host foo; + } + + location = /back { + echo -n $echo_client_request_headers; + } +--- request +GET /t +--- more_headers eval +my $s = "User-Agent: curl\nBah: bah\n"; + +for my $i ('a' .. 'r') { + $s .= uc($i) . ": " . "$i\n" +} +$s +--- response_body eval +my $comm_header = $Test::Nginx::Util::NginxVersion < 1.029008 ? +"Host: foo\r\nConnection: close\r" : "Connection: close\r\nHost: foo\r"; +"GET /back HTTP/1.0\r +$comm_header +User-Agent: curl\r +Bah: bah\r +A: a\r +B: b\r +C: c\r +D: d\r +E: e\r +F: f\r +G: g\r +H: h\r +I: i\r +J: j\r +K: k\r +L: l\r +M: m\r +N: n\r +O: o\r +P: p\r +\r +" + + + +=== TEST 33: clear input header (just more than 21 headers) +--- config + location = /t { + more_clear_input_headers R Q; + more_set_input_headers "foo-1: 1" "foo-2: 2" "foo-3: 3" "foo-4: 4" + "foo-5: 5" "foo-6: 6" "foo-7: 7" "foo-8: 8" "foo-9: 9" + "foo-10: 10" "foo-11: 11" "foo-12: 12" "foo-13: 13" + "foo-14: 14" "foo-15: 15" "foo-16: 16" "foo-17: 17" "foo-18: 18" + "foo-19: 19" "foo-20: 20" "foo-21: 21"; + + proxy_pass http://127.0.0.1:$server_port/back; + proxy_set_header Host foo; + proxy_http_version 1.0; + proxy_set_header Connection close; + } + + location = /back { + echo -n $echo_client_request_headers; + } +--- request +GET /t +--- more_headers eval +my $s = "User-Agent: curl\nBah: bah\n"; + +for my $i ('a' .. 'r') { + $s .= uc($i) . ": " . "$i\n" +} +$s +--- response_body eval +"GET /back HTTP/1.0\r +Host: foo\r +Connection: close\r +User-Agent: curl\r +Bah: bah\r +A: a\r +B: b\r +C: c\r +D: d\r +E: e\r +F: f\r +G: g\r +H: h\r +I: i\r +J: j\r +K: k\r +L: l\r +M: m\r +N: n\r +O: o\r +P: p\r +foo-1: 1\r +foo-2: 2\r +foo-3: 3\r +foo-4: 4\r +foo-5: 5\r +foo-6: 6\r +foo-7: 7\r +foo-8: 8\r +foo-9: 9\r +foo-10: 10\r +foo-11: 11\r +foo-12: 12\r +foo-13: 13\r +foo-14: 14\r +foo-15: 15\r +foo-16: 16\r +foo-17: 17\r +foo-18: 18\r +foo-19: 19\r +foo-20: 20\r +foo-21: 21\r +\r +" + + + +=== TEST 34: clear X-Real-IP +--- config + location /t { + more_clear_input_headers X-Real-IP; + echo "X-Real-IP: $http_x_real_ip"; + } +--- request +GET /t +--- more_headers +X-Real-IP: 8.8.8.8 + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + if (@defined($r->headers_in->x_real_ip) && $r->headers_in->x_real_ip) { + printf("rewrite: x-real-ip: %s\n", + user_string_n($r->headers_in->x_real_ip->value->data, + $r->headers_in->x_real_ip->value->len)) + } else { + println("rewrite: no x-real-ip") + } +} + +F(ngx_http_core_content_phase) { + if (@defined($r->headers_in->x_real_ip) && $r->headers_in->x_real_ip) { + printf("content: x-real-ip: %s\n", + user_string_n($r->headers_in->x_real_ip->value->data, + $r->headers_in->x_real_ip->value->len)) + } else { + println("content: no x-real-ip") + } +} + +--- stap_out +rewrite: x-real-ip: 8.8.8.8 +content: no x-real-ip + +--- response_body +X-Real-IP: + +--- no_error_log +[error] + + + +=== TEST 35: set custom X-Real-IP +--- config + location /t { + more_set_input_headers "X-Real-IP: 8.8.4.4"; + echo "X-Real-IP: $http_x_real_ip"; + } +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + if (@defined($r->headers_in->x_real_ip) && $r->headers_in->x_real_ip) { + printf("rewrite: x-real-ip: %s\n", + user_string_n($r->headers_in->x_real_ip->value->data, + $r->headers_in->x_real_ip->value->len)) + } else { + println("rewrite: no x-real-ip") + } + +} + +F(ngx_http_core_content_phase) { + if (@defined($r->headers_in->x_real_ip) && $r->headers_in->x_real_ip) { + printf("content: x-real-ip: %s\n", + user_string_n($r->headers_in->x_real_ip->value->data, + $r->headers_in->x_real_ip->value->len)) + } else { + println("content: no x-real-ip") + } +} + +--- stap_out +rewrite: no x-real-ip +content: x-real-ip: 8.8.4.4 + +--- response_body +X-Real-IP: 8.8.4.4 + +--- no_error_log +[error] + + + +=== TEST 36: clear Via +--- config + location /t { + more_clear_input_headers Via; + echo "Via: $http_via"; + } +--- request +GET /t +--- more_headers +Via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + if (@defined($r->headers_in->via) && $r->headers_in->via) { + printf("rewrite: via: %s\n", + user_string_n($r->headers_in->via->value->data, + $r->headers_in->via->value->len)) + } else { + println("rewrite: no via") + } +} + +F(ngx_http_core_content_phase) { + if (@defined($r->headers_in->via) && $r->headers_in->via) { + printf("content: via: %s\n", + user_string_n($r->headers_in->via->value->data, + $r->headers_in->via->value->len)) + } else { + println("content: no via") + } +} + +--- stap_out +rewrite: via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) +content: no via + +--- response_body +Via: + +--- no_error_log +[error] + + + +=== TEST 37: set custom Via +--- config + location /t { + more_set_input_headers "Via: 1.0 fred, 1.1 nowhere.com (Apache/1.1)"; + echo "Via: $http_via"; + } +--- request +GET /t + +--- stap +F(ngx_http_headers_more_exec_input_cmd) { + if (@defined($r->headers_in->via) && $r->headers_in->via) { + printf("rewrite: via: %s\n", + user_string_n($r->headers_in->via->value->data, + $r->headers_in->via->value->len)) + } else { + println("rewrite: no via") + } + +} + +F(ngx_http_core_content_phase) { + if (@defined($r->headers_in->via) && $r->headers_in->via) { + printf("content: via: %s\n", + user_string_n($r->headers_in->via->value->data, + $r->headers_in->via->value->len)) + } else { + println("content: no via") + } +} + +--- stap_out +rewrite: no via +content: via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) + +--- response_body +Via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) + +--- no_error_log +[error] + + + +=== TEST 38: HTTP 0.9 (set) +--- config + location /foo { + more_set_input_headers 'X-Foo: howdy'; + echo "x-foo: $http_x_foo"; + } +--- raw_request eval +"GET /foo\r\n" +--- response_headers +! X-Foo +--- response_body +x-foo: +--- http09 + + + +=== TEST 39: HTTP 0.9 (clear) +--- config + location /foo { + more_clear_input_headers 'X-Foo'; + echo "x-foo: $http_x_foo"; + } +--- raw_request eval +"GET /foo\r\n" +--- response_headers +! X-Foo +--- response_body +x-foo: +--- http09 + + + +=== TEST 40: Host header with port and $host +--- config + location /bar { + more_set_input_headers 'Host: agentzh.org:1984'; + echo "host var: $host"; + echo "http_host var: $http_host"; + } +--- request +GET /bar +--- response_body +host var: agentzh.org +http_host var: agentzh.org:1984 + + + +=== TEST 41: Host header with upper case letters and $host +--- config + location /bar { + more_set_input_headers 'Host: agentZH.org:1984'; + echo "host var: $host"; + echo "http_host var: $http_host"; + } +--- request +GET /bar +--- response_body +host var: agentzh.org +http_host var: agentZH.org:1984 + + + +=== TEST 42: clear all and re-insert +--- config + location = /t { + more_clear_input_headers Host Connection Cache-Control Accept + User-Agent Accept-Encoding Accept-Language + Cookie; + + more_set_input_headers "Host: a" "Connection: b" "Cache-Control: c" + "Accept: d" "User-Agent: e" "Accept-Encoding: f" + "Accept-Language: g" "Cookie: h"; + + more_clear_input_headers Host Connection Cache-Control Accept + User-Agent Accept-Encoding Accept-Language + Cookie; + + more_set_input_headers "Host: a" "Connection: b" "Cache-Control: c" + "Accept: d" "User-Agent: e" "Accept-Encoding: f" + "Accept-Language: g" "Cookie: h"; + + echo ok; + } + +--- raw_request eval +"GET /t HTTP/1.1\r +Host: localhost\r +Connection: close\r +Cache-Control: max-age=0\r +Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r +User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36\r +Accept-Encoding: gzip,deflate,sdch\r +Accept-Language: en-US,en;q=0.8\r +Cookie: test=cookie;\r +\r +" +--- response_body +ok +--- no_error_log +[error] + + + +=== TEST 43: more_set_input_header does not override request headers with multiple values +--- config + #lua_code_cache off; + location = /t { + more_set_input_headers "AAA: 111"; + + content_by_lua ' + local headers = ngx.req.get_headers() + ngx.say(headers["AAA"]) + '; + } +--- request +GET /t +--- more_headers +AAA: 123 +AAA: 456 +AAA: 678 + +--- response_body +111 +--- no_error_log +[error] + + + +=== TEST 44: clear If-Unmodified-Since req header +--- config + location = /t { + more_clear_input_headers 'If-Unmodified-Since'; + content_by_lua ' + ngx.header["Last-Modified"] = "Tue, 30 Jun 2011 12:16:36 GMT" + ngx.say("If-Unmodified-Since: ", ngx.var.http_if_unmodified_since) + '; + } +--- request +GET /t +--- more_headers +If-Unmodified-Since: Tue, 28 Jun 2011 12:16:36 GMT +--- response_body +If-Unmodified-Since: nil +--- no_error_log +[error] + + + +=== TEST 45: clear If-Match req header +--- config + location = /t { + more_clear_input_headers 'If-Match'; + echo "If-Match: $http_if_match"; + } +--- request +GET /t +--- more_headers +If-Match: abc +--- response_body +If-Match: +--- no_error_log +[error] + + + +=== TEST 46: clear If-None-Match req header +--- config + location = /t { + more_clear_input_headers 'If-None-Match'; + echo "If-None-Match: $http_if_none_match"; + } +--- request +GET /t +--- more_headers +If-None-Match: * +--- response_body +If-None-Match: +--- no_error_log +[error] + + + +=== TEST 47: set the Destination request header for WebDav +--- config + location = /a.txt { + more_set_input_headers "Destination: /b.txt"; + dav_methods MOVE; + dav_access all:rw; + root html; + } + +--- user_files +>>> a.txt +hello, world! + +--- request +MOVE /a.txt + +--- response_body +--- no_error_log +client sent no "Destination" header +[error] +--- error_code: 204 + + + +=== TEST 48: more_set_input_headers + X-Forwarded-For +--- config + location = /t { + more_set_input_headers "X-Forwarded-For: 8.8.8.8"; + proxy_pass http://127.0.0.1:$server_port/back; + proxy_http_version 1.0; + proxy_set_header Connection close; + proxy_set_header Foo $proxy_add_x_forwarded_for; + } + + location = /back { + echo "Foo: $http_foo"; + } + +--- request +GET /t + +--- response_body +Foo: 8.8.8.8, 127.0.0.1 +--- no_error_log +[error] + + + +=== TEST 49: more_clear_input_headers + X-Forwarded-For +--- config + location = /t { + more_clear_input_headers "X-Forwarded-For"; + proxy_pass http://127.0.0.1:$server_port/back; + proxy_http_version 1.0; + proxy_set_header Connection close; + proxy_set_header Foo $proxy_add_x_forwarded_for; + } + + location = /back { + echo "Foo: $http_foo"; + } + +--- request +GET /t + +--- more_headers +X-Forwarded-For: 8.8.8.8 +--- response_body +Foo: 127.0.0.1 +--- no_error_log +[error] + + + +=== TEST 50: clear input headers with wildcard +--- config + location /hello { + more_clear_input_headers 'X-Hidden-*'; + content_by_lua ' + ngx.say("X-Hidden-One: ", ngx.var.http_x_hidden_one) + ngx.say("X-Hidden-Two: ", ngx.var.http_x_hidden_two) + '; + } +--- request + GET /hello +--- more_headers +X-Hidden-One: i am hidden +X-Hidden-Two: me 2 +--- response_body +X-Hidden-One: nil +X-Hidden-Two: nil + + + +=== TEST 51: make sure wildcard doesn't affect more_set_input_headers +--- config + location /hello { + more_set_input_headers 'X-Hidden-*: lol'; + content_by_lua ' + ngx.say("X-Hidden-One: ", ngx.var.http_x_hidden_one) + ngx.say("X-Hidden-Two: ", ngx.var.http_x_hidden_two) + '; + } +--- request + GET /hello +--- more_headers +X-Hidden-One: i am hidden +X-Hidden-Two: me 2 +--- response_body +X-Hidden-One: i am hidden +X-Hidden-Two: me 2 diff --git a/ngx_headers_more/t/phase.t b/ngx_headers_more/t/phase.t new file mode 100644 index 0000000..11183db --- /dev/null +++ b/ngx_headers_more/t/phase.t @@ -0,0 +1,25 @@ +# vi:filetype=perl + +use lib 'lib'; +use Test::Nginx::Socket; + +plan tests => 3; + +no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: simple set (1 arg) +--- config + location /foo { + deny all; + more_set_headers 'X-Foo: Blah'; + } +--- request + GET /foo +--- response_headers +X-Foo: Blah +--- response_body_like: 403 Forbidden +--- error_code: 403 diff --git a/ngx_headers_more/t/sanity.t b/ngx_headers_more/t/sanity.t new file mode 100644 index 0000000..47d65e3 --- /dev/null +++ b/ngx_headers_more/t/sanity.t @@ -0,0 +1,628 @@ +# vi:filetype= + +use lib 'lib'; +use Test::Nginx::Socket; + +repeat_each(2); + +plan tests => repeat_each() * 123; + +#master_on(); +#workers(2); +log_level("warn"); +no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: simple set (1 arg) +--- config + location /foo { + echo hi; + more_set_headers 'X-Foo: Blah'; + } +--- request + GET /foo +--- response_headers +X-Foo: Blah +--- response_body +hi + + + +=== TEST 2: simple set (2 args) +--- config + location /foo { + echo hi; + more_set_headers 'X-Foo: Blah' 'X-Bar: hi'; + } +--- request + GET /foo +--- response_headers +X-Foo: Blah +X-Bar: hi +--- response_body +hi + + + +=== TEST 3: two sets in a single location +--- config + location /two { + echo hi; + more_set_headers 'X-Foo: Blah' + more_set_headers 'X-Bar: hi'; + } +--- request + GET /two +--- response_headers +X-Foo: Blah +X-Bar: hi +--- response_body +hi + + + +=== TEST 4: two sets in a single location (for 404 too) +--- config + location /two { + more_set_headers 'X-Foo: Blah' + more_set_headers 'X-Bar: hi'; + return 404; + } +--- request + GET /two +--- response_headers +X-Foo: Blah +X-Bar: hi +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 5: set a header then clears it (500) +--- config + location /two { + more_set_headers 'X-Foo: Blah'; + more_set_headers 'X-Foo:'; + return 500; + } +--- request + GET /two +--- response_headers +! X-Foo +! X-Bar +--- response_body_like: 500 Internal Server Error +--- error_code: 500 + + + +=== TEST 6: set a header only when 500 (matched) +--- config + location /bad { + more_set_headers -s 500 'X-Mine: Hiya'; + more_set_headers -s 404 'X-Yours: Blah'; + return 500; + } +--- request + GET /bad +--- response_headers +X-Mine: Hiya +! X-Yours +--- response_body_like: 500 Internal Server Error +--- error_code: 500 + + + +=== TEST 7: set a header only when 500 (not matched with 200) +--- config + location /bad { + more_set_headers -s 500 'X-Mine: Hiya'; + more_set_headers -s 404 'X-Yours: Blah'; + echo hello; + } +--- request + GET /bad +--- response_headers +! X-Mine +! X-Yours +--- response_body +hello +--- error_code: 200 + + + +=== TEST 8: set a header only when 500 (not matched with 404) +--- config + location /bad { + more_set_headers -s 500 'X-Mine: Hiya'; + more_set_headers -s 404 'X-Yours: Blah'; + return 404; + } +--- request + GET /bad +--- response_headers +! X-Mine +X-Yours: Blah +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 9: more conditions +--- config + location /bad { + more_set_headers -s '503 404' 'X-Mine: Hiya'; + more_set_headers -s ' 404 413 ' 'X-Yours: Blah'; + return 503; + } +--- request + GET /bad +--- response_headers +X-Mine: Hiya +! X-Yours +--- response_body_like: 503 Service +--- error_code: 503 + + + +=== TEST 10: more conditions +--- config + location /bad { + more_set_headers -s '503 404' 'X-Mine: Hiya'; + more_set_headers -s ' 404 413 ' 'X-Yours: Blah'; + return 404; + } +--- request + GET /bad +--- response_headers +X-Mine: Hiya +X-Yours: Blah +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 11: more conditions +--- config + location /bad { + more_set_headers -s '503 404' 'X-Mine: Hiya'; + more_set_headers -s ' 404 413 ' 'X-Yours: Blah'; + return 413; + } +--- request + GET /bad +--- response_headers +! X-Mine +X-Yours: Blah +--- response_body_like: 413 Request Entity Too Large +--- error_code: 413 + + + +=== TEST 12: simple -t +--- config + location /bad { + default_type 'text/css'; + more_set_headers -t 'text/css' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +X-CSS: yes +--- response_body +hi + + + +=== TEST 13: simple -t (not matched) +--- config + location /bad { + default_type 'text/plain'; + more_set_headers -t 'text/css' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +! X-CSS +--- response_body +hi + + + +=== TEST 14: multiple -t (not matched) +--- config + location /bad { + default_type 'text/plain'; + more_set_headers -t 'text/javascript' -t 'text/css' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +! X-CSS +--- response_body +hi + + + +=== TEST 15: multiple -t (matched) +--- config + location /bad { + default_type 'text/plain'; + more_set_headers -t 'text/javascript' -t 'text/plain' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +X-CSS: yes +--- response_body +hi + + + +=== TEST 16: multiple -t (matched) +--- config + location /bad { + default_type 'text/javascript'; + more_set_headers -t 'text/javascript' -t 'text/plain' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +X-CSS: yes +--- response_body +hi + + + +=== TEST 17: multiple -t (matched) with extra spaces +--- config + location /bad { + default_type 'text/javascript'; + more_set_headers -t ' text/javascript ' -t 'text/plain' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +X-CSS: yes +--- response_body +hi + + + +=== TEST 18: multiple -t merged +--- config + location /bad { + default_type 'text/javascript'; + more_set_headers -t ' text/javascript text/plain' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +X-CSS: yes +--- response_body +hi + + + +=== TEST 19: multiple -t merged (2) +--- config + location /bad { + default_type 'text/plain'; + more_set_headers -t ' text/javascript text/plain' 'X-CSS: yes'; + echo hi; + } +--- request + GET /bad +--- response_headers +X-CSS: yes +--- response_body +hi + + + +=== TEST 20: multiple -s option in a directive (not matched) +--- config + location /bad { + more_set_headers -s 404 -s 500 'X-status: howdy'; + echo hi; + } +--- request + GET /bad +--- response_headers +! X-status +--- response_body +hi + + + +=== TEST 21: multiple -s option in a directive (matched 404) +--- config + location /bad { + more_set_headers -s 404 -s 500 'X-status: howdy'; + return 404; + } +--- request + GET /bad +--- response_headers +X-status: howdy +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 22: multiple -s option in a directive (matched 500) +--- config + location /bad { + more_set_headers -s 404 -s 500 'X-status: howdy'; + return 500; + } +--- request + GET /bad +--- response_headers +X-status: howdy +--- response_body_like: 500 Internal Server Error +--- error_code: 500 + + + +=== TEST 23: -s mixed with -t +--- config + location /bad { + default_type 'text/html'; + more_set_headers -s 404 -s 200 -t 'text/html' 'X-status: howdy2'; + return 404; + } +--- request + GET /bad +--- response_headers +X-status: howdy2 +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 24: -s mixed with -t +--- config + location /bad { + default_type 'text/html'; + more_set_headers -s 404 -s 200 -t 'text/plain' 'X-status: howdy2'; + return 404; + } +--- request + GET /bad +--- response_headers +! X-status +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 25: -s mixed with -t +--- config + location /bad { + default_type 'text/html'; + more_set_headers -s 404 -s 200 -t 'text/html' 'X-status: howdy2'; + echo hi; + } +--- request + GET /bad +--- response_headers +X-status: howdy2 +--- response_body +hi +--- error_code: 200 + + + +=== TEST 26: -s mixed with -t +--- config + location /bad { + default_type 'text/html'; + more_set_headers -s 500 -s 200 -t 'text/html' 'X-status: howdy2'; + return 404; + } +--- request + GET /bad +--- response_headers +! X-status +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 27: merge from the upper level +--- config + more_set_headers -s 404 -t 'text/html' 'X-status2: howdy3'; + location /bad { + default_type 'text/html'; + more_set_headers -s 500 -s 200 -t 'text/html' 'X-status: howdy2'; + return 404; + } +--- request + GET /bad +--- response_headers +X-status2: howdy3 +! X-status +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 28: merge from the upper level +--- config + more_set_headers -s 404 -t 'text/html' 'X-status2: howdy3'; + location /bad { + default_type 'text/html'; + more_set_headers -s 500 -s 200 -t 'text/html' 'X-status: howdy2'; + echo yeah; + } +--- request + GET /bad +--- response_headers +! X-status2 +X-status: howdy2 +--- response_body +yeah +--- error_code: 200 + + + +=== TEST 29: override settings by inheritance +--- config + more_set_headers -s 404 -t 'text/html' 'X-status: yeah'; + location /bad { + default_type 'text/html'; + more_set_headers -s 404 -t 'text/html' 'X-status: nope'; + return 404; + } +--- request + GET /bad +--- response_headers +X-status: nope +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 30: append settings by inheritance +--- config + more_set_headers -s 404 -t 'text/html' 'X-status: yeah'; + location /bad { + default_type 'text/html'; + more_set_headers -s 404 -t 'text/html' 'X-status2: nope'; + return 404; + } +--- request + GET /bad +--- response_headers +X-status: yeah +X-status2: nope +--- response_body_like: 404 Not Found +--- error_code: 404 + + + +=== TEST 31: clear headers with wildcard +--- config + location = /backend { + add_header X-Hidden-One "i am hidden"; + add_header X-Hidden-Two "me 2"; + echo hi; + } + location /hello { + more_clear_headers 'X-Hidden-*'; + proxy_pass http://127.0.0.1:$server_port/backend; + } +--- request + GET /hello +--- response_headers +! X-Hidden-One +! X-Hidden-Two +--- response_body +hi + + + +=== TEST 32: clear duplicate headers +--- config + location = /backend { + add_header pragma no-cache; + add_header pragma no-cache; + echo hi; + } + location /hello { + more_clear_headers 'pragma'; + proxy_pass http://127.0.0.1:$server_port/backend; + } +--- request + GET /hello +--- response_headers +!pragma +--- response_body +hi + + + +=== TEST 33: HTTP 0.9 (set) +--- config + location /foo { + more_set_headers 'X-Foo: howdy'; + echo ok; + } +--- raw_request eval +"GET /foo\r\n" +--- response_headers +! X-Foo +--- response_body +ok +--- http09 + + + +=== TEST 34: use the -a option to append the cookie field +--- config + location /cookie { + more_set_headers -a 'Set-Cookie: name=lynch'; + echo ok; + } +--- request + GET /cookie +--- response_headers +Set-Cookie: name=lynch +--- response_body +ok + + + +=== TEST 35: the original Set-Cookie fields will not be overwritten, when using the -a option +--- config + location /cookie { + more_set_headers 'Set-Cookie: name=lynch'; + more_set_headers -a 'Set-Cookie: born=1981'; + echo ok; + } +--- request + GET /cookie +--- raw_response_headers_like eval +"Set-Cookie: name=lynch\r\nSet-Cookie: born=1981\r\n" +--- response_body +ok + + + +=== TEST 36: The behavior of builtin headers can not be changed +--- config + location /foo { + more_set_headers -a "Server: myServer"; + echo ok; + } +--- request + GET /foo +--- must_die +--- error_log chomp +can not append builtin headers +--- suppress_stderr + + + +=== TEST 37: can not use -a option with more_clear_headers +--- config + location /foo { + more_clear_headers -a 'Content-Type'; + echo ok; + } +--- request + GET /foo +--- must_die +--- error_log chomp +invalid option name: "-a" +--- suppress_stderr diff --git a/ngx_headers_more/t/subrequest.t b/ngx_headers_more/t/subrequest.t new file mode 100644 index 0000000..9443eca --- /dev/null +++ b/ngx_headers_more/t/subrequest.t @@ -0,0 +1,68 @@ +# vi:filetype= + +use lib 'lib'; +use Test::Nginx::Socket; # 'no_plan'; + +plan tests => blocks() * 3; + +no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: vars in input header directives +--- config + location /main { + echo_location /foo; + echo "main: $http_user_agent"; + } + location /foo { + set $val 'dog'; + + more_set_input_headers 'User-Agent: $val'; + + proxy_pass http://127.0.0.1:$server_port/proxy; + } + location /proxy { + echo "sub: $http_user_agent"; + } +--- request + GET /main +--- more_headers +User-Agent: my-sock +--- response_body +sub: dog +main: dog +--- response_headers +! Host +--- skip_nginx: 3: < 0.7.46 + + + +=== TEST 2: vars in input header directives +--- config + location /main { + #more_set_input_headers 'User-Agent: cat'; + echo_location /foo; + echo "main: $http_user_agent"; + } + location /foo { + set $val 'dog'; + + more_set_input_headers 'User-Agent: $val'; + + proxy_pass http://127.0.0.1:$server_port/proxy; + #echo $http_user_agent; + } + location /proxy { + echo "sub: $http_user_agent"; + } +--- request + GET /main +--- response_body +sub: dog +main: dog +--- response_headers +! Host +--- skip_nginx: 3: < 0.7.46 diff --git a/ngx_headers_more/t/unused.t b/ngx_headers_more/t/unused.t new file mode 100644 index 0000000..c51f91c --- /dev/null +++ b/ngx_headers_more/t/unused.t @@ -0,0 +1,174 @@ +# vi:filetype= + +use lib 'lib'; +use Test::Nginx::Socket; + +repeat_each(2); + +plan tests => repeat_each() * (blocks() * 4 + 2); + +#master_on(); +#workers(2); +log_level("warn"); +no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: used output filter +--- config + location /foo { + echo hi; + more_set_headers "Foo: bar"; + } +--- request + GET /foo +--- response_headers +Foo: bar +--- response_body +hi +--- error_log +headers more header filter +--- no_error_log +[error] +--- log_level: debug + + + +=== TEST 2: unused output filter (none) +--- config + location /foo { + echo hi; + } +--- request + GET /foo +--- response_body +hi +--- no_error_log +headers more header filter +[error] +--- log_level: debug + + + +=== TEST 3: unused output filter (with more_set_input_headers only) +--- config + location /foo { + more_set_input_headers "Foo: bar"; + echo hi; + } +--- request + GET /foo +--- response_body +hi +--- no_error_log +headers more header filter +[error] +--- log_level: debug + + + +=== TEST 4: used rewrite handler +--- config + location /foo { + more_set_input_headers "Foo: bar"; + echo hi; + } +--- request + GET /foo +--- response_body +hi +--- error_log +headers more rewrite handler +--- no_error_log +[error] +--- log_level: debug + + + +=== TEST 5: unused rewrite handler (none) +--- config + location /foo { + #more_set_input_headers "Foo: bar"; + echo hi; + } +--- request + GET /foo +--- response_body +hi +--- no_error_log +headers more rewrite handler +[error] +--- log_level: debug + + + +=== TEST 6: unused rewrite handler (with output header filters) +--- config + location /foo { + #more_set_input_headers "Foo: bar"; + echo hi; + more_set_headers "Foo: bar"; + } +--- request + GET /foo +--- response_headers +Foo: bar +--- response_body +hi +--- no_error_log +headers more rewrite handler +[error] +--- log_level: debug + + + +=== TEST 7: multiple http {} blocks (filter) +This test case won't run with nginx 1.9.3+ since duplicate http {} blocks +have been prohibited since then. +--- SKIP +--- config + location /foo { + echo hi; + more_set_headers 'Foo: bar'; + } +--- post_main_config + http { + } + +--- request + GET /foo +--- response_body +hi +--- response_headers +Foo: bar +--- no_error_log +[error] +--- error_log +headers more header filter +--- log_level: debug + + + +=== TEST 8: multiple http {} blocks (handler) +This test case won't run with nginx 1.9.3+ since duplicate http {} blocks +have been prohibited since then. +--- SKIP +--- config + location /foo { + more_set_input_headers 'Foo: bar'; + echo $http_foo; + } +--- post_main_config + http { + } + +--- request + GET /foo +--- response_body +bar +--- no_error_log +headers more header handler +[error] +--- log_level: debug diff --git a/ngx_headers_more/t/vars.t b/ngx_headers_more/t/vars.t new file mode 100644 index 0000000..04c75c3 --- /dev/null +++ b/ngx_headers_more/t/vars.t @@ -0,0 +1,58 @@ +# vi:ft= + +use lib 'lib'; +use Test::Nginx::Socket; # 'no_plan'; + +plan tests => 9; + +no_diff; + +run_tests(); + +__DATA__ + +=== TEST 1: vars +--- config + location /foo { + echo hi; + set $val 'hello, world'; + more_set_headers 'X-Foo: $val'; + } +--- request + GET /foo +--- response_headers +X-Foo: hello, world +--- response_body +hi + + + +=== TEST 2: vars in both key and val +--- config + location /foo { + echo hi; + set $val 'hello, world'; + more_set_headers '$val: $val'; + } +--- request + GET /foo +--- response_headers +$val: hello, world +--- response_body +hi + + + +=== TEST 3: vars in input header directives +--- config + location /foo { + set $val 'dog'; + more_set_input_headers 'Host: $val'; + echo $host; + } +--- request + GET /foo +--- response_body +dog +--- response_headers +Host: diff --git a/ngx_headers_more/util/build.sh b/ngx_headers_more/util/build.sh new file mode 100644 index 0000000..eb1b9ce --- /dev/null +++ b/ngx_headers_more/util/build.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# this file is mostly meant to be used by the author himself. + +root=`pwd` +version=$1 +home=~ +force=$2 + + #--with-cc=gcc46 \ + +ngx-build $force $version \ + --with-ld-opt="-L$PCRE_LIB -Wl,-rpath,$PCRE_LIB" \ + --with-cc-opt="-DNGX_LUA_USE_ASSERT -I$PCRE_INC" \ + --without-mail_pop3_module \ + --without-mail_imap_module \ + --without-mail_smtp_module \ + --without-http_upstream_ip_hash_module \ + --without-http_empty_gif_module \ + --without-http_memcached_module \ + --without-http_referer_module \ + --without-http_autoindex_module \ + --without-http_auth_basic_module \ + --without-http_userid_module \ + --with-http_realip_module \ + --with-http_dav_module \ + --add-module=$root/../eval-nginx-module \ + --add-module=$root/../lua-nginx-module \ + --add-module=$root/../echo-nginx-module \ + --add-module=$root $opts \ + --with-debug + #--add-module=$root/../ndk-nginx-module \ + #--without-http_ssi_module # we cannot disable ssi because echo_location_async depends on it (i dunno why?!) + diff --git a/ngx_headers_more/valgrind.suppress b/ngx_headers_more/valgrind.suppress new file mode 100644 index 0000000..d51de70 --- /dev/null +++ b/ngx_headers_more/valgrind.suppress @@ -0,0 +1,135 @@ +{ + + Memcheck:Leak + fun:malloc + fun:ngx_alloc + obj:* +} +{ + + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_calloc + fun:ngx_event_process_init +} +{ + + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_event_process_init +} +{ + + Memcheck:Param + epoll_ctl(event) + fun:epoll_ctl +} +{ + + Memcheck:Cond + fun:memcpy + fun:ngx_vslprintf + fun:ngx_log_error_core + fun:ngx_http_charset_header_filter +} +{ + nginx-core-process-init + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_event_process_init +} +{ + nginx-core-crc32-init + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_crc32_table_init + fun:main +} +{ + palloc_large_for_init_request + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_palloc_large + fun:ngx_palloc + fun:ngx_pcalloc + fun:ngx_http_init_request + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + palloc_large_for_create_temp_buf + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_palloc_large + fun:ngx_palloc + fun:ngx_create_temp_buf + fun:ngx_http_init_request + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + accept_create_pool + Memcheck:Leak + fun:memalign + fun:posix_memalign + fun:ngx_memalign + fun:ngx_create_pool + fun:ngx_event_accept + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + create_pool_for_init_req + Memcheck:Leak + fun:memalign + fun:posix_memalign + fun:ngx_memalign + fun:ngx_create_pool + fun:ngx_http_init_request + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + + Memcheck:Addr8 + fun:getenv + fun:gcov_exit + fun:exit + fun:ngx_master_process_exit +} +{ + + Memcheck:Cond + fun:index + fun:expand_dynamic_string_token + fun:_dl_map_object + fun:map_doit + fun:_dl_catch_error + fun:do_preload + fun:dl_main +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_set_environment + fun:ngx_single_process_cycle +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_set_environment + fun:ngx_worker_process_init + fun:ngx_worker_process_cycle +} diff --git a/ngx_zstd/.gitattributes b/ngx_zstd/.gitattributes new file mode 100644 index 0000000..6fe6f35 --- /dev/null +++ b/ngx_zstd/.gitattributes @@ -0,0 +1 @@ +*.t linguist-language=Text diff --git a/ngx_zstd/.gitignore b/ngx_zstd/.gitignore new file mode 100644 index 0000000..e3bcd3c --- /dev/null +++ b/ngx_zstd/.gitignore @@ -0,0 +1,54 @@ +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +t/servroot/* diff --git a/ngx_zstd/LICENSE b/ngx_zstd/LICENSE new file mode 100644 index 0000000..b4d1280 --- /dev/null +++ b/ngx_zstd/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2018, Alex Zhang +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ngx_zstd/README.md b/ngx_zstd/README.md new file mode 100644 index 0000000..6105896 --- /dev/null +++ b/ngx_zstd/README.md @@ -0,0 +1,155 @@ +# Name +zstd-nginx-module - Nginx module for the [Zstandard compression](https://facebook.github.io/zstd/). + +# Table of Contents + +* [Name](#name) +* [Status](#status) +* [Synopsis](#synopsis) +* [Installation](#installation) +* [Directives](#directives) + * [ngx_http_zstd_filter_module](#ngx_http_zstd_filter_module) + * [zstd_dict_file](#zstd_dict_file) + * [zstd](#zstd) + * [zstd_comp_level](#zstd_comp_level) + * [zstd_min_length](#zstd_min_length) + * [zstd_types](#zstd_types) + * [zstd_buffers](#zstd_buffers) + * [ngx_http_zstd_static_module](#ngx_http_zstd_static_module) + * [zstd_static](#zstd_static) +* [Variables](#variables) + * [ngx_http_zstd_filter_module](#ngx_http_zstd_filter_module) + * [$zstd_ratio](#$zstd_ratio) +* [Author](#author) + +# Status + +This Nginx module is currently considered experimental. Issues and PRs are welcome if you encounter any problems. + +# Synopsis + +```nginx + +# specify the dictionary +zstd_dict_file /path/to/dict; + +server { + listen 127.0.0.1:8080; + server_name localhost; + + location / { + # enable zstd compression + zstd on; + zstd_min_length 256; # no less than 256 bytes + zstd_comp_level 3; # set the level to 3 + + proxy_pass http://foo.com; + } +} + +server { + listen 127.0.0.1:8081; + server_name localhost; + + location / { + zstd_static on; + root html; + } +} +``` + +# Installation + +To use theses modules, configure your nginx branch with `--add-module=/path/to/zstd-nginx-module`. Several points should be taken care of. + +* You can set environment variables `ZSTD_INC` and `ZSTD_LIB` to specify the path to `zstd.h` and the path to zstd shared library respectively. +* static library will be attempted prior to dynamic library, since this Nginx module uses some **advanced APIs** where static linking is recommended. +* System's zstd bundle will be linked if `ZSTD_INC` and `ZSTD_LIB` are not specified. +* Both `ngx_http_zstd_static_module` and `ngx_http_zstd_filter_module` will be configured. + +# Directives + +## ngx_http_zstd_filter_module + +The `ngx_http_zstd_filter_module` module is a filter that compresses responses using the _"zstd"_ method. This often helps to reduce the size of transmitted data by half or even more. + +### zstd_dict_file + +**Syntax:** *zstd_dict_file /path/to/dict;* +**Default:** *-* +**Context:** *http* + +Specifies the external dictionary. + +**WARNING:** Be careful! The content-coding registration only specifies a means to signal the use of the zstd format, and does not additionally specify any mechanism for advertising/negotiating/synchronizing the use of a specific dictionary between client and server. Use the `zstd_dict_file` only if you can insure that both ends _(server and client)_ are capable of using the same dictionary (e.g. advertise with a HTTP header). See https://github.com/tokers/zstd-nginx-module/issues/2 for the details. + +### zstd + +**Syntax:** *zstd on | off;* +**Default:** *zstd off;* +**Context:** *http, server, location, if in location* + +Enables or disables zstd compression for response. + +### zstd_comp_level + +**Syntax:** *zstd_comp_level level;* +**Default:** *zstd_comp_level 1;* +**Context:** *http, server, location* + +Sets a zstd compression level of a response. Acceptable values are in the range from 1 to `ZSTD_maxCLevel()`. + +### zstd_min_length + +**Syntax:** *zstd_min_length length;* +**Default:** *zstd_min_length 20;* +**Context:** *http, server, location* + +Sets the minimum length of a response that will be compressed by zstd. The length is determined only from the `Content-Length` response header field. + +### zstd_types + +**Syntax:** *zstd_types mime-type ...;* +**Default:** *zstd_types text/html;* +**Context:** *http, server, location* + +Enables zstd of responses for the specified MIME types in addition to `text/html`. The special value `*` matches any MIME type. + +### zstd_buffers + +**Syntax:** *zstd_buffers number size;* +**Default:** *zstd_buffers 32 4k | 16 8k;* +**Context:** *http, server, location* + +Sets the number and size of buffers used to compress a response. By default the buffer size is equal to one memory page. This is either 4K or 8K, depending on a platform. + +## ngx_http_zstd_static_module + +The `ngx_http_zstd_static_module` module allows sending precompressed files with the `.zst` filename extension instead of regular files. + +### zstd_static + +**Syntax:** *zstd_static on | off | always;* +**Default:** *zstd_static off;* +**Context:** *http, server, location* + +Enables ("on") or disables ("off") checking the existence of precompressed files. The following directives are also taken into account: `gzip_vary`. + +With the _"always"_ value, "zstd" file is used in all cases, without checking if the client supports it. + + +# Variables + +## ngx_http_zstd_filter_module + +### $zstd_ratio + +Achieved compression ratio, computed as the ratio between the original and compressed response sizes. + +# Author + +Alex Zhang (张超) zchao1995@gmail.com, UPYUN Inc. + +# License + +This Nginx module is licensed under [BSD 2-Clause License](LICENSE). diff --git a/ngx_zstd/config b/ngx_zstd/config new file mode 100644 index 0000000..3d13883 --- /dev/null +++ b/ngx_zstd/config @@ -0,0 +1,11 @@ +# Make sure the module knows it is a submodule. +ngx_addon_name=ngx_zstd +. $ngx_addon_dir/filter/config + +# Make sure the module knows it is a submodule. +ngx_addon_name=ngx_zstd +. $ngx_addon_dir/static/config + +# The final name for reporting. +ngx_addon_name=ngx_zstd + diff --git a/ngx_zstd/filter/config b/ngx_zstd/filter/config new file mode 100644 index 0000000..2942125 --- /dev/null +++ b/ngx_zstd/filter/config @@ -0,0 +1,144 @@ +ngx_feature_incs="#include " +ngx_feature_test="(void) ZSTD_createCCtx();" +ngx_feature_libs= +ngx_feature_run=yes + +ngx_zstd_opt_I= +ngx_zstd_opt_L= + +if [ -n "$ZSTD_INC" -o -n "$ZSTD_LIB" ]; then + ngx_feature="ZStandard static library in $ZSTD_INC and $ZSTD_LIB" + ngx_feature_path=$ZSTD_INC + + # we try the static shared library firstly + ngx_zstd_opt_I="-I$ZSTD_INC -DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="$ZSTD_LIB/libzstd.a" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + # then try the dynamic shared library + ngx_feature="ZStandard dynamic library in $ZSTD_INC and $ZSTD_LIB" + ngx_zstd_opt_L="-L$ZSTD_LIB -lzstd -Wl,-rpath, $ZSTD_LIB" + + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library, please be sure that "\$ZSTD_INC" and "\$ZSTD_LIB" are set correctly. +END + exit 1 + fi + + fi +else + # auto-discovery + ngx_feature="ZStandard static library" + ngx_zstd_opt_I="-DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="-l:libzstd.a" + + # still we consider the static library firstly + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + + ngx_feature="ZStandard dynamic library" + ngx_zstd_opt_L="-lzstd" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library. +END + exit 1 + fi + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + cat << END + $0: warning: ngx_http_zstd_filter_module uses advanced ZStandard APIs (which are still considered experimental) while you are trying to link the dynamic shared library. +END + fi + + # TODO we need more tries for the different OS port. +fi + +NGX_LD_OPT="$ngx_zstd_opt_L $NGX_LD_OPT" + +HTTP_ZSTD_SRCS="$ngx_addon_dir/filter/ngx_http_zstd_filter_module.c" + +ngx_addon_name=ngx_http_zstd_filter_module +ngx_module_type=HTTP_FILTER +ngx_module_name=ngx_http_zstd_filter_module +ngx_module_incs="$ngx_zstd_opt_I" +ngx_module_srcs=$HTTP_ZSTD_SRCS +ngx_module_libs=$NGX_LD_OPT +ngx_module_order="$ngx_module_name \ + ngx_pagespeed \ + ngx_http_postpone_filter_module \ + ngx_http_ssi_filter_module \ + ngx_http_charset_filter_module \ + ngx_http_xslt_filter_module \ + ngx_http_image_filter_module \ + ngx_http_sub_filter_module \ + ngx_http_addition_filter_module \ + ngx_http_gunzip_filter_module \ + ngx_http_userid_filter_module \ + ngx_http_headers_filter_module \ + ngx_http_copy_filter_module \ + ngx_http_range_body_filter_module \ + ngx_http_not_modified_filter_module \ + ngx_http_slice_filter_module" + +. auto/module + +if [ "$ngx_module_link" != DYNAMIC ]; then + # ngx_module_order doesn't work with static modules, + # so we must re-order filters here. + + if [ "$HTTP_GZIP" = YES ]; then + next=ngx_http_gzip_filter_module + elif echo $HTTP_FILTER_MODULES | grep pagespeed_etag_filter >/dev/null; then + next=ngx_pagespeed_etag_filter + else + next=ngx_http_range_header_filter_module + fi + + HTTP_FILTER_MODULES=`echo $HTTP_FILTER_MODULES \ + | sed "s/$ngx_module_name//" \ + | sed "s/$next/$next $ngx_module_name/"` +fi + diff --git a/ngx_zstd/filter/ngx_http_zstd_filter_module.c b/ngx_zstd/filter/ngx_http_zstd_filter_module.c new file mode 100644 index 0000000..50ec55f --- /dev/null +++ b/ngx_zstd/filter/ngx_http_zstd_filter_module.c @@ -0,0 +1,1035 @@ + +/* + * Copyright (C) Alex Zhang + */ + + +#include +#include +#include + +#include + + +#define NGX_HTTP_ZSTD_FILTER_COMPRESS 0 +#define NGX_HTTP_ZSTD_FILTER_FLUSH 1 +#define NGX_HTTP_ZSTD_FILTER_END 2 + + +typedef struct { + ngx_str_t dict_file; +} ngx_http_zstd_main_conf_t; + + +typedef struct { + ngx_flag_t enable; + ngx_int_t level; + ssize_t min_length; + + ngx_hash_t types; + + ngx_bufs_t bufs; + + ngx_array_t *types_keys; + + ZSTD_CDict *dict; +} ngx_http_zstd_loc_conf_t; + + +typedef struct { + ngx_chain_t *in; + ngx_chain_t *free; + ngx_chain_t *busy; + ngx_chain_t *out; + ngx_chain_t **last_out; + + ngx_buf_t *in_buf; + ngx_buf_t *out_buf; + ngx_int_t bufs; + + ZSTD_inBuffer buffer_in; + ZSTD_outBuffer buffer_out; + + ZSTD_CStream *cstream; + + ngx_http_request_t *request; + + size_t bytes_in; + size_t bytes_out; + + unsigned action:2; + unsigned last:1; + unsigned redo:1; + unsigned flush:1; + unsigned done:1; + unsigned nomem:1; +} ngx_http_zstd_ctx_t; + + +typedef struct { + ngx_conf_post_handler_pt post_handler; +} ngx_http_zstd_comp_level_bounds_t; + + +static ngx_http_output_header_filter_pt ngx_http_next_header_filter; +static ngx_http_output_body_filter_pt ngx_http_next_body_filter; + +static ngx_str_t ngx_http_zstd_ratio = ngx_string("zstd_ratio"); + + +static ngx_int_t ngx_http_zstd_header_filter(ngx_http_request_t *r); +static ngx_int_t ngx_http_zstd_body_filter(ngx_http_request_t *r, + ngx_chain_t *in); +static ngx_int_t ngx_http_zstd_filter_add_data(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ngx_int_t ngx_http_zstd_filter_get_buf(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ZSTD_CStream *ngx_http_zstd_filter_create_cstream(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ngx_int_t ngx_http_zstd_filter_compress(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ngx_int_t ngx_http_zstd_accept_encoding(ngx_str_t *ae); +static ngx_int_t ngx_http_zstd_ok(ngx_http_request_t *r); +static ngx_int_t ngx_http_zstd_filter_init(ngx_conf_t *cf); +static void * ngx_http_zstd_create_main_conf(ngx_conf_t *cf); +static char *ngx_http_zstd_init_main_conf(ngx_conf_t *cf, void *conf); +static void *ngx_http_zstd_create_loc_conf(ngx_conf_t *cf); +static char *ngx_http_zstd_merge_loc_conf(ngx_conf_t *cf, void *parent, + void *child); +static ngx_int_t ngx_http_zstd_add_variables(ngx_conf_t *cf); +static ngx_int_t ngx_http_zstd_ratio_variable(ngx_http_request_t *r, + ngx_http_variable_value_t *vv, uintptr_t data); +static void * ngx_http_zstd_filter_alloc(void *opaque, size_t size); +static void ngx_http_zstd_filter_free(void *opaque, void *address); +static char *ngx_http_zstd_comp_level(ngx_conf_t *cf, void *post, void *data); +static char *ngx_conf_zstd_set_num_slot_with_negatives(ngx_conf_t *cf, ngx_command_t *cmd, void *conf); + + +static ngx_http_zstd_comp_level_bounds_t ngx_http_zstd_comp_level_bounds = { + ngx_http_zstd_comp_level +}; + + +static ngx_command_t ngx_http_zstd_filter_commands[] = { + + { ngx_string("zstd"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_HTTP_LIF_CONF + |NGX_CONF_FLAG, + ngx_conf_set_flag_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, enable), + NULL }, + + { ngx_string("zstd_comp_level"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, + ngx_conf_zstd_set_num_slot_with_negatives, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, level), + &ngx_http_zstd_comp_level_bounds }, + + { ngx_string("zstd_types"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE, + ngx_http_types_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, types_keys), + &ngx_http_html_default_types[0] }, + + { ngx_string("zstd_buffers"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE2, + ngx_conf_set_bufs_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, bufs), + NULL }, + + { ngx_string("zstd_min_length"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE, + ngx_conf_set_size_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, min_length), + NULL }, + + { ngx_string("zstd_dict_file"), + NGX_HTTP_MAIN_CONF|NGX_CONF_TAKE1, + ngx_conf_set_str_slot, + NGX_HTTP_MAIN_CONF_OFFSET, + offsetof(ngx_http_zstd_main_conf_t, dict_file), + NULL }, + + ngx_null_command +}; + + +static ngx_http_module_t ngx_http_zstd_filter_module_ctx = { + ngx_http_zstd_add_variables, /* preconfiguration */ + ngx_http_zstd_filter_init, /* postconfiguration */ + + ngx_http_zstd_create_main_conf, /* create main configuration */ + ngx_http_zstd_init_main_conf, /* init main configuration */ + + NULL, /* create server configuration */ + NULL, /* merge server configuration */ + + ngx_http_zstd_create_loc_conf, /* create location configuration */ + ngx_http_zstd_merge_loc_conf, /* merge location configuration */ +}; + + +ngx_module_t ngx_http_zstd_filter_module = { + NGX_MODULE_V1, + &ngx_http_zstd_filter_module_ctx, /* module context */ + ngx_http_zstd_filter_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + NULL, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static ngx_int_t +ngx_http_zstd_header_filter(ngx_http_request_t *r) +{ + ngx_table_elt_t *h; + ngx_http_zstd_loc_conf_t *zlcf; + ngx_http_zstd_ctx_t *ctx; + + zlcf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_filter_module); + + if (!zlcf->enable + || (r->headers_out.status != NGX_HTTP_OK + && r->headers_out.status != NGX_HTTP_FORBIDDEN + && r->headers_out.status != NGX_HTTP_NOT_FOUND) + || (r->headers_out.content_encoding + && r->headers_out.content_encoding->value.len) + || (r->headers_out.content_length_n != -1 + && r->headers_out.content_length_n < zlcf->min_length) + || ngx_http_test_content_type(r, &zlcf->types) == NULL + || r->header_only) + { + return ngx_http_next_header_filter(r); + } + + r->gzip_vary = 1; + + if (ngx_http_zstd_ok(r) != NGX_OK) { + return ngx_http_next_header_filter(r); + } + + ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_zstd_ctx_t)); + if (ctx == NULL) { + return NGX_ERROR; + } + + ngx_http_set_ctx(r, ctx, ngx_http_zstd_filter_module); + + ctx->request = r; + ctx->last_out = &ctx->out; + + h = ngx_list_push(&r->headers_out.headers); + if (h == NULL) { + return NGX_ERROR; + } + + h->hash = 1; + ngx_str_set(&h->key, "Content-Encoding"); + ngx_str_set(&h->value, "zstd"); + r->headers_out.content_encoding = h; + + r->main_filter_need_in_memory = 1; + + ngx_http_clear_content_length(r); + ngx_http_clear_accept_ranges(r); + ngx_http_weak_etag(r); + + return ngx_http_next_header_filter(r); +} + + +static ngx_int_t +ngx_http_zstd_body_filter(ngx_http_request_t *r, ngx_chain_t *in) +{ + size_t rv; + ngx_int_t flush, rc; + ngx_chain_t *cl; + ngx_http_zstd_ctx_t *ctx; + + + ctx = ngx_http_get_module_ctx(r, ngx_http_zstd_filter_module); + + if (ctx == NULL || ctx->done || r->header_only) { + return ngx_http_next_body_filter(r, in); + } + + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "http zstd filter"); + + if (ctx->cstream == NULL) { + ctx->cstream = ngx_http_zstd_filter_create_cstream(r, ctx); + if (ctx->cstream == NULL) { + goto failed; + } + } + + if (in) { + if (ngx_chain_add_copy(r->pool, &ctx->in, in) != NGX_OK) { + goto failed; + } + + r->connection->buffered |= NGX_HTTP_GZIP_BUFFERED; + } + + if (ctx->nomem) { + + /* flush busy buffers */ + + if (ngx_http_next_body_filter(r, NULL) == NGX_ERROR) { + goto failed; + } + + cl = NULL; + + ngx_chain_update_chains(r->pool, &ctx->free, &ctx->busy, &cl, + (ngx_buf_tag_t) &ngx_http_zstd_filter_module); + + flush = 0; + ctx->nomem = 0; + + } else { + flush = ctx->busy ? 1 : 0; + } + + for ( ;; ) { + + /* cycle while we can write to a client */ + + for ( ;; ) { + + rc = ngx_http_zstd_filter_add_data(r, ctx); + + if (rc == NGX_DECLINED) { + break; + } + + if (rc == NGX_AGAIN) { + continue; + } + + rc = ngx_http_zstd_filter_get_buf(r, ctx); + + if (rc == NGX_ERROR) { + goto failed; + } + + if (rc == NGX_DECLINED) { + break; + } + + rc = ngx_http_zstd_filter_compress(r, ctx); + + if (rc == NGX_ERROR) { + goto failed; + } + + if (rc == NGX_OK) { + break; + } + + /* rc == NGX_AGAIN */ + } + + if (ctx->out == NULL && !flush) { + return ctx->busy ? NGX_AGAIN : NGX_OK; + } + + rc = ngx_http_next_body_filter(r, ctx->out); + + if (rc == NGX_ERROR) { + goto failed; + } + + ngx_chain_update_chains(r->pool, &ctx->free, &ctx->busy, &ctx->out, + (ngx_buf_tag_t) &ngx_http_zstd_filter_module); + + ctx->last_out = &ctx->out; + ctx->nomem = 0; + flush = 0; + + if (ctx->done) { + rv = ZSTD_freeCStream(ctx->cstream); + if (ZSTD_isError(rv)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_freeCStream() failed: %s", + ZSTD_getErrorName(rc)); + + rc = NGX_ERROR; + } + + return rc; + } + } + +failed: + + ctx->done = 1; + rv = ZSTD_freeCStream(ctx->cstream); + if (ZSTD_isError(rv)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_freeCStream() failed: %s", ZSTD_getErrorName(rv)); + } + + return NGX_ERROR; +} + + +static ngx_int_t +ngx_http_zstd_filter_compress(ngx_http_request_t *r, ngx_http_zstd_ctx_t *ctx) +{ + size_t rc, pos_in, pos_out; + char *hint; + ngx_chain_t *cl; + ngx_buf_t *b; + + ngx_log_debug8(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "zstd compress in: src:%p pos:%ud size: %ud, " + "dst:%p pos:%ud size:%ud flush:%d redo:%d", + ctx->buffer_in.src, ctx->buffer_in.pos, ctx->buffer_in.size, + ctx->buffer_out.dst, ctx->buffer_out.pos, + ctx->buffer_out.size, ctx->flush, ctx->redo); + + pos_in = ctx->buffer_in.pos; + pos_out = ctx->buffer_out.pos; + + switch (ctx->action) { + + case NGX_HTTP_ZSTD_FILTER_FLUSH: + hint = "ZSTD_flushStream() "; + rc = ZSTD_flushStream(ctx->cstream, &ctx->buffer_out); + break; + + case NGX_HTTP_ZSTD_FILTER_END: + hint = "ZSTD_endStream() "; + rc = ZSTD_endStream(ctx->cstream, &ctx->buffer_out); + break; + + default: + hint = "ZSTD_compressStream() "; + rc = ZSTD_compressStream(ctx->cstream, &ctx->buffer_out, + &ctx->buffer_in); + break; + } + + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "%s failed: %s", hint, ZSTD_getErrorName(rc)); + + return NGX_ERROR; + } + + ngx_log_debug6(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "zstd compress out: src:%p pos:%ud size: %ud, " + "dst:%p pos:%ud size:%ud", + ctx->buffer_in.src, ctx->buffer_in.pos, ctx->buffer_in.size, + ctx->buffer_out.dst, ctx->buffer_out.pos, + ctx->buffer_out.size); + + ctx->in_buf->pos += ctx->buffer_in.pos - pos_in; + ctx->out_buf->last += ctx->buffer_out.pos - pos_out; + ctx->redo = 0; + + if (rc > 0) { + if (ctx->action == NGX_HTTP_ZSTD_FILTER_COMPRESS) { + ctx->action = NGX_HTTP_ZSTD_FILTER_FLUSH; + } + + ctx->redo = 1; + + } else if (ctx->last && ctx->action != NGX_HTTP_ZSTD_FILTER_END) { + ctx->redo = 1; + ctx->action = NGX_HTTP_ZSTD_FILTER_END; + + /* pending to call the ZSTD_endStream() */ + + return NGX_AGAIN; + + } else { + ctx->action = NGX_HTTP_ZSTD_FILTER_COMPRESS; /* restore */ + } + + if (ngx_buf_size(ctx->out_buf) == 0) { + return NGX_AGAIN; + } + + cl = ngx_alloc_chain_link(r->pool); + if (cl == NULL) { + return NGX_ERROR; + } + + b = ctx->out_buf; + + if (rc == 0 && (ctx->flush || ctx->last)) { + r->connection->buffered &= ~NGX_HTTP_GZIP_BUFFERED; + + b->flush = ctx->flush; + b->last_buf = ctx->last; + + ctx->done = ctx->last; + ctx->flush = 0; + } + + ctx->bytes_out += ngx_buf_size(b); + + cl->next = NULL; + cl->buf = b; + + *ctx->last_out = cl; + ctx->last_out = &cl->next; + + ngx_memzero(&ctx->buffer_out, sizeof(ZSTD_outBuffer)); + + return ctx->last && rc == 0 ? NGX_OK : NGX_AGAIN; +} + + +static ngx_int_t +ngx_http_zstd_filter_add_data(ngx_http_request_t *r, ngx_http_zstd_ctx_t *ctx) +{ + if (ctx->buffer_in.pos < ctx->buffer_in.size + || ctx->flush + || ctx->last + || ctx->redo) + { + return NGX_OK; + } + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "zstd in: %p", ctx->in); + + if (ctx->in == NULL) { + return NGX_DECLINED; + } + + ctx->in_buf = ctx->in->buf; + ctx->in = ctx->in->next; + + if (ctx->in_buf->flush) { + ctx->flush = 1; + + } else if (ctx->in_buf->last_buf) { + ctx->last = 1; + } + + ctx->buffer_in.src = ctx->in_buf->pos; + ctx->buffer_in.pos = 0; + ctx->buffer_in.size = ngx_buf_size(ctx->in_buf); + + ctx->bytes_in += ngx_buf_size(ctx->in_buf); + + if (ctx->buffer_in.size == 0) { + return NGX_AGAIN; + } + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_zstd_filter_get_buf(ngx_http_request_t *r, ngx_http_zstd_ctx_t *ctx) +{ + ngx_chain_t *cl; + ngx_http_zstd_loc_conf_t *zlcf; + + if (ctx->buffer_out.pos < ctx->buffer_out.size) { + return NGX_OK; + } + + zlcf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_filter_module); + + if (ctx->free) { + cl = ctx->free; + ctx->free = ctx->free->next; + ctx->out_buf = cl->buf; + ngx_free_chain(r->pool, cl); + + } else if (ctx->bufs < zlcf->bufs.num) { + ctx->out_buf = ngx_create_temp_buf(r->pool, zlcf->bufs.size); + if (ctx->out_buf == NULL) { + return NGX_ERROR; + } + + ctx->out_buf->tag = (ngx_buf_tag_t) &ngx_http_zstd_filter_module; + ctx->out_buf->recycled = 1; + ctx->bufs++; + + } else { + ctx->nomem = 1; + return NGX_DECLINED; + } + + ctx->buffer_out.dst = ctx->out_buf->pos; + ctx->buffer_out.pos = 0; + ctx->buffer_out.size = ctx->out_buf->end - ctx->out_buf->start; + + return NGX_OK; +} + + +static ZSTD_CStream * +ngx_http_zstd_filter_create_cstream(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx) +{ + size_t rc; + ZSTD_CStream *cstream; + ZSTD_customMem cmem; + ngx_http_zstd_loc_conf_t *zlcf; + + zlcf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_filter_module); + + cmem.customAlloc = ngx_http_zstd_filter_alloc; + cmem.customFree = ngx_http_zstd_filter_free; + cmem.opaque = ctx; + + cstream = ZSTD_createCStream_advanced(cmem); + if (cstream == NULL) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_createCStream_advanced() failed"); + + return NULL; + } + + /* TODO use the advanced initialize functions */ + + if (zlcf->dict) { +#if ZSTD_VERSION_NUMBER >= 10500 + rc = ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_CCtx_reset() failed: %s", + ZSTD_getErrorName(rc)); + goto failed; + } + + rc = ZSTD_CCtx_refCDict(cstream, zlcf->dict); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_CCtx_refCDict() failed: %s", + ZSTD_getErrorName(rc)); + goto failed; + } +#else + rc = ZSTD_initCStream_usingCDict(cstream, zlcf->dict); +#endif + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_initCStream_usingCDict() failed: %s", + ZSTD_getErrorName(rc)); + + goto failed; + } + + } else { + rc = ZSTD_initCStream(cstream, zlcf->level); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_initCStream() failed: %s", + ZSTD_getErrorName(rc)); + + goto failed; + } + } + + return cstream; + +failed: + rc = ZSTD_freeCStream(cstream); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_freeCStream() failed: %s", ZSTD_getErrorName(rc)); + } + + return NULL; +} + + +static ngx_int_t +ngx_http_zstd_accept_encoding(ngx_str_t *ae) +{ + u_char *p; + + p = ngx_strcasestrn(ae->data, "zstd", sizeof("zstd") - 2); + if (p == NULL) { + return NGX_DECLINED; + } + + if (p == ae->data || (*(p - 1) == ',' || *(p - 1) == ' ')) { + + p += sizeof("zstd") - 1; + + if (p == ae->data + ae->len || *p == ',' || *p == ' ' || *p == ';') { + return NGX_OK; + } + } + + return NGX_DECLINED; +} + + +static ngx_int_t +ngx_http_zstd_ok(ngx_http_request_t *r) +{ + ngx_table_elt_t *ae; + + if (r != r->main) { + return NGX_DECLINED; + } + + ae = r->headers_in.accept_encoding; + if (ae == NULL) { + return NGX_DECLINED; + } + + if (ae->value.len < sizeof("zstd") - 1) { + return NGX_DECLINED; + } + + if (ngx_memcmp(ae->value.data, "zstd", 4) != 0 + && ngx_http_zstd_accept_encoding(&ae->value) != NGX_OK) + { + return NGX_DECLINED; + } + + + r->gzip_tested = 1; + r->gzip_ok = 0; + + return NGX_OK; +} + + +static void * +ngx_http_zstd_create_main_conf(ngx_conf_t *cf) +{ + ngx_http_zstd_main_conf_t *zmcf; + + zmcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_zstd_main_conf_t)); + if (zmcf == NULL) { + return NULL; + } + + return zmcf; +} + + +static char * +ngx_http_zstd_init_main_conf(ngx_conf_t *cf, void *conf) +{ + ngx_http_zstd_main_conf_t *zmcf = conf; + + if (zmcf->dict_file.len == 0) { + return NGX_CONF_OK; + } + + if (ngx_conf_full_name(cf->cycle, &zmcf->dict_file, 1) != NGX_OK) { + return NGX_CONF_ERROR; + } + + return NGX_CONF_OK; +} + + +static void * +ngx_http_zstd_create_loc_conf(ngx_conf_t *cf) +{ + ngx_http_zstd_loc_conf_t *conf; + + conf = ngx_pcalloc(cf->pool, sizeof(ngx_http_zstd_loc_conf_t)); + if (conf == NULL) { + return NULL; + } + + /* + * set by ngx_pcalloc(): + * + * conf->bufs.num = 0; + * conf->types = { NULL }; + * conf->types_keys = NULL; + * conf->dict = NULL; + */ + + conf->enable = NGX_CONF_UNSET; + conf->level = NGX_CONF_UNSET; + conf->min_length = NGX_CONF_UNSET; + + return conf; +} + + +static char * +ngx_http_zstd_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) +{ + ngx_http_zstd_loc_conf_t *prev = parent; + ngx_http_zstd_loc_conf_t *conf = child; + + ngx_fd_t fd; + size_t size; + ssize_t n; + char *rc; + u_char *buf; + ngx_file_info_t info; + ngx_http_zstd_main_conf_t *zmcf; + + rc = NGX_OK; + buf = NULL; + fd = NGX_INVALID_FILE; + + ngx_conf_merge_value(conf->enable, prev->enable, 0); + ngx_conf_merge_value(conf->level, prev->level, 1); + ngx_conf_merge_value(conf->min_length, prev->min_length, 20); + + if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types, + &prev->types_keys, &prev->types, + ngx_http_html_default_types)) + { + return NGX_CONF_ERROR; + } + + ngx_conf_merge_ptr_value(conf->dict, prev->dict, NULL); + ngx_conf_merge_bufs_value(conf->bufs, prev->bufs, + (128 * 1024) / ngx_pagesize, ngx_pagesize); + + zmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_zstd_filter_module); + + if (conf->enable && zmcf->dict_file.len > 0) { + + if (conf->level == prev->level) { + conf->dict = prev->dict; + + } else { + /* + * compression level is different from the outer block, + * so we should create a seperate dict object. + */ + + fd = ngx_open_file(zmcf->dict_file.data, NGX_FILE_RDONLY, + NGX_FILE_OPEN, 0); + + if (fd == NGX_INVALID_FILE) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_open_file_n " \"%V\" failed", + &zmcf->dict_file); + + return NGX_CONF_ERROR; + } + + if (ngx_fd_info(fd, &info) == NGX_FILE_ERROR) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_fd_info_n " \"%V\" failed", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + goto close; + } + + size = ngx_file_size(&info); + buf = ngx_palloc(cf->pool, size); + if (buf == NULL) { + rc = NGX_CONF_ERROR; + goto close; + } + + n = ngx_read_fd(fd, (void *) buf, size); + if (n < 0) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_read_fd_n " %V\" failed", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + goto close; + + } else if ((size_t) n != size) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_read_fd_n "\"%V incomplete\"", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + goto close; + } + + conf->dict = ZSTD_createCDict_byReference(buf, size, conf->level); + if (conf->dict == NULL) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ZSTD_createCDict_byReference() failed"); + rc = NGX_CONF_ERROR; + goto close; + } + } + } + +close: + + if (fd != NGX_INVALID_FILE && ngx_close_file(fd) == NGX_FILE_ERROR) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_close_file_n " \"%V\" failed", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + } + + return rc; +} + + +static ngx_int_t +ngx_http_zstd_filter_init(ngx_conf_t *cf) +{ + ngx_http_next_header_filter = ngx_http_top_header_filter; + ngx_http_top_header_filter = ngx_http_zstd_header_filter; + + ngx_http_next_body_filter = ngx_http_top_body_filter; + ngx_http_top_body_filter = ngx_http_zstd_body_filter; + + return NGX_OK; +} + + +static void * +ngx_http_zstd_filter_alloc(void *opaque, size_t size) +{ + ngx_http_zstd_ctx_t *ctx = opaque; + + void *p; + + p = ngx_palloc(ctx->request->pool, size); + + ngx_log_debug2(NGX_LOG_DEBUG_HTTP, ctx->request->connection->log, 0, + "zstd alloc: %p, size: %uz", p, size); + + return p; +} + + +static ngx_int_t +ngx_http_zstd_add_variables(ngx_conf_t *cf) +{ + ngx_http_variable_t *v; + + v = ngx_http_add_variable(cf, &ngx_http_zstd_ratio, + NGX_HTTP_VAR_NOCACHEABLE); + if (v == NULL) { + return NGX_ERROR; + } + + v->get_handler = ngx_http_zstd_ratio_variable; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_zstd_ratio_variable(ngx_http_request_t *r, + ngx_http_variable_value_t *vv, uintptr_t data) +{ + ngx_uint_t ratio_int, ratio_frac; + ngx_http_zstd_ctx_t *ctx; + + ctx = ngx_http_get_module_ctx(r, ngx_http_zstd_filter_module); + if (ctx == NULL || !ctx->done || ctx->bytes_out == 0) { + vv->not_found = 1; + return NGX_OK; + } + + vv->data = ngx_pnalloc(r->pool, NGX_INT32_LEN + 3); + if (vv->data == NULL) { + return NGX_ERROR; + } + + ratio_int = (ngx_uint_t) ctx->bytes_in / ctx->bytes_out; + ratio_frac = (ngx_uint_t) (ctx->bytes_in * 1000 / ctx->bytes_out % 1000); + + vv->len = ngx_sprintf(vv->data, "%ui.%03ui", ratio_int, ratio_frac) + - vv->data; + + vv->valid = 1; + vv->no_cacheable = 1; + + return NGX_OK; +} + + +static void +ngx_http_zstd_filter_free(void *opaque, void *address) +{ +#if (NGX_DEBUG) + + ngx_http_zstd_ctx_t *ctx = opaque; + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, ctx->request->connection->log, 0, + "zstd free: %p", address); + +#endif +} + + +static char * +ngx_http_zstd_comp_level(ngx_conf_t *cf, void *post, void *data) +{ + ngx_int_t *np = data; + + if (*np == 0 || *np < (ngx_int_t)ZSTD_minCLevel() || *np > ZSTD_maxCLevel()) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "zstd compress level must between %i and %i excluding 0", + (ngx_int_t)ZSTD_minCLevel(), ZSTD_maxCLevel()); + + return NGX_CONF_ERROR; + } + + return NGX_CONF_OK; +} + +static char * +ngx_conf_zstd_set_num_slot_with_negatives(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + char *p = conf; + + ngx_int_t *np; + ngx_str_t *value; + ngx_conf_post_t *post; + + + np = (ngx_int_t *) (p + cmd->offset); + + if (*np != NGX_CONF_UNSET) { + return "is duplicate"; + } + + value = cf->args->elts; + + if (*(value[1].data) == '-') { + // Parse ignoring the leading '-' character + *np = ngx_atoi(value[1].data + 1, value[1].len - 1); + + // NGX_ERROR is -1 so we need to check for that before making the parsed + // result negative + if (*np == NGX_ERROR) { + return "invalid number"; + } + + *np = -*np; + } else { + *np = ngx_atoi(value[1].data, value[1].len); + + if (*np == NGX_ERROR) { + return "invalid number"; + } + } + + if (cmd->post) { + post = cmd->post; + return post->post_handler(cf, post, np); + } + + return NGX_CONF_OK; +} diff --git a/ngx_zstd/static/config b/ngx_zstd/static/config new file mode 100644 index 0000000..ed6e66e --- /dev/null +++ b/ngx_zstd/static/config @@ -0,0 +1,111 @@ +ngx_feature_incs="#include " +ngx_feature_test="(void) ZSTD_createCCtx();" +ngx_feature_libs= +ngx_feature_run=yes + +ngx_zstd_opt_I= +ngx_zstd_opt_L= + +if [ -n "$ZSTD_INC" -o -n "$ZSTD_LIB" ]; then + ngx_feature="ZStandard static library in $ZSTD_INC and $ZSTD_LIB" + ngx_feature_path=$ZSTD_INC + + # we try the static shared library firstly + ngx_zstd_opt_I="-I$ZSTD_INC -DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="$ZSTD_LIB/libzstd.a" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + # then try the dynamic shared library + ngx_feature="ZStandard dynamic library in $ZSTD_INC and $ZSTD_LIB" + ngx_zstd_opt_L="-L$ZSTD_LIB -lzstd -Wl,-rpath, $ZSTD_LIB" + + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library, please be sure that "\$ZSTD_INC" and "\$ZSTD_LIB" are set correctly. +END + exit 1 + fi + + fi +else + # auto-discovery + ngx_feature="ZStandard static library" + ngx_zstd_opt_I="-DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="-l:libzstd.a" + + # still we consider the static library firstly + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + + ngx_feature="ZStandard dynamic library" + ngx_zstd_opt_L="-lzstd" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library. +END + exit 1 + fi + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + cat << END + $0: warning: ngx_http_zstd_filter_module uses advanced ZStandard APIs (which are still considered experimental) while you are trying to link the dynamic shared library. +END + fi + + # TODO we need more tries for the different OS port. +fi + +CFLAGS="$ngx_zstd_opt_I $CFLAGS" +NGX_LD_OPT="$ngx_zstd_opt_L $NGX_LD_OPT" + +# build the ngx_http_zstd_static_module +HTTP_ZSTD_SRCS="$ngx_addon_dir/static/ngx_http_zstd_static_module.c" + +ngx_addon_name=ngx_http_zstd_static_module +ngx_module_type=HTTP +ngx_module_name=ngx_http_zstd_static_module +ngx_module_incs="$ngx_zstd_opt_I" +ngx_module_srcs=$HTTP_ZSTD_SRCS + +. auto/module diff --git a/ngx_zstd/static/ngx_http_zstd_static_module.c b/ngx_zstd/static/ngx_http_zstd_static_module.c new file mode 100644 index 0000000..3b247e9 --- /dev/null +++ b/ngx_zstd/static/ngx_http_zstd_static_module.c @@ -0,0 +1,383 @@ + +/* + * Copyright (C) Alex Zhang + */ + + +#include +#include +#include + + +#define NGX_HTTP_ZSTD_STATIC_OFF 0 +#define NGX_HTTP_ZSTD_STATIC_ON 1 +#define NGX_HTTP_ZSTD_STATIC_ALWAYS 2 + + +typedef struct { + ngx_uint_t enable; +} ngx_http_zstd_static_conf_t; + + +static ngx_conf_enum_t ngx_http_zstd_static[] = { + { ngx_string("off"), NGX_HTTP_ZSTD_STATIC_OFF }, + { ngx_string("on"), NGX_HTTP_ZSTD_STATIC_ON }, + { ngx_string("always"), NGX_HTTP_ZSTD_STATIC_ALWAYS }, +}; + + +static ngx_command_t ngx_http_zstd_static_commands[] = { + + { ngx_string("zstd_static"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, + ngx_conf_set_enum_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_static_conf_t, enable), + &ngx_http_zstd_static }, + + ngx_null_command +}; + + +static ngx_int_t ngx_http_zstd_static_handler(ngx_http_request_t *r); +static ngx_int_t ngx_http_zstd_accept_encoding(ngx_str_t *ae); +static ngx_int_t ngx_http_zstd_ok(ngx_http_request_t *r); +static void * ngx_http_zstd_static_create_loc_conf(ngx_conf_t *cf); +static char * ngx_http_zstd_static_merge_loc_conf(ngx_conf_t *cf, void *parent, + void *child); +static ngx_int_t ngx_http_zstd_static_init(ngx_conf_t *cf); + + +static ngx_http_module_t ngx_http_zstd_static_module_ctx = { + NULL, /* preconfiguration */ + ngx_http_zstd_static_init, /* postconfiguration */ + + NULL, /* create main configuration */ + NULL, /* init main configuration */ + + NULL, /* create server configuration */ + NULL, /* merge server configuration */ + + ngx_http_zstd_static_create_loc_conf, /* create location configuration */ + ngx_http_zstd_static_merge_loc_conf, /* merge location configuration */ +}; + + +ngx_module_t ngx_http_zstd_static_module = { + NGX_MODULE_V1, + &ngx_http_zstd_static_module_ctx, /* module context */ + ngx_http_zstd_static_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + NULL, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static ngx_int_t +ngx_http_zstd_static_handler(ngx_http_request_t *r) +{ + u_char *p; + ngx_int_t rc; + ngx_uint_t level; + size_t root; + ngx_str_t path; + ngx_buf_t *b; + ngx_log_t *log; + ngx_table_elt_t *h; + ngx_chain_t out; + ngx_open_file_info_t of; + ngx_http_core_loc_conf_t *clcf; + ngx_http_zstd_static_conf_t *zscf; + + if (!(r->method & (NGX_HTTP_GET|NGX_HTTP_HEAD))) { + return NGX_DECLINED; + } + + if (r->uri.data[r->uri.len - 1] == '/') { + return NGX_DECLINED; + } + + zscf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_static_module); + + if (zscf->enable == NGX_HTTP_ZSTD_STATIC_OFF) { + return NGX_DECLINED; + } + + if (zscf->enable == NGX_HTTP_ZSTD_STATIC_ON) { + rc = ngx_http_zstd_ok(r); + + } else { + rc = NGX_OK; + } + + clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module); + + if (!clcf->gzip_vary && rc != NGX_OK) { + return NGX_DECLINED; + } + + log = r->connection->log; + + p = ngx_http_map_uri_to_path(r, &path, &root, sizeof(".zst") - 1); + if (p == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + *p++ = '.'; + *p++ = 'z'; + *p++ = 's'; + *p++ = 't'; + *p = '\0'; + + path.len = p - path.data; + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0, + "http filename: \"%s\"", path.data); + + ngx_memzero(&of, sizeof(ngx_open_file_info_t)); + + of.read_ahead = clcf->read_ahead; + of.directio = clcf->directio; + of.valid = clcf->open_file_cache_valid; + of.min_uses = clcf->open_file_cache_min_uses; + of.errors = clcf->open_file_cache_errors; + of.events = clcf->open_file_cache_events; + + if (ngx_http_set_disable_symlinks(r, clcf, &path, &of) != NGX_OK) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + if (ngx_open_cached_file(clcf->open_file_cache, &path, &of, r->pool) + != NGX_OK) + { + switch (of.err) { + + case 0: + return NGX_HTTP_INTERNAL_SERVER_ERROR; + + case NGX_ENOENT: + case NGX_ENOTDIR: + case NGX_ENAMETOOLONG: + + return NGX_DECLINED; + + case NGX_EACCES: +#if (NGX_HAVE_OPENAT) + case NGX_EMLINK: + case NGX_ELOOP: +#endif + + level = NGX_LOG_ERR; + break; + + default: + + level = NGX_LOG_CRIT; + break; + } + + ngx_log_error(level, log, of.err, + "%s \"%s\" failed", of.failed, path.data); + + return NGX_DECLINED; + } + + if (zscf->enable == NGX_HTTP_ZSTD_STATIC_ON) { + r->gzip_vary = 1; + + if (rc != NGX_OK) { + return NGX_DECLINED; + } + } + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0, "http static fd: %d", of.fd); + + if (of.is_dir) { + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, log, 0, "http dir"); + return NGX_DECLINED; + } + +#if !(NGX_WIN32) /* the not regular files are probably Unix specific */ + + if (!of.is_file) { + ngx_log_error(NGX_LOG_CRIT, log, 0, + "\"%s\" is not a regular file", path.data); + + return NGX_HTTP_NOT_FOUND; + } + +#endif + + r->root_tested = !r->error_page; + + rc = ngx_http_discard_request_body(r); + if (rc != NGX_OK) { + return rc; + } + + log->action = "sending response to client"; + + r->headers_out.status = NGX_HTTP_OK; + r->headers_out.content_length_n = of.size; + r->headers_out.last_modified_time = of.mtime; + + if (ngx_http_set_etag(r) != NGX_OK) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + if (ngx_http_set_content_type(r) != NGX_OK) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + h = ngx_list_push(&r->headers_out.headers); + if (h == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + h->hash = 1; + ngx_str_set(&h->key, "Content-Encoding"); + ngx_str_set(&h->value, "zstd"); + r->headers_out.content_encoding = h; + + b = ngx_calloc_buf(r->pool); + if (b == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + b->file = ngx_pcalloc(r->pool, sizeof(ngx_file_t)); + if (b->file == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + rc = ngx_http_send_header(r); + + if (rc == NGX_ERROR || rc > NGX_OK || r->header_only) { + return rc; + } + + b->file_pos = 0; + b->file_last = of.size; + + b->in_file = b->file_last ? 1 : 0; + b->last_buf = (r == r->main) ? 1 : 0; + b->last_in_chain = 1; + + b->file->fd = of.fd; + b->file->name = path; + b->file->log = log; + b->file->directio = of.is_directio; + + out.buf = b; + out.next = NULL; + + return ngx_http_output_filter(r, &out); +} + + +static ngx_int_t +ngx_http_zstd_ok(ngx_http_request_t *r) +{ + ngx_table_elt_t *ae; + + if (r != r->main) { + return NGX_DECLINED; + } + + ae = r->headers_in.accept_encoding; + if (ae == NULL) { + return NGX_DECLINED; + } + + if (ae->value.len < sizeof("zstd") - 1) { + return NGX_DECLINED; + } + + if (ngx_memcmp(ae->value.data, "zstd", 4) != 0 + && ngx_http_zstd_accept_encoding(&ae->value) != NGX_OK) + { + return NGX_DECLINED; + } + + + r->gzip_tested = 1; + r->gzip_ok = 0; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_zstd_accept_encoding(ngx_str_t *ae) +{ + u_char *p; + + p = ngx_strcasestrn(ae->data, "zstd", sizeof("zstd") - 1); + if (p == NULL) { + return NGX_DECLINED; + } + + if (p == ae->data || (*(p - 1) == ',' || *(p - 1) == ' ')) { + + p += sizeof("zstd") - 1; + + if (p == ae->data + ae->len || *p == ',' || *p == ' ' || *p == ';') { + return NGX_OK; + } + } + + return NGX_DECLINED; +} + + +static void * +ngx_http_zstd_static_create_loc_conf(ngx_conf_t *cf) +{ + ngx_http_zstd_static_conf_t *conf; + + conf = ngx_palloc(cf->pool, sizeof(ngx_http_zstd_static_conf_t)); + if (conf == NULL) { + return NULL; + } + + conf->enable = NGX_CONF_UNSET_UINT; + + return conf; +} + + +static char * +ngx_http_zstd_static_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) +{ + ngx_http_zstd_static_conf_t *prev = parent; + ngx_http_zstd_static_conf_t *conf = child; + + ngx_conf_merge_uint_value(conf->enable, prev->enable, + NGX_HTTP_ZSTD_STATIC_OFF); + + return NGX_CONF_OK; +} + + +static ngx_int_t +ngx_http_zstd_static_init(ngx_conf_t *cf) +{ + ngx_http_handler_pt *h; + ngx_http_core_main_conf_t *cmcf; + + cmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_core_module); + + h = ngx_array_push(&cmcf->phases[NGX_HTTP_CONTENT_PHASE].handlers); + if (h == NULL) { + return NGX_ERROR; + } + + *h = ngx_http_zstd_static_handler; + + return NGX_OK; +} diff --git a/ngx_zstd/t/00-filter.t b/ngx_zstd/t/00-filter.t new file mode 100644 index 0000000..1e48098 --- /dev/null +++ b/ngx_zstd/t/00-filter.t @@ -0,0 +1,8 @@ +use Test::Nginx::Socket::Lua; + +no_long_string(); +run_tests(); + +__DATA__ + + diff --git a/ngx_zstd/t/01-static.t b/ngx_zstd/t/01-static.t new file mode 100644 index 0000000..74fa130 --- /dev/null +++ b/ngx_zstd/t/01-static.t @@ -0,0 +1,198 @@ +use Test::Nginx::Socket; +use lib 'lib'; + +no_long_string(); +log_level 'debug'; +repeat_each(3); +plan tests => repeat_each() * ((blocks() - 3) * 5 + 3); +run_tests(); + + +__DATA__ + + +=== TEST 1: zstd_static off +--- config + location /test { + zstd_static off; + root ../../t/suite; + } +--- request +GET /test +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 2: zstd_static off (with accept-encoding header) +--- config + location /test { + zstd_static off; + root ../../t/suite; + } +--- request +GET /test +Accept-Encoding: gzip,zstd +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 3: zstd_static on +--- config + location /test { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, zstd +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +!Content-Encoding +Content-Encoding: zstd +--- no_error_log +[error] + + + +=== TEST 4: zstd_static on (without accept-encoding header) +--- config + location /test { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +Content-Encoding: zstd +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 5: zstd_static on (without zstd component in accept-encoding header) +--- config + location /test { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, br +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 6: zstd_static always +--- config + location /test { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, br +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +Content-Encoding: zstd +--- no_error_log +[error] + + + +=== TEST 6: zstd_static always (without accept-encoding header) +--- config + location /test { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +Content-Encoding: zstd +--- no_error_log +[error] + + + +=== TEST 7: zstd_static always (without zstd component in accept-encoding header) +--- config + location /test { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, br +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +Content-Encoding: zstd +--- no_error_log +[error] + + +=== TEST 8: zstd_static always (file does not exist) +--- config + location /test2 { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test2 +--- more_headers +Accept-Encoding: gzip, br +--- error_code: 404 + + + +=== TEST 9: zstd_static on (file does not exist) +--- config + location /test2 { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test2 +--- more_headers +Accept-Encoding: gzip, br +--- error_code: 404 + + + +=== TEST 10: zstd_static off (file does not exist) +--- config + location /test2 { + zstd_static off; + root ../../t/suite; + } +--- request +GET /test2 +--- more_headers +Accept-Encoding: gzip, br +--- error_code: 404 diff --git a/ngx_zstd/t/suite/test b/ngx_zstd/t/suite/test new file mode 100644 index 0000000..53ce3fa --- /dev/null +++ b/ngx_zstd/t/suite/test @@ -0,0 +1,2040 @@ + + + + +Regular Expression Matching Can Be Simple And Fast + + + + +

+Regular Expression Matching Can Be Simple And Fast +
+(but is slow in Java, Perl, PHP, Python, Ruby, ...) +

+

+Russ Cox +
+rsc@swtch.com +
+January 2007 +
+ +

+ + +

Introduction

+ +

+This is a tale of two approaches to regular expression matching. +One of them is in widespread use in the +standard interpreters for many languages, including Perl. +The other is used only in a few places, notably most implementations +of awk and grep. +The two approaches have wildly different +performance characteristics: +

+ +
+
+ +
Perl graphThompson NFA graph +
+
+Time to match a?nan against an +
+
+
+ +

+Let's use superscripts to denote string repetition, +so that +a?3a3 +is shorthand for +a?a?a?aaa. +The two graphs plot the time required by each approach +to match the regular expression +a?nan +against the string an. +

+ +

+Notice that Perl requires over sixty seconds to match +a 29-character string. +The other approach, labeled Thompson NFA for +reasons that will be explained later, +requires twenty microseconds to match the string. +That's not a typo. The Perl graph plots time in seconds, +while the Thompson NFA graph plots time in microseconds: +the Thompson NFA implementation +is a million times faster than Perl +when running on a miniscule 29-character string. +The trends shown in the graph continue: the +Thompson NFA handles a 100-character string in under 200 microseconds, +while Perl would require over 1015 years. +(Perl is only the most conspicuous example of a large +number of popular programs that use the same algorithm; +the above graph could have been Python, or PHP, or Ruby, +or many other languages. A more detailed +graph later in this article presents data for other implementations.) +

+ +

+It may be hard to believe the graphs: perhaps you've used Perl, +and it never seemed like regular expression matching was +particularly slow. +Most of the time, in fact, regular expression matching in Perl +is fast enough. +As the graph shows, though, it is possible +to write so-called “pathological” regular expressions that +Perl matches very very slowly. +In contrast, there are no regular expressions that are +pathological for the Thompson NFA implementation. +Seeing the two graphs side by side prompts the question, +“why doesn't Perl use the Thompson NFA approach?” +It can, it should, and that's what the rest of this article is about. +

+ +

+Historically, regular expressions are one of computer science's +shining examples of how using good theory leads to good programs. +They were originally developed by theorists as a +simple computational model, +but Ken Thompson introduced them to +programmers in his implementation of the text editor QED +for CTSS. +Dennis Ritchie followed suit in his own implementation +of QED, for GE-TSS. +Thompson and Ritchie would go on to create Unix, +and they brought regular expressions with them. +By the late 1970s, regular expressions were a key +feature of the Unix landscape, in tools such as +ed, sed, grep, egrep, awk, and lex. +

+ +

+Today, regular expressions have also become a shining +example of how ignoring good theory leads to bad programs. +The regular expression implementations used by +today's popular tools are significantly slower +than the ones used in many of those thirty-year-old Unix tools. +

+ +

+This article reviews the good theory: +regular expressions, finite automata, +and a regular expression search algorithm +invented by Ken Thompson in the mid-1960s. +It also puts the theory into practice, describing +a simple implementation of Thompson's algorithm. +That implementation, less than 400 lines of C, +is the one that went head to head with Perl above. +It outperforms the more complex real-world +implementations used by +Perl, Python, PCRE, and others. +The article concludes with a discussion of how +theory might yet be converted into practice +in the real-world implementations. +

+ +

+Regular Expressions +

+ + +

+Regular expressions are a notation for +describing sets of character strings. +When a particular string is in the set +described by a regular expression, +we often say that the regular expression +matches +the string. +

+ +

+The simplest regular expression is a single literal character. +Except for the special metacharacters +*+?()|, +characters match themselves. +To match a metacharacter, escape it with +a backslash: +\+ +matches a literal plus character. +

+ +

+Two regular expressions can be alternated or concatenated to form a new +regular expression: +if e1 matches +s +and e2 matches +t, +then e1|e2 matches +s +or +t, +and +e1e2 +matches +st. +

+ +

+The metacharacters +*, ++, +and +? +are repetition operators: +e1* +matches a sequence of zero or more (possibly different) +strings, each of which match e1; +e1+ +matches one or more; +e1? +matches zero or one. +

+ +

+The operator precedence, from weakest to strongest binding, is +first alternation, then concatenation, and finally the +repetition operators. +Explicit parentheses can be used to force different meanings, +just as in arithmetic expressions. +Some examples: +ab|cd +is equivalent to +(ab)|(cd); +ab* +is equivalent to +a(b*). +

+ +

+The syntax described so far is a subset of the traditional Unix +egrep +regular expression syntax. +This subset suffices to describe all regular +languages: loosely speaking, a regular language is a set +of strings that can be matched in a single pass through +the text using only a fixed amount of memory. +Newer regular expression facilities (notably Perl and +those that have copied it) have added +many new operators +and escape sequences. These additions make the regular +expressions more concise, and sometimes more cryptic, but usually +not more powerful: +these fancy new regular expressions almost always have longer +equivalents using the traditional syntax. +

+ +

+One common regular expression extension that +does provide additional power is called +backreferences. +A backreference like +\1 +or +\2 +matches the string matched +by a previous parenthesized expression, and only that string: +(cat|dog)\1 +matches +catcat +and +dogdog +but not +catdog +nor +dogcat. +As far as the theoretical term is concerned, +regular expressions with backreferences +are not regular expressions. +The power that backreferences add comes at great cost: +in the worst case, the best known implementations require +exponential search algorithms, +like the one Perl uses. +Perl (and the other languages) +could not now remove backreference support, +of course, but they could employ much faster algorithms +when presented with regular expressions that don't have +backreferences, like the ones considered above. +This article is about those faster algorithms. +

+ +

+Finite Automata +

+ + + +

+Another way to describe sets of character strings is with +finite automata. +Finite automata are also known as state machines, +and we will use “automaton” and “machine” interchangeably. +

+ +

+As a simple example, here is a machine recognizing +the set of strings matched by the regular expression +a(bb)+a: +

+ +

DFA for a(bb)+a

+ +

+A finite automaton is always in one of its states, +represented in the diagram by circles. +(The numbers inside the circles are labels to make this +discussion easier; they are not part of the machine's operation.) +As it reads the string, it switches from state to state. +This machine has two special states: the start state s0 +and the matching state s4. +Start states are depicted with lone arrowheads pointing at them, +and matching states are drawn as a double circle. +

+ +

+The machine reads an input string one character at a time, +following arrows corresponding to the input to move from +state to state. +Suppose the input string is +abbbba. +When the machine reads the first letter of the string, the +a, +it is in the start state s0. It follows the +a +arrow to state s1. +This process repeats as the machine reads the rest of the string: +b +to +s2, +b +to +s3, +b +to +s2, +b +to +s3, +and finally +a +to +s4. +

+

DFA execution on abbbba

+

+The machine ends in s4, a matching state, so it +matches the string. +If the machine ends in a non-matching state, it does not +match the string. +If, at any point during the machine's execution, there is no +arrow for it to follow corresponding to the current +input character, the machine stops executing early. +

+ +

+The machine we have been considering is called a +deterministic +finite automaton (DFA), +because in any state, each possible input letter +leads to at most one new state. +We can also create machines +that must choose between multiple possible next states. +For example, this machine is equivalent to the previous +one but is not deterministic: +

+

NFA for a(bb)+a

+

+The machine is not deterministic because if it reads a +b +in state s2, it has multiple choices for the next state: +it can go back to s1 in hopes of seeing another +bb, +or it can go on to s3 in hopes of seeing the final +a. +Since the machine cannot peek ahead to see the rest of +the string, it has no way to know which is the correct decision. +In this situation, it turns out to be interesting to +let the machine +always guess correctly. +Such machines are called non-deterministic finite automata +(NFAs or NDFAs). +An NFA matches an input string if there is some way +it can read the string and follow arrows to a matching state. +

+ +

+Sometimes it is convenient to let NFAs have arrows with no +corresponding input character. We will leave these arrows unlabeled. +An NFA can, at any time, choose to follow an unlabeled arrow +without reading any input. +This NFA is equivalent to the previous two, but the unlabeled arrow +makes the correspondence with +a(bb)+a +clearest: +

+

Another NFA for a(bb)+a

+ +

+Converting Regular Expressions to NFAs +

+ +

+Regular expressions and NFAs turn out to be exactly +equivalent in power: every regular expression has an +equivalent NFA (they match the same strings) and vice versa. +(It turns out that DFAs are also equivalent in power +to NFAs and regular expressions; we will see this later.) +There are multiple ways to translate regular expressions into NFAs. +The method described here was first described by Thompson +in his 1968 CACM paper. +

+ +

+The NFA for a regular expression is built up from partial NFAs +for each subexpression, with a different construction for +each operator. The partial NFAs have +no matching states: instead they have one or more dangling arrows, +pointing to nothing. The construction process will finish by +connecting these arrows to a matching state. +

+ +

+The NFAs for matching single characters look like: +

+

Single-character NFA

+

+The NFA for the concatenation e1e2 +connects the final arrow of the e1 +machine to the start of the e2 machine: +

+

Concatenation NFA

+

+The NFA for the alternation e1|e2 +adds a new start state with a choice of either the +e1 machine or the e2 machine. +

+

Alternation NFA

+

+The NFA for e? alternates the e machine with an empty path: +

+

Zero or one NFA

+

+The NFA for e* uses the same alternation but loops a +matching e machine back to the start: +

+

Zero or more NFA

+

+The NFA for e+ also creates a loop, but one that +requires passing through e at least once: +

+

One or more NFA

+ +

+Counting the new states in the diagrams above, we can see +that this technique creates exactly one state per character +or metacharacter in the regular expression, +excluding parentheses. +Therefore the number of states in the final NFA is at most +equal to the length of the original regular expression. +

+ +

+Just as with the example NFA discussed earlier, it is always possible +to remove the unlabeled arrows, and it is also always possible to generate +the NFA without the unlabeled arrows in the first place. +Having the unlabeled arrows makes the NFA easier for us to read +and understand, and they also make the C representation +simpler, so we will keep them. +

+ +

+Regular Expression Search Algorithms +

+ +

+Now we have a way to test whether a regular expression +matches a string: convert the regular expression to an NFA +and then run the NFA using the string as input. +Remember that NFAs are endowed with the ability to guess +perfectly when faced with a choice of next state: +to run the NFA using an ordinary computer, we must find +a way to simulate this guessing. +

+ +

+One way to simulate perfect guessing is to guess +one option, and if that doesn't work, try the other. +For example, consider the NFA for +abab|abbb +run on the string +abbb: +

+

NFA for abab|abbb

+

Backtracking execution on abbb

+

+At step 0, the NFA must make a choice: try to match +abab +or +try to match +abbb? +In the diagram, the NFA tries +abab, +but that fails after step 3. +The NFA then tries the other choice, leading to step 4 and eventually a match. +This backtracking approach +has a simple recursive implementation +but can read the input string many times +before succeeding. +If the string does not match, +the machine must try +all +possible execution paths before +giving up. +The NFA tried only two different paths in the example, +but in the worst case, there can be exponentially +many possible execution paths, leading to very slow run times. +

+ +

+A more efficient but more complicated way to simulate perfect +guessing is to guess both options simultaneously. +In this approach, the simulation allows the machine +to be in multiple states at once. To process each letter, +it advances all the states along all the arrows that +match the letter. +

+

Parallel execution on abbb

+

+The machine starts in the start state and all the states +reachable from the start state by unlabeled arrows. +In steps 1 and 2, the NFA is in two states simultaneously. +Only at step 3 does the state set narrow down to a single state. +This multi-state approach tries both paths at the same time, +reading the input only once. +In the worst case, the NFA might be in +every +state at each step, but this results in at worst a constant amount +of work independent of the length of the string, +so arbitrarily +large input strings can be processed in linear time. +This is a dramatic improvement over the exponential time +required by the backtracking approach. +The efficiency comes from tracking the set of reachable +states but +not +which paths were used to reach them. +In an NFA with +n +nodes, there can only be +n +reachable states at any step, but there might be +2n paths through the NFA. +

+ +

+Implementation +

+ +

+Thompson introduced the multiple-state simulation approach +in his 1968 paper. +In his formulation, the states of the NFA were represented +by small machine-code sequences, and the list of possible states +was just a sequence of function call instructions. +In essence, Thompson compiled the regular expression into clever +machine code. +Forty years later, computers are much faster and the +machine code approach is not as necessary. +The following sections +present an implementation written in portable ANSI C. +The full source code (under 400 lines) +and the benchmarking scripts are +available online. +(Readers who are unfamiliar or uncomfortable with C or pointers should +feel free to read the descriptions and skip over the actual code.) +

+ +

+Implementation: Compiling to NFA +

+ +

+The first step is to compile the regular expression +into an equivalent NFA. +In our C program, we will represent an NFA as a +linked collection of +State +structures: +

+
+struct State
+{
+	int c;
+	State *out;
+	State *out1;
+	int lastlist;
+};
+

+Each +State +represents one of the following three NFA fragments, +depending on the value of +c. +

+

Possible per-State NFA fragments

+

+(Lastlist +is used during execution and is explained in the next section.) +

+ +

+Following Thompson's paper, +the compiler builds an NFA from a regular expression in +postfix +notation with dot +(.) added +as an explicit concatenation operator. +A separate function +re2post +rewrites infix regular expressions like +“a(bb)+a” +into equivalent postfix expressions like +“abb.+.a.”. +(A “real” implementation would certainly +need to use dot as the “any character” metacharacter +rather than as a concatenation operator. +A real implementation would also probably build the +NFA during parsing rather than build an explicit postfix expression. +However, the postfix version is convenient and follows +Thompson's paper more closely.) +

+ +

+As the compiler scans the postfix expression, it maintains +a stack of computed NFA fragments. +Literals push new NFA fragments onto the stack, while +operators pop fragments off the stack and then +push a new fragment. +For example, +after compiling the +abb in abb.+.a., +the stack contains NFA fragments for +a, +b, +and +b. +The compilation of the +. +that follows pops the two +b +NFA fragment from the stack and pushes an NFA fragment for the +concatenation +bb.. +Each NFA fragment is defined by its start state and its +outgoing arrows: +

+struct Frag
+{
+	State *start;
+	Ptrlist *out;
+};
+

+Start +points at the start state for the fragment, +and +out +is a list of pointers to +State* +pointers that are not yet connected to anything. +These are the dangling arrows in the NFA fragment. +

+ +

+Some helper functions manipulate pointer lists: +

+Ptrlist *list1(State **outp);
+Ptrlist *append(Ptrlist *l1, Ptrlist *l2);
+
+void patch(Ptrlist *l, State *s);
+

+List1 +creates a new pointer list containing the single pointer +outp. +Append +concatenates two pointer lists, returning the result. +Patch +connects the dangling arrows in the pointer list +l +to the state +s: +it sets +*outp += +s +for each pointer +outp +in +l. +

+ +

+Given these primitives and a fragment stack, +the compiler is a simple loop over the postfix expression. +At the end, there is a single fragment left: +patching in a matching state completes the NFA. +

+State*
+post2nfa(char *postfix)
+{
+	char *p;
+	Frag stack[1000], *stackp, e1, e2, e;
+	State *s;
+
+	#define push(s) *stackp++ = s
+	#define pop()   *--stackp
+
+	stackp = stack;
+	for(p=postfix; *p; p++){
+		switch(*p){
+		/* compilation cases, described below */
+		}
+	}
+	
+	e = pop();
+	patch(e.out, matchstate);
+	return e.start;
+}
+

+The specific compilation cases mimic the translation +steps described earlier. +

+ + +

+Literal characters: +

+default:
+	s = state(*p, NULL, NULL);
+	push(frag(s, list1(&s->out));
+	break;
+
+
+ +

+Catenation: +

+case '.':
+	e2 = pop();
+	e1 = pop();
+	patch(e1.out, e2.start);
+	push(frag(e1.start, e2.out));
+	break;
+
+
+ +

+Alternation: +

+case '|':
+	e2 = pop();
+	e1 = pop();
+	s = state(Split, e1.start, e2.start);
+	push(frag(s, append(e1.out, e2.out)));
+	break;
+
+
+ +

+Zero or one: +

+case '?':
+	e = pop();
+	s = state(Split, e.start, NULL);
+	push(frag(s, append(e.out, list1(&s->out1))));
+	break;
+
+
+ +

+Zero or more: +

+case '*':
+	e = pop();
+	s = state(Split, e.start, NULL);
+	patch(e.out, s);
+	push(frag(s, list1(&s->out1)));
+	break;
+
+
+ +

+One or more: +

+case '+':
+	e = pop();
+	s = state(Split, e.start, NULL);
+	patch(e.out, s);
+	push(frag(e.start, list1(&s->out1)));
+	break;
+
+
+
+ +

+Implementation: Simulating the NFA +

+ +

+Now that the NFA has been built, we need to simulate it. +The simulation requires tracking +State +sets, which are stored as a simple array list: +

+struct List
+{
+	State **s;
+	int n;
+};
+

+The simulation uses two lists: +clist +is the current set of states that the NFA is in, +and +nlist +is the next set of states that the NFA will be in, +after processing the current character. +The execution loop initializes +clist +to contain just the start state and then +runs the machine one step at a time. +

+int
+match(State *start, char *s)
+{
+	List *clist, *nlist, *t;
+
+	/* l1 and l2 are preallocated globals */
+	clist = startlist(start, &l1);
+	nlist = &l2;
+	for(; *s; s++){
+		step(clist, *s, nlist);
+		t = clist; clist = nlist; nlist = t;	/* swap clist, nlist */
+	}
+	return ismatch(clist);
+}
+

+To avoid allocating on every iteration of the loop, +match +uses two preallocated lists +l1 +and +l2 +as +clist +and +nlist, +swapping the two after each step. +

+ +

+If the final state list contains the matching state, +then the string matches. +

+int
+ismatch(List *l)
+{
+	int i;
+
+	for(i=0; i<l->n; i++)
+		if(l->s[i] == matchstate)
+			return 1;
+	return 0;
+}
+

+

+ +

+Addstate +adds a state to the list, +but not if it is already on the list. +Scanning the entire list for each add would be inefficient; +instead the variable +listid +acts as a list generation number. +When +addstate +adds +s +to a list, +it records +listid +in +s->lastlist. +If the two are already equal, +then +s +is already on the list being built. +Addstate +also follows unlabeled arrows: +if +s +is a +Split +state with two unlabeled arrows to new states, +addstate +adds those states to the list instead of +s. +

+void
+addstate(List *l, State *s)
+{
+	if(s == NULL || s->lastlist == listid)
+		return;
+	s->lastlist = listid;
+	if(s->c == Split){
+		/* follow unlabeled arrows */
+		addstate(l, s->out);
+		addstate(l, s->out1);
+		return;
+	}
+	l->s[l->n++] = s;
+}
+

+

+ +

+Startlist +creates an initial state list by adding just the start state: +

+List*
+startlist(State *s, List *l)
+{
+	listid++;
+	l->n = 0;
+	addstate(l, s);
+	return l;
+}
+

+

+ +

+Finally, +step +advances the NFA past a single character, using +the current list +clist +to compute the next list +nlist. +

+void
+step(List *clist, int c, List *nlist)
+{
+	int i;
+	State *s;
+
+	listid++;
+	nlist->n = 0;
+	for(i=0; i<clist->n; i++){
+		s = clist->s[i];
+		if(s->c == c)
+			addstate(nlist, s->out);
+	}
+}
+
+ +

+Performance +

+ +

+The C implementation just described was not written with performance in mind. +Even so, a slow implementation of a linear-time algorithm +can easily outperform a fast implementation of an +exponential-time algorithm once the exponent is large enough. +Testing a variety of popular regular expression engines on +a so-called pathological regular expression demonstrates this nicely. +

+ +

+Consider the regular expression +a?nan. +It matches the string +an +when the +a? +are chosen not to match any letters, +leaving the entire string to be matched by the +an. +Backtracking regular expression implementations +implement the zero-or-one +? +by first trying one and then zero. +There are +n +such choices to make, a total of +2n possibilities. +Only the very last +possibility—choosing zero for all the ?—will lead to a match. +The backtracking approach thus requires +O(2n) time, so it will not scale much beyond n=25. +

+ +

+In contrast, Thompson's algorithm maintains state lists of length +approximately n and processes the string, also of length n, +for a total of O(n2) time. +(The run time is superlinear, +because we are not keeping the regular expression constant +as the input grows. +For a regular expression of length m run on text of length n, +the Thompson NFA requires O(mn) time.) +

+ +

+The following graph plots time required to check whether +a?nan +matches +an: +

+ +
+
+
+
+
+Performance graph +
+regular expression and text size n +
+a?nan +matching +an +
+
+
+
+
+ +

+Notice that the graph's y-axis has a logarithmic scale, +in order to be able to see a wide variety of times on a single graph. +

+ +

+From the graph it is clear that Perl, PCRE, Python, and Ruby are +all using recursive backtracking. +PCRE stops getting the right answer at +n=23, +because it aborts the recursive backtracking after a maximum number +of steps. +As of Perl 5.6, Perl's regular expression engine is +said to memoize +the recursive backtracking search, which should, at some memory cost, +keep the search from taking exponential amounts of time +unless backreferences are being used. +As the performance graph shows, the memoization is not complete: +Perl's run time grows exponentially even though there +are no backreferences +in the expression. +Although not benchmarked here, Java uses a backtracking +implementation too. +In fact, the +java.util.regex +interface requires a backtracking +implementation, because arbitrary Java code +can be substituted into the matching path. +PHP uses the PCRE library. +

+ +

+The thick blue line is the C implementation of Thompson's algorithm given above. +Awk, Tcl, GNU grep, and GNU awk +build DFAs, either precomputing them or using the on-the-fly +construction described in the next section. +

+ +

+Some might argue that this test is unfair to +the backtracking implementations, since it focuses on an +uncommon corner case. +This argument misses the point: +given a choice between an implementation +with a predictable, consistent, fast running time on all inputs +or one that usually runs quickly but can take +years of CPU time (or more) on some inputs, +the decision should be easy. +Also, while examples as dramatic as this one +rarely occur in practice, less dramatic ones do occur. +Examples include using +(.*) +(.*) +(.*) +(.*) +(.*) +to split five space-separated fields, or using +alternations where the common cases +are not listed first. +As a result, programmers often learn which constructs are +expensive and avoid them, or they turn to so-called +optimizers. +Using Thompson's NFA simulation does not require such adaptation: +there are no expensive regular expressions. +

+ +

+Caching the NFA to build a DFA +

+ +

+Recall that DFAs are more efficient to execute than NFAs, +because DFAs are only ever in one state at a time: they never +have a choice of multiple next states. +Any NFA can be converted into an equivalent DFA +in which each DFA state corresponds to a +list of NFA states. +

+ +

+For example, here is the NFA we used earlier for +abab|abbb, +with state numbers added: +

+

NFA for abab|abbb

+

+The equivalent DFA would be: +

+

DFA for abab|abbb

+

+Each state in the DFA corresponds to a list of +states from the NFA. +

+ +

+In a sense, Thompson's NFA simulation is +executing the equivalent DFA: each +List +corresponds to some DFA state, +and the +step +function is computing, given a list and a next character, +the next DFA state to enter. +Thompson's algorithm simulates the DFA by +reconstructing each DFA state as it is needed. +Rather than throw away this work after each step, +we could cache the +Lists +in spare memory, avoiding the cost of repeating the computation +in the future +and essentially computing the equivalent DFA as it is needed. +This section presents the implementation of such an approach. +Starting with the NFA implementation from the previous section, +we need to add less than 100 lines to build a DFA implementation. +

+ +

+To implement the cache, we first introduce a new data type +that represents a DFA state: +

+struct DState
+{
+	List l;
+	DState *next[256];
+	DState *left;
+	DState *right;
+};
+

+A +DState +is the cached copy of the list +l. +The array +next +contains pointers to the next state for each +possible input character: +if the current state is +d +and the next input character is +c, +then +d->next[c] +is the next state. +If +d->next[c] +is null, then the next state has not been computed yet. +Nextstate +computes, records, and returns the next state +for a given state and character. +

+ +

+The regular expression match follows +d->next[c] +repeatedly, calling +nextstate +to compute new states as needed. +

+int
+match(DState *start, char *s)
+{
+	int c;
+	DState *d, *next;
+	
+	d = start;
+	for(; *s; s++){
+		c = *s & 0xFF;
+		if((next = d->next[c]) == NULL)
+			next = nextstate(d, c);
+		d = next;
+	}
+	return ismatch(&d->l);
+}
+

+

+ +

+All the +DStates +that have been computed need to be saved in a +structure that lets us look up a +DState +by its +List. +To do this, we arrange them +in a binary tree +using the sorted +List +as the key. +The +dstate +function returns the +DState +for a given +List, +allocating one if necessary: +

+DState*
+dstate(List *l)
+{
+	int i;
+	DState **dp, *d;
+	static DState *alldstates;
+
+	qsort(l->s, l->n, sizeof l->s[0], ptrcmp);
+
+	/* look in tree for existing DState */
+	dp = &alldstates;
+	while((d = *dp) != NULL){
+		i = listcmp(l, &d->l);
+		if(i < 0)
+			dp = &d->left;
+		else if(i > 0)
+			dp = &d->right;
+		else
+			return d;
+	}
+	
+	/* allocate, initialize new DState */
+	d = malloc(sizeof *d + l->n*sizeof l->s[0]);
+	memset(d, 0, sizeof *d);
+	d->l.s = (State**)(d+1);
+	memmove(d->l.s, l->s, l->n*sizeof l->s[0]);
+	d->l.n = l->n;
+
+	/* insert in tree */
+	*dp = d;
+	return d;
+}
+

+Nextstate runs the NFA +step +and returns the corresponding +DState: +

+DState*
+nextstate(DState *d, int c)
+{
+	step(&d->l, c, &l1);
+	return d->next[c] = dstate(&l1);
+}
+

+Finally, the DFA's start state is the +DState +corresponding to the NFA's start list: +

+DState*
+startdstate(State *start)
+{
+	return dstate(startlist(start, &l1));
+}
+

+(As in the NFA simulation, +l1 +is a preallocated +List.) +

+ +

+The +DStates +correspond to DFA states, but the DFA is only built as needed: +if a DFA state has not been encountered during the search, +it does not yet exist in the cache. +An alternative would be to compute the entire DFA at once. +Doing so would make +match +a little faster by removing the conditional branch, +but at the cost of increased startup time and +memory use. +

+ +

+One might also worry about bounding the amount of +memory used by the on-the-fly DFA construction. +Since the +DStates +are only a cache of the +step +function, the implementation of +dstate +could choose to throw away the entire DFA so far +if the cache grew too large. +This cache replacement policy +only requires a few extra lines of code in +dstate +and in +nextstate, +plus around 50 lines of code for memory management. +An implementation is +available online. +(Awk +uses a similar limited-size cache strategy, +with a fixed limit of 32 cached states; this explains the discontinuity +in its performance at n=28 in the graph above.) +

+ +

+NFAs derived from regular expressions +tend to exhibit good locality: they visit the same states +and follow the same transition arrows over and over +when run on most texts. +This makes the caching worthwhile: the first time an arrow +is followed, the next state must be computed as in the NFA +simulation, but future traversals of the arrow are just +a single memory access. +Real DFA-based implementations can make use +of additional optimizations to run even faster. +A companion article (not yet written) will explore +DFA-based regular expression implementations in more detail. +

+ + +

+Real world regular expressions +

+ +

+Regular expression usage in real programs +is somewhat more complicated than what the regular expression +implementations described above can handle. +This section briefly describes the common complications; +full treatment of any of these is beyond the scope of this +introductory article. +

+ +

+Character classes. +A character class, whether +[0-9] +or +\w +or +. (dot), +is just a concise representation of an alternation. +Character classes can be expanded into alternations +during compilation, though it is more efficient to add +a new kind of NFA node to represent them explicitly. +POSIX +defines special character classes +like [[:upper:]] that change meaning +depending on the current locale, but the hard part of +accommodating these is determining their meaning, +not encoding that meaning into an NFA. +

+ +

+Escape sequences. +Real regular expression syntaxes need to handle +escape sequences, both as a way to match metacharacters +(\(, +\), +\\, +etc.) +and to specify otherwise difficult-to-type characters such as +\n. +

+ +

+Counted repetition. +Many regular expression implementations provide a counted +repetition operator +{n} +to match exactly +n +strings matching a pattern; +{n,m} +to match at least +n +but no more than +m; +and +{n,} +to match +n +or more. +A recursive backtracking implementation can implement +counted repetition using a loop; an NFA or DFA-based +implementation must expand the repetition: +e{3} +expands to +eee; +e{3,5} +expands to +eeee?e?, +and +e{3,} +expands to +eee+. +

+ +

+Submatch extraction. +When regular expressions are used for splitting or parsing strings, +it is useful to be able to find out which sections of the input string +were matched by each subexpression. +After a regular expression like +([0-9]+-[0-9]+-[0-9]+) +([0-9]+:[0-9]+) +matches a string (say a date and time), +many regular expression engines make the +text matched by each parenthesized expression +available. +For example, one might write in Perl: +

+if(/([0-9]+-[0-9]+-[0-9]+) ([0-9]+:[0-9]+)/){
+	print "date: $1, time: $2\n";
+}
+

+The extraction of submatch boundaries has been mostly ignored +by computer science theorists, and it is perhaps the most +compelling argument for using recursive backtracking. +However, Thompson-style algorithms can be adapted to +track submatch boundaries without giving up efficient performance. +The Eighth Edition Unix +regexp(3) +library implemented such an algorithm as early as 1985, +though as explained below, +it was not very widely used or even noticed. +

+ +

+Unanchored matches. +This article has assumed that regular expressions +are matched against an entire input string. +In practice, one often wishes to find a substring +of the input that matches the regular expression. +Unix tools traditionally return the longest matching substring +that starts at the leftmost possible point in the input. +An unanchored search for +e +is a special case +of submatch extraction: it is like searching for +.*(e).* +where the first +.* +is constrained to match as short a string as possible. +

+ +

+Non-greedy operators. +In traditional Unix regular expressions, the repetition operators +?, +*, +and ++ +are defined to match as much of the string as possible while +still allowing the entire regular expression to match: +when matching +(.+)(.+) +against +abcd, +the first +(.+) +will match +abc, +and the second +will match +d. +These operators are now called +greedy. +Perl introduced +??, +*?, +and ++? +as non-greedy versions, which match as little of the string +as possible while preserving the overall match: +when matching +(.+?)(.+?) +against +abcd, +the first +(.+?) +will match only +a, +and the second +will match +bcd. +By definition, whether an operator is greedy +cannot affect whether a regular expression matches a +particular string as a whole; it only affects the +choice of submatch boundaries. +The backtracking algorithm admits a simple implementation +of non-greedy operators: +try the shorter match before the longer one. +For example, in a standard backtracking implementation, +e? +first tries using +e +and then tries not using it; +e?? +uses the other order. +The submatch-tracking variants of Thompson's algorithm +can be adapted to accommodate non-greedy operators. +

+ +

+Assertions. +The traditional regular expression metacharacters +^ +and +$ +can be viewed as +assertions +about the text around them: +^ +asserts that the previous character +is a newline (or the beginning of the string), +while +$ +asserts that the next character is a newline +(or the end of the string). +Perl added more assertions, like +the word boundary +\b, +which asserts that +the previous character is alphanumeric but the next +is not, or vice versa. +Perl also generalized the idea to arbitrary +conditions called lookahead assertions: +(?=re) +asserts that the text after the current input position matches +re, +but does not actually advance the input position; +(?!re) +is similar but +asserts that the text does not match +re. +The lookbehind assertions +(?<=re) +and +(?<!re) +are similar but make assertions about the text +before the current input position. +Simple assertions like +^, +$, +and +\b +are easy to accommodate in an NFA, +delaying the match one byte for forward assertions. +The generalized assertions +are harder to accommodate but in principle could +be encoded in the NFA. +

+ +

+Backreferences. +As mentioned earlier, no one knows how to +implement regular expressions with backreferences efficiently, +though no one can prove that it's impossible either. +(Specifically, the +problem is NP-complete, meaning that if +someone did find an efficient implementation, that would +be major news to computer scientists and would +win a million dollar prize.) +The simplest, most effective strategy for backreferences, +taken by the original awk and egrep, is not to implement them. +This strategy is no longer practical: users have come to +rely on backreferences for at least occasional use, +and backreferences are part of +the +POSIX standard for regular expressions. +Even so, it would be reasonable to use Thompson's NFA simulation +for most regular expressions, and only bring out +backtracking when it is needed. +A particularly clever implementation could combine the two, +resorting to backtracking only to accommodate the backreferences. +

+ +

+Backtracking with memoization. +Perl's approach of using memoization to avoid exponential blowup +during backtracking +when possible is a good one. At least in theory, it should make +Perl's regular expressions behave more like an NFA and +less like backtracking. +Memoization does not completely solve the problem, though: +the memoization itself requires a memory footprint roughly +equal to the size of the text times the size of the regular expression. +Memoization also does not address the issue of the stack space used +by backtracking, which is linear in the size of the text: +matching long strings typically causes a backtracking +implementation to run out of stack space: +

+$ perl -e '("a" x 100000) =~ /^(ab?)*$/;'
+Segmentation fault (core dumped)
+$
+
+ +

+Character sets. +Modern regular expression implementations must deal with +large non-ASCII character sets such as Unicode. +The +Plan 9 regular expression library +incorporates Unicode by running an NFA with a +single Unicode character as the input character for each step. +That library separates the running of the NFA from decoding +the input, so that the same regular expression matching code +is used for both +UTF-8 +and wide-character inputs. +

+ +

+History and References +

+ + +

+Michael Rabin and Dana Scott +introduced non-deterministic finite automata +and the concept of non-determinism in 1959 +[7], +showing that NFAs can be simulated by +(potentially much larger) DFAs in which +each DFA state corresponds to a set of NFA states. +(They won the Turing Award in 1976 for the introduction +of the concept of non-determinism in that paper.) +

+ +

+R. McNaughton and H. Yamada +[4] +and +Ken Thompson +[9] +are commonly credited with giving the first constructions +to convert regular expressions into NFAs, +even though neither paper mentions the +then-nascent concept of an NFA. +McNaughton and Yamada's construction +creates a DFA, +and Thompson's construction creates IBM 7094 machine code, +but reading between the lines one can +see latent NFA constructions underlying both. +Regular expression to NFA constructions differ only in how they encode +the choices that the NFA must make. +The approach used above, mimicking Thompson, +encodes the choices with explicit choice +nodes +(the +Split +nodes above) +and unlabeled arrows. +An alternative approach, +the one most commonly credited to McNaughton and Yamada, +is to avoid unlabeled arrows, instead allowing NFA states to +have multiple outgoing arrows with the same label. +McIlroy +[3] +gives a particularly elegant implementation of this approach +in Haskell. +

+ +

+Thompson's regular expression implementation +was for his QED editor running on the CTSS +[10] +operating +system on the IBM 7094. +A copy of the editor can be found in archived CTSS sources +[5]. +L. Peter Deutsch and Butler Lampson +[1] +developed the first QED, but +Thompson's reimplementation was the first to use +regular expressions. +Dennis Ritchie, author of yet another QED implementation, +has documented the early history of the QED editor +[8] +(Thompson, Ritchie, and Lampson later won +Turing awards for work unrelated to QED or finite automata.) +

+ +

+Thompson's paper marked the +beginning of a long line of regular expression implementations. +Thompson chose not to use his algorithm when +implementing the text editor ed, which appeared in +First Edition Unix (1971), or in its descendant grep, +which first appeared in the Fourth Edition (1973). +Instead, these venerable Unix tools used +recursive backtracking! +Backtracking was justifiable because the +regular expression syntax was quite limited: +it omitted grouping parentheses and the +|, +?, +and ++ +operators. +Al Aho's egrep, +which first appeared in the Seventh Edition (1979), +was the first Unix tool to provide +the full regular expression syntax, using a +precomputed DFA. +By the Eighth Edition (1985), egrep computed the DFA on the fly, +like the implementation given above. +

+ +

+While writing the text editor sam +[6] +in the early 1980s, +Rob Pike wrote a new regular expression implementation, +which Dave Presotto extracted into a library that +appeared in the Eighth Edition. +Pike's implementation +incorporated submatch tracking into an efficient NFA simulation +but, like the rest of the Eighth Edition source, was not widely +distributed. +Pike himself did not realize that his technique was anything new. +Henry Spencer reimplemented the Eighth Edition library +interface from scratch, but using backtracking, +and +released his implementation +into the public domain. +It became very widely used, eventually serving as the basis +for the slow regular expression implementations +mentioned earlier: Perl, PCRE, Python, and so on. +(In his defense, +Spencer knew the routines could be slow, +and he didn't know that a more efficient algorithm existed. +He even warned in the documentation, +“Many users have found the speed perfectly adequate, +although replacing the insides of egrep with this code +would be a mistake.”) +Pike's regular expression implementation, extended to +support Unicode, was made freely available +with sam in +late 1992, +but the particularly efficient +regular expression search algorithm went unnoticed. +The code is now available in many forms: as +part of sam, +as +Plan 9's regular expression library, +or +packaged separately for Unix. +Ville Laurikari independently discovered Pike's algorithm +in 1999, developing a theoretical foundation as well +[2]. +

+ + +

+Finally, any discussion of regular expressions +would be incomplete without mentioning +Jeffrey Friedl's book +Mastering Regular Expressions, +perhaps the most popular reference among today's programmers. +Friedl's book teaches programmers how best to use today's +regular expression implementations, but not how best to implement them. +What little text it devotes to implementation +issues perpetuates the widespread belief that recursive backtracking +is the only way to simulate an NFA. +Friedl makes it clear that he +neither understands nor respects +the underlying theory. +

+ +

+Summary +

+ +

+Regular expression matching can be simple and fast, using +finite automata-based techniques that have been known for decades. +In contrast, Perl, PCRE, Python, Ruby, Java, +and many other languages +have regular expression implementations based on +recursive backtracking that are simple but can be +excruciatingly slow. +With the exception of backreferences, the features +provided by the slow backtracking implementations +can be provided by the automata-based implementations +at dramatically faster, more consistent speeds. +

+ +

+The next article in this series, +“Regular Expression Matching: the Virtual Machine Approach,” discusses NFA-based submatch extraction. +The third article, “Regular Expression Matching in the Wild,” examines a production implementation. +The fourth article, “Regular Expression Matching with a Trigram Index,” explains how Google Code Search was implemented. +

+ +

+Acknowledgements +

+ +

+Lee Feigenbaum, +James Grimmelmann, +Alex Healy, +William Josephson, +and +Arnold Robbins +read drafts of this article and made many helpful suggestions. +Rob Pike clarified some of the history surrounding his +regular expression implementation. +Thanks to all. +

+ +

+References +

+ +

+ +[1] +L. Peter Deutsch and Butler Lampson, +“An online editor,” +Communications of the ACM 10(12) (December 1967), pp. 793–799. +http://doi.acm.org/10.1145/363848.363863 +

+ +[2] +Ville Laurikari, +“NFAs with Tagged Transitions, +their Conversion to Deterministic Automata +and +Application to Regular Expressions,” +in Proceedings of the Symposium on String Processing and +Information Retrieval, September 2000. +http://laurikari.net/ville/spire2000-tnfa.ps +

+ +[3] +M. Douglas McIlroy, +“Enumerating the strings of regular languages,” +Journal of Functional Programming 14 (2004), pp. 503–518. +http://www.cs.dartmouth.edu/~doug/nfa.ps.gz (preprint) +

+ +[4] +R. McNaughton and H. Yamada, +“Regular expressions and state graphs for automata,” +IRE Transactions on Electronic Computers EC-9(1) (March 1960), pp. 39–47. +

+ +[5] +Paul Pierce, +“CTSS source listings.” +http://www.piercefuller.com/library/ctss.html +(Thompson's QED is in the file +com5 +in the source listings archive and is marked as +0QED) +

+ +[6] +Rob Pike, +“The text editor sam,” +Software—Practice & Experience 17(11) (November 1987), pp. 813–845. +http://plan9.bell-labs.com/sys/doc/sam/sam.html +

+ +[7] +Michael Rabin and Dana Scott, +“Finite automata and their decision problems,” +IBM Journal of Research and Development 3 (1959), pp. 114–125. +http://www.research.ibm.com/journal/rd/032/ibmrd0302C.pdf +

+ +[8] +Dennis Ritchie, +“An incomplete history of the QED text editor.” +http://plan9.bell-labs.com/~dmr/qed.html +

+ +[9] +Ken Thompson, +“Regular expression search algorithm,” +Communications of the ACM 11(6) (June 1968), pp. 419–422. +http://doi.acm.org/10.1145/363347.363387 +(PDF) +

+ +[10] +Tom Van Vleck, +“The IBM 7094 and CTSS.” +http://www.multicians.org/thvv/7094.html +

+ +
+

+Discussion on reddit and perlmonks and +LtU +

+ +
+

+Copyright © 2007 Russ Cox. All Rights Reserved. +
+http://swtch.com/~rsc/regexp/ +

+
+ + + + + diff --git a/ngx_zstd/t/suite/test.zst b/ngx_zstd/t/suite/test.zst new file mode 100644 index 0000000..9d4e0c4 Binary files /dev/null and b/ngx_zstd/t/suite/test.zst differ diff --git a/ngx_zstd/valgrind.suppress b/ngx_zstd/valgrind.suppress new file mode 100644 index 0000000..fe4f255 --- /dev/null +++ b/ngx_zstd/valgrind.suppress @@ -0,0 +1,218 @@ +{ + + Memcheck:Addr1 + fun:ngx_init_cycle + fun:ngx_master_process_cycle + fun:main +} +{ + + Memcheck:Addr4 + fun:ngx_init_cycle + fun:ngx_master_process_cycle + fun:main +} +{ + + Memcheck:Cond + fun:ngx_vslprintf + fun:ngx_snprintf + fun:ngx_sock_ntop + fun:ngx_event_accept + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + + Memcheck:Addr1 + fun:ngx_vslprintf + fun:ngx_snprintf + fun:ngx_sock_ntop + fun:ngx_event_accept +} +{ + + exp-sgcheck:SorG + fun:ngx_http_lua_ndk_set_var_get +} +{ + + exp-sgcheck:SorG + fun:ngx_http_variables_init_vars + fun:ngx_http_block +} +{ + + exp-sgcheck:SorG + fun:ngx_conf_parse +} +{ + + exp-sgcheck:SorG + fun:ngx_vslprintf + fun:ngx_log_error_core +} +{ + + Memcheck:Param + epoll_ctl(event) + fun:epoll_ctl +} +{ + + Memcheck:Cond + fun:ngx_conf_flush_files + fun:ngx_single_process_cycle +} +{ + + Memcheck:Cond + fun:memcpy + fun:ngx_vslprintf + fun:ngx_log_error_core + fun:ngx_http_charset_header_filter +} +{ + + Memcheck:Param + socketcall.setsockopt(optval) + fun:setsockopt + fun:drizzle_state_connect +} +{ + + Memcheck:Cond + fun:ngx_conf_flush_files + fun:ngx_single_process_cycle + fun:main +} +{ + + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_event_process_init +} +{ + + Memcheck:Param + sendmsg(mmsg[0].msg_hdr) + fun:sendmmsg + fun:__libc_res_nsend +} +{ + + Memcheck:Param + sendmsg(msg.msg_iov[0]) + fun:__sendmsg_nocancel + fun:ngx_write_channel + fun:ngx_pass_open_channel + fun:ngx_start_cache_manager_processes +} +{ + + Memcheck:Cond + fun:ngx_init_cycle + fun:ngx_master_process_cycle + fun:main +} +{ + + Memcheck:Cond + fun:index + fun:expand_dynamic_string_token + fun:_dl_map_object + fun:map_doit + fun:_dl_catch_error + fun:do_preload + fun:dl_main + fun:_dl_sysdep_start + fun:_dl_start +} +{ + + Memcheck:Param + sendmsg(mmsg[0].msg_hdr) + fun:sendmmsg + fun:__libc_res_nsend + fun:__libc_res_nquery + fun:__libc_res_nquerydomain + fun:__libc_res_nsearch +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_set_environment + fun:ngx_single_process_cycle +} +{ + + Memcheck:Cond + obj:* +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_set_environment + fun:ngx_worker_process_init +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_create_pool + fun:main +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:epoll_wait + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:epoll_wait + fun:ngx_epoll_test_rdhup + fun:ngx_epoll_init + fun:ngx_event_process_init +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:ngx_epoll_test_rdhup + fun:ngx_epoll_init + fun:ngx_event_process_init +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:ngx_alloc + fun:ngx_crc32_table_init + fun:main +}