root/trunk/pdns/pdns/pdns_recursor.cc @ 1333

Revision 1333, 63.7 KB (checked in by ahu, 4 years ago)

it is 2009!

  • Property svn:eol-style set to native
  • Property svn:keywords set to author date id revision
Line 
1/*
2    PowerDNS Versatile Database Driven Nameserver
3    Copyright (C) 2003 - 2009  PowerDNS.COM BV
4
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License version 2
7    as published by the Free Software Foundation
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17*/
18
19#ifndef WIN32
20# include <netdb.h>
21# include <unistd.h>
22#else
23 #include "ntservice.hh"
24 #include "recursorservice.hh"
25#endif // WIN32
26
27#include "utility.hh"
28#include "dns_random.hh"
29#include <iostream>
30#include <errno.h>
31#include <map>
32#include <set>
33#include "recursor_cache.hh"
34#include <stdio.h>
35#include <signal.h>
36#include <stdlib.h>
37
38#include "mtasker.hh"
39#include <utility>
40#include "arguments.hh"
41#include "syncres.hh"
42#include <fcntl.h>
43#include <fstream>
44#include "sstuff.hh"
45#include <boost/tuple/tuple.hpp>
46#include <boost/tuple/tuple_comparison.hpp>
47#include <boost/shared_array.hpp>
48#include <boost/lexical_cast.hpp>
49#include <boost/function.hpp>
50#include <boost/algorithm/string.hpp>
51#include "dnsparser.hh"
52#include "dnswriter.hh"
53#include "dnsrecords.hh"
54#include "zoneparser-tng.hh"
55#include "rec_channel.hh"
56#include "logger.hh"
57#include "iputils.hh"
58#include "mplexer.hh"
59#include "config.h"
60#include "lua-pdns-recursor.hh"
61
62#ifndef RECURSOR
63#include "statbag.hh"
64StatBag S;
65#endif
66
67FDMultiplexer* g_fdm;
68unsigned int g_maxTCPPerClient;
69bool g_logCommonErrors;
70shared_ptr<PowerDNSLua> g_pdl;
71using namespace boost;
72
73#ifdef __FreeBSD__           // see cvstrac ticket #26
74#include <pthread.h>
75#include <semaphore.h>
76#endif
77
78MemRecursorCache RC;
79RecursorStats g_stats;
80bool g_quiet;
81NetmaskGroup* g_allowFrom;
82NetmaskGroup* g_dontQuery;
83string s_programname="pdns_recursor";
84typedef vector<int> g_tcpListenSockets_t;
85g_tcpListenSockets_t g_tcpListenSockets;
86int g_tcpTimeout;
87map<int, ComboAddress> g_listenSocketsAddresses;
88struct DNSComboWriter {
89  DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now), d_tcp(false), d_socket(-1)
90  {}
91  MOADNSParser d_mdp;
92  void setRemote(ComboAddress* sa)
93  {
94    d_remote=*sa;
95  }
96
97  void setSocket(int sock)
98  {
99    d_socket=sock;
100  }
101
102  string getRemote() const
103  {
104    return d_remote.toString();
105  }
106
107  struct timeval d_now;
108  ComboAddress d_remote;
109  bool d_tcp;
110  int d_socket;
111};
112
113
114#ifndef WIN32
115#ifndef __FreeBSD__
116extern "C" {
117  int sem_init(sem_t*, int, unsigned int){return 0;}
118  int sem_wait(sem_t*){return 0;}
119  int sem_trywait(sem_t*){return 0;}
120  int sem_post(sem_t*){return 0;}
121  int sem_getvalue(sem_t*, int*){return 0;}
122  pthread_t pthread_self(void){return (pthread_t) 0;}
123  int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr){ return 0; }
124  int pthread_mutex_lock(pthread_mutex_t *mutex){ return 0; }
125  int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
126  int pthread_mutex_destroy(pthread_mutex_t *mutex) { return 0; }
127}
128#endif // __FreeBSD__
129#endif // WIN32
130
131ArgvMap &arg()
132{
133  static ArgvMap theArg;
134  return theArg;
135}
136
137struct timeval g_now;
138typedef vector<int> tcpserversocks_t;
139
140MT_t* MT; // the big MTasker
141
142void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
143
144// -1 is error, 0 is timeout, 1 is success
145int asendtcp(const string& data, Socket* sock) 
146{
147  PacketID pident;
148  pident.sock=sock;
149  pident.outMSG=data;
150 
151  g_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
152  string packet;
153
154  int ret=MT->waitEvent(pident, &packet, 1);
155
156  if(!ret || ret==-1) { // timeout
157    g_fdm->removeWriteFD(sock->getHandle());
158  }
159  else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
160    return -1;
161  }
162  return ret;
163}
164
165void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
166
167// -1 is error, 0 is timeout, 1 is success
168int arecvtcp(string& data, int len, Socket* sock) 
169{
170  data.clear();
171  PacketID pident;
172  pident.sock=sock;
173  pident.inNeeded=len;
174  g_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
175
176  int ret=MT->waitEvent(pident,&data,1);
177  if(!ret || ret==-1) { // timeout
178    g_fdm->removeReadFD(sock->getHandle());
179  }
180  else if(data.empty()) {// error, EOF or other
181    return -1;
182  }
183
184  return ret;
185}
186
187
188void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
189
190// you can ask this class for a UDP socket to send a query from
191// this socket is not yours, don't even think about deleting it
192// but after you call 'returnSocket' on it, don't assume anything anymore
193class UDPClientSocks
194{
195  unsigned int d_numsocks;
196  unsigned int d_maxsocks;
197
198public:
199  UDPClientSocks() : d_numsocks(0), d_maxsocks(5000)
200  {
201  }
202
203  typedef set<int> socks_t;
204  socks_t d_socks;
205
206  // returning -1 means: temporary OS error (ie, out of files), -2 means OS error
207  int getSocket(const ComboAddress& toaddr, int* fd)
208  {
209    *fd=makeClientSocket(toaddr.sin4.sin_family);
210    if(*fd < 0) // temporary error - receive exception otherwise
211      return -1;
212
213    if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
214      int err = errno;
215      //      returnSocket(*fd);
216      Utility::closesocket(*fd);
217      if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
218        return -2;
219      return -1;
220    }
221
222    d_socks.insert(*fd);
223    d_numsocks++;
224    return 0;
225  }
226
227  void returnSocket(int fd)
228  {
229    socks_t::iterator i=d_socks.find(fd);
230    if(i==d_socks.end()) {
231      throw AhuException("Trying to return a socket (fd="+lexical_cast<string>(fd)+") not in the pool");
232    }
233    returnSocket(i);
234  }
235
236  // return a socket to the pool, or simply erase it
237  void returnSocket(socks_t::iterator& i)
238  {
239    if(i==d_socks.end()) {
240      throw AhuException("Trying to return a socket not in the pool");
241    }
242    try {
243      g_fdm->removeReadFD(*i);
244    }
245    catch(FDMultiplexerException& e) {
246      // we sometimes return a socket that has not yet been assigned to g_fdm
247    }
248    Utility::closesocket(*i);
249   
250    d_socks.erase(i++);
251    --d_numsocks;
252  }
253
254  // returns -1 for errors which might go away, throws for ones that won't
255  int makeClientSocket(int family)
256  {
257    int ret=(int)socket(family, SOCK_DGRAM, 0);
258    if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
259      return ret;
260   
261    if(ret<0) 
262      throw AhuException("Making a socket for resolver: "+stringerror());
263   
264    static optional<ComboAddress> sin4;
265    if(!sin4) {
266      sin4=ComboAddress(::arg()["query-local-address"]);
267    }
268    static optional<ComboAddress> sin6;
269    if(!sin6) {
270      if(!::arg()["query-local-address6"].empty())
271        sin6=ComboAddress(::arg()["query-local-address6"]);
272    }
273   
274    int tries=10;
275    while(--tries) {
276      uint16_t port=1025+dns_random(64510);
277      if(tries==1)  // fall back to kernel 'random'
278        port=0;
279     
280      if(family==AF_INET) {
281        sin4->sin4.sin_port = htons(port); 
282       
283        if (::bind(ret, (struct sockaddr *)&*sin4, sin4->getSocklen()) >= 0) 
284          break;
285      }
286      else {
287        sin6->sin6.sin6_port = htons(port); 
288       
289        if (::bind(ret, (struct sockaddr *)&*sin6, sin6->getSocklen()) >= 0) 
290          break;
291      }
292    }
293    if(!tries)
294      throw AhuException("Resolver binding to local query client socket: "+stringerror());
295   
296    Utility::setNonBlocking(ret);
297    return ret;
298  }
299
300
301} g_udpclientsocks;
302
303
304/* these two functions are used by LWRes */
305// -2 is OS error, -1 is error that depends on the remote, > 0 is success
306int asendto(const char *data, int len, int flags, 
307            const ComboAddress& toaddr, uint16_t id, const string& domain, uint16_t qtype, int* fd) 
308{
309
310  PacketID pident;
311  pident.domain = domain;
312  pident.remote = toaddr;
313  pident.type = qtype;
314
315  // see if there is an existing outstanding request we can chain on to, using partial equivalence function
316  pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
317
318  for(; chain.first != chain.second; chain.first++) {
319    if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
320      /*
321      cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
322      cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
323          <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
324      */
325      chain.first->key.chain.insert(id); // we can chain
326      *fd=-1;                            // gets used in waitEvent / sendEvent later on
327      return 1;
328    }
329  }
330
331  int ret=g_udpclientsocks.getSocket(toaddr, fd);
332  if(ret < 0)
333    return ret;
334
335  pident.fd=*fd;
336  pident.id=id;
337 
338  g_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
339  ret=send(*fd, data, len, 0);
340  if(ret < 0)
341    g_udpclientsocks.returnSocket(*fd);
342  return ret;
343}
344
345// -1 is error, 0 is timeout, 1 is success
346int arecvfrom(char *data, int len, int flags, const ComboAddress& fromaddr, int *d_len, 
347              uint16_t id, const string& domain, uint16_t qtype, int fd, unsigned int now)
348{
349  static optional<unsigned int> nearMissLimit;
350  if(!nearMissLimit) 
351    nearMissLimit=::arg().asNum("spoof-nearmiss-max");
352
353  PacketID pident;
354  pident.fd=fd;
355  pident.id=id;
356  pident.domain=domain;
357  pident.type = qtype;
358  pident.remote=fromaddr;
359
360  string packet;
361  int ret=MT->waitEvent(pident, &packet, 1, now);
362
363  if(ret > 0) {
364    if(packet.empty()) // means "error"
365      return -1; 
366
367    *d_len=(int)packet.size();
368    memcpy(data,packet.c_str(),min(len,*d_len));
369    if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
370      L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
371      g_stats.spoofCount++;
372      return -1;
373    }
374  }
375  else {
376    if(fd >= 0)
377      g_udpclientsocks.returnSocket(fd);
378  }
379  return ret;
380}
381
382void setBuffer(int fd, int optname, uint32_t size)
383{
384  uint32_t psize=0;
385  socklen_t len=sizeof(psize);
386 
387  if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
388    L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
389    return; 
390  }
391
392  if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
393    L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
394}
395
396
397static void setReceiveBuffer(int fd, uint32_t size)
398{
399  setBuffer(fd, SO_RCVBUF, size);
400}
401
402static void setSendBuffer(int fd, uint32_t size)
403{
404  setBuffer(fd, SO_SNDBUF, size);
405}
406
407string s_pidfname;
408static void writePid(void)
409{
410  ofstream of(s_pidfname.c_str(), ios_base::app);
411  if(of)
412    of<< Utility::getpid() <<endl;
413  else
414    L<<Logger::Error<<"Requested to write pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
415}
416
417void primeHints(void)
418{
419  // prime root cache
420  set<DNSResourceRecord>nsset;
421
422  if(::arg()["hint-file"].empty()) {
423    static const char*ips[]={"198.41.0.4", "192.228.79.201", "192.33.4.12", "128.8.10.90", "192.203.230.10", "192.5.5.241", 
424                             "192.112.36.4", "128.63.2.53",
425                             "192.36.148.17","192.58.128.30", "193.0.14.129", "199.7.83.42", "202.12.27.33"};
426    static const char *ip6s[]={
427      "2001:503:ba3e::2:30", NULL, NULL, NULL, NULL,
428      "2001:500:2f::f", NULL, "2001:500:1::803f:235", NULL,
429      "2001:503:c27::2:30", NULL, NULL, NULL
430    };
431    DNSResourceRecord arr, aaaarr, nsrr;
432    arr.qtype=QType::A;
433    aaaarr.qtype=QType::AAAA;
434    nsrr.qtype=QType::NS;
435    arr.ttl=aaaarr.ttl=nsrr.ttl=time(0)+3600000;
436   
437    for(char c='a';c<='m';++c) {
438      static char templ[40];
439      strncpy(templ,"a.root-servers.net.", sizeof(templ) - 1);
440      *templ=c;
441      aaaarr.qname=arr.qname=nsrr.content=templ;
442      arr.content=ips[c-'a'];
443      set<DNSResourceRecord> aset;
444      aset.insert(arr);
445      RC.replace(time(0), string(templ), QType(QType::A), aset, true); // auth, nuke it all
446      if (ip6s[c-'a'] != NULL) {
447        aaaarr.content=ip6s[c-'a'];
448
449        set<DNSResourceRecord> aaaaset;
450        aaaaset.insert(aaaarr);
451        RC.replace(time(0), string(templ), QType(QType::AAAA), aaaaset, true);
452      }
453     
454      nsset.insert(nsrr);
455    }
456  }
457  else {
458    ZoneParserTNG zpt(::arg()["hint-file"]);
459    DNSResourceRecord rr;
460
461    while(zpt.get(rr)) {
462      rr.ttl+=time(0);
463      if(rr.qtype.getCode()==QType::A) {
464        set<DNSResourceRecord> aset;
465        aset.insert(rr);
466        RC.replace(time(0), rr.qname, QType(QType::A), aset, true); // auth, etc see above
467      } else if(rr.qtype.getCode()==QType::AAAA) {
468        set<DNSResourceRecord> aaaaset;
469        aaaaset.insert(rr);
470        RC.replace(time(0), rr.qname, QType(QType::AAAA), aaaaset, true);
471      } else if(rr.qtype.getCode()==QType::NS) {
472        rr.content=toLower(rr.content);
473        nsset.insert(rr);
474      }
475    }
476  }
477  RC.replace(time(0),".", QType(QType::NS), nsset, true); // and stuff in the cache (auth)
478}
479
480map<ComboAddress, uint32_t> g_tcpClientCounts;
481
482struct TCPConnection
483{
484  int fd;
485  enum stateenum {BYTE0, BYTE1, GETQUESTION, DONE} state;
486  int qlen;
487  int bytesread;
488  ComboAddress remote;
489  char data[65535];
490  time_t startTime;
491
492  static void closeAndCleanup(int fd, const ComboAddress& remote) 
493  {
494    Utility::closesocket(fd);
495    if(!g_tcpClientCounts[remote]--) 
496      g_tcpClientCounts.erase(remote);
497    s_currentConnections--;
498  }
499  void closeAndCleanup()
500  {
501    closeAndCleanup(fd, remote);
502  }
503  static unsigned int s_currentConnections; //!< total number of current TCP connections
504};
505
506unsigned int TCPConnection::s_currentConnections; 
507void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
508
509void startDoResolve(void *p)
510{
511  DNSComboWriter* dc=(DNSComboWriter *)p;
512
513  try {
514    uint16_t maxudpsize=512;
515    EDNSOpts edo;
516    if(getEDNSOpts(dc->d_mdp, &edo)) {
517      maxudpsize=max(edo.d_packetsize, (uint16_t)1280);
518    }
519   
520    vector<DNSResourceRecord> ret;
521    vector<uint8_t> packet;
522
523    DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass); 
524
525    pw.getHeader()->aa=0;
526    pw.getHeader()->ra=1;
527    pw.getHeader()->qr=1;
528    pw.getHeader()->id=dc->d_mdp.d_header.id;
529    pw.getHeader()->rd=dc->d_mdp.d_header.rd;
530
531    SyncRes sr(dc->d_now);
532    if(!g_quiet)
533      L<<Logger::Error<<"["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
534       <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
535
536    sr.setId(MT->getTid());
537    if(!dc->d_mdp.d_header.rd)
538      sr.setCacheOnly();
539
540    int res;
541
542    if(!g_pdl.get() || !g_pdl->preresolve(dc->d_remote, g_listenSocketsAddresses[dc->d_socket], dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res)) {
543       res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
544
545       if(g_pdl.get()) {
546         if(res == RCode::NXDomain)
547           g_pdl->nxdomain(dc->d_remote, g_listenSocketsAddresses[dc->d_socket], dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res);
548       }
549    }
550
551    if(res<0) {
552      pw.getHeader()->rcode=RCode::ServFail;
553      // no commit here, because no record
554      g_stats.servFails++;
555    }
556    else {
557      pw.getHeader()->rcode=res;
558      switch(res) {
559      case RCode::ServFail:
560        g_stats.servFails++;
561        break;
562      case RCode::NXDomain:
563        g_stats.nxDomains++;
564        break;
565      case RCode::NoError:
566        g_stats.noErrors++;
567        break;
568      }
569     
570      if(ret.size()) {
571        shuffle(ret);
572       
573        for(vector<DNSResourceRecord>::const_iterator i=ret.begin(); i!=ret.end(); ++i) {
574          pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, i->qclass, (DNSPacketWriter::Place)i->d_place); 
575         
576          if(i->qtype.getCode() == QType::A) { // blast out A record w/o doing whole dnswriter thing
577            uint32_t ip=0;
578            IpToU32(i->content, &ip);
579            pw.xfr32BitInt(htonl(ip));
580          } else {
581            shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), i->qclass, i->content)); 
582            drc->toPacket(pw);
583          }
584          if(!dc->d_tcp && pw.size() > maxudpsize) {
585            pw.rollback();
586            if(i->d_place==DNSResourceRecord::ANSWER)  // only truncate if we actually omitted parts of the answer
587              pw.getHeader()->tc=1;
588            goto sendit; // need to jump over pw.commit
589          }
590        }
591
592        pw.commit();
593      }
594    }
595  sendit:;
596    if(!dc->d_tcp) {
597      sendto(dc->d_socket, (const char*)&*packet.begin(), packet.size(), 0, (struct sockaddr *)(&dc->d_remote), dc->d_remote.getSocklen());
598    }
599    else {
600      char buf[2];
601      buf[0]=packet.size()/256;
602      buf[1]=packet.size()%256;
603
604      Utility::iovec iov[2];
605
606      iov[0].iov_base=(void*)buf;              iov[0].iov_len=2;
607      iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
608
609      int ret=Utility::writev(dc->d_socket, iov, 2);
610      bool hadError=true;
611
612      if(ret == 0) 
613        L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
614      else if(ret < 0 ) 
615        L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
616      else if((unsigned int)ret != 2 + packet.size())
617        L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
618      else
619        hadError=false;
620     
621      // update tcp connection status, either by closing or moving to 'BYTE0'
622
623      if(hadError) {
624        g_fdm->removeReadFD(dc->d_socket);
625        TCPConnection::closeAndCleanup(dc->d_socket, dc->d_remote);
626      }
627      else {
628        TCPConnection tc;
629        tc.fd=dc->d_socket;
630        tc.state=TCPConnection::BYTE0;
631        tc.remote=dc->d_remote;
632        Utility::gettimeofday(&g_now, 0); // needs to be updated
633        tc.startTime=g_now.tv_sec;
634        g_fdm->addReadFD(tc.fd, handleRunningTCPQuestion, tc);
635        g_fdm->setReadTTD(tc.fd, g_now, g_tcpTimeout);
636      }
637    }
638   
639    if(!g_quiet) {
640      L<<Logger::Error<<"["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
641      L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
642        sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
643    }
644
645    sr.d_outqueries ? RC.cacheMisses++ : RC.cacheHits++; 
646    float spent=makeFloat(sr.d_now-dc->d_now);
647    if(spent < 0.001)
648      g_stats.answers0_1++;
649    else if(spent < 0.010)
650      g_stats.answers1_10++;
651    else if(spent < 0.1)
652      g_stats.answers10_100++;
653    else if(spent < 1.0)
654      g_stats.answers100_1000++;
655    else
656      g_stats.answersSlow++;
657
658    uint64_t newLat=(uint64_t)(spent*1000000);
659    if(newLat < 1000000)  // outliers of several minutes exist..
660      g_stats.avgLatencyUsec=(uint64_t)((1-0.0001)*g_stats.avgLatencyUsec + 0.0001*newLat);
661
662    delete dc;
663  }
664  catch(AhuException &ae) {
665    L<<Logger::Error<<"startDoResolve problem: "<<ae.reason<<endl;
666  }
667  catch(MOADNSException& e) {
668    L<<Logger::Error<<"DNS parser error: "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
669  }
670  catch(std::exception& e) {
671    L<<Logger::Error<<"STL error: "<<e.what()<<endl;
672  }
673  catch(...) {
674    L<<Logger::Error<<"Any other exception in a resolver context"<<endl;
675  }
676}
677
678RecursorControlChannel s_rcc;
679
680void makeControlChannelSocket()
681{
682  string sockname=::arg()["socket-dir"]+"/pdns_recursor.controlsocket";
683  if(::arg().mustDo("fork")) {
684    sockname+="."+lexical_cast<string>(Utility::getpid());
685    L<<Logger::Warning<<"Forked control socket name: "<<sockname<<endl;
686  }
687  s_rcc.listen(sockname);
688}
689
690void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
691{
692  TCPConnection* conn=any_cast<TCPConnection>(&var);
693
694  if(conn->state==TCPConnection::BYTE0) {
695    int bytes=recv(conn->fd, conn->data, 2, 0);
696    if(bytes==1)
697      conn->state=TCPConnection::BYTE1;
698    if(bytes==2) { 
699      conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
700      conn->bytesread=0;
701      conn->state=TCPConnection::GETQUESTION;
702    }
703    if(!bytes || bytes < 0) {
704      TCPConnection tmp(*conn); 
705      g_fdm->removeReadFD(fd);
706      tmp.closeAndCleanup();
707      return;
708    }
709  }
710  else if(conn->state==TCPConnection::BYTE1) {
711    int bytes=recv(conn->fd, conn->data+1, 1, 0);
712    if(bytes==1) {
713      conn->state=TCPConnection::GETQUESTION;
714      conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
715      conn->bytesread=0;
716    }
717    if(!bytes || bytes < 0) {
718      if(g_logCommonErrors)
719        L<<Logger::Error<<"TCP client "<< conn->remote.toString() <<" disconnected after first byte"<<endl;
720      TCPConnection tmp(*conn); 
721      g_fdm->removeReadFD(fd);
722      tmp.closeAndCleanup();  // conn loses validity here..
723      return;
724    }
725  }
726  else if(conn->state==TCPConnection::GETQUESTION) {
727    int bytes=recv(conn->fd, conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
728    if(!bytes || bytes < 0) {
729      L<<Logger::Error<<"TCP client "<< conn->remote.toString() <<" disconnected while reading question body"<<endl;
730      TCPConnection tmp(*conn);
731      g_fdm->removeReadFD(fd);
732      tmp.closeAndCleanup();  // conn loses validity here..
733
734      return;
735    }
736    conn->bytesread+=bytes;
737    if(conn->bytesread==conn->qlen) {
738      TCPConnection tconn(*conn); 
739      g_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
740
741      DNSComboWriter* dc=0;
742      try {
743        dc=new DNSComboWriter(tconn.data, tconn.qlen, g_now);
744      }
745      catch(MOADNSException &mde) {
746        g_stats.clientParseError++; 
747        if(g_logCommonErrors)
748          L<<Logger::Error<<"Unable to parse packet from TCP client "<< tconn.remote.toString() <<endl;
749        tconn.closeAndCleanup();
750        return;
751      }
752     
753      dc->setSocket(tconn.fd);
754      dc->d_tcp=true;
755      dc->setRemote(&tconn.remote);
756      if(dc->d_mdp.d_header.qr) {
757        delete dc;
758        L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
759        tconn.closeAndCleanup();
760        return;
761      }
762      else {
763        ++g_stats.qcounter;
764        ++g_stats.tcpqcounter;
765        MT->makeThread(startDoResolve, dc); // deletes dc
766        return;
767      }
768    }
769  }
770}
771
772//! Handle new incoming TCP connection
773void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
774{
775  ComboAddress addr;
776  socklen_t addrlen=sizeof(addr);
777  int newsock=(int)accept(fd, (struct sockaddr*)&addr, &addrlen);
778  if(newsock>0) {
779    g_stats.addRemote(addr);
780    if(g_allowFrom && !g_allowFrom->match(&addr)) {
781      if(!g_quiet) 
782        L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
783
784      g_stats.unauthorizedTCP++;
785      Utility::closesocket(newsock);
786      return;
787    }
788   
789    if(g_maxTCPPerClient && g_tcpClientCounts.count(addr) && g_tcpClientCounts[addr] >= g_maxTCPPerClient) {
790      g_stats.tcpClientOverflow++;
791      Utility::closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
792      return;
793    }
794    g_tcpClientCounts[addr]++;
795    Utility::setNonBlocking(newsock);
796    TCPConnection tc;
797    tc.fd=newsock;
798    tc.state=TCPConnection::BYTE0;
799    tc.remote=addr;
800    tc.startTime=g_now.tv_sec;
801    TCPConnection::s_currentConnections++;
802    g_fdm->addReadFD(tc.fd, handleRunningTCPQuestion, tc);
803
804    struct timeval now;
805    Utility::gettimeofday(&now, 0);
806    g_fdm->setReadTTD(tc.fd, now, g_tcpTimeout);
807  }
808}
809 
810void questionExpand(const char* packet, uint16_t len, char* qname, int maxlen, uint16_t& type)
811{
812  type=0;
813  const unsigned char* end=(const unsigned char*)packet+len;
814  unsigned char* lbegin=(unsigned char*)packet+12;
815  unsigned char* pos=lbegin;
816  unsigned char labellen;
817
818  // 3www4ds9a2nl0
819  char *dst=qname;
820  char* lend=dst + maxlen;
821 
822  if(!*pos)
823    *dst++='.';
824
825  while((labellen=*pos++) && pos < end) { // "scan and copy"
826    if(dst >= lend)
827      throw runtime_error("Label length exceeded destination length");
828    for(;labellen;--labellen)
829      *dst++ = *pos++;
830    *dst++='.';
831  }
832  *dst=0;
833
834  if(pos + labellen + 2 <= end)  // is this correct XXX FIXME?
835    type=(*pos)*256 + *(pos+1);
836
837
838  //  cerr<<"Returning: '"<< string(tmp+1, pos) <<"'\n";
839}
840
841string questionExpand(const char* packet, uint16_t len, uint16_t& type)
842{
843  char tmp[512];
844  questionExpand(packet, len, tmp, sizeof(tmp), type);
845  return tmp;
846}
847
848void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
849{
850  //  static HTimer s_timer("udp new question processing");
851  //  HTimerSentinel hts=s_timer.getSentinel();
852  int len;
853  char data[1500];
854  ComboAddress fromaddr;
855  socklen_t addrlen=sizeof(fromaddr);
856  //  uint64_t tsc1, tsc2;
857
858  if((len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
859    //    RDTSC(tsc1);     
860    g_stats.addRemote(fromaddr);
861
862    if(g_allowFrom && !g_allowFrom->match(&fromaddr)) {
863      if(!g_quiet) 
864        L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
865
866      g_stats.unauthorizedUDP++;
867      return;
868    }
869    try {
870      dnsheader* dh=(dnsheader*)data;
871     
872      if(dh->qr) {
873        if(g_logCommonErrors)
874          L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
875      }
876      else {
877        ++g_stats.qcounter;
878#if 0
879        uint16_t type;
880        char qname[256];
881        try {
882           questionExpand(data, len, qname, sizeof(qname), type); 
883        }
884        catch(std::exception &e)
885        {
886           throw MOADNSException(e.what());
887        }
888       
889        // must all be same length answers right now!
890        if((type==QType::A || type==QType::AAAA) && dh->arcount==0 && dh->ancount==0 && dh->nscount ==0 && ntohs(dh->qdcount)==1 ) {
891          char *record[10];
892          uint16_t rlen[10];
893          uint32_t ttd[10];
894          int count;
895          if((count=RC.getDirect(g_now.tv_sec, qname, QType(type), ttd, record, rlen))) {
896            if(len + count*(sizeof(dnsrecordheader) + 2 + rlen[0]) > 512)
897              goto slow;
898
899            random_shuffle(record, &record[count]);
900            dh->qr=1;
901            dh->ra=1;
902            dh->ancount=ntohs(count);
903            for(int n=0; n < count ; ++n) {
904              memcpy(data+len, "\xc0\x0c", 2); // answer label pointer
905              len+=2;
906              struct dnsrecordheader drh;
907              drh.d_type=htons(type);
908              drh.d_class=htons(1);
909              drh.d_ttl=htonl(ttd[n] - g_now.tv_sec);
910              drh.d_clen=htons(rlen[n]);
911              memcpy(data+len, &drh, sizeof(drh));
912              len+=sizeof(drh);
913              memcpy(data+len, record[n], rlen[n]);
914              len+=rlen[n];
915            }
916            RDTSC(tsc2);           
917            g_stats.shunted++;
918            sendto(fd, data, len, 0, (struct sockaddr *)(&fromaddr), fromaddr.getSocklen());
919//          cerr<<"shunted: " << (tsc2-tsc1) / 3000.0 << endl;
920            return;
921          }
922        }
923        else {
924          if(type!=QType::A && type!=QType::AAAA)
925            g_stats.noShuntWrongType++;
926          else
927            g_stats.noShuntWrongQuestion++;
928        }
929      slow:
930#endif
931        DNSComboWriter* dc = new DNSComboWriter(data, len, g_now);
932        dc->setSocket(fd);
933        dc->setRemote(&fromaddr);
934
935        dc->d_tcp=false;
936
937        MT->makeThread(startDoResolve, (void*) dc); // deletes dc
938      }
939    }
940    catch(MOADNSException& mde) {
941      g_stats.clientParseError++; 
942      if(g_logCommonErrors)
943        L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
944    }
945  }
946}
947
948typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
949deferredAdd_t deferredAdd;
950
951void makeTCPServerSockets()
952{
953  int fd;
954  vector<string>locals;
955  stringtok(locals,::arg()["local-address"]," ,");
956
957  if(locals.empty())
958    throw AhuException("No local address specified");
959 
960  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
961    ServiceTuple st;
962    st.port=::arg().asNum("local-port");
963    parseService(*i, st);
964   
965    ComboAddress sin;
966
967    memset((char *)&sin,0, sizeof(sin));
968    sin.sin4.sin_family = AF_INET;
969    if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
970      sin.sin6.sin6_family = AF_INET6;
971      if(Utility::inet_pton(AF_INET6, st.host.c_str(), &sin.sin6.sin6_addr) <= 0)
972        throw AhuException("Unable to resolve local address for TCP server on '"+ st.host +"'"); 
973    }
974
975    fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
976    if(fd<0) 
977      throw AhuException("Making a TCP server socket for resolver: "+stringerror());
978
979    int tmp=1;
980    if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
981      L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
982      exit(1);
983    }
984   
985#ifdef TCP_DEFER_ACCEPT
986    if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
987      if(i==locals.begin())
988        L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
989    }
990#endif
991
992    sin.sin4.sin_port = htons(st.port);
993    int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
994    if (::bind(fd, (struct sockaddr *)&sin, socklen )<0) 
995      throw AhuException("Binding TCP server socket for "+ st.host +": "+stringerror());
996   
997    Utility::setNonBlocking(fd);
998    setSendBuffer(fd, 65000);
999    listen(fd, 128);
1000    deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
1001    g_tcpListenSockets.push_back(fd);
1002
1003    if(sin.sin4.sin_family == AF_INET) 
1004      L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1005    else
1006      L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1007  }
1008}
1009
1010void makeUDPServerSockets()
1011{
1012  vector<string>locals;
1013  stringtok(locals,::arg()["local-address"]," ,");
1014
1015  if(locals.empty())
1016    throw AhuException("No local address specified");
1017 
1018  if(::arg()["local-address"]=="0.0.0.0") {
1019    L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
1020  }
1021
1022  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1023    ServiceTuple st;
1024    st.port=::arg().asNum("local-port");
1025    parseService(*i, st);
1026
1027    ComboAddress sin;
1028
1029    memset(&sin, 0, sizeof(sin));
1030    sin.sin4.sin_family = AF_INET;
1031    if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1032      sin.sin6.sin6_family = AF_INET6;
1033      if(Utility::inet_pton(AF_INET6, st.host.c_str(), &sin.sin6.sin6_addr) <= 0)
1034        throw AhuException("Unable to resolve local address for UDP server on '"+ st.host +"'"); 
1035    }
1036   
1037    int fd=socket(sin.sin4.sin_family, SOCK_DGRAM,0);
1038    if(fd < 0) {
1039      throw AhuException("Making a UDP server socket for resolver: "+netstringerror());
1040    }
1041
1042    setReceiveBuffer(fd, 200000);
1043    sin.sin4.sin_port = htons(st.port);
1044
1045    int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1046    if (::bind(fd, (struct sockaddr *)&sin, socklen)<0) 
1047      throw AhuException("Resolver binding to server socket on port "+ lexical_cast<string>(st.port) +" for "+ st.host+": "+stringerror());
1048   
1049    Utility::setNonBlocking(fd);
1050
1051    deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
1052    g_listenSocketsAddresses[fd]=sin;
1053    if(sin.sin4.sin_family == AF_INET) 
1054      L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
1055    else
1056      L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1057  }
1058}
1059
1060
1061#ifndef WIN32
1062void daemonize(void)
1063{
1064  if(fork())
1065    exit(0); // bye bye
1066 
1067  setsid(); 
1068
1069  int i=open("/dev/null",O_RDWR); /* open stdin */
1070  if(i < 0) 
1071    L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1072  else {
1073    dup2(i,0); /* stdin */
1074    dup2(i,1); /* stderr */
1075    dup2(i,2); /* stderr */
1076    close(i);
1077  }
1078}
1079#endif
1080
1081uint64_t counter;
1082bool statsWanted;
1083
1084
1085void usr1Handler(int)
1086{
1087  statsWanted=true;
1088}
1089
1090
1091
1092void usr2Handler(int)
1093{
1094  SyncRes::setLog(true);
1095  g_quiet=false;
1096  ::arg().set("quiet")="no";
1097
1098}
1099
1100void doStats(void)
1101{
1102  if(g_stats.qcounter && (RC.cacheHits + RC.cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
1103    L<<Logger::Warning<<"stats: "<<g_stats.qcounter<<" questions, "<<RC.size()<<" cache entries, "<<SyncRes::s_negcache.size()<<" negative entries, "
1104     <<(int)((RC.cacheHits*100.0)/(RC.cacheHits+RC.cacheMisses))<<"% cache hits"<<endl;
1105    L<<Logger::Warning<<"stats: throttle map: "<<SyncRes::s_throttle.size()<<", ns speeds: "
1106     <<SyncRes::s_nsSpeeds.size()<<endl; // ", bytes: "<<RC.bytes()<<endl;
1107    L<<Logger::Warning<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1108    L<<Logger::Warning<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
1109     <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
1110    L<<Logger::Warning<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<MT->numProcesses()<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
1111
1112    L<<Logger::Warning<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
1113      g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
1114  }
1115  else if(statsWanted) 
1116    L<<Logger::Warning<<"stats: no stats yet!"<<endl;
1117
1118  //  HTimer::listAll();
1119
1120  statsWanted=false;
1121}
1122
1123static void houseKeeping(void *)
1124try
1125{
1126  static time_t last_stat, last_rootupdate, last_prune;
1127  struct timeval now;
1128  Utility::gettimeofday(&now, 0);
1129
1130  if(now.tv_sec - last_prune > 300) { 
1131    DTime dt;
1132    dt.setTimeval(now);
1133    RC.doPrune();
1134   
1135    typedef SyncRes::negcache_t::nth_index<1>::type negcache_by_ttd_index_t;
1136    negcache_by_ttd_index_t& ttdindex=boost::multi_index::get<1>(SyncRes::s_negcache);
1137
1138    negcache_by_ttd_index_t::iterator i=ttdindex.lower_bound(now.tv_sec);
1139    ttdindex.erase(ttdindex.begin(), i);
1140
1141    time_t limit=now.tv_sec-300;
1142    for(SyncRes::nsspeeds_t::iterator i = SyncRes::s_nsSpeeds.begin() ; i!= SyncRes::s_nsSpeeds.end(); )
1143      if(i->second.stale(limit))
1144        SyncRes::s_nsSpeeds.erase(i++);
1145      else
1146        ++i;
1147
1148    //   cerr<<"Pruned "<<pruned<<" records, left "<<SyncRes::s_negcache.size()<<"\n";
1149//    cout<<"Prune took "<<dt.udiff()<<"usec\n";
1150    last_prune=time(0);
1151  }
1152  if(now.tv_sec - last_stat>1800) { 
1153    doStats();
1154    last_stat=time(0);
1155  }
1156  if(now.tv_sec - last_rootupdate > 7200) {
1157    SyncRes sr(now);
1158    sr.setDoEDNS0(true);
1159    vector<DNSResourceRecord> ret;
1160
1161    sr.setNoCache();
1162    int res=sr.beginResolve(".", QType(QType::NS), 1, ret);
1163    if(!res) {
1164      L<<Logger::Warning<<"Refreshed . records"<<endl;
1165      last_rootupdate=now.tv_sec;
1166    }
1167    else
1168      L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1169  }
1170}
1171catch(AhuException& ae)
1172{
1173  L<<Logger::Error<<"Fatal error: "<<ae.reason<<endl;
1174  throw;
1175}
1176;
1177
1178
1179void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
1180{
1181  string remote;
1182  string msg=s_rcc.recv(&remote);
1183  RecursorControlParser rcp;
1184  RecursorControlParser::func_t* command;
1185  string answer=rcp.getAnswer(msg, &command);
1186  try {
1187    s_rcc.send(answer, &remote);
1188    command();
1189  }
1190  catch(std::exception& e) {
1191    L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
1192  }
1193  catch(AhuException& ae) {
1194    L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
1195  }
1196}
1197
1198void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
1199{
1200  PacketID* pident=any_cast<PacketID>(&var);
1201  //  cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
1202
1203  shared_array<char> buffer(new char[pident->inNeeded]);
1204
1205  int ret=recv(fd, buffer.get(), pident->inNeeded,0);
1206  if(ret > 0) {
1207    pident->inMSG.append(&buffer[0], &buffer[ret]);
1208    pident->inNeeded-=ret;
1209    if(!pident->inNeeded) {
1210      //      cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
1211      PacketID pid=*pident;
1212      string msg=pident->inMSG;
1213     
1214      g_fdm->removeReadFD(fd);
1215      MT->sendEvent(pid, &msg); 
1216    }
1217    else {
1218      //      cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
1219    }
1220  }
1221  else {
1222    PacketID tmp=*pident;
1223    g_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
1224    string empty;
1225    MT->sendEvent(tmp, &empty); // this conveys error status
1226  }
1227}
1228
1229void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
1230{
1231  PacketID* pid=any_cast<PacketID>(&var);
1232  int ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
1233  if(ret > 0) {
1234    pid->outPos+=ret;
1235    if(pid->outPos==pid->outMSG.size()) {
1236      PacketID tmp=*pid;
1237      g_fdm->removeWriteFD(fd);
1238      MT->sendEvent(tmp, &tmp.outMSG);  // send back what we sent to convey everything is ok
1239    }
1240  }
1241  else {  // error or EOF
1242    PacketID tmp(*pid);
1243    g_fdm->removeWriteFD(fd);
1244    string sent;
1245    MT->sendEvent(tmp, &sent);         // we convey error status by sending empty string
1246  }
1247}
1248
1249// resend event to everybody chained onto it
1250void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
1251{
1252  if(iter->key.chain.empty())
1253    return;
1254  //  cerr<<"doResends called!\n";
1255  for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
1256    resend.fd=-1;
1257    resend.id=*i;
1258    //    cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
1259
1260    MT->sendEvent(resend, &content);
1261    g_stats.chainResends++;
1262  }
1263}
1264
1265void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
1266{
1267  //  static HTimer s_timer("udp server response processing");
1268
1269  PacketID pid=any_cast<PacketID>(var);
1270  int len;
1271  char data[1500];
1272  ComboAddress fromaddr;
1273  socklen_t addrlen=sizeof(fromaddr);
1274
1275  len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
1276
1277  if(len < (int)sizeof(dnsheader)) {
1278    if(len < 0)
1279      ; //      cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
1280    else {
1281      g_stats.serverParseError++; 
1282      if(g_logCommonErrors)
1283        L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< sockAddrToString((struct sockaddr_in*) &fromaddr) <<
1284          ": packet smalller than DNS header"<<endl;
1285    }
1286
1287    g_udpclientsocks.returnSocket(fd);
1288    string empty;
1289
1290    MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
1291    if(iter != MT->d_waiters.end()) 
1292      doResends(iter, pid, empty);
1293   
1294    MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
1295    return;
1296  } 
1297
1298  dnsheader dh;
1299  memcpy(&dh, data, sizeof(dh));
1300 
1301  if(dh.qr) {
1302    PacketID pident;
1303    pident.remote=fromaddr;
1304    pident.id=dh.id;
1305    pident.fd=fd;
1306    if(!dh.qdcount) { // UPC, Nominum, very old BIND on FormErr, NSD
1307      pident.domain.clear();
1308      pident.type = 0;
1309    }
1310    else {
1311      pident.domain=questionExpand(data, len, pident.type); // don't copy this from above - we need to do the actual read
1312    }
1313    string packet;
1314    packet.assign(data, len);
1315
1316    MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
1317    if(iter != MT->d_waiters.end()) {
1318      doResends(iter, pident, packet);
1319    }
1320
1321  retryWithName:
1322
1323    if(!MT->sendEvent(pident, &packet)) {
1324//      if(g_logCommonErrors)
1325//      L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toString()<<": "<<pident.type<<endl;
1326      g_stats.unexpectedCount++;
1327     
1328      for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1329        if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote &&  mthread->key.type == pident.type &&
1330           !Utility::strcasecmp(pident.domain.c_str(), mthread->key.domain.c_str())) {
1331          mthread->key.nearMisses++;
1332        }
1333
1334        // be a bit paranoid here since we're weakening our matching
1335        if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type && 
1336           pident.id  == mthread->key.id && mthread->key.remote == pident.remote) {
1337          //        cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
1338          pident.domain = mthread->key.domain;
1339          pident.type = mthread->key.type;
1340          g_stats.unexpectedCount--;
1341          goto retryWithName;
1342        }
1343      }
1344    }
1345    else if(fd >= 0) {
1346      g_udpclientsocks.returnSocket(fd);
1347    }
1348  }
1349  else
1350    L<<Logger::Warning<<"Ignoring question on outgoing socket from "<< sockAddrToString((struct sockaddr_in*) &fromaddr)  <<endl;
1351}
1352
1353FDMultiplexer* getMultiplexer()
1354{
1355  FDMultiplexer* ret;
1356  for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1357      i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1358    try {
1359      ret=i->second();
1360      L<<Logger::Error<<"Enabled '"<<ret->getName()<<"' multiplexer"<<endl;
1361      return ret;
1362    }
1363    catch(FDMultiplexerException &fe) {
1364      L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
1365    }
1366    catch(...) {
1367      L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
1368    }
1369  }
1370  L<<Logger::Error<<"No working multiplexer found!"<<endl;
1371  exit(1);
1372}
1373
1374static void makeNameToIPZone(const string& hostname, const string& ip)
1375{
1376  SyncRes::AuthDomain ad;
1377  DNSResourceRecord rr;
1378  rr.qname=toCanonic("", hostname);
1379  rr.d_place=DNSResourceRecord::ANSWER;
1380  rr.ttl=86400;
1381  rr.qtype=QType::SOA;
1382  rr.content="localhost. root 1 604800 86400 2419200 604800";
1383 
1384  ad.d_records.insert(rr);
1385
1386  rr.qtype=QType::NS;
1387  rr.content="localhost.";
1388
1389  ad.d_records.insert(rr);
1390 
1391  rr.qtype=QType::A;
1392  rr.content=ip;
1393  ad.d_records.insert(rr);
1394 
1395  if(SyncRes::s_domainmap.count(rr.qname)) {
1396    L<<Logger::Warning<<"Hosts file will not overwrite zone '"<<rr.qname<<"' already loaded"<<endl;
1397  }
1398  else {
1399    L<<Logger::Warning<<"Inserting forward zone '"<<rr.qname<<"' based on hosts file"<<endl;
1400    SyncRes::s_domainmap[rr.qname]=ad;
1401  }
1402}
1403
1404//! parts[0] must be an IP address, the rest must be host names
1405static void makeIPToNamesZone(const vector<string>& parts) 
1406{
1407  string address=parts[0];
1408  vector<string> ipparts;
1409  stringtok(ipparts, address,".");
1410 
1411  SyncRes::AuthDomain ad;
1412  DNSResourceRecord rr;
1413  for(int n=ipparts.size()-1; n>=0 ; --n) {
1414    rr.qname.append(ipparts[n]);
1415    rr.qname.append(1,'.');
1416  }
1417  rr.qname.append("in-addr.arpa.");
1418
1419  rr.d_place=DNSResourceRecord::ANSWER;
1420  rr.ttl=86400;
1421  rr.qtype=QType::SOA;
1422  rr.content="localhost. root. 1 604800 86400 2419200 604800";
1423 
1424  ad.d_records.insert(rr);
1425
1426  rr.qtype=QType::NS;
1427  rr.content="localhost.";
1428
1429  ad.d_records.insert(rr);
1430  rr.qtype=QType::PTR;
1431
1432  if(ipparts.size()==4)  // otherwise this is a partial zone
1433    for(unsigned int n=1; n < parts.size(); ++n) {
1434      rr.content=toCanonic("", parts[n]);
1435      ad.d_records.insert(rr);
1436    }
1437
1438  if(SyncRes::s_domainmap.count(rr.qname)) {
1439    L<<Logger::Warning<<"Will not overwrite zone '"<<rr.qname<<"' already loaded"<<endl;
1440  }
1441  else {
1442    if(ipparts.size()==4)
1443      L<<Logger::Warning<<"Inserting reverse zone '"<<rr.qname<<"' based on hosts file"<<endl;
1444    SyncRes::s_domainmap[rr.qname]=ad;
1445  }
1446}
1447
1448
1449void parseAuthAndForwards();
1450
1451void convertServersForAD(const std::string& input, SyncRes::AuthDomain& ad, const char* sepa, bool verbose=true)
1452{
1453  vector<string> servers;
1454  stringtok(servers, input, sepa);
1455  ad.d_servers.clear();
1456  for(vector<string>::const_iterator iter = servers.begin(); iter != servers.end(); ++iter) {
1457    if(verbose && iter != servers.begin()) 
1458      L<<", ";
1459    pair<string,string> ipport=splitField(*iter, ':');
1460    ComboAddress addr(ipport.first, ipport.second.empty() ? 53 : lexical_cast<uint16_t>(ipport.second));
1461    if(verbose)
1462      L<<addr.toStringWithPort();
1463    ad.d_servers.push_back(addr);
1464  }
1465  if(verbose)
1466    L<<endl;
1467}
1468
1469string reloadAuthAndForwards()
1470{
1471  SyncRes::domainmap_t original=SyncRes::s_domainmap;
1472 
1473  try {
1474    L<<Logger::Warning<<"Reloading zones, purging data from cache"<<endl;
1475 
1476    for(SyncRes::domainmap_t::const_iterator i = SyncRes::s_domainmap.begin(); i != SyncRes::s_domainmap.end(); ++i) {
1477      for(SyncRes::AuthDomain::records_t::const_iterator j = i->second.d_records.begin(); j != i->second.d_records.end(); ++j) 
1478        RC.doWipeCache(j->qname);
1479    }
1480
1481    string configname=::arg()["config-dir"]+"/recursor.conf";
1482    cleanSlashes(configname);
1483   
1484    if(!::arg().preParseFile(configname.c_str(), "forward-zones")) 
1485      L<<Logger::Warning<<"Unable to re-parse configuration file '"<<configname<<"'"<<endl;
1486   
1487    ::arg().preParseFile(configname.c_str(), "auth-zones");
1488    ::arg().preParseFile(configname.c_str(), "export-etc-hosts");
1489    ::arg().preParseFile(configname.c_str(), "serve-rfc1918");
1490   
1491    parseAuthAndForwards();
1492   
1493    // purge again - new zones need to blank out the cache
1494    for(SyncRes::domainmap_t::const_iterator i = SyncRes::s_domainmap.begin(); i != SyncRes::s_domainmap.end(); ++i) {
1495      for(SyncRes::AuthDomain::records_t::const_iterator j = i->second.d_records.begin(); j != i->second.d_records.end(); ++j) 
1496        RC.doWipeCache(j->qname);
1497    }
1498
1499    // this is pretty blunt
1500    SyncRes::s_negcache.clear(); 
1501    return "ok\n";
1502  }
1503  catch(std::exception& e) {
1504    L<<Logger::Error<<"Had error reloading zones, keeping original data: "<<e.what()<<endl;
1505  }
1506  catch(AhuException& ae) {
1507    L<<Logger::Error<<"Encountered error reloading zones, keeping original data: "<<ae.reason<<endl;
1508  }
1509  catch(...) {
1510    L<<Logger::Error<<"Encountered unknown error reloading zones, keeping original data"<<endl;
1511  }
1512  SyncRes::s_domainmap.swap(original);
1513  return "reloading failed, see log\n";
1514}
1515
1516void parseAuthAndForwards()
1517{
1518  SyncRes::s_domainmap.clear(); // this makes us idempotent
1519
1520  TXTRecordContent::report();
1521  OPTRecordContent::report();
1522
1523  typedef vector<string> parts_t;
1524  parts_t parts; 
1525  for(int n=0; n < 2 ; ++n ) {
1526    parts.clear();
1527    stringtok(parts, ::arg()[n ? "forward-zones" : "auth-zones"], ",\t\n\r");
1528    for(parts_t::const_iterator iter = parts.begin(); iter != parts.end(); ++iter) {
1529      SyncRes::AuthDomain ad;
1530      pair<string,string> headers=splitField(*iter, '=');
1531      trim(headers.first);
1532      trim(headers.second);
1533      headers.first=toCanonic("", headers.first);
1534      if(n==0) {
1535        L<<Logger::Error<<"Parsing authoritative data for zone '"<<headers.first<<"' from file '"<<headers.second<<"'"<<endl;
1536        ZoneParserTNG zpt(headers.second, headers.first);
1537        DNSResourceRecord rr;
1538        while(zpt.get(rr)) {
1539          try {
1540            string tmp=DNSRR2String(rr);
1541            rr=String2DNSRR(rr.qname, rr.qtype, tmp, rr.ttl);
1542          }
1543          catch(std::exception &e) {
1544            throw AhuException("Error parsing record '"+rr.qname+"' of type "+rr.qtype.getName()+" in zone '"+headers.first+"' from file '"+headers.second+"': "+e.what());
1545          }
1546          catch(...) {
1547            throw AhuException("Error parsing record '"+rr.qname+"' of type "+rr.qtype.getName()+" in zone '"+headers.first+"' from file '"+headers.second+"'");
1548          }
1549
1550          ad.d_records.insert(rr);
1551
1552        }
1553      }
1554      else {
1555        L<<Logger::Error<<"Redirecting queries for zone '"<<headers.first<<"' to: ";
1556        convertServersForAD(headers.second, ad, ";");
1557      }
1558     
1559      SyncRes::s_domainmap[headers.first]=ad;
1560    }
1561  }
1562 
1563  if(!::arg()["forward-zones-file"].empty()) {
1564    L<<Logger::Warning<<"Reading zone forwarding information from '"<<::arg()["forward-zones-file"]<<"'"<<endl;
1565    SyncRes::AuthDomain ad;
1566    FILE *rfp=fopen(::arg()["forward-zones-file"].c_str(), "r");
1567
1568    if(!rfp)
1569      throw AhuException("Error opening forward-zones-file '"+::arg()["forward-zones-file"]+"': "+stringerror());
1570
1571    shared_ptr<FILE> fp=shared_ptr<FILE>(rfp, fclose);
1572   
1573    char line[1024];
1574    int linenum=0;
1575    uint64_t before = SyncRes::s_domainmap.size();
1576    while(linenum++, fgets(line, sizeof(line)-1, fp.get())) {
1577      string domain, instructions;
1578      tie(domain, instructions)=splitField(line, '=');
1579      trim(domain);
1580      trim(instructions);
1581
1582      if(domain.empty()) 
1583        throw AhuException("Error parsing line "+lexical_cast<string>(linenum)+" of " +::arg()["forward-zones-file"]);
1584
1585      try {
1586        convertServersForAD(instructions, ad, ",; ", false);
1587      }
1588      catch(...) {
1589        throw AhuException("Conversion error parsing line "+lexical_cast<string>(linenum)+" of " +::arg()["forward-zones-file"]);
1590      }
1591
1592      SyncRes::s_domainmap[toCanonic("", domain)]=ad;
1593    }
1594    L<<Logger::Warning<<"Done parsing " << SyncRes::s_domainmap.size() - before<<" forwarding instructions from file '"<<::arg()["forward-zones-file"]<<"'"<<endl;
1595  }
1596
1597  if(::arg().mustDo("export-etc-hosts")) {
1598    string line;
1599    string fname;
1600   
1601    ifstream ifs("/etc/hosts");
1602    if(!ifs) {
1603      L<<Logger::Warning<<"Could not open /etc/hosts for reading"<<endl;
1604      return;
1605    }
1606   
1607    string::size_type pos;
1608    while(getline(ifs,line)) {
1609      pos=line.find('#');
1610      if(pos!=string::npos)
1611        line.resize(pos);
1612      trim(line);
1613      if(line.empty())
1614        continue;
1615      parts.clear();
1616      stringtok(parts, line, "\t\r\n ");
1617      if(parts[0].find(':')!=string::npos)
1618        continue;
1619     
1620      for(unsigned int n=1; n < parts.size(); ++n)
1621        makeNameToIPZone(parts[n], parts[0]);
1622      makeIPToNamesZone(parts);
1623    }
1624  }
1625  if(::arg().mustDo("serve-rfc1918")) {
1626    L<<Logger::Warning<<"Inserting rfc 1918 private space zones"<<endl;
1627    parts.clear();
1628    parts.push_back("127");
1629    makeIPToNamesZone(parts);
1630    parts[0]="10";
1631    makeIPToNamesZone(parts);
1632
1633    parts[0]="192.168";
1634    makeIPToNamesZone(parts);
1635    for(int n=16; n < 32; n++) {
1636      parts[0]="172."+lexical_cast<string>(n);
1637      makeIPToNamesZone(parts);
1638    }
1639  }
1640}
1641
1642string doReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1643{
1644  string fname=::arg()["lua-dns-script"];
1645  try {
1646    if(begin==end) {
1647      if(!fname.empty()) 
1648        g_pdl = shared_ptr<PowerDNSLua>(new PowerDNSLua(fname));
1649      else
1650        throw runtime_error("Asked to reload lua scripts, but no name passed and no default ('lua-dns-script') defined");
1651    }
1652    else {
1653      fname=*begin;
1654      if(fname.empty()) {
1655        g_pdl.reset();
1656        L<<Logger::Error<<"Unloaded current lua script"<<endl;
1657        return "unloaded current lua script\n";
1658      }
1659      else {
1660        g_pdl = shared_ptr<PowerDNSLua>(new PowerDNSLua(fname));
1661        ::arg().set("lua-dns-script")=fname;
1662      }
1663    }
1664  }
1665  catch(std::exception& e) {
1666    L<<Logger::Error<<"Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
1667    return string("Retaining current script, error from '"+fname+"': "+string(e.what())+"\n");
1668  }
1669  L<<Logger::Warning<<"(Re)loaded lua script from '"<<fname<<"'"<<endl;
1670  return "ok - loaded script from '"+fname+"'\n";
1671}
1672
1673
1674
1675int serviceMain(int argc, char*argv[])
1676{
1677  L.setName("pdns_recursor");
1678
1679  L.setLoglevel((Logger::Urgency)(6)); // info and up
1680
1681  if(!::arg()["logging-facility"].empty()) {
1682    boost::optional<int> val=logFacilityToLOG(::arg().asNum("logging-facility") );
1683    if(val)
1684      theL().setFacility(*val);
1685    else
1686      L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
1687  }
1688
1689  L<<Logger::Warning<<"PowerDNS recursor "<<VERSION<<" (C) 2001-2009 PowerDNS.COM BV ("<<__DATE__", "__TIME__;
1690#ifdef __GNUC__
1691  L<<", gcc "__VERSION__;
1692#endif // add other compilers here
1693#ifdef _MSC_VER
1694  L<<", MSVC "<<_MSC_VER;
1695#endif
1696  L<<") starting up"<<endl;
1697 
1698  L<<Logger::Warning<<"PowerDNS comes with ABSOLUTELY NO WARRANTY. "
1699    "This is free software, and you are welcome to redistribute it "
1700    "according to the terms of the GPL version 2."<<endl;
1701 
1702  L<<Logger::Warning<<"Operating in "<<(sizeof(unsigned long)*8) <<" bits mode"<<endl;
1703 
1704  seedRandom(::arg()["entropy-source"]);
1705
1706  if(!::arg()["allow-from-file"].empty()) {
1707    string line;
1708    g_allowFrom=new NetmaskGroup;
1709    ifstream ifs(::arg()["allow-from-file"].c_str());
1710    if(!ifs) {
1711        throw AhuException("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
1712    }
1713
1714    string::size_type pos;
1715    while(getline(ifs,line)) {
1716      pos=line.find('#');
1717      if(pos!=string::npos)
1718        line.resize(pos);
1719      trim(line);
1720      if(line.empty())
1721        continue;
1722
1723      g_allowFrom->addMask(line);
1724    }
1725    L<<Logger::Warning<<"Done parsing " << g_allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
1726  }
1727  else if(!::arg()["allow-from"].empty()) {
1728    g_allowFrom=new NetmaskGroup;
1729    vector<string> ips;
1730    stringtok(ips, ::arg()["allow-from"], ", ");
1731    L<<Logger::Warning<<"Only allowing queries from: ";
1732    for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1733      g_allowFrom->addMask(*i);
1734      if(i!=ips.begin())
1735        L<<Logger::Warning<<", ";
1736      L<<Logger::Warning<<*i;
1737    }
1738    L<<Logger::Warning<<endl;
1739  }
1740  else if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
1741    L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
1742 
1743
1744  if(!::arg()["dont-query"].empty()) {
1745    g_dontQuery=new NetmaskGroup;
1746    vector<string> ips;
1747    stringtok(ips, ::arg()["dont-query"], ", ");
1748    L<<Logger::Warning<<"Will not send queries to: ";
1749    for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1750      g_dontQuery->addMask(*i);
1751      if(i!=ips.begin())
1752        L<<Logger::Warning<<", ";
1753      L<<Logger::Warning<<*i;
1754    }
1755    L<<Logger::Warning<<endl;
1756  }
1757
1758  g_quiet=::arg().mustDo("quiet");
1759  if(::arg().mustDo("trace")) {
1760    SyncRes::setLog(true);
1761    ::arg().set("quiet")="no";
1762    g_quiet=false;
1763  }
1764
1765  RC.d_followRFC2181=::arg().mustDo("auth-can-lower-ttl");
1766 
1767  if(!::arg()["query-local-address6"].empty()) {
1768    SyncRes::s_doIPv6=true;
1769    L<<Logger::Error<<"Enabling IPv6 transport for outgoing queries"<<endl;
1770  }
1771 
1772  SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
1773  SyncRes::s_serverID=::arg()["server-id"];
1774  if(SyncRes::s_serverID.empty()) {
1775    char tmp[128];
1776    gethostname(tmp, sizeof(tmp)-1);
1777    SyncRes::s_serverID=tmp;
1778  }
1779 
1780  parseAuthAndForwards();
1781
1782  try {
1783    if(!::arg()["lua-dns-script"].empty()) {
1784      g_pdl = shared_ptr<PowerDNSLua>(new PowerDNSLua(::arg()["lua-dns-script"]));
1785      L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
1786    }
1787   
1788  }
1789  catch(std::exception &e) {
1790    L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
1791    exit(99);
1792  }
1793
1794 
1795  g_stats.remotes.resize(::arg().asNum("remotes-ringbuffer-entries"));
1796  if(!g_stats.remotes.empty())
1797    memset(&g_stats.remotes[0], 0, g_stats.remotes.size() * sizeof(RecursorStats::remotes_t::value_type));
1798  g_logCommonErrors=::arg().mustDo("log-common-errors");
1799 
1800  makeUDPServerSockets();
1801  makeTCPServerSockets();
1802
1803  s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
1804  if(!s_pidfname.empty())
1805    unlink(s_pidfname.c_str()); // remove possible old pid file
1806 
1807#ifndef WIN32
1808  if(::arg().mustDo("fork")) {
1809    fork();
1810    L<<Logger::Warning<<"This is forked pid "<<getpid()<<endl;
1811  }
1812#endif
1813 
1814  MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
1815  PacketID pident;
1816  primeHints();   
1817  L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
1818#ifndef WIN32
1819  if(::arg().mustDo("daemon")) {
1820    L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
1821    L.toConsole(Logger::Critical);
1822    daemonize();
1823  }
1824  signal(SIGUSR1,usr1Handler);
1825  signal(SIGUSR2,usr2Handler);
1826  signal(SIGPIPE,SIG_IGN);
1827  writePid();
1828#endif
1829  makeControlChannelSocket();       
1830  g_fdm=getMultiplexer();
1831 
1832  for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i) 
1833    g_fdm->addReadFD(i->first, i->second);
1834 
1835  int newgid=0;
1836  if(!::arg()["setgid"].empty())
1837    newgid=Utility::makeGidNumeric(::arg()["setgid"]);
1838  int newuid=0;
1839  if(!::arg()["setuid"].empty())
1840    newuid=Utility::makeUidNumeric(::arg()["setuid"]);
1841 
1842#ifndef WIN32
1843  if (!::arg()["chroot"].empty()) {
1844    if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
1845      L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
1846      exit(1);
1847    }
1848  }
1849 
1850  Utility::dropPrivs(newuid, newgid);
1851  g_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
1852#endif
1853 
1854  counter=0;
1855  unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
1856  g_tcpTimeout=::arg().asNum("client-tcp-timeout");
1857 
1858  g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
1859 
1860 
1861  bool listenOnTCP(true);
1862 
1863  for(;;) {
1864    while(MT->schedule(g_now.tv_sec)); // housekeeping, let threads do their thing
1865     
1866    if(!(counter%500)) {
1867      MT->makeThread(houseKeeping,0);
1868    }
1869
1870    if(!(counter%55)) {
1871      typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
1872      expired_t expired=g_fdm->getTimeouts(g_now);
1873       
1874      for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
1875        TCPConnection conn=any_cast<TCPConnection>(i->second);
1876        if(g_logCommonErrors)
1877          L<<Logger::Warning<<"Timeout from remote TCP client "<< conn.remote.toString() <<endl;
1878        g_fdm->removeReadFD(i->first);
1879        conn.closeAndCleanup();
1880      }
1881    }
1882     
1883    counter++;
1884
1885    if(statsWanted) {
1886      doStats();
1887    }
1888
1889    Utility::gettimeofday(&g_now, 0);
1890    g_fdm->run(&g_now);
1891    Utility::gettimeofday(&g_now, 0);
1892
1893    if(listenOnTCP) {
1894      if(TCPConnection::s_currentConnections > maxTcpClients) {  // shutdown
1895        for(g_tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
1896          g_fdm->removeReadFD(*i);
1897        listenOnTCP=false;
1898      }
1899    }
1900    else {
1901      if(TCPConnection::s_currentConnections <= maxTcpClients) {  // reenable
1902        for(g_tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
1903          g_fdm->addReadFD(*i, handleNewTCPQuestion);
1904        listenOnTCP=true;
1905      }
1906    }
1907  }
1908}
1909#ifdef WIN32
1910void doWindowsServiceArguments(RecursorService& recursor)
1911{
1912  if(::arg().mustDo( "register-service" )) {
1913    if ( !recursor.registerService( "The PowerDNS Recursor.", true )) {
1914      cerr << "Could not register service." << endl;
1915      exit( 99 );
1916    }
1917   
1918    exit( 0 );
1919  }
1920
1921  if ( ::arg().mustDo( "unregister-service" )) {
1922    recursor.unregisterService();
1923    exit( 0 );
1924  }
1925}
1926#endif
1927
1928
1929int main(int argc, char **argv) 
1930{
1931  //  HTimer mtimer("main");
1932  //  mtimer.start();
1933
1934
1935  g_stats.startupTime=time(0);
1936  reportBasicTypes();
1937
1938  int ret = EXIT_SUCCESS;
1939#ifdef WIN32
1940  RecursorService service;
1941  WSADATA wsaData;
1942  if(WSAStartup( MAKEWORD( 2, 2 ), &wsaData )) {
1943    cerr<<"Unable to initialize winsock\n";
1944    exit(1);
1945  }
1946#endif // WIN32
1947
1948  try {
1949    ::arg().set("stack-size","stack size per mthread")="200000";
1950    ::arg().set("soa-minimum-ttl","Don't change")="0";
1951    ::arg().set("soa-serial-offset","Don't change")="0";
1952    ::arg().set("no-shuffle","Don't change")="off";
1953    ::arg().set("aaaa-additional-processing","turn on to do AAAA additional processing (slow)")="off";
1954    ::arg().set("local-port","port to listen on")="53";
1955    ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
1956    ::arg().set("trace","if we should output heaps of logging")="off";
1957    ::arg().set("daemon","Operate as a daemon")="yes";
1958    ::arg().set("log-common-errors","If we should log rather common errors")="yes";
1959    ::arg().set("chroot","switch to chroot jail")="";
1960    ::arg().set("setgid","If set, change group id to this gid for more security")="";
1961    ::arg().set("setuid","If set, change user id to this uid for more security")="";
1962#ifdef WIN32
1963    ::arg().set("quiet","Suppress logging of questions and answers")="off";
1964    ::arg().setSwitch( "register-service", "Register the service" )= "no";
1965    ::arg().setSwitch( "unregister-service", "Unregister the service" )= "no";
1966    ::arg().setSwitch( "ntservice", "Run as service" )= "no";
1967    ::arg().setSwitch( "use-ntlog", "Use the NT logging facilities" )= "yes"; 
1968    ::arg().setSwitch( "use-logfile", "Use a log file" )= "no"; 
1969    ::arg().setSwitch( "logfile", "Filename of the log file" )= "recursor.log"; 
1970#else
1971    ::arg().set("quiet","Suppress logging of questions and answers")="";
1972    ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
1973#endif
1974    ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
1975#ifndef WIN32
1976    ::arg().set("socket-owner","Owner of socket")="";
1977    ::arg().set("socket-group","Group of socket")="";
1978    ::arg().set("socket-mode", "Permissions for socket")="";
1979#endif
1980   
1981    ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
1982    ::arg().set("delegation-only","Which domains we only accept delegations from")="";
1983    ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
1984    ::arg().set("query-local-address6","Source IPv6 address for sending queries")="";
1985    ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
1986    ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
1987    ::arg().set("hint-file", "If set, load root hints from this file")="";
1988    ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="0";
1989    ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
1990    ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
1991    ::arg().set("remotes-ringbuffer-entries", "maximum number of packets to store statistics for")="0";
1992    ::arg().set("version-string", "string reported on version.pdns or version.bind")="PowerDNS Recursor "VERSION" $Id$";
1993    ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fe80::/10";
1994    ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
1995    ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
1996    ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fe80::/10";
1997    ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
1998    ::arg().set("fork", "If set, fork the daemon for possible double performance")="no";
1999    ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
2000    ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
2001    ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
2002    ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
2003    ::arg().set("forward-zones-file", "File with domain=ip pairs for forwarding")="";
2004    ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
2005    ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
2006    ::arg().set("auth-can-lower-ttl", "If we follow RFC 2181 to the letter, an authoritative server can lower the TTL of NS records")="off";
2007    ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
2008    ::arg().setSwitch( "ignore-rd-bit", "Assume each packet requires recursion, for compatability" )= "off"; 
2009
2010    ::arg().setCmd("help","Provide a helpful message");
2011    ::arg().setCmd("version","Print version string ("VERSION")");
2012    ::arg().setCmd("config","Output blank configuration");
2013    L.toConsole(Logger::Info);
2014    ::arg().laxParse(argc,argv); // do a lax parse
2015
2016    string configname=::arg()["config-dir"]+"/recursor.conf";
2017    cleanSlashes(configname);
2018
2019    if(!::arg().file(configname.c_str())) 
2020      L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2021
2022    ::arg().parse(argc,argv);
2023
2024    ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
2025
2026    if(::arg().mustDo("help")) {
2027      cerr<<"syntax:"<<endl<<endl;
2028      cerr<<::arg().helpstring(::arg()["help"])<<endl;
2029      exit(99);
2030    }
2031    if(::arg().mustDo("version")) {
2032      cerr<<"version: "VERSION<<endl;
2033      exit(99);
2034    }
2035
2036    if(::arg().mustDo("config")) {
2037      cout<<::arg().configstring()<<endl;
2038      exit(0);
2039    }
2040
2041
2042#ifndef WIN32
2043    serviceMain(argc, argv);
2044#else
2045    doWindowsServiceArguments(service);
2046        L.toNTLog();
2047    RecursorService::instance()->start( argc, argv, ::arg().mustDo( "ntservice" )); 
2048#endif
2049
2050  }
2051  catch(AhuException &ae) {
2052    L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2053    ret=EXIT_FAILURE;
2054  }
2055  catch(std::exception &e) {
2056    L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2057    ret=EXIT_FAILURE;
2058  }
2059  catch(...) {
2060    L<<Logger::Error<<"any other exception in main: "<<endl;
2061    ret=EXIT_FAILURE;
2062  }
2063 
2064#ifdef WIN32
2065  WSACleanup();
2066#endif // WIN32
2067
2068  return ret;
2069}
Note: See TracBrowser for help on using the browser.