root/trunk/pdns/pdns/pdns_recursor.cc @ 616

Revision 616, 31.0 KB (checked in by ahu, 7 years ago)

improve truncating behaviour: don't truncate because of additional records. See  http://mailman.powerdns.com/pipermail/pdns-users/2006-March/003178.html and onwards
Thanks to Alan Hodgson for reporting this

  • Property svn:eol-style set to native
  • Property svn:keywords set to author date id revision
Line 
1/*
2    PowerDNS Versatile Database Driven Nameserver
3    Copyright (C) 2003 - 2006  PowerDNS.COM BV
4
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License version 2
7    as published by the Free Software Foundation
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17*/
18
19#include "utility.hh"
20#include <iostream>
21#include <errno.h>
22#include <map>
23#include <set>
24#ifndef WIN32
25#include <netdb.h>
26#endif // WIN32
27#include "recursor_cache.hh"
28#include <stdio.h>
29#include <signal.h>
30#include <stdlib.h>
31#include <unistd.h>
32#include "mtasker.hh"
33#include <utility>
34#include "dnspacket.hh"
35#include "statbag.hh"
36#include "arguments.hh"
37#include "syncres.hh"
38#include <fcntl.h>
39#include <fstream>
40#include "sstuff.hh"
41#include <boost/tuple/tuple.hpp>
42#include <boost/tuple/tuple_comparison.hpp>
43#include <boost/shared_array.hpp>
44#include <boost/lexical_cast.hpp>
45#include "dnsparser.hh"
46#include "dnswriter.hh"
47#include "dnsrecords.hh"
48#include "zoneparser-tng.hh"
49#include "rec_channel.hh"
50
51// using namespace boost;
52
53#ifdef __FreeBSD__           // see cvstrac ticket #26
54#include <pthread.h>
55#include <semaphore.h>
56#endif
57
58MemRecursorCache RC;
59RecursorStats g_stats;
60bool g_quiet;
61string s_programname="pdns_recursor";
62
63struct DNSComboWriter {
64  DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now), d_tcp(false), d_socket(-1)
65  {}
66  MOADNSParser d_mdp;
67  void setRemote(struct sockaddr* sa, socklen_t len)
68  {
69    memcpy((void *)d_remote, (void *)sa, len);
70    d_socklen=len;
71  }
72
73  void setSocket(int sock)
74  {
75    d_socket=sock;
76  }
77
78  string getRemote() const
79  {
80    return sockAddrToString((struct sockaddr_in *)d_remote, d_socklen);
81  }
82
83  struct timeval d_now;
84  char d_remote[sizeof(sockaddr_in6)];
85  socklen_t d_socklen;
86  bool d_tcp;
87  int d_socket;
88};
89
90
91#ifndef WIN32
92#ifndef __FreeBSD__
93extern "C" {
94  int sem_init(sem_t*, int, unsigned int){return 0;}
95  int sem_wait(sem_t*){return 0;}
96  int sem_trywait(sem_t*){return 0;}
97  int sem_post(sem_t*){return 0;}
98  int sem_getvalue(sem_t*, int*){return 0;}
99  pthread_t pthread_self(void){return (pthread_t) 0;}
100  int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr){ return 0; }
101  int pthread_mutex_lock(pthread_mutex_t *mutex){ return 0; }
102  int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
103  int pthread_mutex_destroy(pthread_mutex_t *mutex) { return 0; }
104}
105#endif // __FreeBSD__
106#endif // WIN32
107
108StatBag S;
109ArgvMap &arg()
110{
111  static ArgvMap theArg;
112  return theArg;
113}
114static int d_clientsock;
115static vector<int> d_udpserversocks;
116
117typedef vector<int> tcpserversocks_t;
118static tcpserversocks_t s_tcpserversocks;
119
120static map<int,PacketID> d_tcpclientreadsocks, d_tcpclientwritesocks;
121
122MTasker<PacketID,string>* MT;
123
124int asendtcp(const string& data, Socket* sock) 
125{
126  PacketID pident;
127  pident.sock=sock;
128  pident.outMSG=data;
129  string packet;
130
131  d_tcpclientwritesocks[sock->getHandle()]=pident;
132
133  int ret=MT->waitEvent(pident,&packet,1);
134  if(!ret || ret==-1) { // timeout
135    d_tcpclientwritesocks.erase(sock->getHandle());
136  }
137  return ret;
138}
139
140// -1 is error, 0 is timeout, 1 is success
141int arecvtcp(string& data, int len, Socket* sock) 
142{
143  data="";
144  PacketID pident;
145  pident.sock=sock;
146  pident.inNeeded=len;
147
148  d_tcpclientreadsocks[sock->getHandle()]=pident;
149
150  int ret=MT->waitEvent(pident,&data,1);
151  if(!ret || ret==-1) { // timeout
152    d_tcpclientreadsocks.erase(sock->getHandle());
153  }
154  return ret;
155}
156
157
158/* these two functions are used by LWRes */
159// -1 is error, > 1 is success
160int asendto(const char *data, int len, int flags, struct sockaddr *toaddr, int addrlen, int id) 
161{
162  return sendto(d_clientsock, data, len, flags, toaddr, addrlen);
163}
164
165// -1 is error, 0 is timeout, 1 is success
166int arecvfrom(char *data, int len, int flags, struct sockaddr *toaddr, Utility::socklen_t *addrlen, int *d_len, int id)
167{
168  PacketID pident;
169  pident.id=id;
170  memcpy(&pident.remote, toaddr, sizeof(pident.remote));
171
172  string packet;
173  int ret=MT->waitEvent(pident, &packet, 1);
174  if(ret > 0) {
175    *d_len=packet.size();
176    memcpy(data,packet.c_str(),min(len,*d_len));
177  }
178  return ret;
179}
180
181void setReceiveBuffer(int fd, uint32_t size)
182{
183  uint32_t psize;
184  socklen_t len;
185  getsockopt(fd, SOL_SOCKET, SO_RCVBUF, (char*)&psize, &len);
186  if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (char*)&size, sizeof(size)) < 0 )
187    L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<"\n";
188}
189
190
191static void writePid(void)
192{
193  string fname=::arg()["socket-dir"]+"/"+s_programname+".pid";
194  ofstream of(fname.c_str());
195  if(of)
196    of<< getpid() <<endl;
197  else
198    L<<Logger::Error<<"Requested to write pid for "<<getpid()<<" to "<<fname<<" failed: "<<strerror(errno)<<endl;
199}
200
201void primeHints(void)
202{
203  // prime root cache
204  set<DNSResourceRecord>nsset;
205
206  if(::arg()["hint-file"].empty()) {
207    static char*ips[]={"198.41.0.4", "192.228.79.201", "192.33.4.12", "128.8.10.90", "192.203.230.10", "192.5.5.241", "192.112.36.4", "128.63.2.53", 
208                       "192.36.148.17","192.58.128.30", "193.0.14.129", "198.32.64.12", "202.12.27.33"};
209    DNSResourceRecord arr, nsrr;
210    arr.qtype=QType::A;
211    arr.ttl=time(0)+3600000;
212    nsrr.qtype=QType::NS;
213    nsrr.ttl=time(0)+3600000;
214   
215    for(char c='a';c<='m';++c) {
216      static char templ[40];
217      strncpy(templ,"a.root-servers.net", sizeof(templ) - 1);
218      *templ=c;
219      arr.qname=nsrr.content=templ;
220      arr.content=ips[c-'a'];
221      set<DNSResourceRecord> aset;
222      aset.insert(arr);
223      RC.replace(string(templ), QType(QType::A), aset);
224     
225      nsset.insert(nsrr);
226    }
227  }
228  else {
229    ZoneParserTNG zpt(::arg()["hint-file"]);
230    DNSResourceRecord rr;
231    set<DNSResourceRecord> aset;
232
233    while(zpt.get(rr)) {
234      rr.ttl+=time(0);
235      if(rr.qtype.getCode()==QType::A) {
236        set<DNSResourceRecord> aset;
237        aset.insert(rr);
238        RC.replace(rr.qname, QType(QType::A), aset);
239      }
240      if(rr.qtype.getCode()==QType::NS) {
241        nsset.insert(rr);
242      }
243    }
244  }
245  RC.replace("", QType(QType::NS), nsset); // and stuff in the cache
246}
247
248void startDoResolve(void *p)
249{
250  try {
251    DNSComboWriter* dc=(DNSComboWriter *)p;
252
253    uint16_t maxudpsize=512;
254    MOADNSParser::EDNSOpts edo;
255    if(dc->d_mdp.getEDNSOpts(&edo)) {
256      maxudpsize=edo.d_packetsize;
257    }
258
259    vector<DNSResourceRecord> ret;
260   
261    vector<uint8_t> packet;
262    DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
263
264    pw.getHeader()->aa=0;
265    pw.getHeader()->ra=1;
266    pw.getHeader()->qr=1;
267    pw.getHeader()->id=dc->d_mdp.d_header.id;
268    pw.getHeader()->rd=dc->d_mdp.d_header.rd;
269
270    //    MT->setTitle("udp question for "+P.qdomain+"|"+P.qtype.getName());
271    SyncRes sr(dc->d_now);
272    if(!g_quiet)
273      L<<Logger::Error<<"["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
274       <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
275
276    sr.setId(MT->getTid());
277    if(!dc->d_mdp.d_header.rd)
278      sr.setCacheOnly();
279
280    int res=sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret);
281    if(res<0) {
282      pw.getHeader()->rcode=RCode::ServFail;
283      g_stats.servFails++;
284    }
285    else {
286      pw.getHeader()->rcode=res;
287      switch(res) {
288      case RCode::ServFail:
289        g_stats.servFails++;
290        break;
291      case RCode::NXDomain:
292        g_stats.nxDomains++;
293        break;
294      case RCode::NoError:
295        g_stats.noErrors++;
296        break;
297      }
298     
299      if(ret.size()) {
300        shuffle(ret);
301        for(vector<DNSResourceRecord>::const_iterator i=ret.begin();i!=ret.end();++i) {
302          pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, 1, (DNSPacketWriter::Place)i->d_place);
303          shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), 1, i->content)); 
304          drc->toPacket(pw);
305          if(!dc->d_tcp && pw.size() > maxudpsize) {
306            pw.rollback();
307            if(i->d_place==DNSResourceRecord::ANSWER)  // only truncate if we actually omitted parts of the answer
308              pw.getHeader()->tc=1;
309            goto sendit; // need to jump over pw.commit
310          }
311        }
312        pw.commit();
313      }
314    }
315  sendit:;
316    if(!dc->d_tcp) {
317      sendto(dc->d_socket, &*packet.begin(), packet.size(), 0, (struct sockaddr *)(dc->d_remote), dc->d_socklen);
318    }
319    else {
320      char buf[2];
321      buf[0]=packet.size()/256;
322      buf[1]=packet.size()%256;
323
324      struct iovec iov[2];
325
326      iov[0].iov_base=(void*)buf;              iov[0].iov_len=2;
327      iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
328
329      int ret=writev(dc->d_socket, iov, 2);
330
331      if(ret <= 0 ) 
332        L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< (ret ? strerror(errno) : "EOF") <<endl;
333      else if((unsigned int)ret != 2 + packet.size())
334        L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" - probably would have trouble receiving our answer anyhow (size="<<packet.size()<<")"<<endl;
335    }
336
337    //    MT->setTitle("DONE! udp question for "+P.qdomain+"|"+P.qtype.getName());
338    if(!g_quiet) {
339      L<<Logger::Error<<"["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
340      L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
341        sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
342    }
343   
344    sr.d_outqueries ? RC.cacheMisses++ : RC.cacheHits++; 
345    float spent=makeFloat(sr.d_now-dc->d_now);
346    if(spent < 0.001)
347      g_stats.answers0_1++;
348    else if(spent < 0.010)
349      g_stats.answers1_10++;
350    else if(spent < 0.1)
351      g_stats.answers10_100++;
352    else if(spent < 1.0)
353      g_stats.answers100_1000++;
354    else
355      g_stats.answersSlow++;
356
357    uint64_t newLat=(uint64_t)(spent*1000000);
358    if(newLat < 1000000)  // outliers of several minutes exist..
359      g_stats.avgLatencyUsec=(uint64_t)((1-0.0001)*g_stats.avgLatencyUsec + 0.0001*newLat);
360    delete dc;
361  }
362  catch(AhuException &ae) {
363    L<<Logger::Error<<"startDoResolve problem: "<<ae.reason<<endl;
364  }
365  catch(exception& e) {
366    L<<Logger::Error<<"STL error: "<<e.what()<<endl;
367  }
368  catch(...) {
369    L<<Logger::Error<<"Any other exception in a resolver context"<<endl;
370  }
371}
372
373RecursorControlChannel s_rcc;
374
375void makeControlChannelSocket()
376{
377  s_rcc.listen("pdns_recursor.controlsocket");
378}
379
380void makeClientSocket()
381{
382  d_clientsock=socket(AF_INET, SOCK_DGRAM,0);
383  if(d_clientsock<0) 
384    throw AhuException("Making a socket for resolver: "+stringerror());
385  setReceiveBuffer(d_clientsock, 250000); 
386  struct sockaddr_in sin;
387  memset((char *)&sin,0, sizeof(sin));
388 
389  sin.sin_family = AF_INET;
390
391  if(!IpToU32(::arg()["query-local-address"], &sin.sin_addr.s_addr))
392    throw AhuException("Unable to resolve local address '"+ ::arg()["query-local-address"] +"'"); 
393
394  int tries=10;
395  while(--tries) {
396    uint16_t port=10000+Utility::random()%10000;
397    sin.sin_port = htons(port); 
398   
399    if (::bind(d_clientsock, (struct sockaddr *)&sin, sizeof(sin)) >= 0) 
400      break;
401   
402  }
403  if(!tries)
404    throw AhuException("Resolver binding to local socket: "+stringerror());
405
406  Utility::setNonBlocking(d_clientsock);
407
408  L<<Logger::Error<<"Sending UDP queries from "<<inet_ntoa(sin.sin_addr)<<":"<< ntohs(sin.sin_port)  <<endl;
409}
410
411void makeTCPServerSockets()
412{
413  vector<string>locals;
414  stringtok(locals,::arg()["local-address"]," ,");
415
416  if(locals.empty())
417    throw AhuException("No local address specified");
418 
419  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
420    int fd=socket(AF_INET, SOCK_STREAM,0);
421    if(fd<0) 
422      throw AhuException("Making a server socket for resolver: "+stringerror());
423 
424    struct sockaddr_in sin;
425    memset((char *)&sin,0, sizeof(sin));
426   
427    sin.sin_family = AF_INET;
428    if(!IpToU32(*i, &sin.sin_addr.s_addr))
429      throw AhuException("Unable to resolve local address '"+ *i +"'"); 
430
431    int tmp=1;
432    if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
433      L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
434      exit(1); 
435    }
436   
437    sin.sin_port = htons(::arg().asNum("local-port")); 
438   
439    if (::bind(fd, (struct sockaddr *)&sin, sizeof(sin))<0) 
440      throw AhuException("Binding TCP server socket for "+*i+": "+stringerror());
441   
442    Utility::setNonBlocking(fd);
443    listen(fd, 128);
444    s_tcpserversocks.push_back(fd);
445    L<<Logger::Error<<"Listening for TCP queries on "<<inet_ntoa(sin.sin_addr)<<":"<<::arg().asNum("local-port")<<endl;
446  }
447}
448
449void makeUDPServerSockets()
450{
451  vector<string>locals;
452  stringtok(locals,::arg()["local-address"]," ,");
453
454  if(locals.empty())
455    throw AhuException("No local address specified");
456 
457  if(::arg()["local-address"]=="0.0.0.0") {
458    L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
459  }
460
461  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
462    int fd=socket(AF_INET, SOCK_DGRAM,0);
463    if(fd<0) 
464      throw AhuException("Making a server socket for resolver: "+stringerror());
465    setReceiveBuffer(fd, 250000);
466    struct sockaddr_in sin;
467    memset((char *)&sin,0, sizeof(sin));
468   
469    sin.sin_family = AF_INET;
470    if(!IpToU32(*i, &sin.sin_addr.s_addr))
471      throw AhuException("Unable to resolve local address '"+ *i +"'"); 
472   
473    sin.sin_port = htons(::arg().asNum("local-port")); 
474   
475    if (::bind(fd, (struct sockaddr *)&sin, sizeof(sin))<0) 
476      throw AhuException("Resolver binding to server socket for "+*i+": "+stringerror());
477   
478    Utility::setNonBlocking(fd);
479    d_udpserversocks.push_back(fd);
480    L<<Logger::Error<<"Listening for UDP queries on "<<inet_ntoa(sin.sin_addr)<<":"<<::arg().asNum("local-port")<<endl;
481  }
482}
483
484
485#ifndef WIN32
486void daemonize(void)
487{
488  if(fork())
489    exit(0); // bye bye
490 
491  setsid(); 
492
493  // cleanup open fds, but skip sockets
494  close(0);
495  close(1);
496  close(2);
497}
498#endif
499
500uint64_t counter, qcounter;
501bool statsWanted;
502
503
504void usr1Handler(int)
505{
506  statsWanted=true;
507}
508
509
510
511void usr2Handler(int)
512{
513  SyncRes::setLog(true);
514  g_quiet=false;
515  ::arg().set("quiet")="no";
516
517}
518
519void doStats(void)
520{
521  if(qcounter) {
522    L<<Logger::Error<<"stats: "<<qcounter<<" questions, "<<RC.size()<<" cache entries, "<<SyncRes::s_negcache.size()<<" negative entries, "
523     <<(int)((RC.cacheHits*100.0)/(RC.cacheHits+RC.cacheMisses))<<"% cache hits"<<endl;
524    L<<Logger::Error<<"stats: throttle map: "<<SyncRes::s_throttle.size()<<", ns speeds: "
525     <<SyncRes::s_nsSpeeds.size()<<endl; // ", bytes: "<<RC.bytes()<<endl;
526    L<<Logger::Error<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
527    L<<Logger::Error<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
528     <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
529    L<<Logger::Error<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<MT->numProcesses()<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
530  }
531  else if(statsWanted) 
532    L<<Logger::Error<<"stats: no stats yet!"<<endl;
533
534  statsWanted=false;
535}
536
537static void houseKeeping(void *)
538{
539  static time_t last_stat, last_rootupdate, last_prune;
540  struct timeval now;
541  gettimeofday(&now, 0);
542
543  if(now.tv_sec - last_prune > 60) { 
544    DTime dt;
545    dt.setTimeval(now);
546    RC.doPrune();
547    int pruned=0;
548    for(SyncRes::negcache_t::iterator i = SyncRes::s_negcache.begin(); i != SyncRes::s_negcache.end();) 
549      if(i->second.ttd > now.tv_sec) {
550        SyncRes::s_negcache.erase(i++);
551        pruned++;
552      }
553      else
554        ++i;
555
556    time_t limit=now.tv_sec-300;
557    for(SyncRes::nsspeeds_t::iterator i = SyncRes::s_nsSpeeds.begin() ; i!= SyncRes::s_nsSpeeds.end(); )
558      if(i->second.stale(limit))
559        SyncRes::s_nsSpeeds.erase(i++);
560      else
561        ++i;
562
563    //    cerr<<"Pruned "<<pruned<<" records, left "<<SyncRes::s_negcache.size()<<"\n";
564//    cout<<"Prune took "<<dt.udiff()<<"usec\n";
565    last_prune=time(0);
566  }
567  if(now.tv_sec - last_stat>1800) { 
568    doStats();
569    last_stat=time(0);
570  }
571  if(now.tv_sec -last_rootupdate>7200) {
572    SyncRes sr(now);
573    vector<DNSResourceRecord> ret;
574
575    sr.setNoCache();
576    int res=sr.beginResolve("", QType(QType::NS), ret);
577    if(!res) {
578      L<<Logger::Error<<"Refreshed . records"<<endl;
579      last_rootupdate=now.tv_sec;
580    }
581    else
582      L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
583  }
584}
585
586struct TCPConnection
587{
588  int fd;
589  enum {BYTE0, BYTE1, GETQUESTION} state;
590  int qlen;
591  int bytesread;
592  struct sockaddr_in remote;
593  char data[65535];
594  time_t startTime;
595};
596
597#if 0
598#include <execinfo.h>
599
600  multimap<uint32_t,string> rev;
601  for(map<string,uint32_t>::const_iterator i=casesptr->begin(); i!=casesptr->end(); ++i) {
602    rev.insert(make_pair(i->second,i->first));
603  }
604  for(multimap<uint32_t,string>::const_iterator i=rev.begin(); i!= rev.end(); ++i)
605    cout<<i->first<<" times: \n"<<i->second<<"\n";
606
607  cout.flush();
608
609map<string,uint32_t>* casesptr;
610static string maketrace()
611{
612  void *array[20]; //only care about last 17 functions (3 taken with tracing support)
613  size_t size;
614  char **strings;
615  size_t i;
616
617  size = backtrace (array, 5);
618  strings = backtrace_symbols (array, size); //Need -rdynamic gcc (linker) flag for this to work
619
620  string ret;
621
622  for (i = 0; i < size; i++) //skip useless functions
623    ret+=string(strings[i])+"\n";
624  return ret;
625}
626
627extern "C" {
628
629int gettimeofday (struct timeval *__restrict __tv,
630                  __timezone_ptr_t __tz)
631{
632  static map<string, uint32_t> s_cases;
633  casesptr=&s_cases;
634  s_cases[maketrace()]++;
635  __tv->tv_sec=time(0);
636  return 0;
637}
638
639}
640#endif
641
642int main(int argc, char **argv) 
643{
644  reportBasicTypes();
645
646  int ret = EXIT_SUCCESS;
647#ifdef WIN32
648    WSADATA wsaData;
649    WSAStartup( MAKEWORD( 2, 0 ), &wsaData );
650#endif // WIN32
651
652  try {
653    Utility::srandom(time(0));
654    ::arg().set("soa-minimum-ttl","Don't change")="0";
655    ::arg().set("soa-serial-offset","Don't change")="0";
656    ::arg().set("no-shuffle","Don't change")="off";
657    ::arg().set("aaaa-additional-processing","turn on to do AAAA additional processing (slow)")="off";
658    ::arg().set("local-port","port to listen on")="53";
659    ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas")="0.0.0.0";
660    ::arg().set("trace","if we should output heaps of logging")="off";
661    ::arg().set("daemon","Operate as a daemon")="yes";
662    ::arg().set("chroot","switch to chroot jail")="";
663    ::arg().set("setgid","If set, change group id to this gid for more security")="";
664    ::arg().set("setuid","If set, change user id to this uid for more security")="";
665    ::arg().set("quiet","Suppress logging of questions and answers")="true";
666    ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
667    ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
668    ::arg().set("delegation-only","Which domains we only accept delegations from")="";
669    ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
670    ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
671    ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
672    ::arg().set("hint-file", "If set, load root hints from this file")="";
673
674    ::arg().setCmd("help","Provide a helpful message");
675    L.toConsole(Logger::Warning);
676    ::arg().laxParse(argc,argv); // do a lax parse
677
678    string configname=::arg()["config-dir"]+"/recursor.conf";
679    cleanSlashes(configname);
680
681    if(!::arg().file(configname.c_str())) 
682      L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
683
684    ::arg().parse(argc,argv);
685
686    ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
687
688    if(::arg().mustDo("help")) {
689      cerr<<"syntax:"<<endl<<endl;
690      cerr<<::arg().helpstring(::arg()["help"])<<endl;
691      exit(99);
692    }
693
694    L.setName("pdns_recursor");
695
696    L<<Logger::Warning<<"PowerDNS recursor "<<VERSION<<" (C) 2001-2006 PowerDNS.COM BV ("<<__DATE__", "__TIME__;
697#ifdef __GNUC__
698    L<<", gcc "__VERSION__;
699#endif // add other compilers here
700    L<<") starting up"<<endl;
701
702    L<<Logger::Warning<<"Operating in "<<(sizeof(unsigned long)*8) <<" bits mode"<<endl;
703  L<<Logger::Warning<<"PowerDNS comes with ABSOLUTELY NO WARRANTY. "
704    "This is free software, and you are welcome to redistribute it "
705    "according to the terms of the GPL version 2."<<endl;
706
707
708  g_quiet=::arg().mustDo("quiet");
709  if(::arg().mustDo("trace")) {
710      SyncRes::setLog(true);
711      ::arg().set("quiet")="no";
712      g_quiet=false;
713  }
714
715    makeClientSocket();
716    makeUDPServerSockets();
717    makeTCPServerSockets();
718    makeControlChannelSocket();
719   
720    MT=new MTasker<PacketID,string>(100000);
721
722    char data[1500];
723    struct sockaddr_in fromaddr;
724   
725    PacketID pident;
726    primeHints();   
727    L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
728#ifndef WIN32
729    if(::arg().mustDo("daemon")) {
730      L.toConsole(Logger::Critical);
731      daemonize();
732    }
733    signal(SIGUSR1,usr1Handler);
734    signal(SIGUSR2,usr2Handler);
735    signal(SIGPIPE,SIG_IGN);
736
737    writePid();
738#endif
739
740    int newgid=0;
741    if(!::arg()["setgid"].empty())
742      newgid=Utility::makeGidNumeric(::arg()["setgid"]);
743    int newuid=0;
744    if(!::arg()["setuid"].empty())
745      newuid=Utility::makeUidNumeric(::arg()["setuid"]);
746
747
748    if (!::arg()["chroot"].empty()) {
749        if (chroot(::arg()["chroot"].c_str())<0) {
750            L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
751            exit(1);
752        }
753    }
754
755    Utility::dropPrivs(newuid, newgid);
756
757    vector<TCPConnection> tcpconnections;
758    counter=0;
759    struct timeval now;
760    unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
761    int tcpLimit=::arg().asNum("client-tcp-timeout");
762    for(;;) {
763      while(MT->schedule()); // housekeeping, let threads do their thing
764     
765      if(!((counter++)%500)) 
766        MT->makeThread(houseKeeping,0,"housekeeping");
767      if(statsWanted) {
768        doStats();
769      }
770
771      Utility::socklen_t addrlen=sizeof(fromaddr);
772      int d_len;
773     
774      struct timeval tv;
775      tv.tv_sec=0;
776      tv.tv_usec=500000;
777     
778      fd_set readfds, writefds;
779      FD_ZERO( &readfds );
780      FD_ZERO( &writefds );
781      FD_SET( d_clientsock, &readfds );
782      FD_SET( s_rcc.d_fd, &readfds);
783      int fdmax=max(d_clientsock, s_rcc.d_fd);
784
785      if(!tcpconnections.empty())
786        gettimeofday(&now, 0);
787
788      vector<TCPConnection> sweeped;
789
790      for(vector<TCPConnection>::iterator i=tcpconnections.begin();i!=tcpconnections.end();++i) {
791        if(now.tv_sec < i->startTime + tcpLimit) {
792          FD_SET(i->fd, &readfds);
793          fdmax=max(fdmax,i->fd);
794          sweeped.push_back(*i);
795        }
796        else {
797          L<<Logger::Error<<"TCP timeout from client "<<inet_ntoa(i->remote.sin_addr)<<endl;
798          close(i->fd);
799        }
800      }
801      sweeped.swap(tcpconnections);
802
803      for(vector<int>::const_iterator i=d_udpserversocks.begin(); i!=d_udpserversocks.end(); ++i) {
804        FD_SET( *i, &readfds );
805        fdmax=max(fdmax,*i);
806      }
807      if(tcpconnections.size() < maxTcpClients) 
808        for(tcpserversocks_t::const_iterator i=s_tcpserversocks.begin(); i!=s_tcpserversocks.end(); ++i) {
809          FD_SET(*i, &readfds );
810          fdmax=max(fdmax,*i);
811        }
812
813      for(map<int,PacketID>::const_iterator i=d_tcpclientreadsocks.begin(); i!=d_tcpclientreadsocks.end(); ++i) {
814        // cerr<<"Adding TCP socket "<<i->first<<" to read select set"<<endl;
815        FD_SET( i->first, &readfds );
816        fdmax=max(fdmax,i->first);
817      }
818
819      for(map<int,PacketID>::const_iterator i=d_tcpclientwritesocks.begin(); i!=d_tcpclientwritesocks.end(); ++i) {
820        // cerr<<"Adding TCP socket "<<i->first<<" to write select set"<<endl;
821        FD_SET( i->first, &writefds );
822        fdmax=max(fdmax,i->first);
823      }
824
825      int selret = select(  fdmax + 1, &readfds, &writefds, NULL, &tv );
826      gettimeofday(&now, 0);
827      if(selret<=0) 
828        if (selret == -1 && errno!=EINTR) 
829          throw AhuException("Select returned: "+stringerror());
830        else
831          continue;
832
833      if(FD_ISSET(s_rcc.d_fd, &readfds)) {
834        string remote;
835        string msg=s_rcc.recv(&remote);
836        RecursorControlParser rcp;
837        s_rcc.send(rcp.getAnswer(msg), &remote);
838      }
839
840      if(FD_ISSET(d_clientsock,&readfds)) { // do we have a UDP question response?
841        d_len=recvfrom(d_clientsock, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);   
842        if(d_len<0) 
843          continue;
844
845        try {
846          DNSComboWriter dc(data, d_len, now);
847          dc.setRemote((struct sockaddr *)&fromaddr, addrlen);
848
849          if(dc.d_mdp.d_header.qr) {
850            pident.remote=fromaddr;
851            pident.id=dc.d_mdp.d_header.id;
852            string packet;
853            packet.assign(data, d_len);
854            MT->sendEvent(pident, &packet);
855          }
856          else 
857            L<<Logger::Warning<<"Ignoring question on outgoing socket from "<<dc.getRemote()<<endl;
858        }
859        catch(MOADNSException& mde) {
860          L<<Logger::Error<<"Unparseable packet from remote server "<< sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen) <<": "<<mde.what()<<endl;
861        }
862      }
863     
864      for(vector<int>::const_iterator i=d_udpserversocks.begin(); i!=d_udpserversocks.end(); ++i) {
865        if(FD_ISSET(*i,&readfds)) { // do we have a new question on udp?
866          d_len=recvfrom(*i, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);   
867          if(d_len<0) 
868            continue;
869
870          g_stats.queryrate.pulse(now);
871
872          try {
873            DNSComboWriter* dc = new DNSComboWriter(data, d_len, now);
874
875            dc->setRemote((struct sockaddr *)&fromaddr, addrlen);
876
877            if(dc->d_mdp.d_header.qr)
878              L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
879            else {
880              ++qcounter;
881              dc->setSocket(*i);
882              dc->d_tcp=false;
883              MT->makeThread(startDoResolve, (void*) dc, "udp");
884            }
885          }
886          catch(MOADNSException& mde) {
887            L<<Logger::Error<<"Unparseable packet from remote server "<< sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen) <<": "<<mde.what()<<endl;
888          }
889        }
890      }
891
892      for(tcpserversocks_t::const_iterator i=s_tcpserversocks.begin(); i!=s_tcpserversocks.end(); ++i) { 
893        if(FD_ISSET(*i ,&readfds)) { // do we have a new TCP connection?
894          struct sockaddr_in addr;
895          socklen_t addrlen=sizeof(addr);
896          int newsock=accept(*i, (struct sockaddr*)&addr, &addrlen);
897         
898          if(newsock>0) {
899            Utility::setNonBlocking(newsock);
900            TCPConnection tc;
901            tc.fd=newsock;
902            tc.state=TCPConnection::BYTE0;
903            tc.remote=addr;
904            tc.startTime=now.tv_sec;
905            tcpconnections.push_back(tc);
906          }
907        }
908      }
909
910      // have any question answers come in over TCP?
911      for(map<int,PacketID>::iterator i=d_tcpclientreadsocks.begin(); i!=d_tcpclientreadsocks.end();) { 
912        bool haveErased=false;
913        if(FD_ISSET(i->first, &readfds)) { // can we receive
914          shared_array<char> buffer(new char[i->second.inNeeded]);
915
916          int ret=read(i->first, buffer.get(), min(i->second.inNeeded,200));
917          // cerr<<"Read returned "<<ret<<endl;
918          if(ret > 0) {
919            i->second.inMSG.append(&buffer[0], &buffer[ret]);
920            i->second.inNeeded-=ret;
921            if(!i->second.inNeeded) {
922              // cerr<<"Got entire load of "<<i->second.inMSG.size()<<" bytes"<<endl;
923              PacketID pid=i->second;
924              string msg=i->second.inMSG;
925             
926              d_tcpclientreadsocks.erase((i++));
927              haveErased=true;
928              MT->sendEvent(pid, &msg);   // XXX DODGY
929            }
930            else {
931              // cerr<<"Still have "<<i->second.inNeeded<<" left to go"<<endl;
932            }
933          }
934          else {
935            //      cerr<<"when reading ret="<<ret<<endl;
936            // XXX FIXME I think some stuff needs to happen here - like send an EOF event
937          }
938        }
939        if(!haveErased)
940          ++i;
941      }
942     
943      // is there data we can send to remote nameservers over TCP?
944      for(map<int,PacketID>::iterator i=d_tcpclientwritesocks.begin(); i!=d_tcpclientwritesocks.end(); ) { 
945        bool haveErased=false;
946        if(FD_ISSET(i->first, &writefds)) { // can we send over TCP
947          // cerr<<"Socket "<<i->first<<" available for writing"<<endl;
948          int ret=write(i->first, i->second.outMSG.c_str(), i->second.outMSG.size() - i->second.outPos);
949          if(ret > 0) {
950            i->second.outPos+=ret;
951            if(i->second.outPos==i->second.outMSG.size()) {
952              // cerr<<"Sent out entire load of "<<i->second.outMSG.size()<<" bytes"<<endl;
953              PacketID pid=i->second;
954              d_tcpclientwritesocks.erase(i++);   // erase!
955              haveErased=true;
956              MT->sendEvent(pid, 0);
957            }
958
959          }
960          else { 
961            //      cerr<<"ret="<<ret<<" when writing"<<endl;
962            // XXX FIXME I think some stuff needs to happen here - like send an EOF event
963          }
964        }
965        if(!haveErased)
966          ++i;
967      }
968     
969      // very braindead TCP incoming question parser
970      for(vector<TCPConnection>::iterator i=tcpconnections.begin();i!=tcpconnections.end();++i) {
971        if(FD_ISSET(i->fd, &readfds)) {
972          if(i->state==TCPConnection::BYTE0) {
973            int bytes=read(i->fd,i->data,2);
974            if(bytes==1)
975              i->state=TCPConnection::BYTE1;
976            if(bytes==2) { 
977              i->qlen=(i->data[0]<<8)+i->data[1];
978              i->bytesread=0;
979              i->state=TCPConnection::GETQUESTION;
980            }
981            if(!bytes || bytes < 0) {
982              close(i->fd);
983              tcpconnections.erase(i);
984              break;
985            }
986          }
987          else if(i->state==TCPConnection::BYTE1) {
988            int bytes=read(i->fd,i->data+1,1);
989            if(bytes==1) {
990              i->state=TCPConnection::GETQUESTION;
991              i->qlen=(i->data[0]<<8)+i->data[1];
992              i->bytesread=0;
993            }
994            if(!bytes || bytes < 0) {
995              L<<Logger::Error<<"TCP Remote "<<sockAddrToString(&i->remote,sizeof(i->remote))<<" disconnected after first byte"<<endl;
996              close(i->fd);
997              tcpconnections.erase(i);
998              break;
999            }
1000           
1001          }
1002          else if(i->state==TCPConnection::GETQUESTION) {
1003            int bytes=read(i->fd,i->data + i->bytesread,i->qlen - i->bytesread);
1004            if(!bytes || bytes < 0) {
1005              L<<Logger::Error<<"TCP Remote "<<sockAddrToString(&i->remote,sizeof(i->remote))<<" disconnected while reading question body"<<endl;
1006              close(i->fd);
1007              tcpconnections.erase(i);
1008              break;
1009            }
1010            i->bytesread+=bytes;
1011            if(i->bytesread==i->qlen) {
1012              i->state=TCPConnection::BYTE0;
1013              DNSComboWriter* dc=0;
1014              try {
1015                dc=new DNSComboWriter(i->data, i->qlen, now);
1016              }
1017              catch(MOADNSException &mde) {
1018                L<<Logger::Error<<"Unparseable packet from remote client "<<sockAddrToString(&i->remote,sizeof(i->remote))<<endl;
1019                close(i->fd);
1020                tcpconnections.erase(i);
1021                break;
1022              }
1023
1024              dc->setSocket(i->fd);
1025              dc->d_tcp=true;
1026              dc->setRemote((struct sockaddr *)&i->remote,sizeof(i->remote));
1027              if(dc->d_mdp.d_header.qr)
1028                L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
1029              else {
1030                ++qcounter;
1031                MT->makeThread(startDoResolve, dc, "tcp");
1032              }
1033            }
1034          }
1035        }
1036      }
1037    }
1038  }
1039  catch(AhuException &ae) {
1040    L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
1041    ret=EXIT_FAILURE;
1042  }
1043  catch(exception &e) {
1044    L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
1045    ret=EXIT_FAILURE;
1046  }
1047  catch(...) {
1048    L<<Logger::Error<<"any other exception in main: "<<endl;
1049    ret=EXIT_FAILURE;
1050  }
1051 
1052#ifdef WIN32
1053  WSACleanup();
1054#endif // WIN32
1055
1056  return ret;
1057}
Note: See TracBrowser for help on using the browser.